1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include <bits/stdc++.h>

using namespace std;

typedef long long int LL;

const int N = 1e7 + 7;

int n, m, p;
int dp[2][N][2];
int pref[2][N][3];

void add(int &a, int b) {
	a += b;
	if(a >= p)
		a -= p;
}

int main() {
	scanf("%d %d %d", &n, &m, &p);
	for(int i = 1; i <= m; ++i) {
		dp[0][i][0] = (i == 1);
		dp[0][i][1] = (i > 1);
		
		pref[0][i][0] = 1;
		pref[0][i][1] = 1;
		pref[0][i][2] = i - 1;
	}
	
	for(int i = 1; i <= n; ++i) {
		int t = i & 1;
		int t2 = t ^ 1;

		for(int k = 1; k <= m; ++k) {
			LL tmp0 = dp[t2][k][1] * 1LL * (m - k + 1);
			tmp0 += 1LL * (m + 1) * (pref[t2][m][0] - pref[t2][k - 1][0]);
			tmp0 -= pref[t2][m][1] - pref[t2][k - 1][1];
			
			LL tmp1 = 1LL * (m - k + 1) * pref[t2][k - 1][2];
			tmp1 += 1LL * (m - k + 1) * pref[t2][k - 1][1];
			tmp1 += 1LL * (k - 1) * (m + 1) % p * (pref[t2][m][0] - pref[t2][k - 1][0]);
			tmp1 -= 1LL * (k - 1) * (pref[t2][m][1] - pref[t2][k - 1][1]);
			
			dp[t][k][0] = tmp0 % p;
			if(dp[t][k][0] < 0)
				dp[t][k][0] += p;

			dp[t][k][1] = tmp1 % p;
			if(dp[t][k][1] < 0)
				dp[t][k][1] += p;
			
//			printf("dp[%d][%d][0] = %d, dp[%d][%d][1] = %d\n", t, k, dp[t][k][0], t, k, dp[t][k][1]);
			
			pref[t][k][0] = pref[t][k - 1][0];
			add(pref[t][k][0], dp[t][k][0]);

			pref[t][k][1] = (pref[t][k - 1][1] + 1LL * k * dp[t][k][0]) % p;

			pref[t][k][2] = pref[t][k - 1][2];
			add(pref[t][k][2], dp[t][k][1]);
		}
	}
	
	printf("%d\n", pref[n & 1][m][0]);
	return 0;
}