1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <bits/stdc++.h>
#define PB push_back
#define ST first
#define ND second
#define _ ios_base::sync_with_stdio(0); cin.tie(0);
//mt19937 rng(chrono::high_resolution_clock::now().time_since_epoch().count());

using namespace std;

using ll = long long;
using pi = pair<int,int>;
using vi = vector<int>;

int n, m, mod;
vector<vi> p, k, sp, sk;

int main() {_
	cin >> m >> n >> mod;
	p.resize(m + 2);
	k.resize(m + 2);
	sp.resize(m + 2);
	sk.resize(m + 2);
	for(int i = 0; i <= m + 1; ++i) {
		p[i].resize(n + 2);
	}
	for(int i = 0; i <= m + 1; ++i) {
		k[i].resize(n + 2);
	}
	for(int i = 0; i <= m + 1; ++i) {
		sp[i].resize(n + 2);
	}
	for(int i = 0; i <= m + 1; ++i) {
		sk[i].resize(n + 2);
	}
	p[0][1] = 1;
	k[0][n] = 1;
	for(int i = 1; i <= m; ++i) {
		int all = p[i - 1][1];
		for(int a = 1; a <= n; ++a) {
			p[i][a] = ((ll)(n - a + 1) * (all - k[i - 1][a - 1]))%mod;
			p[i][a] -= sp[i - 1][a + 1];
			p[i][a] %= mod;
			if(p[i][a] < 0) p[i][a]+=mod;
		}
		for(int b = 1; b <= n; ++b) {
			k[i][b] = ((ll)(b) * (all - p[i - 1][b + 1])) % mod;
			k[i][b] -= sk[i - 1][b - 1];
			k[i][b] %= mod;
			if(k[i][b] < 0) k[i][b] += mod;
		}
			//~ for(int b = a; b <= n; ++b) {
				//~ dp[i][a][b] = (p[i - 1][1] - p[i - 1][b+1] - k[i-1][a - 1]);
				//~ dp[i][a][b] %= mod;
				//~ if(dp[i][a][b] < 0) dp[i][a][b] += mod;
				//~ p[i][a] = (p[i][a]+dp[i][a][b])%mod;
				//~ k[i][b] = (k[i][b]+dp[i][a][b])%mod;
			//~ }
		//~ }
		for(int j = n; j >= 1; --j) {
			p[i][j] = (p[i][j] + p[i][j + 1]) % mod;
			sp[i][j] = (sp[i][j + 1] + p[i][j]) % mod;
		}
		for(int j = 1; j <= n; ++j) {
			k[i][j] = (k[i][j] + k[i][j - 1]) % mod;
			sk[i][j] = (sk[i][j - 1] + k[i][j]) % mod;
		}
	}
	cout << p[m][1];
	
}