1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#include<bits/stdc++.h>
using namespace std;
using ll = long long;
const int N= 1e7+1;
ll n, m, p;
int dp0[2][N];
int dp1[2][N][2]; 
int main() {
	ios_base::sync_with_stdio(0); cin.tie(0); cout.tie(0);
	cin>>n>>m>>p;
	for(ll i=1; i<=m; ++i) {
		dp0[1][i] = (dp0[1][i-1] + 1)%p;	
	}
	for(ll i=2; i<=n; ++i) {
		for(ll j=1; j<=m; ++j) {
			dp0[i&1][j] = dp0[i&1][j-1];	
			dp0[i&1][j] = (dp0[i&1][j] + (ll)(dp0[(i-1)&1][m] - dp0[(i-1)&1][j]+p) * j)%p;
			dp0[i&1][j] = (dp0[i&1][j] + (ll)(dp1[(i-1)&1][m][1] - dp1[(i-1)&1][j][1]+p) * j)%p;
			dp0[i&1][j] = (dp0[i&1][j] + (ll)(dp1[(i-1)&1][j][0]) * (m-j))%p;

			for(ll k=0; k<2; ++k) {
				ll x = ((ll)dp1[(i-1)&1][j][0] + (dp0[(i-1)&1][j] - dp0[(i-1)&1][j-1])*j +p)%p;
				dp1[i&1][j][k] = (dp1[i&1][j-1][k] + ((k==0)?(ll)x*j:(ll)x*(m-j+1)) )%p;
			}
		}
	}
	ll ans = 0;
	for(ll i=1; i<=m; ++i) {
		ans = (ans + (ll)(dp0[n&1][i] - dp0[n&1][i-1] + p) * i)%p;
		ans = (ans + (ll)(dp1[n&1][i][0] - dp1[n&1][i-1][0] + p) * (m-i+1))%p;
	}
	cout<<ans<<'\n';
}