1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#pragma GCC optimize("Ofast")
#pragma GCC optimization ("O3")
#pragma GCC optimization ("unroll-loops")
#include <bits/stdc++.h>
#include <ext/pb_ds/assoc_container.hpp>
#include <ext/pb_ds/tree_policy.hpp>
using namespace __gnu_pbds;
using namespace std;
#define mp make_pair
#define eb emplace_back
#define pb push_back
#define e1 first
#define e2 second
#define uint unsigned int
#define ll long long
#define ull unsigned long long
#define ld long double
#define float long double
#define size(x) (int)x.size()
#define satori int testCases; cin>>testCases; while(testCases--)
#define fastio ios_base::sync_with_stdio(false);cin.tie(0);cout.tie(0)
#define all(r) begin(r),end(r)
#define time chrono::high_resolution_clock().now().time_since_epoch().count()
typedef tree<int,null_type,less<int>,rb_tree_tag,tree_order_statistics_node_update> ordered_set;
mt19937 rng(chrono::high_resolution_clock().now().time_since_epoch().count());
///////////////////
#define debug if(1)
///////////////////

int mul(ll a,ll b,ll mod){
	return (a*b)%mod;
}

int32_t main()
{
	fastio;
	int n,m,mod;
	cin>>n>>m>>mod;
	vector<vector<int>> l(n,vector<int>(m,0));
	vector<vector<int>> r(n,vector<int>(m,0));
	for(int j=0;j<m;j++)
		l[0][j]=j+1,r[0][j]=m-j;
	int ps,pss,tot;
	for(int i=1;i<n;i++){
		ps=pss=tot=0;
		for(int j=0;j<m;j++){
			r[i][j]-=mul(ps,m-j,mod);
			if(r[i][j]<0)
				r[i][j]+=mod;
			l[i][j]-=pss;
			if(l[i][j]<0)
				l[i][j]+=mod;
			ps+=l[i-1][j];
			if(ps>=mod)
				ps-=mod;
			pss+=ps;
			if(pss>=mod)
				pss-=mod;
			tot+=l[i-1][j];
			if(tot>=mod)
				tot-=mod;
		}
		ps=pss=0;
		for(int j=m-1;j>=0;j--){
			l[i][j]+=mul(tot,j+1,mod);
			if(l[i][j]>=mod)
				l[i][j]-=mod;
			r[i][j]+=mul(tot,m-j,mod);
			if(r[i][j]>=mod)
				r[i][j]-=mod;
			l[i][j]-=mul(ps,j+1,mod);
			if(l[i][j]<0)
				l[i][j]+=mod;
			r[i][j]-=pss;
			if(r[i][j]<0)
				r[i][j]+=mod;
			ps+=r[i-1][j];
			if(ps>=mod)
				ps-=mod;
			pss+=ps;
			if(pss>=mod)
				pss-=mod;
		}
	}
	int res=0;
	for(int j=0;j<m;j++){
		res+=l[n-1][j];
		if(res>=mod)
			res-=mod;
	}
	cout<<res<<'\n';
}