1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#pragma GCC optimize ("O3")
#include <bits/stdc++.h>

using namespace std;

typedef long long ll;
typedef pair<int, int> ii;
typedef vector<int> vi;
const int INF = 0x3f3f3f3f;

#define FOR(i,b,e) for(int i = (b); i < (e); i++)
#define TRAV(x,a) for(auto &x: (a))
#define SZ(x) ((int)x.size())
#define PB push_back
#define X first
#define Y second

void solve(){
    int n, m, mod;
    cin >> n >> m >> mod;
    vector<vector<int>> dp(2, vi(m));
    dp[0][0] = 1;
    FOR(j, 1, m) dp[0][j] = (dp[0][j-1] + j+1) % mod;
    FOR(i, 1, n){
        int prefsum = 0;
        FOR(j, 0, m){
            dp[i&1][j] = 0;
            if(j > 0) dp[i&1][j] = dp[i&1][j-1];
            int nad = (2ll*mod + dp[(i-1)&1][m-1] - dp[(i-1)&1][j] - (j < m-1 ? dp[(i-1)&1][m-2-j] : 0)) % mod;
            dp[i&1][j] = (dp[i&1][j] + 1ll*nad*(j+1)) % mod;
            dp[i&1][j] = (dp[i&1][j] + 1ll*dp[(i-1)&1][j]*(j+1) - prefsum + mod) % mod;
            prefsum = (prefsum + dp[(i-1)&1][j]) % mod;
        }
    }
    cout << dp[(n-1)&1][m-1] << '\n';
}

int main(){
    ios::sync_with_stdio(0);
    cin.tie(0);
    // int tt; cin >> tt;
    // FOR(te, 0, tt)
    solve();
    return 0;
}