1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include <bits/stdc++.h>
using namespace std;

using ll = long long;

int main() {
  ll n, m, p;
  cin >> n >> m >> p;
  vector<ll> diag(m, 1), new_diag;
  vector<ll> sum(m), new_sum;
  vector<ll> diff(m), new_diff;
  for (int i = 0; i < m; i++) {
    sum[i] = m - i;
  }

  for (int k = 2; k <= n; k++) {
    // printf("diag "); for (auto i : diag) printf("\t%lld", i); printf("\n");
    // printf("sum "); for (auto i : sum) printf("\t%lld", i); printf("\n");
    // printf("diff "); for (auto i : diff) printf("\t%lld", i); printf("\n");
    // printf("\n");

    new_diag.assign(m, 0);
    new_sum.assign(m, 0);
    new_diff.assign(m, 0);

    new_diag[0] = sum[0];
    ll last_val = 0;
    for (auto i : sum) {
      last_val = (last_val + i) % p;
      new_sum[0] = (new_sum[0] + last_val) % p;
    }

    ll last_val_sum = diag[0];
    ll prefix_sum = diag[0];
    ll sum_sum = sum[0];
    for (int i = 1; i < m; i++) {
      sum_sum = (sum_sum + sum[i]) % p;

      new_diag[i] = (sum_sum - prefix_sum) % p;

      last_val_sum = ((last_val_sum + diff[i] * i) + diag[i]) % p;

      prefix_sum = (prefix_sum + last_val_sum) % p;

      new_sum[i] = (new_sum[i-1] - new_diag[i-1] * (m - i + 1)) % p;
      new_sum[i] = (new_sum[i] + new_diag[i] * (m - i)) % p;
      new_sum[i] = (new_sum[i] - sum[i] * (m - i)) % p;
    }

    new_diff = move(sum);


    diag.swap(new_diag);
    sum.swap(new_sum);
    diff.swap(new_diff);
  }
  ll res = 0;
  for (auto i : sum) {
    res = (res + i) % p;
  }
  if (res < 0) {
    res += p;
  }
  printf("%lld\n", res);
}