1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include <iostream>
#include <vector>
using namespace std;
const int N = 1e7+3;
long long n, m, p, highest_suffix_sum[N], highest_times_index_suffix_sum[N], not_highest_prefix_sum[N];
long long fastmodulo(long long x) { return (x >= p ? x-p : x); }
int main() {
    ios_base::sync_with_stdio(0);
    cin.tie(0);
    cin >> n >> m >> p;

    vector<long long> highest(m), not_highest(m);
    for (int i=0; i<m; i++) {
        highest[i] = m-i;
        if (i) not_highest[i] = (i * (m-i)) % p;
    }
    for (int x=1; x<n; x++) {

        highest_suffix_sum[m] = 0;
        // for (int y=m-1; y>=0; y--) highest_suffix_sum[y] = (highest_suffix_sum[y+1] + highest[y]) % p;
        for (int y=m-1; y>=0; y--) highest_suffix_sum[y] = fastmodulo(highest_suffix_sum[y+1] + highest[y]);
        highest_times_index_suffix_sum[m] = 0;
        for (int y=m-1; y>=0; y--) highest_times_index_suffix_sum[y] = (highest_times_index_suffix_sum[y+1] + y * highest[y]) % p;
        not_highest_prefix_sum[0] = 0;
        // for (int y=0; y<m; y++) not_highest_prefix_sum[y+1] = (not_highest_prefix_sum[y] + not_highest[y]) % p;
        for (int y=0; y<m; y++) not_highest_prefix_sum[y+1] = fastmodulo(not_highest_prefix_sum[y] + not_highest[y]);

        vector<long long> new_highest(m), new_not_highest(m);
        for (int y=0; y<m; y++) {
            long long h = 0;
            
            // for (int e=y; e<m; e++) h = (h + highest[e] * (m-e)) % p;
            h += highest_suffix_sum[y] * m;
            h -= highest_times_index_suffix_sum[y];

            h += not_highest[y] * (m-y);
            
            new_highest[y] = h % p;
            if (new_highest[y] < 0) new_highest[y] += p;
        }
        long long ymmodp = 0;   // (y*m) % p
        for (int y=1; y<m; y++) {
            ymmodp = fastmodulo(ymmodp + m);
            long long nh = 0;
            
            // for (int e=0; e<y; e++) nh = (nh + highest[e] * (((e+1) * (m-y))) % p) % p;
            long long highest_prefix_sum = highest_suffix_sum[0] - highest_suffix_sum[y];
            long long highest_times_index_prefix_sum = highest_times_index_suffix_sum[0] - highest_times_index_suffix_sum[y];
            nh += (highest_times_index_prefix_sum + highest_prefix_sum) * (m-y);

            // for (int e=y; e<m; e++) nh = (nh + highest[e] * ((y * (m-e))) % p) % p;
            nh += highest_suffix_sum[y] * (ymmodp);
            nh -= highest_times_index_suffix_sum[y] * y;

            // for (int e=0; e<y; e++) nh = (nh + not_highest[e] * (m-y)) % p;
            nh += not_highest_prefix_sum[y] * (m-y);
            new_not_highest[y] = nh % p;
            if (new_not_highest[y] < 0) new_not_highest[y] += p;
        }
        highest.swap(new_highest);
        not_highest.swap(new_not_highest);
    }
    
    long long sum = 0;
    for (int b=0; b<m; b++) sum += highest[b];
    cout << ((sum % p) + p) % p << "\n";
    return 0;
}