1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include <cstdio>
#include <cstdlib>
#include <cstdint>

static const uint64_t MAX_N = 1024;
static const uint64_t MAX_K = 16;

struct infopair {
  uint64_t hothot, hotcold;
};

infopair data[MAX_K][MAX_N] = {0};
infopair sumData[MAX_K][MAX_N] = {0};
uint64_t binomial[MAX_N][MAX_N] = {0};

void computeBinomials(const uint64_t N, const uint64_t P) {
  binomial[0][0] = 1;
  binomial[1][0] = 1;
  binomial[1][1] = 1;
  for (uint64_t n = 2; n <= N; n++) {
    binomial[n][0] = 1;
    for (uint64_t k = 1; k < N; k++) {
      binomial[n][k] = (binomial[n - 1][k - 1] + binomial[n - 1][k]) % P;
    }
    binomial[n][n] = 1;
  }
}

inline uint64_t binom(uint64_t n, uint64_t k) {
  return binomial[n][k];
}

uint64_t solve(const uint64_t N, const uint64_t K, const uint64_t P) {
  if (K > 10) {
    // There won't be more than 10 rounds
    return 0;
  }

  if (N == 1) {
    return 0; // 1-permutations are already done
  }

  computeBinomials(N, P);

  data[0][0].hothot = 1;
  data[0][0].hotcold = 1;

  // Fill first two rows
  for (uint64_t k = 0; k <= K; k++) {
    sumData[k + 1][0].hothot = sumData[k][0].hothot + data[k][0].hothot;
    sumData[k + 1][0].hotcold = sumData[k][0].hotcold + data[k][0].hotcold;
  }

  // Fill intermediate rows
  for (uint64_t k = 1; k <= K; k++) {
    for (uint64_t ps = 1; ps < N; ps++) {
      // ps - perm size

      // First case - the biggest number is placed at the wall
      uint64_t accumhothot = 0;
      uint64_t accumhotcold = 0;

      // Other cases
      for (uint64_t pivot = 0; pivot < ps; pivot++) {
        const auto leftSize = pivot;
        const auto rightSize = ps - pivot - 1;
        const auto interleaveCount = binom(ps - 1, leftSize);

        // Could be reduced to two cases, but would be asymmetric
        const auto typeA1 = data[k][leftSize].hothot * sumData[k][rightSize].hothot;
        const auto typeA2 = sumData[k][leftSize].hothot * data[k][rightSize].hothot;
        const auto typeA3 = data[k - 1][leftSize].hothot * data[k - 1][rightSize].hothot;
        const auto sumhothot = ((typeA1 + typeA2 + typeA3) % P) * interleaveCount;
        accumhothot = (accumhothot + sumhothot) % P;

        const auto typeB1 = data[k - 1][leftSize].hothot * data[k - 1][rightSize].hotcold;
        const auto typeB2 = data[k - 1][leftSize].hothot * sumData[k - 1][rightSize].hotcold;
        const auto typeB3 = sumData[k][leftSize].hothot * data[k][rightSize].hotcold;
        const auto sumhotcold = ((typeB1 + typeB2 + typeB3) % P) * interleaveCount;
        accumhotcold = (accumhotcold + sumhotcold) % P;
      }

      data[k][ps].hothot = accumhothot;
      data[k][ps].hotcold = accumhotcold;
      sumData[k + 1][ps].hothot = (sumData[k][ps].hothot + accumhothot) % P;
      sumData[k + 1][ps].hotcold = (sumData[k][ps].hotcold + accumhotcold) % P;

      // printf("cold[%d][%d] = %d\n", (int)k, (int)ps, (int)accumhotcold);
      // printf("hot[%d][%d] = %d\n", (int)k, (int)ps, (int)accumhothot);
    }
  }

  // Compute the result
  uint64_t resultaccum = 0;
  for (uint64_t pivot = 0; pivot < N; pivot++) {
      const auto leftSize = pivot;
      const auto rightSize = N - pivot - 1;
      const auto interleaveCount = binom(N - 1, leftSize);

      const auto type1 = data[K][leftSize].hotcold * sumData[K][rightSize].hotcold;
      const auto type2 = sumData[K][leftSize].hotcold * data[K][rightSize].hotcold;
      const auto type3 = data[K][leftSize].hotcold * data[K][rightSize].hotcold;
      const auto sum = ((type1 + type2 + type3) % P) * interleaveCount;

      resultaccum = (resultaccum + sum) % P;
      // printf("binom(%d, %d) = %d\n", (int)N - 1, (int)leftSize, interleaveCount);
      // printf("%d -> %d\n", (int)sum, (int)resultaccum);
  }

  return resultaccum;
}

int main() {
  int N, K, P;
  scanf("%d %d %d", &N, &K, &P);

  const auto solution = (int)solve(N, K, P);
  printf("%d\n", solution);

  return 0;
}