1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#pragma GCC optimize ("Ofast")
#define _USE_MATH_DEFINES
#include <bits/stdc++.h>
#define FOR(i, a, b) for (auto i=(a); i<(b); i++)
#define FORD(i, a, b) for (int i=(a); i>(b); i--)
#define SZ(x) ((int)(x).size())
#define ALL(x) (x).begin(), (x).end()
#define PPC(x) __builtin_popcountll(x)
#define MSB(x) (63 - __builtin_clzll(x))
#define LSB(x) __builtin_ctzll(x)
#define ARG(x, i) (get<i>(x))
#define ithBit(m, i) ((m) >> (i) & 1)
#define pb push_back
#define ft first
#define sd second
#define kw(a) ((a) * (a))
#define CLR(x) x.clear(), x.shrink_to_fit()
#ifdef DEBUG
#include "debug.h"
#else
#define dbg(...) 0
#endif
using namespace std;
 
template <typename T1, typename T2> inline void remin(T1& a, T2 b) { a = min(a, (T1)b);	}
template <typename T1, typename T2> inline void remax(T1& a, T2 b) { a = max(a, (T1)b);	}

const int maxN = 3'003, mod = 1'000'000'007;

template <typename T1, typename T2> inline void addMod(T1& a, T2 b) { a = (a + b) % mod; }
template <typename T1, typename T2> inline void multMod(T1& a, T2 b) { a = a * b % mod; }

long long qpow(long long a, long long b)
{
	long long ret = 1ll;
	for (; b != 0; b /= 2)
	{
		if (b & 1)	multMod(ret, a);
		multMod(a, a);
	}
	return ret;
}

template <typename T1, typename T2> inline void subMod(T1& a, T2 b) { a = (a - b%mod + mod) % mod; }
template <typename T1, typename T2> inline void divMod(T1& a, T2 b) { a = a * qpow(b, mod-2) % mod; }


bool vis[maxN][maxN];
int P[maxN];

void solve()
{
	int n, k;
	scanf ("%d%d", &n, &k);
	FOR(_, 0, k)
		FOR(i, 1, n+1)
			scanf ("%d", P+i);
	
	if (k > 1)
	{
		long long res = n * (n - 1) / 2;
		divMod(res, 2);
		printf("%lld\n", res);
		return;
	}

	long long res = 0;

	FOR(i, 1, n+1) FOR(j, 1, i) if (!vis[i][j])
	{
		int cnt[2] = {};

		for (int a=i, b=j; !vis[a][b]; a=P[a], b=P[b])
		{
			cnt[a < b]++;
			vis[a][b] = true;
		}

		int len = cnt[0] + cnt[1];

		long long invsLocal = cnt[0] * cnt[1] % mod;
		divMod(invsLocal, len);
		addMod(res, invsLocal);
	}

	printf("%lld\n", res);
}

int main()
{
	int t = 1;
//	scanf ("%d", &t);	
	FOR(tid, 1, t+1)
	{
		//printf("Case #%d: ", tid);
		solve();
	}
	return 0;
}