1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// #pragma GCC optimize("O3")
#pragma GCC optimize("Ofast")
#pragma GCC optimize("unroll-loops")
// #pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,tune=native")
#include <bits/stdc++.h>
using namespace std;

#define PB emplace_back
#define int long long
#define ll long long
#define vi vector<int>
#define siz(a) ((int) ((a).size()))
#define rep(i, a, b) for (int i = (a); i <= (b); ++i)
#define per(i, a, b) for (int i = (a); i >= (b); --i)
void print(vi n) { rep(i, 0, siz(n) - 1) cerr << n[i] << " \n"[i == siz(n) - 1]; }

const int N = 3000, mod = 1e9 + 7;
int a, b, s[N + 5], fa[N * N + 5], s1[N * N + 5], s2[N * N + 5];
int qp(int n, int m = mod - 2) {
	int res = 1;
	for (; m; m >>= 1) {
		if (m & 1) res = res * n % mod;
		n = n * n % mod;
	}
	return res;
}
int f(int n) {return fa[n] == n ? n : fa[n] = f(fa[n]);}
int id(int n, int m) {return m + (n - 1) * a;}

signed main() {
	// freopen(".in", "r", stdin);
	// freopen(".out", "w", stdout);
	ios::sync_with_stdio(0);
	cin.tie(0), cout.tie(0);
	cin >> a >> b;
	rep(i, 1, a) rep(j, 1, a) fa[id(i, j)] = id(i, j), s1[id(i, j)] = i < j, s2[id(i, j)] = 1;
	rep(i, 1, b) {
		rep(j, 1, a) cin >> s[j];
		rep(j, 1, a) rep(k, 1, a) {
			int x = f(id(j, k)), y = f(id(s[j], s[k]));
			if(x != y) fa[x] = y, s1[y] += s1[x], s2[y] += s2[x];
		}
	}
	int ans = 0;
	rep(i, 1, a) rep(j, 1, i - 1) (ans += s1[f(id(i, j))] * qp(s2[f(id(i, j))])) %= mod;
	cout << ans;
	return cerr << endl << 1.0 * clock() / CLOCKS_PER_SEC << endl, 0;
}