#include <vector> #pragma GCC target("sse4.1") #include "bits/stdc++.h" #include <smmintrin.h> using namespace std; #define rep(i,a,b) for(int i=(a); i<(b); ++i) #define all(x) x.begin(),x.end() #define sz(x) int(x.size()) typedef long long ll; typedef unsigned long long ull; typedef vector<short> vi; typedef vector<vi> vvi; const int N = 500; short dists[N][N]; short sd[N]; mt19937 rng; void solve(){ int n; cin >> n; vi ord(n); iota(all(ord),0); shuffle(all(ord),rng); vector<string> aa(n); for(auto& c : aa) cin >> c; auto a = aa; rep(i,0,n) rep(j,0,n) a[i][j] = aa[ord[i]][ord[j]]; rep(i,0,n) rep(j,0,n) dists[i][j] = N; rep(i,0,n) dists[i][i] = 0; rep(i,0,n) rep(j,0,n) if (a[i][j] == '1') dists[i][j] = 1; rep(k,0,n) rep(i,0,n) rep(j,0,n) dists[i][j] = min(dists[i][j], (short)(dists[i][k] + dists[k][j])); short ans = N; rep(u,0,n) rep(v,0,u){ short mx = 0; rep(i,0,n) sd[i] = min(dists[i][u], dists[i][v]); rep(i,0,n) { __m128i v_mx = _mm_set1_epi16(mx); __m128i v_sd_i = _mm_set1_epi16(sd[i]); int j = 0; for (; j + 8 < i; j += 8) { __m128i v_sd_j = _mm_loadu_si128((__m128i*)&sd[j]); __m128i v_sum = _mm_adds_epu16(v_sd_i, v_sd_j); __m128i v_dists = _mm_loadu_si128((__m128i*)&dists[i][j]); __m128i v_min = _mm_min_epu16(v_sum, v_dists); v_mx = _mm_max_epu16(v_mx, v_min); } short temp[8]; _mm_storeu_si128((__m128i*)temp, v_mx); for (int k = 0; k < 8; ++k) { mx = max(mx, temp[k]); } for (; j < i; ++j) { mx = max(mx, min((short)(sd[i] + sd[j]), dists[i][j])); } if (mx >= ans) break; } ans = min(ans,mx); } cout << ans << '\n'; } int main(){ cin.tie(NULL),cin.sync_with_stdio(false); rng.seed(chrono::steady_clock::now().time_since_epoch().count()); int t; cin >> t; while(t--) solve(); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | #include <vector> #pragma GCC target("sse4.1") #include "bits/stdc++.h" #include <smmintrin.h> using namespace std; #define rep(i,a,b) for(int i=(a); i<(b); ++i) #define all(x) x.begin(),x.end() #define sz(x) int(x.size()) typedef long long ll; typedef unsigned long long ull; typedef vector<short> vi; typedef vector<vi> vvi; const int N = 500; short dists[N][N]; short sd[N]; mt19937 rng; void solve(){ int n; cin >> n; vi ord(n); iota(all(ord),0); shuffle(all(ord),rng); vector<string> aa(n); for(auto& c : aa) cin >> c; auto a = aa; rep(i,0,n) rep(j,0,n) a[i][j] = aa[ord[i]][ord[j]]; rep(i,0,n) rep(j,0,n) dists[i][j] = N; rep(i,0,n) dists[i][i] = 0; rep(i,0,n) rep(j,0,n) if (a[i][j] == '1') dists[i][j] = 1; rep(k,0,n) rep(i,0,n) rep(j,0,n) dists[i][j] = min(dists[i][j], (short)(dists[i][k] + dists[k][j])); short ans = N; rep(u,0,n) rep(v,0,u){ short mx = 0; rep(i,0,n) sd[i] = min(dists[i][u], dists[i][v]); rep(i,0,n) { __m128i v_mx = _mm_set1_epi16(mx); __m128i v_sd_i = _mm_set1_epi16(sd[i]); int j = 0; for (; j + 8 < i; j += 8) { __m128i v_sd_j = _mm_loadu_si128((__m128i*)&sd[j]); __m128i v_sum = _mm_adds_epu16(v_sd_i, v_sd_j); __m128i v_dists = _mm_loadu_si128((__m128i*)&dists[i][j]); __m128i v_min = _mm_min_epu16(v_sum, v_dists); v_mx = _mm_max_epu16(v_mx, v_min); } short temp[8]; _mm_storeu_si128((__m128i*)temp, v_mx); for (int k = 0; k < 8; ++k) { mx = max(mx, temp[k]); } for (; j < i; ++j) { mx = max(mx, min((short)(sd[i] + sd[j]), dists[i][j])); } if (mx >= ans) break; } ans = min(ans,mx); } cout << ans << '\n'; } int main(){ cin.tie(NULL),cin.sync_with_stdio(false); rng.seed(chrono::steady_clock::now().time_since_epoch().count()); int t; cin >> t; while(t--) solve(); } |