Kod źródłowy zgłoszenia nr 674290

#include <bits/stdc++.h>
#pragma GCC optimize("O3")
#pragma GCC optimize("unroll-loops")

using namespace std;
#define PB push_back
#define LL long long
#define int LL
#define FOR(i,a,b) for (int i = (a); i <= (b); i++)
#define FORD(i,a,b) for (int i = (a); i >= (b); i--)
#define REP(i,n) FOR(i,0,(int)(n)-1)
#define RE(i,n) FOR(i,1,n)
#define st first
#define nd second
#define ALL(x) (x).begin(), (x).end()
#define SZ(x) ((int)(x).size())
#define VI vector<int>
#define PII pair<int,int>
#define LD long double


template<class T> ostream &operator<<(ostream &os, vector<T> V){
    os<<"[";for(auto vv:V)os<<vv<<",";return os<<"]";
}

template<class L, class R> ostream &operator<<(ostream &os, pair<L,R> P) {
    return os << "(" << P.st << "," << P.nd << ")";
}


template<class C> void mini(C& a4, C b4) { a4 = min(a4, b4); }
template<class C> void maxi(C& a4, C b4) { a4 = max(a4, b4); }

template<class TH> void _dbg(const char *sdbg, TH h){cerr<<sdbg<<"="<<h<<"\n";}
template<class TH, class... TA> void _dbg(const char *sdbg, TH h, TA... a) {
  while(*sdbg!=',')cerr<<*sdbg++;cerr<<"="<<h<<","; _dbg(sdbg+1, a...);
}


#ifdef LOCAL
#define debug(...) _dbg(#__VA_ARGS__, __VA_ARGS__)
#else
#define debug(...) (__VA_ARGS__)
#define cerr if(0)cout
#endif
/*Precision error max_ans/1e15 (2.5e18) for (long) doubles.
So integer rounding works for doubles with answers 0.5*1e15,
e.g. for sizes 2^20 and RANDOM positive integers up to 45k.
Those values assume DBL_MANT_DIG=53 and LDBL_MANT_DIG=64.
For input in [0, M], you can decrease everything by M/2.
If there are many small vectors, uncomment "BRUTE FORCE".*/
const int mod = 1e9 + 7;
typedef double ld; // 'long double' is 2.2 times slower
struct C {
	ld real, imag;
	C operator * (const C & he) const {
		return C{real * he.real - imag * he.imag,
				real * he.imag + imag * he.real};
	}
	void operator += (const C & he) {
		real += he.real; imag += he.imag;
	}
};
void dft(vector<C> & a, bool rev) {
	const int n = a.size();
	for(int i = 1, k = 0; i < n; ++i) {
		for(int bit = n / 2; (k ^= bit) < bit; bit /= 2);;;
		if(i < k) swap(a[i], a[k]);
	}
	for(int len = 1, who = 0; len < n; len *= 2, ++who) {
		static vector<C> t[30];
		vector<C> & om = t[who];
		if(om.empty()) {
			om.resize(len);
			const ld ang = 2 * acosl(0) / len;
			REP(i, len) om[i] = i%2 || !who ?
					C{cos(i*ang), sin(i*ang)} : t[who-1][i/2];
		}
		for(int i = 0; i < n; i += 2 * len)
			REP(k, len) {
				 const C x = a[i+k], y = a[i+k+len]
						* C{om[k].real, om[k].imag * (rev ? -1 : 1)};
				a[i+k] += y;
				a[i+k+len] = C{x.real - y.real, x.imag - y.imag};
			}
	}
	if(rev) REP(i, n) a[i].real /= n;
}
template<typename T>vector<T> multiply(const vector<T> & a, const vector<T> & b, bool split = false) {
	if(a.empty() || b.empty()) return {};
	int n = a.size() + b.size();
	vector<T> ans(n - 1);
	// /* if(min(a.size(),b.size()) < 190) { // BRUTE FORCE
	// 	REP(i, a.size()) REP(j, b.size()) ans[i+j] += a[i]*b[j];
	// 	return ans; } */
	while(n&(n-1)) ++n;
	// http://codeforces.com/blog/entry/48417
	auto speed = [&](const vector<C> & w, int i, int k) {
		int j = i ? n - i : 0, r = k ? -1 : 1;
		return C{w[i].real + w[j].real * r, w[i].imag
				- w[j].imag * r} * (k ? C{0, -0.5} : C{0.5, 0});
	};
	if(!split) { // standard fast version
		vector<C> in(n), done(n);
		REP(i, a.size()) in[i].real = a[i];
		REP(i, b.size()) in[i].imag = b[i];
		dft(in, false);
		REP(i, n) done[i] = speed(in, i, 0) * speed(in, i, 1);
		dft(done, true);
		REP(i, ans.size()) ans[i] = is_integral<T>::value ?
				llround(done[i].real) : done[i].real;
	//REP(i,ans.size())err=max(err,abs(done[i].real-ans[i]));
	}
	else {
		const int M = 1 << 15;
		vector <C> t[2];
		for (int x = 0; x < 2; ++x) {
			t[x].resize(n);
			const vector <T> & in = (x ? b : a);
			for (int i = 0; i < (int) in.size(); ++i)
				t[x][i] = C{ld(in[i] % M), ld(in[i] / M)};
			dft(t[x], false);
		}
		vector <C> d1(n), d2(n);
		for (int i = 0; i < n; ++i) {
			d1[i] += speed(t[0], i, 0) * speed(t[1], i, 0);
			d1[i] += speed(t[0], i, 1) * speed(t[1], i, 1) * C{0, 1};
			d2[i] += speed(t[0], i, 0) * speed(t[1], i, 1);
			d2[i] += speed(t[0], i, 1) * speed(t[1], i, 0);
		}
		dft(d1, true);
		dft(d2, true);
		for (int i = 0; i < n; ++i) {
			d1[i].imag /= n;
		}
		for (int i = 0; i < (int) ans.size(); ++i) {
			ans[i] = (llround(d1[i].real) + llround(d2[i].real) % mod * M + llround(d1[i].imag) % mod * (M * M)) % mod;
		}
	}
	return ans;
}

int real2rep(int x, int min_value){
    return x - min_value;
}

int rep2real(int x, int min_value){
    return x + min_value;
}

int brute_force(VI a){
    unordered_map<int, int> cnt;
    REP(i, SZ(a)){
        int sum = 0;
        FOR(j, i, SZ(a) - 1){
            sum += a[j];
            cnt[sum]++;
        }
    }
    int res = 0;
    for(PII p1 : cnt){
        for(PII p2: cnt){
            int val = -(p1.st + p2.st);
            if (cnt.count(val)){
                res += p1.nd * p2.nd * cnt[val];
            }
        }
    }
    for(PII p: cnt){
        int val = -2 * p.st;
        if (cnt.count(val)){
            res -= p.nd * cnt[val] * 3;
        }
    }
    res += cnt[0] * 2;
    res /= 6;

    return res;
}

int32_t main() {
    ios_base::sync_with_stdio(0);
    cin.tie(0);
    cout << fixed << setprecision(11);
    cerr << fixed << setprecision(6);
    int n;
    cin >> n;
    VI a(n);
    REP(i, n) cin >> a[i];
    int max_abs_val = max(abs(*min_element(ALL(a))), abs(*max_element(ALL(a))));
    int single_min_bound = -max_abs_val * n;
    VI cnt(max_abs_val * n * 2 + 1, 0);
    if(SZ(cnt) > 5e5){
        debug("BRUTE");
        cout << brute_force(a) << "\n";
        return 0;
    }
    REP(i, n){
        int sum = real2rep(0, single_min_bound);
        FOR(j, i, n - 1){
            sum += a[j];
            cnt[sum]++;
        }
    }
    REP(i, SZ(cnt)){
        if(cnt[i]){
            int real_i = rep2real(i, single_min_bound);
            debug(i, real_i, cnt[i]);
        }
    }

    // FFT fft(SZ(cnt));
    debug(SZ(cnt));
    VI res = multiply(cnt, cnt);

    int min_bound = 2 * single_min_bound;
    debug("FFT result");
    REP(i, SZ(res)){
        if(res[i]){
            int real_i = rep2real(i, min_bound);
            debug(i, real_i, res[i]);
        }
    }
    int ans = 0;
    FOR(i, real2rep(single_min_bound, min_bound), real2rep(-single_min_bound, min_bound)){
        // i is representation
        int val = rep2real(i, min_bound);
        int seeked_val = real2rep(-val, single_min_bound);
        ans += res[i] * cnt[seeked_val];
    }
    debug("unordered triplets");
    debug(ans);
    REP(i, SZ(cnt)){
        int real_val = rep2real(i, single_min_bound);
        int double_val = 2 * real_val;
        int double_val_rep = real2rep(double_val, single_min_bound);
        if (double_val_rep < 0 || double_val_rep >= SZ(cnt)) continue;
        ans -= cnt[i] * cnt[real2rep(-double_val, single_min_bound)] * 3;
    }
    ans += cnt[real2rep(0, single_min_bound)] * 2;
    ans /= 6;
    cout << ans << "\n";
}

#include <bits/stdc++.h>
#pragma GCC optimize("O3")
#pragma GCC optimize("unroll-loops")

using namespace std;
#define PB push_back
#define LL long long
#define int LL
#define FOR(i,a,b) for (int i = (a); i <= (b); i++)
#define FORD(i,a,b) for (int i = (a); i >= (b); i--)
#define REP(i,n) FOR(i,0,(int)(n)-1)
#define RE(i,n) FOR(i,1,n)
#define st first
#define nd second
#define ALL(x) (x).begin(), (x).end()
#define SZ(x) ((int)(x).size())
#define VI vector<int>
#define PII pair<int,int>
#define LD long double


template<class T> ostream &operator<<(ostream &os, vector<T> V){
    os<<"[";for(auto vv:V)os<<vv<<",";return os<<"]";
}

template<class L, class R> ostream &operator<<(ostream &os, pair<L,R> P) {
    return os << "(" << P.st << "," << P.nd << ")";
}


template<class C> void mini(C& a4, C b4) { a4 = min(a4, b4); }
template<class C> void maxi(C& a4, C b4) { a4 = max(a4, b4); }

template<class TH> void _dbg(const char *sdbg, TH h){cerr<<sdbg<<"="<<h<<"\n";}
template<class TH, class... TA> void _dbg(const char *sdbg, TH h, TA... a) {
  while(*sdbg!=',')cerr<<*sdbg++;cerr<<"="<<h<<","; _dbg(sdbg+1, a...);
}


#ifdef LOCAL
#define debug(...) _dbg(#__VA_ARGS__, __VA_ARGS__)
#else
#define debug(...) (__VA_ARGS__)
#define cerr if(0)cout
#endif
/*Precision error max_ans/1e15 (2.5e18) for (long) doubles.
So integer rounding works for doubles with answers 0.5*1e15,
e.g. for sizes 2^20 and RANDOM positive integers up to 45k.
Those values assume DBL_MANT_DIG=53 and LDBL_MANT_DIG=64.
For input in [0, M], you can decrease everything by M/2.
If there are many small vectors, uncomment "BRUTE FORCE".*/
const int mod = 1e9 + 7;
typedef double ld; // 'long double' is 2.2 times slower
struct C {
	ld real, imag;
	C operator * (const C & he) const {
		return C{real * he.real - imag * he.imag,
				real * he.imag + imag * he.real};
	}
	void operator += (const C & he) {
		real += he.real; imag += he.imag;
	}
};
void dft(vector<C> & a, bool rev) {
	const int n = a.size();
	for(int i = 1, k = 0; i < n; ++i) {
		for(int bit = n / 2; (k ^= bit) < bit; bit /= 2);;;
		if(i < k) swap(a[i], a[k]);
	}
	for(int len = 1, who = 0; len < n; len *= 2, ++who) {
		static vector<C> t[30];
		vector<C> & om = t[who];
		if(om.empty()) {
			om.resize(len);
			const ld ang = 2 * acosl(0) / len;
			REP(i, len) om[i] = i%2 || !who ?
					C{cos(i*ang), sin(i*ang)} : t[who-1][i/2];
		}
		for(int i = 0; i < n; i += 2 * len)
			REP(k, len) {
				 const C x = a[i+k], y = a[i+k+len]
						* C{om[k].real, om[k].imag * (rev ? -1 : 1)};
				a[i+k] += y;
				a[i+k+len] = C{x.real - y.real, x.imag - y.imag};
			}
	}
	if(rev) REP(i, n) a[i].real /= n;
}
template<typename T>vector<T> multiply(const vector<T> & a, const vector<T> & b, bool split = false) {
	if(a.empty() || b.empty()) return {};
	int n = a.size() + b.size();
	vector<T> ans(n - 1);
	// /* if(min(a.size(),b.size()) < 190) { // BRUTE FORCE
	// 	REP(i, a.size()) REP(j, b.size()) ans[i+j] += a[i]*b[j];
	// 	return ans; } */
	while(n&(n-1)) ++n;
	// http://codeforces.com/blog/entry/48417
	auto speed = [&](const vector<C> & w, int i, int k) {
		int j = i ? n - i : 0, r = k ? -1 : 1;
		return C{w[i].real + w[j].real * r, w[i].imag
				- w[j].imag * r} * (k ? C{0, -0.5} : C{0.5, 0});
	};
	if(!split) { // standard fast version
		vector<C> in(n), done(n);
		REP(i, a.size()) in[i].real = a[i];
		REP(i, b.size()) in[i].imag = b[i];
		dft(in, false);
		REP(i, n) done[i] = speed(in, i, 0) * speed(in, i, 1);
		dft(done, true);
		REP(i, ans.size()) ans[i] = is_integral<T>::value ?
				llround(done[i].real) : done[i].real;
	//REP(i,ans.size())err=max(err,abs(done[i].real-ans[i]));
	}
	else {
		const int M = 1 << 15;
		vector <C> t[2];
		for (int x = 0; x < 2; ++x) {
			t[x].resize(n);
			const vector <T> & in = (x ? b : a);
			for (int i = 0; i < (int) in.size(); ++i)
				t[x][i] = C{ld(in[i] % M), ld(in[i] / M)};
			dft(t[x], false);
		}
		vector <C> d1(n), d2(n);
		for (int i = 0; i < n; ++i) {
			d1[i] += speed(t[0], i, 0) * speed(t[1], i, 0);
			d1[i] += speed(t[0], i, 1) * speed(t[1], i, 1) * C{0, 1};
			d2[i] += speed(t[0], i, 0) * speed(t[1], i, 1);
			d2[i] += speed(t[0], i, 1) * speed(t[1], i, 0);
		}
		dft(d1, true);
		dft(d2, true);
		for (int i = 0; i < n; ++i) {
			d1[i].imag /= n;
		}
		for (int i = 0; i < (int) ans.size(); ++i) {
			ans[i] = (llround(d1[i].real) + llround(d2[i].real) % mod * M + llround(d1[i].imag) % mod * (M * M)) % mod;
		}
	}
	return ans;
}

int real2rep(int x, int min_value){
    return x - min_value;
}

int rep2real(int x, int min_value){
    return x + min_value;
}

int brute_force(VI a){
    unordered_map<int, int> cnt;
    REP(i, SZ(a)){
        int sum = 0;
        FOR(j, i, SZ(a) - 1){
            sum += a[j];
            cnt[sum]++;
        }
    }
    int res = 0;
    for(PII p1 : cnt){
        for(PII p2: cnt){
            int val = -(p1.st + p2.st);
            if (cnt.count(val)){
                res += p1.nd * p2.nd * cnt[val];
            }
        }
    }
    for(PII p: cnt){
        int val = -2 * p.st;
        if (cnt.count(val)){
            res -= p.nd * cnt[val] * 3;
        }
    }
    res += cnt[0] * 2;
    res /= 6;

    return res;
}

int32_t main() {
    ios_base::sync_with_stdio(0);
    cin.tie(0);
    cout << fixed << setprecision(11);
    cerr << fixed << setprecision(6);
    int n;
    cin >> n;
    VI a(n);
    REP(i, n) cin >> a[i];
    int max_abs_val = max(abs(*min_element(ALL(a))), abs(*max_element(ALL(a))));
    int single_min_bound = -max_abs_val * n;
    VI cnt(max_abs_val * n * 2 + 1, 0);
    if(SZ(cnt) > 5e5){
        debug("BRUTE");
        cout << brute_force(a) << "\n";
        return 0;
    }
    REP(i, n){
        int sum = real2rep(0, single_min_bound);
        FOR(j, i, n - 1){
            sum += a[j];
            cnt[sum]++;
        }
    }
    REP(i, SZ(cnt)){
        if(cnt[i]){
            int real_i = rep2real(i, single_min_bound);
            debug(i, real_i, cnt[i]);
        }
    }

    // FFT fft(SZ(cnt));
    debug(SZ(cnt));
    VI res = multiply(cnt, cnt);

    int min_bound = 2 * single_min_bound;
    debug("FFT result");
    REP(i, SZ(res)){
        if(res[i]){
            int real_i = rep2real(i, min_bound);
            debug(i, real_i, res[i]);
        }
    }
    int ans = 0;
    FOR(i, real2rep(single_min_bound, min_bound), real2rep(-single_min_bound, min_bound)){
        // i is representation
        int val = rep2real(i, min_bound);
        int seeked_val = real2rep(-val, single_min_bound);
        ans += res[i] * cnt[seeked_val];
    }
    debug("unordered triplets");
    debug(ans);
    REP(i, SZ(cnt)){
        int real_val = rep2real(i, single_min_bound);
        int double_val = 2 * real_val;
        int double_val_rep = real2rep(double_val, single_min_bound);
        if (double_val_rep < 0 || double_val_rep >= SZ(cnt)) continue;
        ans -= cnt[i] * cnt[real2rep(-double_val, single_min_bound)] * 3;
    }
    ans += cnt[real2rep(0, single_min_bound)] * 2;
    ans /= 6;
    cout << ans << "\n";
}