【BZOJ 4589】Hard Nim

相关链接

题目传送门:http://www.lydsy.com/JudgeOnline/problem.php?id=4589

解题报告

我们考虑用SG函数来暴力DP
显然可以用FWT来优化多项式快速幂
总的时间复杂度:$O(n \log n)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 100009; 
const int MOD = 1000000007;
const int REV = 500000004;

bool vis[N];
int arr[N];

inline int read() {
	char c = getchar(); int ret = 0, f = 1;
	for (; c < '0' || c > '9'; f = c == '-'? -1: 1, c = getchar());
	for (; '0' <= c && c <= '9'; ret = ret * 10 + c - '0', c = getchar());
	return ret * f;
}

inline int Pow(int w, int t) {
	int ret = 1;
	for (; t; t >>= 1, w = (LL)w * w % MOD) {
		if (t & 1) {
			ret = (LL)ret * w % MOD;
		}
	}
	return ret;
}

inline void FWT(int *a, int len, int opt = 1) {
	for (int d = 1; d < len; d <<= 1) {
		for (int i = 0; i < len; i += d << 1) {
			for (int j = 0; j < d; j++) {
				int t1 = a[i + j], t2 = a[i + j + d];
				if (opt == 1) {
					a[i + j] = (t1 + t2) % MOD;
					a[i + j + d] = (t1 - t2) % MOD;
				} else {
					a[i + j] = (LL)(t1 + t2) * REV % MOD;
					a[i + j + d] = (LL)(t1 - t2) * REV % MOD;
				}
			}
		}
	}
}

int main() {
	for (int n, m; ~scanf("%d %d", &n, &m); ) {
		memset(arr, 0, sizeof(arr));
		for (int i = 2; i <= m; i++) {
			if (!vis[i]) {
				arr[i] = 1;
				for (int j = i << 1; 0 <= j && j <= m; j += i) {
					vis[j] = 1;
				}
			}
		}
		int len = 1; 
		for (; len <= m; len <<= 1);
		FWT(arr, len);
		for (int i = 0; i < len; i++) {
			arr[i] = Pow(arr[i], n);
		}
		FWT(arr, len, -1);
		printf("%d\n", (arr[0] + MOD) % MOD);
	}
	return 0;
}

【日常小测】航海舰队

相关链接

题目传送门:https://oi.qizy.tech/wp-content/uploads/2017/06/claris_contest_day1-statements.pdf
官方题解:https://oi.qizy.tech/wp-content/uploads/2017/06/claris_contest_day1-solutions.pdf

解题报告

之前在BZOJ上看过这道题,然后当时嫌麻烦没有写
今天刚好考到,然后就被细节给搞死了_(:з」∠)_

一句话题解就是:用FFT做矩阵匹配
详细一点的话,大概就是:

先用FFT做一遍0/1矩阵匹配,求出能放阵型的位置
然后BFS出能到达的位置
最后再做一遍FFT求出每个点被覆盖了多少次

Code

#include<bits/stdc++.h>
#define LL long long
#define CP complex<double>
using namespace std;

const int N = 709;
const int M = 5000000;
const double EPS = 0.5;
const double PI = acos(-1);

char mp[N][N];
int n, m, vis[M], sfe[M];
int dx[] = {1, 0, -1, 0}, dy[] = {0, 1, 0, -1};
CP a[M], b[M], c[M];

inline void FFT(CP *arr, int len, int ty) {
	static int pos[M], init = 0;
	if (init != len) {
		for (int i = 1; i < len; ++i) {
			pos[i] = (pos[i >> 1] >> 1) | ((i & 1)? (len >> 1): 0); 
		}
		init = len;
	}
	for (int i = 0; i < len; i++) {
		if (pos[i] < i) {
			swap(arr[i], arr[pos[i]]);
		}
	}
	for (int i = 1; i < len; i <<= 1) {
		CP wn(cos(PI / i), sin(PI / i) * ty);
		for (int j = 0; j < len; j += i + i) {
			CP w(1, 0);
			for (int k = 0; k < i; k++, w *= wn) {
				CP tmp = arr[j + i + k] * w;
				arr[j + i + k] = arr[j + k] - tmp;
				arr[j + k] = arr[j + k] + tmp;
			}
		}
	}
	if (ty == -1) {
		for (int i = 0; i < len; i++) {
			arr[i] /= len;
		}
	}
}

inline void BFS(int sx, int sy, int lx, int ly) {
	vis[sy * n + sx] = 1;
	queue<pair<int, int> > que;
	for (que.push(make_pair(sx, sy)); !que.empty(); que.pop()) {
		int x = que.front().first;
		int y = que.front().second;
		for (int i = 0; i < 4; i++) {
			int nx = x + dx[i];
			int ny = y + dy[i];
			if (0 <= nx && nx + lx - 1 < n && 0 <= ny && ny + ly - 1 < m && sfe[ny * n + nx] && !vis[ny * n + nx]) {
				que.push(make_pair(nx, ny));
				vis[ny * n + nx] = 1;
			}
		}
	}
} 

int main() {
	freopen("sailing.in", "r", stdin);
	freopen("sailing.out", "w", stdout);
	cin >> m >> n;
	int mnx = n, mny = m, mxx = 0, mxy = 0; 
	for (int j = 0; j < m; j++) {
		scanf("%s", mp[j]);
		for (int i = 0; i < n; i++) {
			if (mp[j][i] == 'o') {
				mnx = min(i, mnx);
				mxx = max(i, mxx);
				mny = min(j, mny);
				mxy = max(j, mxy);
			}
		}
	}
	for (int j = 0; j < m; ++j) {
		for (int i = 0; i < n; ++i) {
			if (mp[j][i] == 'o') {
				b[(j - mny) * n + i - mnx] = CP(1, 0);
			} else if (mp[j][i] == '#') {
				a[j * n + i] = CP(1, 0);
			}
		}
	}
	int cur = n * m, len = 1;
	for (; len < cur * 2; len <<= 1);
	for (int l = 0, r = cur - 1; l < r; ++l, --r) {
		swap(b[l], b[r]);
	}
	FFT(a, len, 1);
	FFT(b, len, 1);
	for (int i = 0; i < len; i++) {
		a[i] *= b[i];
	}
	FFT(a, len, -1);
	for (int i = 0; i < cur; i++) {
		if (a[i + cur - 1].real() < EPS) {
			sfe[i] = 1;
		}
	}
	BFS(mnx, mny, mxx - mnx + 1, mxy - mny + 1); 
	memset(b, 0, sizeof(b));
	for (int j = 0; j < m; j++) {
		for (int i = 0; i < n; i++) {
			c[j * n + i] = vis[j * n + i]? CP(1, 0): CP(0, 0);
			b[(j - mny) * n + i - mnx] = mp[j][i] == 'o'? CP(1, 0): CP(0, 0);
		}
	}
	FFT(c, len, 1);
	FFT(b, len, 1);
	for (int i = 0; i < len; i++) {
		c[i] *= b[i];
	}
	FFT(c, len, -1);
	int ans = 0;
	for (int i = 0; i < cur; i++) {
		ans += c[i].real() > EPS; 
	}
	printf("%d\n", ans);
	return 0;
}

—————————— UPD 2017.6.30 ——————————
B站题号:4624

【BZOJ 4827】[HNOI2017] 礼物

相关链接

题目传送门:http://www.lydsy.com/JudgeOnline/problem.php?id=4827
神犇题解:https://blog.sengxian.com/solutions/hnoi-2017-day1

解题报告

这题怎么讲呢?如果你不知道循环卷积,那么你只有$70$分
比如说:我 _(:з」∠)_

百度百科上循环卷积这个词条的例题就是这题
所以知道循环卷积的定义,然后写一个$FFT$就可以了

Code

#include<bits/stdc++.h>
#define LL long long
#define CPL complex<double>
using namespace std;

const int N = 200000;
const int INF = 2e9;
const double EPS = 0.5;
const double PI = acos(-1);

int n,m,B,C,mx,len,tt,f[N],pos[N];
CPL a[N],b[N];

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
} 

inline void FFT(CPL *arr, int ty) {
	for (int i=0;i<len;i++) {
		if (pos[i] < i) {
			swap(arr[pos[i]], arr[i]);
		}
	}
	for (int ll=1;ll<len;ll<<=1) {
		CPL wn(cos(PI/ll), sin(PI/ll)*ty);
		for (int i=0;i<len;i+=ll*2) {
			CPL w(1,0);
			for (int j=0;j<ll;j++,w=w*wn) {
				CPL tmp = arr[i+j+ll] * w;
				arr[i+j+ll] = arr[i+j] - tmp;
				arr[i+j] = arr[i+j] + tmp;
			}
		}
	}
	if (ty == -1) {
		for (int i=0;i<len;i++) {
			arr[i] /= len;
		}
	}
}

int main() {
	n = read(); m = read();
	for (int i=0,tmp;i<n;i++) {
		B += (tmp = read()) << 1;
		C += tmp * tmp;
		a[i] = CPL{tmp, 0};
	}
	for (int i=0,tmp;i<n;i++) {
		B -= (tmp = read()) << 1;
		C += tmp * tmp;
		b[n-i] = CPL{tmp, 0};
	}
	
	for (len=1,tt=-1;len<n*2;len<<=1,++tt);
	for (int i=1;i<len;i++) {
		pos[i] = pos[i>>1]>>1;
		if (i&1) {
			pos[i] |= 1 << tt;
		}
	}
	FFT(a, 1);
	FFT(b, 1);
	for (int i=0;i<len;i++) {
		a[i] = a[i] * b[i];
	}
	FFT(a, -1);
	for (int i=n;i<len;i++) {
		a[i%n] += a[i];
	}
	for (int i=0;i<n;i++) {
		f[i] = a[i].real() + EPS;
		mx = max(f[i], mx);
	}
	C = C - 2 * mx;
	int p1 = -1.0 * B / 2 / n + EPS;
	int p2 = -1.0 * B / 2 / n - EPS;
	printf("%d\n",min(p1*p1*n+B*p1, p2*p2*n+B*p2) + C);
	return 0;
}

【HDU 4626】Jinkeloid

相关链接

题目传送门:http://acm.hdu.edu.cn/showproblem.php?pid=4626
数据生成器:http://paste.ubuntu.com/24454724/
神犇题解Ⅰ:https://blog.sengxian.com/solutions/hdoj-4626
神犇题解Ⅱ:http://blog.csdn.net/u012345506/article/details/52040466
FWT相关:http://blog.csdn.net/liangzhaoyang1/article/details/52819835

解题报告

我们使用状压$DP$的话,每次询问相当于强制某些位为$0$
于是我们可以先$FWT$一发,搞出每个状态的方案数
然后我们可以再做一发子集$DP$,搞出每个状态的超集
然后询问就可以$O(1)$回答了
于是总的时间复杂度是:$O(20 \cdot 2^{20} + m + n)$

另外的话,因为本题的$k$非常的小
于是之前$FWT$那一块还可以用容斥来解决
只不过这样单次询问的复杂度是:$O(3^k)$的

Code

这个代码里$DP$超集那一块还是很妙妙的

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 3000000;
const int M = 100009;
const int UNIVERSE = (1 << 20) - 1; 
const int SGZ = 20;

char pat[M];
int n,m,sta;
LL a[N],f[N],zero;

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

inline void FWT(LL *arr, int len, int ty) {
	for (int d=1;d<len;d<<=1) {
		for (int i=0;i<=len;i+=d*2) {
			for (int j=0;j<d;j++) {
				LL t1 = arr[i+j], t2 = arr[i+j+d];
				arr[i+j] = t1 + t2;
				arr[i+j+d] = t1 - t2;
				if (ty == -1) {
					arr[i+j] /= 2;
					arr[i+j+d] /= 2;
				}
			}
		}
	}
}	

int main() {
	for (int T=read();T;T--) {
		memset(a, 0, sizeof(a));
		scanf("%s",pat);
		n = strlen(pat);
		a[0]++; sta = zero = 0;
		for (int i=0;i<n;i++) {
			sta ^= 1 << pat[i] - 'a';
			zero += a[sta]++;
		}  
		FWT(a, UNIVERSE, 1);
		for (int i=0;i<=UNIVERSE;i++) {
			a[i] = a[i] * a[i];
		}
		FWT(a, UNIVERSE, -1);
		a[0] = zero;
		for (int i=1;i<=UNIVERSE;i++) {
			a[i] /= 2;
		}
		for (int i=0;i<=UNIVERSE;i++) {
			if ((i ^ UNIVERSE) < i) {
				swap(a[i], a[i^UNIVERSE]); 
			}
		}
		for (int j=0;j<SGZ;j++) {
			for (int i=0;i<=UNIVERSE;i++) {
				if (i & (1<<j)) {
					a[i^(1<<j)] += a[i];
				}
			}
		}	
		m = read();
		for (int tt=1;tt<=m;tt++) {
			int k = read(), sta = 0;
			for (int i=1;i<=k;i++) {
				scanf("%s",pat);
				sta |= 1 << pat[0] - 'a';
			}
			printf("%lld\n",a[sta]);
		}
	}
	return 0;
}

【BZOJ 3451】Tyvj1953 Normal

相关链接

题目传送门:http://www.lydsy.com/JudgeOnline/problem.php?id=3451
神犇题解:http://wyfcyx.is-programmer.com/posts/74735.html

解题报告

考虑一个点$u$,如果点分到点$v$的时候会产生贡献
那么点分$v$的时候,$v \to u$这个路径上还没有其他点被点分
换一句话来讲,点$v$应该是$v \to u$这条路径上第一个被点分的点
因为每一个点被选的概率一样,所以贡献的概率是$\frac{1}{dis_{u \to v} + 1}$
于是最后答案就是$\sum\limits_{u,v \in [1,n]}{\frac{1}{dis_{u \to v}+1}}$

然后这个东西我们可以使用点分治加上FFT来解决
具体来讲就是在点分的时候统计$dis_{u \to v}=x$的方案数,然后计入答案
时间复杂度:$O(n \log^2 n)$

—————————— UPD 2017.4.11 ————————————
找到这题的序列版了:http://www.lydsy.com/JudgeOnline/problem.php?id=2769
在具体的做法方面,用分个块,然后块内暴力,块间FFT即可

【日常小测】图片加密

题目大意

给定两个数字串$A,B$,长度分别为$n,m(m \le n \le 250000)$
串中的元素为$<512$的自然数,你可以花费一个单位的代价改变一个元素在二进制下的最低位
询问能否通过修改操作使得$B$在$A$中出现
如果可以,请输出最小的花费和在花费最小的前提下最左边的匹配位置

解题报告

我们发现高位不可改,于是我们用$KMP$统计一下高位的合法位置
至于低位,我们可以拆成两次$FFT$
于是搞一搞就可以了

至于为什么我写的是$SA$?
因为考场上忘掉$KMP$怎么写了 QwQ

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;
 
const int N = 550000;
const int M = N << 1;
const int INF = 1e9;
const int MOD = 1004535809;
 
int n,m,L,a1[M],a2[N],o1[N],o2[N],leg[N],ans[N]; 
  
inline int read() {
    char c=getchar(); int ret=0,f=1;
    while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
    while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
    return ret*f;
}
 
inline int Read() {
    static char pat[20]; scanf("%s",pat+1); int ret = 0;
    for (int i=8,w=1;i;i--,w<<=1) ret += (pat[i]-'0') * w; 
    return ret;
}
 
 
class Suffix_Array{
    int len,tot,*rak,*que,bot[N],sa[M],arr1[M],arr2[M],height[N];
    public:
        inline void build() {
            rak = arr1; que = arr2; len = n + m + 1;
            a1[0] = -1; a1[len+1] = -2;
            for (int i=1;i<=len;i++) ++bot[a1[i]];
            for (int i=1;i<=100000;i++) bot[i] += bot[i-1];
            for (int i=1;i<=len;i++) sa[bot[a1[i]]--] = i;
            rak[sa[1]] = tot = 1;
            for (int i=2;i<=len;i++) rak[sa[i]] = (a1[sa[i]]==a1[sa[i-1]])? tot: ++tot;
            for (int l=1,cnt;cnt=0,l<=len;l<<=1) {
                for (int i=len-l+1;i<=len;i++) que[++cnt] = i;
                for (int i=1;i<=len;i++) if (sa[i]>l) que[++cnt] = sa[i] - l;
                for (int i=0;i<=tot;i++) bot[i] = 0;
                for (int i=1;i<=len;i++) bot[rak[i]]++;
                for (int i=1;i<=tot;i++) bot[i] += bot[i-1];
                for (int i=len;i;i--) sa[bot[rak[que[i]]]--] = que[i];
                swap(que, rak); rak[sa[1]] = tot = 1;
                for (int i=2;i<=len;i++) 
                    if (que[sa[i]]==que[sa[i-1]]&&que[sa[i]+l]==que[sa[i-1]+l]) rak[sa[i]] = tot;
                    else rak[sa[i]] = ++tot;
                if (tot >= len) break;
            }
            GetHeight();
        }
        inline void solve() {
            for (int i=rak[n+2];height[i]>=m;i--) if (sa[i] <= n) leg[sa[i]] = 1;
            for (int i=rak[n+2]+1;height[i]>=m;i++) if (sa[i] <= n) leg[sa[i]] = 1;
        }
    private:
        inline void GetHeight() {
            for (int i=1;i<=len;i++) {
                int len = max(0, height[rak[i-1]] - 1);
                int p1 = i + len, p2 = sa[rak[i]-1] + len;
                while (a1[p1++] == a1[p2++]) len++;
                height[rak[i]] = len;
            }
        }
}SA;
 
class Number_Theoretic_Transform{
    int pos[M],REV,G;
    public:
        inline void init() {
            for (L=1;L<n+m;L<<=1); G = 3;
            int len = 1,tt=-1; while (len < L) len<<=1, ++tt; REV = Pow(L, MOD-2);
            for (int i=1;i<len;i++) pos[i] = (pos[i>>1]>>1)|((1<<tt)*(i&1));
        }
        inline void trans(int *a, int t=1) {
            for (int i=0;i<L;i++) if (pos[i] < i) swap(a[pos[i]], a[i]);
            for (int l=2;l<=L;l<<=1) {
                int wn = Pow(G, MOD/l); if (t == -1) wn = Pow(wn, MOD-2);
                for (int i=0;i<L;i+=l) {
                    for (int j=0,w=1;j<l/2;j++,w=(LL)w*wn%MOD) {
                        int tmp = (LL)w * a[i+l/2+j] % MOD;
                        a[i+l/2+j] = (a[i+j] - tmp) % MOD;
                        a[i+j] = (a[i+j] + tmp) % MOD;
                    }
                }
            }
            if (t == -1) for (int i=0,rev=Pow(L,MOD-2);i<L;i++) a[i] = (LL)a[i] * rev % MOD; 
            for (int i=0;i<L;i++) a[i] = a[i]<0? a[i] + MOD: a[i];
        } 
    private:
        inline int Pow(int w, int t) {
            int ret = 1;
            for (;t;t>>=1,w=(LL)w*w%MOD)
                if (t&1) ret=(LL)ret*w%MOD;
            return ret;
        }
}NTT;
 
int main() {
    n = read(); m = read();
    for (int i=1;i<=n;i++) a1[i] = o1[i] = Read();
    for (int i=1;i<=m;i++) a2[i] = o2[i] = Read();
     
    //find out legal positions
    for (int i=1;i<=m;i++) a1[n+i+1] = a2[i];
    for (int i=1;i<=n+m+1;i++) a1[i] >>= 1, a1[i]++;
    a1[n+1] = 0; SA.build(); 
    SA.solve();
     
    //calculate the cost
    NTT.init();
    memset(a1,0,sizeof(a1)); memset(a2,0,sizeof(a2));
    for (int i=0;i<n;i++) a1[i] = o1[i+1] & 1;
    for (int i=0;i<m;i++) a2[i] = (o2[m-i] & 1) ^ 1;
    NTT.trans(a1); NTT.trans(a2);
    for (int i=0;i<L;i++) a1[i] = (LL)a1[i] * a2[i] % MOD;
    NTT.trans(a1, -1);
    for (int i=1;i<=n;i++) ans[i] = a1[i+m-2];
    memset(a1,0,sizeof(a1)); memset(a2,0,sizeof(a2));
    for (int i=0;i<n;i++) a1[i] = (o1[i+1] & 1) ^ 1;
    for (int i=0;i<m;i++) a2[i] = o2[m-i] & 1;
    NTT.trans(a1); NTT.trans(a2);
    for (int i=0;i<L;i++) a1[i] = (LL)a1[i] * a2[i] % MOD;
    NTT.trans(a1, -1);
    for (int i=1;i<=n;i++) ans[i] += a1[i+m-2];
     
    //output the answer
    int pos=-1, cost=INF;
    for (int i=1;i<=n;i++) if (leg[i] && ans[i] < cost) cost = ans[i], pos = i;
    if (!~pos) puts("No");
    else printf("Yes\n%d %d\n",cost,pos);
    return 0;
}

【日常小测】Hope

题目大意

给定排列的长度$n(n\le10^5)$和常数$k(k\le10^5)$
对于单个排列$A$中的每$i$位:我们连一条从$i$出发,到其右边第一个比它大的位,如果没有就不连
定义$A$的花费为 $\sum$其中每一个连通块的大小$^k$
询问所有长度为$n$的排列的花费和$(\bmod 998244353)$

解题报告

设$f_i$为排列长度为$i$的答案,假设我们已经求出了$f_{i-1}$
我们考虑枚举$i$放在哪里,那么$i$之前的数全部连向$i$,之后的数是递归一个子问题
于是我们可以写出暴力的$DP$:${f_i} = \sum\limits_{j = 1}^i {{j^k}(j – 1)!C_{i – 1}^{j – 1}{f_{i – j}}} $
我们化一化式子可以得到$\frac{f_i}{(i-1)!}=\sum\limits_{j=1}^{i}{j^k \cdot \frac{f_{i-j}}{(i-j)!}}$
这显然是一个卷积的形式,于是可以用$NTT$优化

另外,对于$f_i$来讲,$f_{1 \sim i-1}$都会对它有贡献
那么这有是一个典型的$CDQ$分治的模型,于是我们还需要套上一个$CDQ$分治

在另外的话,我们回顾一下推出暴力$DP$的过程
不难发现我们是以最大值的插入作为突破口
那么这种题目与最大值相关的问题,这应该算是套路之一吧!
例如:HDU 5575

—————————— UPD 2017.3.17 ——————————
Claris说这题在OEIS上能找到$O(n)$的递推式 QwQ
Claris太强啦! _(:з」∠)_

【模板库】FWT

参考例题:http://www.lydsy.com/JudgeOnline/problem.php?id=4589

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 100009; 
const int MOD = 1000000007;
const int REV = 500000004;

bool vis[N];
int arr[N];

inline int read() {
	char c = getchar(); int ret = 0, f = 1;
	for (; c < '0' || c > '9'; f = c == '-'? -1: 1, c = getchar());
	for (; '0' <= c && c <= '9'; ret = ret * 10 + c - '0', c = getchar());
	return ret * f;
}

inline int Pow(int w, int t) {
	int ret = 1;
	for (; t; t >>= 1, w = (LL)w * w % MOD) {
		if (t & 1) {
			ret = (LL)ret * w % MOD;
		}
	}
	return ret;
}

inline void FWT(int *a, int len, int opt = 1) {
	for (int d = 1; d < len; d <<= 1) {
		for (int i = 0; i < len; i += d << 1) {
			for (int j = 0; j < d; j++) {
				int t1 = a[i + j], t2 = a[i + j + d];
				//XOR
				if (opt == 1) {
					a[i + j] = (t1 + t2) % MOD;
					a[i + j + d] = (t1 - t2) % MOD;
				} else {
					a[i + j] = (LL)(t1 + t2) * REV % MOD;
					a[i + j + d] = (LL)(t1 - t2) * REV % MOD;
				}
				/*
				AND
				if (opt == 1) {
					arr[i + j] = t1 + t2;
					//arr[i + j + d] = t2;
				} else {
					arr[i + j] = t1 - r2;
					//arr[i + j + d] = t2;
				}
				*/
				/*
				OR
				if (opt == 1) {
					//arr[i + j] = t1;
					arr[i + j + d] = t2 + t1;
				} else {
					//arr[i + j] = t1;
					arr[i + j + d] = t2 - t1;
				}
				*/
			}
		}
	}
}

int main() {
	for (int n, m; ~scanf("%d %d", &n, &m); ) {
		memset(arr, 0, sizeof(arr));
		for (int i = 2; i <= m; i++) {
			if (!vis[i]) {
				arr[i] = 1;
				for (int j = i << 1; 0 <= j && j <= m; j += i) {
					vis[j] = 1;
				}
			}
		}
		int len = 1; 
		for (; len <= m; len <<= 1);
		FWT(arr, len);
		for (int i = 0; i < len; i++) {
			arr[i] = Pow(arr[i], n);
		}
		FWT(arr, len, -1);
		printf("%d\n", (arr[0] + MOD) % MOD);
	}
	return 0;
}

【日常小测】生日礼物

相关链接

题目传送门:https://oi.qizy.tech/wp-content/uploads/2017/03/4237864728368.png
斯特林数相关:https://en.wikipedia.org/wiki/Stirling_number

解题报告

答案显然就是两个序列卷积起来
而其中一个序列就是一个组合数一样的东西
另一个序列则需要处理出第二类斯特林数
众所周知,预处理特殊的第二类斯特林数可以用$NTT$优化
于是就搞两次$NTT$就可以了,时间复杂度$O(n \log n)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;
 
const int N = 600009;
const int MOD = 786433;
const int RT = 13;
 
int g[N],f[N],POW[N],REV[N],pos[N];
int n1,n2,m,q,vis[N],a[N],b[N];
 
inline int read() {
    char c=getchar(); int ret=0,f=1;
    while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
    while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
    return ret*f;
}
 
inline int Pow(int w, int t) {
    int ret = 1;
    for (;t;t>>=1,w=(LL)w*w%MOD)
        if(t&1)ret=(LL)ret*w%MOD;
    return ret;
}   
 
inline int C(int a, int b) {
    if (a > b) return 0;
    return ((LL)POW[b] * REV[a] % MOD) * REV[b-a] % MOD;
}
 
inline void prework() {
    for (int i=2;i<N;i++) {
        for (int j=i*2;j<N;j+=i) {
            vis[j] = 1;
        }
    } vis[1] = 1;
    POW[0] = REV[0] = 1;
    for (int i=1;i<N;i++) POW[i] = (LL)POW[i-1] * i % MOD;
    REV[N-1] = Pow(POW[N-1], MOD-2);
    for (int i=N-2;i;i--) REV[i] = REV[i+1] * (i+1ll) % MOD;
}

inline void ntt(int *a, int len, int rev = 0) {
	for (int i=0;i<len;i++) if (pos[i]<i) swap(a[i], a[pos[i]]);
	for (int l=2;l<=len;l<<=1) {
		int wn = Pow(RT, MOD / l);
		if (rev) wn = Pow(wn, MOD-2);
		for (int i=0,w=1,tmp;i<len;i+=l,w=1) {
			for (int j=0;j<(l>>1);j++,w=(LL)w*wn%MOD) {
				tmp = (LL)w * a[i+j+(l>>1)] % MOD;
				a[i+j+(l>>1)] = (a[i+j] - tmp) % MOD;
				a[i+j] = (a[i+j] + tmp) % MOD;
			}
		}
	}
	if (rev) for (int i=1,Rev=Pow(len,MOD-2);i<=len;i++) 
		a[i] = ((LL)a[i] * Rev % MOD + MOD) % MOD;
}

inline void solve(int l, int *a, int *b) {
	int t = -1, len = 1;
	while (len < (l+1)) len <<= 1, t++;
	for (int i=0;i<len;i++) {
		pos[i] = pos[i>>1]>>1;
		if (i&1) pos[i] |= 1<<t;
	} 
	ntt(a, len); ntt(b, len);
	for (int i=0;i<len;i++) a[i] = (LL)a[i] * b[i] % MOD;
	ntt(a, len, 1);
}
 
inline void update() {
	memset(f,0,sizeof(f)); memset(g,0,sizeof(g));
	memset(a,0,sizeof(a)); memset(b,0,sizeof(b));
    for (int i=1;i<=n2;i++) 
        g[i] = (LL)C(i-1, n2-1) * C(i, m) % MOD;
    for (int i=0;i<=n1;i++) {
		a[i] = (i&1)? MOD-REV[i]: REV[i];
		b[i] = (LL)Pow(i, n1) * REV[i] % MOD;
	}
	solve(n1 << 1, a, b);
    for (int i=1;i<=n1;i++) {
        if (vis[i]) f[i] = i==n1? 1: C(i-1, n1);
        else f[i] = a[i]; 
    }
}
 
int main() {
    prework();
    for (int T=read();T;T--) {
        n1 = read(); n2 = read(); 
        m = read(); update();
        solve(n1 + n2, f, g);
        for (int q=read();q;q--) 
            printf("%d\n",(f[read()]+MOD)%MOD);
    }
    return 0;
}

【BZOJ 3509】[CodeChef] COUNTARI

相关链接

题目传送门:http://www.lydsy.com/JudgeOnline/problem.php?id=3509
原题传送门:https://www.codechef.com/problems/COUNTARI
神犇题解:http://blog.miskcoo.com/2015/04/bzoj-3509

解题报告

这题如果没有$i<j<k$那么撸一发FFT就可以了
现在考虑$i<j<k$的限制,我们可以分块!

设块的大小为$S$,那么对于$j,k$或$i,j$在同一个块内的,我们可以$O(S^2)$暴力
对于$i,k$都不与$j$在同一个块的情况,我们可以用FFT做到$O(\frac{n}{S} \cdot v \log v)$
然后复杂度分析要准确的话应该搞倒数,但我不会QwQ

XeHoTh似乎用一份巨强暴力,卡到了BZOJ和CC的Rank 1
伏地膜啊!太强大了 _(:з」∠)_

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;
 
const int M = 70009;
const int N = 100009;
const int T = 15;
const int L = 1 << T + 1;
const double EPS = 0.5;
const double PI = acos(-1);
 
int n,S,ed,arr[N],tot[M],pos[M];
struct COMPLEX{
	double a,b;
	inline COMPLEX() {}
	inline COMPLEX(double x, double y):a(x),b(y) {}
	inline COMPLEX operator - (const COMPLEX &B) {return COMPLEX(a-B.a,b-B.b);}
	inline COMPLEX operator + (const COMPLEX &B) {return COMPLEX(a+B.a,b+B.b);}
	inline COMPLEX operator * (const COMPLEX &B) {return COMPLEX(a*B.a-b*B.b,a*B.b+b*B.a);}
}a1[M],a2[M],bas(1,0);
LL vout,cnt[M];
 
inline int read() {
    char c=getchar(); int ret=0,f=1;
    while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
    while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
    return ret*f;
}
 
inline void FFT(COMPLEX *a, int T = 1) {
    for (int i=0;i<L;i++) if (pos[i]<i) swap(a[pos[i]],a[i]);
    for (int l=2;l<=L;l<<=1) {
        COMPLEX wn(cos(2*PI/l),sin(2*PI/l)*T),w(1,0);
        for (int i=0;i<L;i+=l,w=bas) {
            for (int j=i;j<(i+(l>>1));j++,w=w*wn) {
                COMPLEX tmp = w * a[j+(l>>1)];
                a[j+(l>>1)] = a[j] - tmp;
                a[j] = a[j] + tmp;
            }
        }
    }   
}
 
int main() {
    n = read(); S = 4000;
    for (int i=1;i<=n;i++) arr[i] = read();
    for (int i=1;i<L;i++) pos[i] = (pos[i>>1]>>1)|((i&1)<<T);
    for (int i=S+1;i+S<=n;i+=S) { 
        memset(a1,0,sizeof(a1));
        memset(a2,0,sizeof(a2));
        for (int j=1;j<i;j++) a1[arr[j]] = a1[arr[j]] + bas;
        for (int j=i+S;j<=n;j++) a2[arr[j]] = a2[arr[j]] + bas;
        FFT(a1); FFT(a2);
        for (int j=0;j<L;j++) a1[j] = a1[j] * a2[j];
        FFT(a1, -1);
        for (int j=0;j<L;j++) cnt[j] = a1[j].a / L + EPS;
        for (int j=i;j<i+S;j++) vout += cnt[arr[j]<<1];
    }
    for (int i=1,t;i<=n;i+=S) {
        ed = max(ed, i);
        for (int j=i,lim=min(n+1,i+S);j<lim;j++) {
            for (int k=j+1;k<lim;k++) {
                t = (arr[j] << 1) - arr[k];
                vout += t<M&&t>0? tot[t]: 0;
            }
            tot[arr[j]]++;
        }
    }
    memset(tot,0,sizeof(tot));
    for (int i=ed,t;i>0;i-=S) {
        for (int j=min(n,i+S-1);j>=i;j--) {
            for (int k=j-1;k>=i;k--) {
                t = (arr[j] << 1) - arr[k];
                vout += t<M&&t>0? tot[t]: 0;
            }
        }
        for (int j=min(n,i+S-1);j>=i;j--) tot[arr[j]]++;
    } 
    printf("%lld\n",vout);
    return 0;
}