【BZOJ 4197】[NOI2015] 寿司晚宴

相关链接

题目传送门:http://www.lydsy.com/JudgeOnline/problem.php?id=4197

解题报告

考虑把小于$\sqrt{n}$的因数状压起来
然后将所有数按照大于$\sqrt{n}$的因数分组
最后分组$DP$即可
总时间复杂度:$O(500 \cdot 3^8)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 509;
const int M = 6561;

int pri[] = {2, 3, 5, 7, 11, 13, 17, 19};
int n, gpri[N], spri[N], sta1[M], sta2[M], tt[M][N][3];
LL MOD, *f, *g, *h, arr1[M], arr2[M], arr3[M], ori[M];
vector<int> sta[N];

inline void relax(LL &a, LL b) {
	a = (a + b) % MOD;
}

inline int num(int x, int t) {
	for (; t; x /= 3, t--);
	return x % 3;
}

inline int SET(int w, int t, int v) {
	static int buf[] = {1, 3, 9, 27, 81, 243, 729, 2187};
	int ret = 0;
	for (int i = 0; i < 8; i++, w /= 3, t >>= 1) {
		if (t & 1) {
			ret += buf[i] * v;
		} else {
			ret += buf[i] * (w % 3);
		}
	}
	return ret;
}

int main() {
	freopen("dinner.in", "r", stdin);
	freopen("dinner.out", "w", stdout);
	cin>>n>>MOD; 
	for (int i = 0; i < M; i++) {
		for (int j = 0; j < 8; j++) {
			int t = num(i, j);
			if (t == 1) {
				sta1[i] |= 1 << j;
			} else if (t == 2) {
				sta2[i] |= 1 << j;
			}
		}
	}
	for (int i = 0; i < M; i++) {
		for (int j = 0; j < (1 << 8); j++) {
			for (int k = 1; k <= 2; k++) {
				tt[i][j][k] = SET(i, j, k);
			}
		}
	}
	for (int i = 2; i <= n; i++) {
		gpri[i] = i;
		for (int j = 0; j < 8; j++) {
			if (gpri[i] % pri[j] == 0) {
				spri[i] |= (1 << j);
				while (gpri[i] % pri[j] == 0) {
					gpri[i] /= pri[j];
				}
			}
		}
	}
	f = arr1, g = arr2, h = arr3;
	g[0] = f[0] = 1;
	for (int i = 2; i <= n; i++) {
		if (gpri[i] == 1) {
			for (int j = M - 1; ~j; j--) {
				if (g[j]) {
					int sta = 0;
					for (int k = 0; k < 8; k++) {
						if (spri[i] >> k & 1) {
							sta |= num(j, k);
						}
					}
					if (sta == 0) {
						relax(f[tt[j][spri[i]][1]], g[j]);
						relax(f[tt[j][spri[i]][2]], g[j]);
					} else if (sta < 3) {
						relax(f[tt[j][spri[i]][sta]], g[j]);
					}
				}
			}
			memcpy(g, f, sizeof(arr1));
			swap(f, g);
		} else {
			sta[gpri[i]].push_back(spri[i]);
		}
	}
	for (int i = 2; i <= n; i++) {
		if (!sta[i].empty()) {
			memcpy(h, g, sizeof(arr1));
			memcpy(ori, g, sizeof(arr1));
			for (int j = 0; j < (int)sta[i].size(); j++) {
				int vv = sta[i][j];
				for (int k = M - 1; ~k; k--) {
					if (g[k]) {
						int s1 = vv & sta1[k];
						if (!s1) {
							relax(f[tt[k][vv][2]], g[k]);
						} 
					}
				}
				memcpy(g, f, sizeof(arr1));
				swap(f, g);
			}
			memcpy(f, h, sizeof(arr1));
			for (int j = 0; j < (int)sta[i].size(); j++) {
				int vv = sta[i][j];
				for (int k = M - 1; ~k; k--) {
					if (h[k]) {
						int s2 = vv & sta2[k];
						if (!s2) {
							relax(f[tt[k][vv][1]], h[k]);
						}
					}
				}
				memcpy(h, f, sizeof(arr1));
				swap(f, h);
			}
			for (int k = 0; k < M; k++) {
				f[k] = g[k] = (f[k] + g[k] - ori[k]) % MOD + MOD;
			}
		}
	}
	LL ans = 0;
	for (int i = 0; i < M; i++) {
		relax(ans, f[i]);
	}
	printf("%lld\n", ans);
	return 0;
}

【Codeforces 815C】Karen and Supermarket

相关链接

题目传送门:http://codeforces.com/contest/815/problem/C

解题报告

这题就是考察树DP优化复杂度的一种常用技巧
考虑暴力DP的话,复杂度是:$O(n^3)$的
但如果在父亲结点那里记录一个儿子节点的子树大小的前缀和,复杂度就变成了$O(n^2)$
证明也很简单,对于任意两个结点,只会在$LCA$处产生$1$的花费

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 5009;
const LL INF = 1e14;

int n, head[N], nxt[N], to[N], sz[N];
LL b, f[N][N], g[N][N], c[N], d[N], t1[N], t2[N];

inline int read() {
	char c=getchar(); int ret=0,f=1;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret*f;
}

inline void AddEdge(int u, int v) {
	static int E = 1;
	to[++E] = v; nxt[E] = head[u]; head[u] = E;	
}

inline void relax(LL &a, LL b) {
	a = a > b? b: a;
}

inline void DFS(int w) {
	f[w][0] = g[w][0] = 0;
	for (int i = head[w]; i; i = nxt[i]) {
		DFS(to[i]);
		memcpy(t1, f[w], sizeof(t1));
		memcpy(t2, g[w], sizeof(t2));
		for (int j = 0; j <= sz[w]; j++) {
			for (int k = 0; k <= sz[to[i]]; k++) {
				relax(f[w][j + k], t1[j] + f[to[i]][k]);
				relax(g[w][j + k], t2[j] + g[to[i]][k]);
			}
		}
		sz[w] += sz[to[i]];
	}
	sz[w]++;
	for (int i = sz[w] - 1; ~i; i--) {
		g[w][i + 1] = g[w][i] + c[w] - d[w];
		relax(f[w][i + 1], f[w][i] + c[w]);
		relax(g[w][i + 1], f[w][i + 1]);
	}
	g[w][0] = 0;
}

int main() {
	n = read(); b = read();
	c[1] = read(); d[1] = read();
	for (int i = 2; i <= n; i++) {
		c[i] = read(); d[i] = read();
		AddEdge(read(), i);
	}
	for (int i = 1; i <= n; i++) {
		for (int j = 1; j <= n; j++) {
			f[i][j] = g[i][j] = INF;
		}
	}
	DFS(1);
	for (int i = n; ~i; i--) {
		if (g[1][i] <= b) {
			printf("%d\n", i);
			exit(0);
		}
	}
	return 0;
}

【Codeforces 736C】Ostap and Tree

相关链接

题目传送门:http://codeforces.com/contest/736/problem/C
官方题解:http://codeforces.com/blog/entry/48659

解题报告

就简单DP一下
记录子树中最深的、还未与黑点匹配的点
记录可以子树中的黑点可以向上覆盖多少
时间复杂度:$O(nm^2)$

Code

#include<bits/stdc++.h>
#define LL long long
#define relax(a, b, c) (a = (a + (LL)b * c) % MOD)
using namespace std;

const int N = 300;
const int MOD = 1000000007;
const int BAS = 120;

int n, K, head[N], nxt[N], to[N];
int *f[N], arr_back[N][N];

inline int read() {
	char c = getchar();
	int ret = 0, f = 1;
	while (c < '0' || c > '9') {
		f = c == '-'? -1: 1;
		c = getchar();
	}
	while ('0' <= c && c <= '9') {
		ret = ret * 10 + c - '0';
		c = getchar();
	}
	return ret * f;
}

inline void AddEdge(int u, int v) {
	static int E = 1;
	to[++E] = v; nxt[E] = head[u]; head[u] = E;
	to[++E] = u; nxt[E] = head[v]; head[v] = E;
}

inline void DFS(int w, int fa) {
	static int arr1_back[N], *tmp = arr1_back + BAS;
	f[w][-1] = f[w][K] = 1;
	for (int i = head[w], t; i; i = nxt[i]) {
		if ((t = to[i]) != fa) {
			DFS(t, w);
			for (int j = -K; j <= K; j++) {
				tmp[j] = f[w][j];
				f[w][j] = 0;
			}
			for (int j = -K; j <= K; j++) {
				for (int k = -K - 1; k < K; k++) {
					if (j >= 0 && k >= 0) {
						relax(f[w][max(j, k)], tmp[j], f[t][k + 1]);	
					} else if ((j < 0 && k >= 0) || (j >= 0 && k < 0)) {
						int ge = max(j, k), le = min(j, k);
						if (ge + 1 >= -le) {
							relax(f[w][ge], tmp[j], f[t][k + 1]);	
						} else {
							relax(f[w][le], tmp[j], f[t][k + 1]);
						}
					} else {
						relax(f[w][min(j, k)], tmp[j], f[t][k + 1]);
					}
				}
			}
		}
	}
}

int main() {
	n = read(); K = read();
	for (int i = 1; i < n; i++) {
		AddEdge(read(), read());
	}
	for (int i = 1; i <= n; i++) {
		f[i] = arr_back[i] + BAS;
	}
	DFS(1, 1);
	int ans = 0;
	for (int i = 0; i <= K; i++) {
		ans = (ans + f[1][i]) % MOD;
	} 
	printf("%d\n", ans);
	return 0;
}

【Codeforces 814E】An unavoidable detour for home

相关链接

题目传送门:http://codeforces.com/contest/814/problem/E
官方题解:http://codeforces.com/blog/entry/52449
神犇题解Ⅰ:https://loj.ac/article/37
神犇题解Ⅱ:http://kczno1.blog.uoj.ac/blog/2660

解题报告

我们考虑分层图的话
那每一层的一个点一定会与上一层的某个点相连,剩下的度数只能层内消化
又因为这题对祖先有严格的限制,所以每一层都是原序列中连续的一段
于是$O(n^5)$的暴力$DP$就可以写了

至于$O(n^3)$的$DP$,就是$DP$套$DP$
预处理出转移的$buf$,然后转移的时候直接乘就好了
但这个预处理的时候要讨论很多情况啊,反正我自己考场上是想不出来的_(:з」∠)_

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 52;
const int MOD = 1000000007;
const int REV2 = 500000004;

int n, f[N][N], s2[N], s3[N], buf[N][N][N], C[N][N], prm[N]; 

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

int main() {
	n = read();
	f[read() + 1][1] = 1;
	for (int i = 2; i <= n; i++) {
		s2[i] = s2[i - 1];
		s3[i] = s3[i - 1];
		if (read() == 2) {
			s2[i]++;
		} else {
			s3[i]++;
		}
	}
	C[0][0] = 1;
	for (int i = 1; i <= n; i++) {
		C[i][0] = 1;
		for (int j = 1; j <= i; j++) {
			C[i][j] = (C[i - 1][j - 1] + C[i - 1][j]) % MOD;
		}
	}
	prm[2] = 1;
	for (int i = 3; i <= n; i++) {
		prm[i] = REV2;
		for (int j = 2; j < i; j++) {
			prm[i] = (LL)prm[i] * j % MOD;
		}
	}
	for (int i = 0; i <= n; i++) {
		for (int j = 0; j <= n; j++) {
			for (int k = 0; k <= n; k++) {
				if (i == 0 && j == 0 && k == 0) {
					buf[i][j][k] = 1;
				} else if (i == 0 && j == 0) {
					for (int l = 2; l < k; l++) {
						(buf[i][j][k] += ((LL)buf[i][j][k - 1 - l] * C[k - 1][l] % MOD) * prm[l + 1] % MOD) %= MOD;
					}
				} else if (i == 0) {
					buf[i][j][k] = j > 1? (LL)(j - 1) * buf[i][j - 2][k] % MOD: 0;
					(buf[i][j][k] += k? (LL)k * buf[i][j][k - 1] % MOD: 0) %= MOD;
				} else {
					buf[i][j][k] = j? (LL)j * buf[i - 1][j - 1][k] % MOD: 0;
					(buf[i][j][k] += k? (LL)k * buf[i - 1][j + 1][k - 1] % MOD: 0) % MOD;
				} 
			}
		}
	} 
	for (int i = 1; i <= n; i++) {
		for (int j = 0; j < i; j++) {
			if (f[i][j]) {
				int t2 = s2[i] - s2[j];
				int t3 = s3[i] - s3[j];
				for (int k = i + 1; k <= n; k++) {
					f[k][i] = (f[k][i] + (LL)f[i][j] * buf[k - i][t2][t3]) % MOD;
				}
			}
		}
	} 
	int ans = 0;
	for (int i = 1; i <= n; i++) {
		ans = (ans + (LL)f[n][i] * buf[0][s2[n] - s2[i]][s3[n] - s3[i]]) % MOD;
	}
	cout<<ans<<endl;
	return 0;
}

【Codeforces 799D】Field expansion

相关链接

题目传送门:http://codeforces.com/contest/799/problem/D

解题报告

不难发现,至多只会使用$O(2 \log 10^5)$个物品
于是定义$f_i$表示长为$i$时,宽最多为多少
暴力转移即可,时间复杂度:$O(10^5 \log 10^5)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 100009;
const int INF = 1e9;

int n,v[N],f[N];

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

inline bool cmp(int a, int b) {
	return a > b;
}

inline int solve(int a, int b, int h, int w) {
	memset(f, 0, sizeof(f));
	f[min(h, a)] = min(b, w);
	int ans = 1;
	for (int j=1;j<=n;j++,ans++) {
		for (int i=a;i;i--) {
			if (f[i]) {
				int tt = min((LL)a, (LL)i * v[j]);
				f[tt] = max(f[tt], f[i]);
				if (tt == a && f[i] == b) {
					return ans;
				}
				tt = min((LL)b, (LL)f[i] * v[j]);
				f[i] = tt;
				if (i == a && tt == b) {
					return ans;
				}
			}
		}
	}
	return INF;
}

int main() {
	int a = read(), b = read();
	int h = read(), w = read();
	n = read();
	for (int i=1;i<=n;i++) {
		v[i] = read();
	}
	sort(v+1, v+1+n, cmp);
	n = min(n, 34);
	if ((h >= a && w >= b) || (h >= b && w >= a)) {
		puts("0");
		exit(0);
	}
	int ans1 = solve(a, b, h, w);
	int ans2 = solve(a, b, w, h);
	if (ans1 >= INF && ans2 >= INF) {
		puts("-1");
	} else {
		printf("%d\n",min(ans1, ans2));
	}
	return 0;
}

【TopCoder SRM713】CoinsQuery

相关链接

题目传送门:https://community.topcoder.com/stat?c=problem_statement&pm=14572&rd=16882

题目大意

给$n(n \le 100)$类物品,第$i$类物品重量为$w_i(w_i \le 100)$,价值为$v_i(v_i \le 10^9)$,数量无限
给定$m(m \le 100)$个询问,第$i$询问请你回答总重量恰好为$q_i(q_i \le 10^9)$的物品,价值和最大为多少
你还需要求出使价值最大的方案数是多少(同类物品视作一样,摆放顺序不同算不同)

解题报告

规定每个物品重量不超过$100$那么我们就可以矩乘
但有一个问题:我们不仅要让价值最大,还要求方案数

但类比倍增Floyd:在一定条件,矩乘重载运算符之后仍然满足结合律
比如说这个题,我们可以:

重载加法为:两种方案取最优
重载乘法为:将两种方案拼起来(方案数相乘,价值相加)

然后直接做是$O(m n^3 \log n)$的,会在第$21$个点$TLE$
于是我们预处理转移矩阵的幂次,然后对于每个询问就是向量与矩阵相乘,单次复杂度是$O(n^2)$的
于是总的时间复杂度优化到:$O(m n^2 \log n + n^3 \log n)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int MOD = 1000000007;
const int N = 101;

struct Data{
	LL val,chs;
	inline Data() {val = chs = -1;}
	inline Data(LL a, LL b):val(a),chs(b) {}
	inline Data operator + (const Data &D) {
		if (chs == -1 || D.chs == -1) {
			return chs != -1? *this: D;
		} else {
			Data ret(max(val, D.val), 0);
			(ret.chs += (val == ret.val? chs: 0)) %= MOD;
			(ret.chs += (D.val == ret.val? D.chs: 0)) %= MOD;
			return ret;
		}
	}
	inline Data operator * (const Data &D) {
		if (!~chs || !~D.chs) return Data(-1, -1);
		return Data(val + D.val, chs * D.chs % MOD);
	}
}e(0,1);
struct Matrix{
	Data a[N][N]; int x,y;
	inline Matrix() {x = y = 0;}
	inline Matrix(int X, int Y):x(X),y(Y) {}
	inline Matrix operator * (const Matrix &M) {
		Matrix ret(M.x, y);
		for (int i=1;i<=M.x;i++) {
			for (int k=1;k<=x;k++) {
				for (int j=1;j<=y;j++) {
					ret.a[i][j] = ret.a[i][j] + (a[k][j] * M.a[i][k]);
				}
			}
		}
		return ret;
	}
}tra[32];

class CoinsQuery {
    public:
    	vector<LL> query(vector<int> w, vector<int> v, vector<int> query) {
    	    int m = query.size(), n = w.size();
			tra[0].x = tra[0].y = 100;
    	    for (int i=0;i<n;i++) {
				tra[0].a[w[i]][1] = tra[0].a[w[i]][1] + Data(v[i], 1);
			}
			for (int i=2;i<=100;i++) {
				tra[0].a[i-1][i] = e;
			}
			for (int i=1;i<=30;i++) {
				tra[i] = tra[i-1] * tra[i-1];
			}
    	    
			vector<LL> ret;
    	    for (int tt=0;tt<m;tt++) {
				Matrix ans(100, 1);
				ans.a[1][1] = e;
				int cur = query[tt];
				for (int i=0;cur;cur>>=1,++i) {
					if (cur & 1) {
						ans = ans * tra[i];
					}
				}
				ret.push_back(ans.a[1][1].val);
				ret.push_back(ans.a[1][1].chs);
			}
    	    return ret;
   		}
   	private:
};

【BZOJ 4828】[HNOI2017] 大佬

相关链接

题目传送门:http://www.lydsy.com/JudgeOnline/problem.php?id=4828

解题报告

先$DP$出最多可以有多少天不做题
然后我们发现两次$D$人其实是独立的
于是我们再$DP$出攻击达到$x$的最小天数

最后回答每个询问的时候
用个双指针扫一扫

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 109;
const int M = 930000;

int n,m,mc,tot,MX,ispri[N],w[N],a[N],f[N][N];
pair<int,int> itm[M];

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

void DFS(int t, LL sum) {
	if (t > MX) return;
	else {
		DFS(t + 1, sum);
		if (ispri[t]) {
			for (;sum*=t,sum<=1e8;) {
				itm[++tot].first = sum;
				DFS(t + 1, sum);
			}
		}
	}
}

inline int cal(int x) {
	int ret = x + 1;
	for (int i=min(MX,x-1),cnt,tmp;i;i--) {
		if (x % i) continue;
		cnt = i + 1; tmp = x;
		for (int j=i;j>1&&tmp>1;j--) {
			while (tmp % j == 0) {
				tmp /= j;
				++cnt;
			}
		}
		if (tmp == 1) {
			ret = min(ret, cnt);
		} else {
			break;
		}
	}
	return ret;
}

int main() {
	n = read(); m = read(); mc = read();
	for (int i=1;i<=n;i++) {
		a[i] = read();
	}
	for (int j=1;j<=n;j++) {
		w[j] = read();
	}
	//DP最多空出来的天数 
	memset(f, -1, sizeof(f));
	f[1][mc] = 0;
	for (int i=1;i<=n;i++) {
		for (int j=a[i];j<=mc;j++) {
			if (f[i][j] == -1) continue;
			int t1 = min(j - a[i] + w[i], mc), t2 = j - a[i];
			if (t1 >= 0) {
				f[i + 1][t1] = max(f[i + 1][t1], f[i][j]);
			}
			if (t2 >= 0) {
				f[i + 1][t2] = max(f[i + 1][t2], f[i][j] + 1);
			}
		}
	}
	MX = -1; 
	for (int j=2;j<=n+1;j++) {
		for (int i=0;i<=mc;i++) {
			MX = max(MX, f[j][i]);
		}
	} 
	//搞出每一个物品的最小花费 
	for (int j=2;j<=100;j++) {
		ispri[j] = 1;
		for (int i=2;i*i<=j;i++) {
			if (j % i == 0) {
				ispri[j] = 0;
			}
		}
	}
	DFS(1, 1); 
	for (int i=1;i<=tot;i++) {
		itm[i].second = cal(itm[i].first);
	}
	itm[++tot] = make_pair(0, 0);
	sort(itm+1, itm+1+tot);
	//对于每个询问用一个双端队列
	for (int tt=1;tt<=m;tt++) {
		int C = read(), ans = 0;
		for (int i=tot,pfx=1,cur=-1e9;i;i--) {
			while (pfx <= tot && itm[pfx].first <= C - itm[i].first) {
				cur = max(cur, itm[pfx].first - itm[pfx].second);
				pfx++;
			}
			if (cur + itm[i].first - itm[i].second >= C - MX) {
				ans = 1; 
				break;
			}
		}
		printf("%d\n",ans);
	} 
	return 0;
}

【TopCoder SRM713】DFSCount

相关链接

题目传送门:https://community.topcoder.com/stat?c=problem_statement&pm=14588&rd=16882

题目大意

给定一个$n(n \le 14)$的简单无向图,问$DFS$序总共可能有多少种不同情况

解题报告

这是一个非常妙妙的$DP$
考虑定义$f_{i,j}$表示当前在第$i$个点,已经走过的点压成二进制状态为$j$
但我们发现这样无法实现$DFS$模拟当中的退回操作

但我们考虑一下$DFS$树:这货是没有横叉边的,只有返祖边
这意味着我们如果决定朝一个方向走,那么一定是把那一个连通块走完才会退回来
于是我么可以不一步一步转移,我们可以把一整块连通块一起转移,这样就不需要模拟$DFS$的回退操作了

至于这份代码是:$O(n^2 2^n)$的
实现得精细一点可以做到$O(n 2^n)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;
 
int n,id[15];
bool g[15][15],vis[15];
LL f[15][17000],pw[15],pol[15];

class DFSCount {
    public:
    	long long count(vector<string> G) {
    		pw[0] = 1;
    		for (int i=1;i<=14;i++) {
				pw[i] = pw[i-1] * i;
			}
    	    n = G.size();
    	    for (int i=0;i<n;i++) {
				for (int j=0;j<n;j++) {
					if (G[i][j] == 'Y') g[i][j] = 1;
					else g[i][j] = 0;
				}
			}
    	    memset(f, 0, sizeof(f));
			for (int i=0;i<n;i++) {
				f[i][(1<<n)-1] = 1;
			}
			for (int sta=(1<<n)-2;sta>0;sta--) {
				for (int i=0;i<n;i++) {
					if (!(sta&(1<<i))) continue;
					for (int j=0;j<n;j++) {
						vis[j] = (sta & (1 << j));
					}
					int cnt = 0;
					for (int j=0;j<n;j++) {
						if (g[i][j] && !(sta&(1<<j))) {
							if (!vis[j]) {
								DFS(j, ++cnt);
								pol[cnt] = 0;
							}
							pol[id[j]] += f[j][(1<<j)|sta];
						}
					}
					f[i][sta] = pw[cnt];
					for (int j=1;j<=cnt;j++) {
						f[i][sta] *= pol[j];
					}
				}
			}
			LL ret = 0;
			for (int i=0;i<n;i++) {
				ret += f[i][1<<i];
			}
			return ret;
   		}
   	private:
   		void DFS(int w, int ID) {
			vis[w] = 1; 
			id[w] = ID;
			for (int i=0;i<n;i++) {
				if (g[w][i] && !vis[i]) {
					DFS(i, ID);
				} 
			}   
		}
};

【HDU 4626】Jinkeloid

相关链接

题目传送门:http://acm.hdu.edu.cn/showproblem.php?pid=4626
数据生成器:http://paste.ubuntu.com/24454724/
神犇题解Ⅰ:https://blog.sengxian.com/solutions/hdoj-4626
神犇题解Ⅱ:http://blog.csdn.net/u012345506/article/details/52040466
FWT相关:http://blog.csdn.net/liangzhaoyang1/article/details/52819835

解题报告

我们使用状压$DP$的话,每次询问相当于强制某些位为$0$
于是我们可以先$FWT$一发,搞出每个状态的方案数
然后我们可以再做一发子集$DP$,搞出每个状态的超集
然后询问就可以$O(1)$回答了
于是总的时间复杂度是:$O(20 \cdot 2^{20} + m + n)$

另外的话,因为本题的$k$非常的小
于是之前$FWT$那一块还可以用容斥来解决
只不过这样单次询问的复杂度是:$O(3^k)$的

Code

这个代码里$DP$超集那一块还是很妙妙的

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 3000000;
const int M = 100009;
const int UNIVERSE = (1 << 20) - 1; 
const int SGZ = 20;

char pat[M];
int n,m,sta;
LL a[N],f[N],zero;

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

inline void FWT(LL *arr, int len, int ty) {
	for (int d=1;d<len;d<<=1) {
		for (int i=0;i<=len;i+=d*2) {
			for (int j=0;j<d;j++) {
				LL t1 = arr[i+j], t2 = arr[i+j+d];
				arr[i+j] = t1 + t2;
				arr[i+j+d] = t1 - t2;
				if (ty == -1) {
					arr[i+j] /= 2;
					arr[i+j+d] /= 2;
				}
			}
		}
	}
}	

int main() {
	for (int T=read();T;T--) {
		memset(a, 0, sizeof(a));
		scanf("%s",pat);
		n = strlen(pat);
		a[0]++; sta = zero = 0;
		for (int i=0;i<n;i++) {
			sta ^= 1 << pat[i] - 'a';
			zero += a[sta]++;
		}  
		FWT(a, UNIVERSE, 1);
		for (int i=0;i<=UNIVERSE;i++) {
			a[i] = a[i] * a[i];
		}
		FWT(a, UNIVERSE, -1);
		a[0] = zero;
		for (int i=1;i<=UNIVERSE;i++) {
			a[i] /= 2;
		}
		for (int i=0;i<=UNIVERSE;i++) {
			if ((i ^ UNIVERSE) < i) {
				swap(a[i], a[i^UNIVERSE]); 
			}
		}
		for (int j=0;j<SGZ;j++) {
			for (int i=0;i<=UNIVERSE;i++) {
				if (i & (1<<j)) {
					a[i^(1<<j)] += a[i];
				}
			}
		}	
		m = read();
		for (int tt=1;tt<=m;tt++) {
			int k = read(), sta = 0;
			for (int i=1;i<=k;i++) {
				scanf("%s",pat);
				sta |= 1 << pat[0] - 'a';
			}
			printf("%lld\n",a[sta]);
		}
	}
	return 0;
}

【HDU 4623】Crime

相关链接

题目传送门:http://acm.hdu.edu.cn/showproblem.php?pid=4623
神犇题解:http://blog.csdn.net/keshuai19940722/article/details/49455357

解题报告

脑袋里一直想着去年$ZJOI$的小星星
然后就各种优化,最后还是$T$成狗 QwQ

然后正解就是把每个数按照因数种类不同分组
最后搞下来只有$14$种不同的,然后暴力状压$DP$就可以了

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 3000000;
const int M = 16;

int n,mx,MX,MOD,f[N][M],cnt[M],bit[M],cur[M],sym[M],suf[M],leg[M][M];
int ty[]={0,1,2,3,2,4,5,6,2,3,7,8,5,9,10,11,2,1,5,1,7,12,13,1,5,4,14,3,10};

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

int gcd(int a, int b) {
	return b? gcd(b, a%b): a;
}

inline void decode(int w, int *arr) {
	for (int i=mx;i;i--) {
		arr[i] = w % bit[i];
		w /= bit[i];
	}
}

inline int solve() {
	memset(cnt,0,sizeof(cnt));
	mx = MX = 0;
	for (int i=1;i<=n;i++) {
		cnt[ty[i]]++;
		mx = max(mx, ty[i]);
	}
	for (int i=1;i<=mx;i++) {
		bit[i] = cnt[i] + 1;
		MX = MX * bit[i] + cnt[i];
	}
	suf[mx] = 1;
	for (int i=mx-1;i;i--) {
		suf[i] = suf[i+1] * bit[i+1];
	}
	memset(f,0,sizeof(f));
	f[0][0] = 1;
	for (int i=0;i<MX;i++) {
		decode(i, cur);
		for (int j=0;j<=mx;j++) {
			if (!f[i][j]) continue;
			for (int k=1;k<=mx;k++) {
				if ((leg[k][j] || !j) && cur[k] < cnt[k]) {
					int to = i + suf[k];
					f[to][k] = (f[to][k] + (LL)f[i][j] * (cnt[k] - cur[k])) % MOD;
				}
			}
		}
	}
	int ret = 0;
	for (int i=1;i<=mx;i++) {
		ret = (ret + f[MX][i]) % MOD;
	}
	return ret;
}

int main() {
	for (int i=1;i<=28;i++) {
		if (sym[ty[i]]) continue;
		sym[ty[i]] = i;
	}
	for (int i=1;i<=14;i++) {
		for (int j=1;j<=14;j++) {
			if (gcd(sym[i], sym[j]) == 1) {
				leg[i][j] = 1;
			}
		} 
	}
	for (int T=read();T;T--) {
		n = read(); MOD = read();
		printf("%d\n",solve());
	}
	return 0;
}

【HDU 4628】Pieces

相关链接

题目传送门:http://acm.hdu.edu.cn/showproblem.php?pid=4628

解题报告

这题只是划分子序列,于是我们可以状压DP
用枚举子集的话,时间复杂度就是:$O(3^n)$的

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

char s[20];
int n,f[1<<17];

inline int read() {
    char c=getchar(); int f=1,ret=0;
    while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
    while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
    return ret * f;
}

inline bool judge(int x) {
    int tot=0; char tmp[20];
    for (int i=1;i<=n;i++,x>>=1) {
        if (x&1) {
            tmp[++tot] = s[i];
        }
    }
    for (int i=1,j=tot;i<j;i++,j--) {
        if (tmp[i] != tmp[j]) {
            return 0;
        }
    }
    return 1;
}

int main() {
    for (int T=read();T;T--) {
        memset(f,60,sizeof(f));
        scanf("%s",s+1); 
        n = strlen(s+1);
        for (int i=1,MX=1<<n;i<MX;i++) {
            if (judge(i)) {
                f[i] = 1;
            } else {
                for (int j=i;j;j=(j-1)&i) {
                    f[i] = min(f[i], f[j] + f[i^j]);
                }
            } 
        }    
        printf("%d\n",f[(1<<n)-1]);
    } 
    return 0;
}

【BZOJ 4861】[BeiJing2017] 魔法咒语

相关链接

题目传送门:http://www.lydsy.com/JudgeOnline/problem.php?id=4861

解题报告

对于$L \le 100$的情况,我们定义$f_{i,j}$表示长度为$i$,在$ac$自动机上匹配到点$j$的答案
然后暴力转移就可以了,时间复杂度:$O(10^4L)$

对于其他的点,我们构造转移矩阵,用矩阵快速幂来优化
至于如何把长度为$2$的和为$1$的给统一起来?我们可以加一个点缓存一下
时间复杂度:$O(200^3 \log L)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int MOD = 1000000007;

int n,m,L,len[109];
char s1[109][109],s2[109][109];

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

class AC_Automaton{
	int cnt,ch[109][26],vis[109],fail[109];
	vector<int> G[109];
	queue<int> que;
	public:
		inline AC_Automaton() {
			cnt=1;
		}
		inline void insert(char *s) {
			int l = strlen(s + 1), w = 1;
			for (int i=1;i<=l;i++) {
				int c = s[i] - 'a';
				if (!ch[w]) ch[w] = ++cnt;
				w = ch[w];
			}
			vis[w] = 1;
		}
		inline void build() {
			for (int i=0;i<26;i++) {
				if (!ch[1][i]) ch[1][i] = 1;
				else fail[ch[1][i]] = 1, que.push(ch[1][i]);
			}
			while (!que.empty()) { 
				int w = que.front(); que.pop(); 
				G[fail[w]].push_back(w);
				for (int i=0;i<26;i++) {
					if (ch[w][i] && ch[w][i] != 1) {
						que.push(ch[w][i]);
						fail[ch[w][i]] = ch[fail[w]][i];
					} else {
						ch[w][i] = ch[fail[w]][i];
					}
				}
			} 
			DFS(1, 0);
		}
		inline int size() {
			return cnt;
		}
		inline int MOVE(int w, int j) {
			if (vis[j]) return -1;
			for (int i=1;i<=len[w];i++) {
				j = ch[j][s1[w][i]-'a'];
				if (vis[j]) return -1;
			}
			return j;
		}
	private:
		void DFS(int w, int t) {
			t |= vis[w];
			if (t) vis[w] = 1; 
			for (int i=G[w].size()-1;~i;i--) {
				if (G[w][i] != i) { 
					DFS(G[w][i], t);
				}
			}
		}
}ac;

namespace SUBTASK_ONE{
	int f[109][109],tot,ans;
	int tra[109][109];
	
	inline void solve() {
		for (int i=1;i<=n;i++) {
			scanf("%s",s1[i]+1);
			len[i] = strlen(s1[i]+1);
		}
		for (int i=1;i<=m;i++) {
			scanf("%s",s2[i]+1);
			ac.insert(s2[i]);
		}
		ac.build(); 
		for (int i=1;i<=n;i++) {
			for (int j=1;j<=ac.size();j++) {
				int tmp = ac.MOVE(i, j);
				if (tmp != -1) tra[j][i] = tmp; 
			}
		}
		f[0][1] = 1;
		for (int i=0;i<L;i++) {
			for (int j=1;j<=ac.size();j++) {
				if (f[i][j]) {
					for (int k=1,t1,t2;k<=n;k++) {
						if ((t1=tra[j][k]) != -1 && (t2=i + len[k]) <= L) {
							f[t2][t1] = (f[t2][t1] + f[i][j]) % MOD; 
						}
					}
				}
			}
		}
		for (int i=1;i<=ac.size();i++) {
			ans = (ans + f[L][i]) % MOD;
		}
		printf("%d\n",ans);
	} 
};

namespace SUBTASK_TWO{
	int sz,tot,tra[109][109*109+109];
	struct Matrix{
		int a[209][209];
		inline Matrix() {
			memset(a,0,sizeof(a));
		}
		inline Matrix(int x) {
			memset(a,0,sizeof(a));
			for (int i=1;i<=sz;i++) a[i][i] = x;
		}
		inline Matrix operator * (const Matrix &M) {
			Matrix ret;
			for (int i=1;i<=sz;i++) {
				for (int j=1;j<=sz;j++) {
					for (int k=1;k<=sz;k++) {
						ret.a[i][j] = (ret.a[i][j] + (LL)a[k][j] * M.a[i][k]) % MOD;	
					}
				}
			}
			return ret;
		}
		inline Matrix operator ^ (int t) {
			Matrix ret(1),tmp;
			for (int i=1;i<=sz;i++) {
				memcpy(tmp.a[i],a[i],sizeof(a[i]));
			}
			for (;t;t>>=1,tmp=tmp*tmp) {
				if (t&1) ret = ret * tmp;
			}
			return ret;
		}
		inline void init() {
			memset(a,0,sizeof(a));
		}
	}ans,trans;
	
	inline void solve() {
		for (int i=1;i<=n;i++) {
			scanf("%s",s1[i]+1);
			len[i] = strlen(s1[i]+1);
		}
		for (int i=1;i<=m;i++) {
			scanf("%s",s2[i]+1);
			ac.insert(s2[i]);
		}
		ac.build(); sz = ac.size();
		for (int i=1;i<=n;i++) {
			for (int j=1;j<=sz;j++) {
				int tmp = ac.MOVE(i, j);
				if (tmp != -1) tra[j][i] = tmp; 
			}
		}
		ans.a[1][1] = 1; int ori = sz; 
		for (int i=1;i<=ori;i++) trans.a[i][++sz]++;
		for (int i=1;i<=ori;i++) {
			for (int j=1;j<=n;j++) {
				if (!tra[i][j]) continue;
				if (len[j] == 1) ++trans.a[tra[i][j]][i];
				else trans.a[tra[i][j]+ori][i]++;
			}
		}
		trans = trans ^ L;
		ans = ans * trans;
		int vout = 0;
		for (int i=1;i<=ori;i++) vout = (vout + ans.a[i][1]) % MOD;
		cout<<vout<<endl;
	}
};

int main() {
	n = read(); m = read(); L = read();
	if (L <= 100) {
		SUBTASK_ONE::solve();
	} else {
		SUBTASK_TWO::solve();
	}
	return 0;
}

【TopCoder SRM712】AverageVarianceSubtree

相关链接

题目传送门:https://community.topcoder.com/stat?c=problem_statement&pm=14568&rd=16881

题目大意

给定一棵$n(n \le 50)$个结点的树,每个点上有一个点权$a_i(a_i \le 10^9)$
让你从中随意选出一个连通子图,询问这个连通子图中的每一个点的权值组成的序列的方差的期望是多少
小数输出,最终误差不得大于$\frac{1}{10^9}$

解题报告

将方差的式子化一化,发现我们大概要维护$E(\sum{a_i^2})$和$E((\sum{a_i})^2)$
第一个可以直接维护
第二个那一坨,我们还是使用期望的线性将其拆开,分别维护即可
最后再做一个树$DP$,总的复杂度大概在$O(n^3)$

Code

long double 最后一个点被卡精度了,只能用__float128

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 59;
const int M = N << 1;

class AverageVarianceSubtree {
	int n,E,head[N],nxt[M],to[M];
	__float128 ans,tot,val[N],s1[N][N],s2[N][N],s3[N][N],cnt[N][N];
    public:
    	double average(vector<int> p, vector<int> weight) {
    		E = 1; ans = tot = 0; memset(head,0,sizeof(head));
    		memset(s1,0,sizeof(s1)); memset(s2,0,sizeof(s2));
    		memset(s3,0,sizeof(s3)); memset(cnt,0,sizeof(cnt));
    	     
    	    n = weight.size();
			for (int i=1;i<=n;i++) val[i] = weight[i - 1];
			for (int i=0;i<n-1;i++) AddEdge(i + 2, p[i] + 1);
			DFS(1, 1);
			for (int i=1;i<=n;i++) {
				for (int j=1;j<=n;j++) {
					ans += (s2[i][j] - s3[i][j] / j) / j;
					tot += cnt[i][j];
				}
			} 
			return ans / tot;
   		}
   	private:
   		inline void AddEdge(int u, int v) {
			to[++E] = v; nxt[E] = head[u]; head[u] = E;
			to[++E] = u; nxt[E] = head[v]; head[v] = E;
		}
		void DFS(int w, int f) {
			cnt[w][0] = cnt[w][1] = 1; 
			s1[w][1] = val[w]; 
			s2[w][1] = s3[w][1] = val[w] * val[w];
			for (int i=head[w];i;i=nxt[i]) {
				if (to[i] != f) {
					DFS(to[i], w);
					for (int t=n;t;t--) {
						for (int a=1,b;b=t-a,a<t;a++) {
							s3[w][t] += s3[w][a] * cnt[to[i]][b] + s3[to[i]][b] * cnt[w][a] + 2 * s1[w][a] * s1[to[i]][b];
							s1[w][t] += s1[w][a] * cnt[to[i]][b] + s1[to[i]][b] * cnt[w][a];
							s2[w][t] += s2[w][a] * cnt[to[i]][b] + s2[to[i]][b] * cnt[w][a];
							cnt[w][t] += cnt[w][a] * cnt[to[i]][b];
						}
					}
				}
			}
		}
};

【日常小测】迷宫

题目大意

给定一个$n \times 6(n \le 10^9)$的方格纸,你需要在每一个方格中填上一个$[1,6]$之间的数
要求任意一个数与它↙,←,↖,↗,→,↘这六个方向的数既不能相同,也不能和为7
并且规定左上角必须为$1$,且按照先从上往下逐行再从左往右的顺序,第一个$2$必须要出现在$5$之前,第一个$3$必须要出现在$4$之前
问有多少种合法的填色方案,输出对$1004535809$取模

解题报告

先不考虑最后题解的关于出现顺序的限制。这样的话,显然可以矩阵快速幂
但直接压状态的的话,矩阵大概长成$10^4 \times 10^4$,单次矩阵乘法都无法满足
不过我们仔细观察即可发现,对于其实1,6是等价的,同理2,53,4
于是我们每一个格子的状态就只有三种了,最后有效的状态一行只有96
这样就可以直接矩阵快速幂了

现在考虑最后的那个限制,我们可以容斥一下
我们可以先排除掉不包含2,5这一对的方案数,然后除以二
同理3,4也一样处理

于是最后就是一个状压DP用矩阵快速幂优化
最后再容斥一下,时间复杂度:$O(96 ^ 3 \log n)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int MOD = 1004535809;

int n,tot,vout,sta[100][7],cur[7];
struct Matrix{
	int f[100][100];
	inline Matrix() {memset(f,0,sizeof(f));}
	inline Matrix(int x) {memset(f,0,sizeof(f));for(int i=1;i<=tot;i++)f[i][i]=x;}
	inline Matrix(const Matrix &M) {for(int i=1;i<=tot;i++)for(int j=1;j<=tot;j++)f[i][j]=M.f[i][j];}
	inline Matrix operator * (const Matrix &M) {
		Matrix ret;
		for (int i=1;i<=tot;i++) for (int j=1;j<=tot;j++) for (int k=1;k<=tot;k++) 
			ret.f[i][k] = (ret.f[i][k] + (LL)f[j][k] * M.f[i][j]) % MOD;
		return ret;
	}
	inline Matrix operator ^ (int t) {
		Matrix ret(1),tmp(*this);
		for (;t;t>>=1,tmp=tmp*tmp)
			if (t&1) ret=ret*tmp;
		return ret;
	}
	inline void out() {
		for (int i=1;i<=tot;i++) {
			for (int j=1;j<=tot;j++) {
				printf("%d ",f[j][i]);
			} cout<<endl;
		}
	}
}ans,trans;

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

void DFS(int w, int v) {
	if (w == 7) {
		memcpy(sta[++tot],cur,sizeof(cur));
	} else {
		for (int i=1;i<=3;i++) if (i != v) 
			cur[w] = i, DFS(w+1, i);
	}
}

inline int Pow(int w, LL t) {
	int ret = 1;
	for (;t;t>>=1,w=(LL)w*w%MOD)
		if (t&1) ret=(LL)ret*w%MOD;
	return ret;
}

inline bool check(int a, int b) {
	for (int i=1;i<=6;i++) {
		if (i > 1 && sta[a][i-1] == sta[b][i]) return 0;
		if (i < tot && sta[a][i+1] == sta[b][i]) return 0;
	} return 1;
}

int main() {
	freopen("board.in","r",stdin);
	freopen("board.out","w",stdout);
	n = read(); DFS(1, -1);
	for (int i=1;i<=tot;i++) for (int j=1;j<=tot;j++)
		if (check(i, j)) trans.f[j][i] = 1 << 6;
	for (int i=1;i<=tot;i++) if (sta[i][1] == 1) ans.f[i][1] = 1 << 5;
	trans = trans ^ (n - 1); ans = ans * trans;
	for (int i=1;i<=tot;i++) vout = (vout + ans.f[i][1]) % MOD; 
	vout = (vout - 2ll * Pow(2, n * 6ll - 1)) % MOD; 
	vout = (LL)vout * Pow(4, MOD-2) % MOD;
	vout = (vout + Pow(2, n * 6ll - 1)) % MOD; 
	printf("%d\n",(vout+MOD)%MOD);
	return 0;
}

【BZOJ 3612】[HEOI2014] 平衡

相关链接

题目传送门:http://www.lydsy.com/JudgeOnline/problem.php?id=3612
神犇题解:http://blog.csdn.net/Vmurder/article/details/42551603

解题报告

读题后我们发现需要求解这么一个问题:

用$k$个不同的数,来凑成$n$的方案数

这个看起来只能$O(kn^2)$来做$DP$
但我们可以将其优化到$O(nk)$
具体来说,考虑所有$k$个数,一起加或一起减
这样的话,我们就可以不用枚举新加入答案集合的数是哪一个了

回到原题,我们枚举左右两边取出来的重量,和左边用了几块橡皮
然后总的复杂度主要还是在$DP$那一块,复杂度是:$O(k^2n)$的

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 100009;
const int M = 11;

int n,K,MOD,f[N][M];

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

int main() {
	for (int T=read(),ans;ans=0,T;T--) {
		n = read(); K = read(); MOD = read();
		memset(f,0,sizeof(f)); f[0][0] = 1;
		for (int i=1,lim=n*K;i<=lim;i++) {
			for (int k=1,LIM=min(i,K);k<=LIM;k++) {
				f[i][k] = (f[i-k][k] + f[i-k][k-1]) % MOD;
				if (i > n) f[i][k] = (f[i][k] - f[i-(n+1)][k-1]) % MOD;
			}
		}
		for (int i=0,lim=n*K;i<=lim;i++) {
			for (int k=0;k<K;k++) {
				ans = (ans + (LL)f[i][k] * f[i][K-k]) % MOD;
				ans = (ans + (LL)f[i][k] * f[i][K-k-1]) % MOD;
			}
		}
		printf("%d\n",(ans+MOD)%MOD);
	}
	return 0;
}

【Codeforces 800C】Vulnerable Kerbals

相关链接

题目传送门:http://codeforces.com/contest/800/problem/C

解题报告

写一写发现前缀积与$m$的$\gcd$是一个递增的关系,依次为倍数
于是我们将$0 \sim m-1$按照与$m$的$\gcd$分类
然后就是在拓扑图上搞一搞$DP$,输出的时候用拓展欧几里得求一发逆元
总的时间复杂度:$O(n \log n)$

Code

#include<bits/stdc++.h>
#define LL long long
using namespace std;

const int N = 5000000;
const int M = 10000;

int n,m,tot,ans,vis[N],id[N],in[M],num[M];
int head[M],nxt[N],to[N],f[M],itr[M];
vector<int> que[M],vout;

inline int read() {
	char c=getchar(); int f=1,ret=0;
	while (c<'0'||c>'9') {if(c=='-')f=-1;c=getchar();}
	while (c<='9'&&c>='0') {ret=ret*10+c-'0';c=getchar();}
	return ret * f;
}

int gcd(int a, int b) {
	return b? gcd(b, a%b): a;
}

inline void AddEdge(int u, int v) {
	static int E = 1; in[u]++; 
	to[++E] = u; nxt[E] = head[v]; head[v] = E;
}

void solve(int w) { 
	f[w] = que[w].size() + (itr[w]?f[itr[w]]:0);
	ans = max(ans, f[w]); 
	for (int i=head[w];i;i=nxt[i]) {
		if (f[w] > f[itr[to[i]]]) itr[to[i]] = w;
		if (--in[to[i]] == 0) solve(to[i]);
	}
}

void exgcd(int a, int b, LL &x, LL &y) {
	if (b) {exgcd(b, a%b, y, x); y -= a / b * x;}
	else {x = 1; y = 0;}
}

inline int REV(int a, int z) {
	int tmp = gcd(a, m), b; LL x, y; 
	a /= tmp; z /= tmp; b = m / tmp;
	exgcd(a, b, x, y);
	return x * z % m;
}

void print(int w, int v) {
	for (int i=0;i<=que[w].size()-1;i++) {
		int nv = que[w][i], rev = REV(v, nv);
		vout.push_back((rev+m)%m); v = nv;
	}
	if (itr[w]) print(itr[w], v);
}

int main() {
	n = read(); m = read();
	for (int i=1;i<=n;i++) vis[read()] = 1;
	for (int i=1;i<m;i++) if (!vis[i]) {
		int tmp = gcd(m, i); 
		if (!id[tmp]) id[tmp] = ++tot, num[tot] = tmp;
		que[id[tmp]].push_back(i);
	}
	for (int i=1;i<=tot;i++) {
		for (int j=1;j<=tot;j++) {
			if (num[j] > num[i] && num[j] % num[i] == 0) {
				AddEdge(i, j); 
			}
		}
	}
	for (int i=1;i<=tot;i++) if (!in[i]) solve(i);
	for (int i=1;i<=tot;i++) if (f[i] == ans) {print(i, 1); break;}
	if (!vis[0]) vout.push_back(0); cout<<vout.size()<<endl;
	for (int i=0;i<=vout.size()-1;i++) printf("%d ",vout[i]); 
	return 0;
}