acwing-345. 牛站

考点

  • Floyd
  • 矩阵乘法
  • 快速幂
  • 线性DP
  • 离散化

题解

见思路

思路

普通DP

代码一

dp[i][j]代表到达点j恰好经过i条边的最短路,显然有如下状态转移方程:

dp[i][j] = min(dp[i][j], dp[i - 1][k] + edge(k, j)),其中k点与j点有连边。

第一份代码构思如下,显然会MLE,因为数组接近上亿。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#include <bits/stdc++.h>
using namespace std;
const int maxn = 2e2 + 50;
int n, t, s, e, m, tot = -1;
int head[maxn], nxt[maxn], edge[maxn], ver[maxn];
int a[maxn], b[maxn], c[maxn], dis[maxn];
int dp[(int)1e6 + 50][maxn];

void add(int u, int v, int w) {
ver[++tot] = v, edge[tot] = w, nxt[tot] = head[u], head[u] = tot;
}

// 离散化
void init() {
dis[++m] = s, dis[++m] = e;
for (int u, v, w, i = 1; i <= t; ++i) {
scanf("%d%d%d", &w, &u, &v);
dis[++m] = u, dis[++m] = v;
a[i] = u, b[i] = v, c[i] = w;
}
sort(dis + 1, dis + 1 + m);
m = unique(dis + 1, dis + 1 + m) - 1 - dis;
memset(head, -1, sizeof(head));
s = lower_bound(dis + 1, dis + 1 + m, s) - dis;
e = lower_bound(dis + 1, dis + 1 + m, e) - dis;
for (int i = 1; i <= t; ++i) {
int u = lower_bound(dis + 1, dis + 1 + m, a[i]) - dis;
int v = lower_bound(dis + 1, dis + 1 + m, b[i]) - dis;
add(u, v, c[i]), add(v, u, c[i]);
}
}

int main() {
scanf("%d%d%d%d", &n, &t, &s, &e);
init();
memset(dp, 0x3f, sizeof(dp));
dp[0][s] = 0;
for (int i = 1; i <= n; ++i) {
for (int u = 1; u <= m; ++u) {
for (int j = head[u]; ~j; j = nxt[j]) {
int v = ver[j], w = edge[j];
dp[i][u] = min(dp[i][u], dp[(i - 1)][v] + w);
}
}
}
cout << dp[n][e] << endl;
return 0;
}

代码二

第二份代码修改如下,使用滚动数组去掉第一维后,出现TLE,说明常数过大

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#include <bits/stdc++.h>
using namespace std;
const int maxn = 2e2 + 50;
int n, t, s, e, m, tot = -1;
int head[maxn], nxt[maxn], edge[maxn], ver[maxn];
int a[maxn], b[maxn], c[maxn], dis[maxn];
int dp[2][maxn];

void add(int u, int v, int w) {
ver[++tot] = v, edge[tot] = w, nxt[tot] = head[u], head[u] = tot;
}

// 离散化
void init() {
dis[++m] = s, dis[++m] = e;
for (int u, v, w, i = 1; i <= t; ++i) {
scanf("%d%d%d", &w, &u, &v);
dis[++m] = u, dis[++m] = v;
a[i] = u, b[i] = v, c[i] = w;
}
sort(dis + 1, dis + 1 + m);
m = unique(dis + 1, dis + 1 + m) - 1 - dis;
memset(head, -1, sizeof(head));
s = lower_bound(dis + 1, dis + 1 + m, s) - dis;
e = lower_bound(dis + 1, dis + 1 + m, e) - dis;
for (int i = 1; i <= t; ++i) {
int u = lower_bound(dis + 1, dis + 1 + m, a[i]) - dis;
int v = lower_bound(dis + 1, dis + 1 + m, b[i]) - dis;
add(u, v, c[i]), add(v, u, c[i]);
}
}

int main() {
freopen("in.txt", "r", stdin), freopen("out.txt", "w", stdout);
scanf("%d%d%d%d", &n, &t, &s, &e);
init();
memset(dp, 0x3f, sizeof(dp));
dp[0][s] = 0;
for (int i = 1; i <= n; ++i) {
for (int u = 1; u <= m; ++u) {
// 覆盖旧数据
dp[i & 1][u] = 0x3f3f3f3f;
for (int j = head[u]; ~j; j = nxt[j]) {
int v = ver[j], w = edge[j];
dp[i & 1][u] = min(dp[i & 1][u], dp[(i - 1) & 1][v] + w);
}
}
}
cout << dp[n & 1][e] << endl;
return 0;
}

代码三

常数优化在如下方面:

  1. 滚动数组本质是用两个数组交替保存结果,那么直接额外开pq两个数组,以减少上亿次&运算的开销
  2. 上亿次的min操作常数开销也极大,部分情况min的内部实现会变成递归;改用if判断
  3. 去掉不必要的中间变量定义

得到AC代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include <bits/stdc++.h>
using namespace std;
const int maxn = 2e2 + 50;
int n, t, s, e, m, tot = -1;
int head[maxn], nxt[maxn], edge[maxn], ver[maxn];
int a[maxn], b[maxn], c[maxn], dis[maxn];
int p[maxn], q[maxn];

void add(int u, int v, int w) {
ver[++tot] = v, edge[tot] = w, nxt[tot] = head[u], head[u] = tot;
}

// 离散化
void init() {
dis[++m] = s, dis[++m] = e;
for (int u, v, w, i = 1; i <= t; ++i) {
scanf("%d%d%d", &w, &u, &v);
dis[++m] = u, dis[++m] = v;
a[i] = u, b[i] = v, c[i] = w;
}
sort(dis + 1, dis + 1 + m);
m = unique(dis + 1, dis + 1 + m) - 1 - dis;
memset(head, -1, sizeof(head));
s = lower_bound(dis + 1, dis + 1 + m, s) - dis;
e = lower_bound(dis + 1, dis + 1 + m, e) - dis;
for (int i = 1; i <= t; ++i) {
int u = lower_bound(dis + 1, dis + 1 + m, a[i]) - dis;
int v = lower_bound(dis + 1, dis + 1 + m, b[i]) - dis;
add(u, v, c[i]), add(v, u, c[i]);
}
}

int main() {
scanf("%d%d%d%d", &n, &t, &s, &e);
init();
memset(p, 0x3f, sizeof(p));
p[s] = 0;
for (int i = 1; i <= n; ++i) {
memcpy(q, p, sizeof(q));
memset(p, 0x3f, sizeof(p));
for (int u = 1; u <= m; ++u) {
for (int j = head[u]; ~j; j = nxt[j]) {
if (p[u] > q[ver[j]] + edge[j]) p[u] = q[ver[j]] + edge[j];
}
}
}
cout << p[e] << endl;
return 0;
}

代码四

根据代码三得知,实际上就是一个广义的Bellman-Ford算法。

所以不用耗时去建图,直接对全图所有边遍历n次即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#include <bits/stdc++.h>
using namespace std;
const int maxn = 2e2 + 50;
int n, t, s, e, m, tot = -1;
int a[maxn], b[maxn], c[maxn], dis[maxn];
int p[maxn], q[maxn];

// 离散化
void init() {
dis[++m] = s, dis[++m] = e;
for (int u, v, w, i = 1; i <= t; ++i) {
scanf("%d%d%d", &w, &u, &v);
dis[++m] = u, dis[++m] = v;
a[i] = u, b[i] = v, c[i] = w;
}
sort(dis + 1, dis + 1 + m);
m = unique(dis + 1, dis + 1 + m) - 1 - dis;
s = lower_bound(dis + 1, dis + 1 + m, s) - dis;
e = lower_bound(dis + 1, dis + 1 + m, e) - dis;
for (int i = 1; i <= t; ++i) {
a[i] = lower_bound(dis + 1, dis + 1 + m, a[i]) - dis;
b[i] = lower_bound(dis + 1, dis + 1 + m, b[i]) - dis;
}
}

int main() {
scanf("%d%d%d%d", &n, &t, &s, &e);
init();
memset(p, 0x3f, sizeof(p));
p[s] = 0;
for (int i = 1; i <= n; ++i) {
memcpy(q, p, sizeof(q));
memset(p, 0x3f, sizeof(p));
for (int u, v, w, j = 1; j <= t; ++j) {
u = a[j], v = b[j], w = c[j];
// 双向边,所以两个方向
if (p[u] > q[v] + w) p[u] = q[v] + w;
if (p[v] > q[u] + w) p[v] = q[u] + w;
}
}
cout << p[e] << endl;
return 0;
}

边为对象的Floyd

如果用邻接矩阵A存边时,每次取:

1
2
A[a[i]][b[i]] = min(A[a[i]][b[i]], c[i]);
A[b[i]][a[i]] = min(A[b[i]][a[i]], c[i]);

那么初始时的A[i][j]显然代表i到j刚好经过1条边的最短路

假设我们去寻找一个i到j的中间点k,一定有: \[ \forall i,j\in \left[ 1,m \right] , B\left[ i,j \right] =\min_{1\leqslant k\leqslant m} \left\{ A\left[ i,k \right] +A\left[ k,j \right] \right\} \] A[i][k]代表点i到点k刚好经过1条边的最短路,A[k][j]代表点k到点j刚好经过1条边的最短路,两个矩阵合并得到的新矩阵B就代表i到j刚好经过2条边的最短路,且该合并操作显然满足结合律,(AB)CA(BC)显然没区别。

以此类推,1条边、2条边、4条边、8条边...显然可以采用快速幂的方式加速矩阵运算,将n拆分成若干个2的幂次即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include <bits/stdc++.h>
using namespace std;
const int maxn = 2e2 + 50;
int n, t, s, e, m;
int a[maxn], b[maxn], c[maxn], dis[maxn];
int A[maxn][maxn], ans[maxn][maxn];

// 离散化
void init() {
dis[++m] = s, dis[++m] = e;
for (int u, v, w, i = 1; i <= t; ++i) {
scanf("%d%d%d", &w, &u, &v);
dis[++m] = u, dis[++m] = v;
a[i] = u, b[i] = v, c[i] = w;
}
sort(dis + 1, dis + 1 + m);
m = unique(dis + 1, dis + 1 + m) - 1 - dis;
s = lower_bound(dis + 1, dis + 1 + m, s) - dis;
e = lower_bound(dis + 1, dis + 1 + m, e) - dis;
memset(A, 0x3f, sizeof(A));
for (int i = 1; i <= t; ++i) {
a[i] = lower_bound(dis + 1, dis + 1 + m, a[i]) - dis;
b[i] = lower_bound(dis + 1, dis + 1 + m, b[i]) - dis;
// i到j刚好走了1条边时的最短路
A[a[i]][b[i]] = min(A[a[i]][b[i]], c[i]);
A[b[i]][a[i]] = min(A[b[i]][a[i]], c[i]);
}
}

// a = a * b
void mul(int a[maxn][maxn], int b[maxn][maxn]) {
int res[maxn][maxn];
memset(res, 0x3f, sizeof(res));
for (int i = 1; i <= m; ++i)
for (int j = 1; j <= m; ++j)
for (int k = 1; k <= m; ++k)
if (res[i][j] > a[i][k] + b[k][j]) res[i][j] = a[i][k] + b[k][j];
memcpy(a, res, sizeof(res));
}

int main() {
scanf("%d%d%d%d", &n, &t, &s, &e), init();
// 初始时,i到j刚好走了0条边的最短路,点到点本身距离为0
memset(ans, 0x3f, sizeof(ans));
for (int i = 1; i <= m; ++i)
for (int j = 1; j <= m; ++j)
if (i == j) ans[i][i] = 0;
// 矩阵快速幂
while (n) {
if (n & 1) mul(ans, A);
mul(A, A);
n >>= 1;
}
cout << ans[s][e] << endl;
return 0;
}