G - Abs Sum Editorial by en_translator
Let \(\displaystyle f(X,Y)=\sum_{i=1}^X \sum_{j=1}^Y |A_i-B_j|\).
This problem can be solved with the algorithm called Mo’s Algorithm (Query square root decomposition). The overview of Mo’s algorithm is as follows:
- Set some constant \(M\).
- Divide the queries into blocks by \(\displaystyle \left\lfloor \frac{X_k}{M} \right\rfloor\).
- For each block, do the following:
- Sort the queries in the block in ascending order of \(Y_k\).
- Find the answer to them one by one, based on the answer to the previous query and the delta.
If \(f(X+1,Y)-f(X,Y)\) and \(f(X,Y+1)-f(X,Y)\) in this problem can be computed in \(O(\alpha)\) time, the algorithm above runs in \(\displaystyle O\left(\max\left(MK\alpha,\frac{N^2\alpha}M\right)\right)\) time. By setting \(\displaystyle M=O\left(\frac{N}{\sqrt K} \right)\), the overall complexity will be \(O\left(N\sqrt K\alpha \right)\). For more details on Mo’s algorithm, refer to the following articles (Japanese):
Mo’s algorithm - アルゴリズムとデータ構造大全
Now we consider how to find \(f(X+1,Y)-f(X,Y)\) and \(f(X,Y+1)-f(X,Y)\) fast.
Let \(C\) be the number of elements strictly less than \(A_{X+1}\) among \(B_1,B_2,\ldots,B_{Y}\), and \(S\) be their sum; then
\[ \begin{aligned} \displaystyle f(X+1,Y)-f(X,Y)&=\sum_{j=1}^Y |A_{X+1}-B_j|\\ &=\sum_{j=1}^Y\left( A_{X+1}+B_j-2\min(A_{X+1},B_j) \right)\\ &=YA_{X+1}+\sum_{j=1}^Y B_j -2\left\{(Y-C)A_{X+1}+S \right\}. \end{aligned} \]
Here, these \(C\) and \(S\) can be found with coordinate compression and a Fenwick Tree. For more details, please refer to the following sample code.
By implementing it appropriately, this problem can be solved in a total of \(O(N\sqrt{K}\log N)\) time.
from atcoder.fenwicktree import FenwickTree
n = int(input())
A = list(map(int, input().split()))
B = list(map(int, input().split()))
q = int(input())
query = []
for i in range(q):
x, y = map(int, input().split())
query.append((x, y, i))
w = 1000
query.sort(key=lambda x: (x[0] // w, x[1] if (x[0] // w) % 2 == 0 else -x[1]))
s = sorted(list(set(A + B)))
d = {}
sz = len(s)
for i in range(sz):
d[s[i]] = i
a = [d[i] for i in A]
b = [d[i] for i in B]
fta0 = FenwickTree(sz)
fta1 = FenwickTree(sz)
ftb0 = FenwickTree(sz)
ftb1 = FenwickTree(sz)
ans = [0] * q
x = 0
y = 0
now, asum, bsum = 0, 0, 0
for xi, yi, i in query:
while x < xi:
c = ftb0._sum(a[x])
s = ftb1._sum(a[x])
now += y * A[x] + bsum - 2 * ((y - c) * A[x] + s)
asum += A[x]
fta0.add(a[x], 1)
fta1.add(a[x], A[x])
x += 1
while x > xi:
x -= 1
c = ftb0._sum(a[x])
s = ftb1._sum(a[x])
now -= y * A[x] + bsum - 2 * ((y - c) * A[x] + s)
asum -= A[x]
fta0.add(a[x], -1)
fta1.add(a[x], -A[x])
while y < yi:
c = fta0._sum(b[y])
s = fta1._sum(b[y])
now += x * B[y] + asum - 2 * ((x - c) * B[y] + s)
bsum += B[y]
ftb0.add(b[y], 1)
ftb1.add(b[y], B[y])
y += 1
while y > yi:
y -= 1
c = fta0._sum(b[y])
s = fta1._sum(b[y])
now -= x * B[y] + asum - 2 * ((x - c) * B[y] + s)
bsum -= B[y]
ftb0.add(b[y], -1)
ftb1.add(b[y], -B[y])
ans[i] = now
print(*ans, sep="\n")
#include <iostream>
#include <vector>
#include <algorithm>
#include <map>
#include <atcoder/all>
using namespace std;
using ll = long long;
int main() {
int n;
cin >> n;
vector<ll> A(n), B(n);
for (int i = 0; i < n; i++) cin >> A[i];
for (int i = 0; i < n; i++) cin >> B[i];
int q;
cin >> q;
vector<tuple<int, int, int>> query(q);
for (int i = 0; i < q; i++) {
int x, y;
cin >> x >> y;
query[i] = {x, y, i};
}
int w = 1000;
sort(query.begin(), query.end(), [w](const auto& a, const auto& b) {
int block_a = get<0>(a) / w;
int block_b = get<0>(b) / w;
if (block_a != block_b) return block_a < block_b;
return (block_a % 2 == 0) ? (get<1>(a) < get<1>(b)) : (get<1>(a) > get<1>(b));
});
vector<ll> s(A.begin(), A.end());
s.insert(s.end(), B.begin(), B.end());
sort(s.begin(), s.end());
s.erase(unique(s.begin(), s.end()), s.end());
map<ll, int> d;
int sz = s.size();
for (int i = 0; i < sz; i++) d[s[i]] = i;
vector<int> a(n), b(n);
for (int i = 0; i < n; i++) a[i] = d[A[i]];
for (int i = 0; i < n; i++) b[i] = d[B[i]];
atcoder::fenwick_tree<ll> fta0(sz), ftb0(sz);
atcoder::fenwick_tree<ll> fta1(sz), ftb1(sz);
vector<ll> ans(q);
int x = 0, y = 0;
ll now = 0, asum = 0, bsum = 0;
for (const auto& [xi, yi, i] : query) {
while (x < xi) {
ll c = ftb0.sum(0, a[x] + 1);
ll s = ftb1.sum(0, a[x] + 1);
now += y * A[x] + bsum - 2 * ((y - c) * A[x] + s);
asum += A[x];
fta0.add(a[x], 1);
fta1.add(a[x], A[x]);
x++;
}
while (x > xi) {
x--;
ll c = ftb0.sum(0, a[x] + 1);
ll s = ftb1.sum(0, a[x] + 1);
now -= y * A[x] + bsum - 2 * ((y - c) * A[x] + s);
asum -= A[x];
fta0.add(a[x], -1);
fta1.add(a[x], -A[x]);
}
while (y < yi) {
ll c = fta0.sum(0, b[y] + 1);
ll s = fta1.sum(0, b[y] + 1);
now += x * B[y] + asum - 2 * ((x - c) * B[y] + s);
bsum += B[y];
ftb0.add(b[y], 1);
ftb1.add(b[y], B[y]);
y++;
}
while (y > yi) {
y--;
ll c = fta0.sum(0, b[y] + 1);
ll s = fta1.sum(0, b[y] + 1);
now -= x * B[y] + asum - 2 * ((x - c) * B[y] + s);
bsum -= B[y];
ftb0.add(b[y], -1);
ftb1.add(b[y], -B[y]);
}
ans[i] = now;
}
for (const auto& res : ans) cout << res << "\n";
return 0;
}
posted:
last update: