Official

G - Abs Sum Editorial by en_translator


Let \(\displaystyle f(X,Y)=\sum_{i=1}^X \sum_{j=1}^Y |A_i-B_j|\).

This problem can be solved with the algorithm called Mo’s Algorithm (Query square root decomposition). The overview of Mo’s algorithm is as follows:

  • Set some constant \(M\).
  • Divide the queries into blocks by \(\displaystyle \left\lfloor \frac{X_k}{M} \right\rfloor\).
  • For each block, do the following:
    • Sort the queries in the block in ascending order of \(Y_k\).
    • Find the answer to them one by one, based on the answer to the previous query and the delta.

If \(f(X+1,Y)-f(X,Y)\) and \(f(X,Y+1)-f(X,Y)\) in this problem can be computed in \(O(\alpha)\) time, the algorithm above runs in \(\displaystyle O\left(\max\left(MK\alpha,\frac{N^2\alpha}M\right)\right)\) time. By setting \(\displaystyle M=O\left(\frac{N}{\sqrt K} \right)\), the overall complexity will be \(O\left(N\sqrt K\alpha \right)\). For more details on Mo’s algorithm, refer to the following articles (Japanese):

Mo’s algorithm - アルゴリズムとデータ構造大全

Mo’s algorithm - ei1333の日記

Now we consider how to find \(f(X+1,Y)-f(X,Y)\) and \(f(X,Y+1)-f(X,Y)\) fast.

Let \(C\) be the number of elements strictly less than \(A_{X+1}\) among \(B_1,B_2,\ldots,B_{Y}\), and \(S\) be their sum; then

\[ \begin{aligned} \displaystyle f(X+1,Y)-f(X,Y)&=\sum_{j=1}^Y |A_{X+1}-B_j|\\ &=\sum_{j=1}^Y\left( A_{X+1}+B_j-2\min(A_{X+1},B_j) \right)\\ &=YA_{X+1}+\sum_{j=1}^Y B_j -2\left\{(Y-C)A_{X+1}+S \right\}. \end{aligned} \]

Here, these \(C\) and \(S\) can be found with coordinate compression and a Fenwick Tree. For more details, please refer to the following sample code.

By implementing it appropriately, this problem can be solved in a total of \(O(N\sqrt{K}\log N)\) time.

Sample code (Python 3)

from atcoder.fenwicktree import FenwickTree

n = int(input())
A = list(map(int, input().split()))
B = list(map(int, input().split()))
q = int(input())
query = []
for i in range(q):
    x, y = map(int, input().split())
    query.append((x, y, i))
w = 1000
query.sort(key=lambda x: (x[0] // w, x[1] if (x[0] // w) % 2 == 0 else -x[1]))

s = sorted(list(set(A + B)))
d = {}
sz = len(s)
for i in range(sz):
    d[s[i]] = i
a = [d[i] for i in A]
b = [d[i] for i in B]

fta0 = FenwickTree(sz)
fta1 = FenwickTree(sz)
ftb0 = FenwickTree(sz)
ftb1 = FenwickTree(sz)

ans = [0] * q
x = 0
y = 0

now, asum, bsum = 0, 0, 0

for xi, yi, i in query:
    while x < xi:
        c = ftb0._sum(a[x])
        s = ftb1._sum(a[x])
        now += y * A[x] + bsum - 2 * ((y - c) * A[x] + s)
        asum += A[x]
        fta0.add(a[x], 1)
        fta1.add(a[x], A[x])
        x += 1
    while x > xi:
        x -= 1
        c = ftb0._sum(a[x])
        s = ftb1._sum(a[x])
        now -= y * A[x] + bsum - 2 * ((y - c) * A[x] + s)
        asum -= A[x]
        fta0.add(a[x], -1)
        fta1.add(a[x], -A[x])
    while y < yi:
        c = fta0._sum(b[y])
        s = fta1._sum(b[y])
        now += x * B[y] + asum - 2 * ((x - c) * B[y] + s)
        bsum += B[y]
        ftb0.add(b[y], 1)
        ftb1.add(b[y], B[y])
        y += 1
    while y > yi:
        y -= 1
        c = fta0._sum(b[y])
        s = fta1._sum(b[y])
        now -= x * B[y] + asum - 2 * ((x - c) * B[y] + s)
        bsum -= B[y]
        ftb0.add(b[y], -1)
        ftb1.add(b[y], -B[y])
    ans[i] = now

print(*ans, sep="\n")

Sample code (C++)

#include <iostream>
#include <vector>
#include <algorithm>
#include <map>
#include <atcoder/all>

using namespace std;
using ll = long long;

int main() {
    int n;
    cin >> n;
    vector<ll> A(n), B(n);
    for (int i = 0; i < n; i++) cin >> A[i];
    for (int i = 0; i < n; i++) cin >> B[i];

    int q;
    cin >> q;
    vector<tuple<int, int, int>> query(q);
    for (int i = 0; i < q; i++) {
        int x, y;
        cin >> x >> y;
        query[i] = {x, y, i};
    }

    int w = 1000;
    sort(query.begin(), query.end(), [w](const auto& a, const auto& b) {
        int block_a = get<0>(a) / w;
        int block_b = get<0>(b) / w;
        if (block_a != block_b) return block_a < block_b;
        return (block_a % 2 == 0) ? (get<1>(a) < get<1>(b)) : (get<1>(a) > get<1>(b));
    });

    vector<ll> s(A.begin(), A.end());
    s.insert(s.end(), B.begin(), B.end());
    sort(s.begin(), s.end());
    s.erase(unique(s.begin(), s.end()), s.end());

    map<ll, int> d;
    int sz = s.size();
    for (int i = 0; i < sz; i++) d[s[i]] = i;

    vector<int> a(n), b(n);
    for (int i = 0; i < n; i++) a[i] = d[A[i]];
    for (int i = 0; i < n; i++) b[i] = d[B[i]];

    atcoder::fenwick_tree<ll> fta0(sz), ftb0(sz);
    atcoder::fenwick_tree<ll> fta1(sz), ftb1(sz);

    vector<ll> ans(q);
    int x = 0, y = 0;
    ll now = 0, asum = 0, bsum = 0;

    for (const auto& [xi, yi, i] : query) {
        while (x < xi) {
            ll c = ftb0.sum(0, a[x] + 1);
            ll s = ftb1.sum(0, a[x] + 1);
            now += y * A[x] + bsum - 2 * ((y - c) * A[x] + s);
            asum += A[x];
            fta0.add(a[x], 1);
            fta1.add(a[x], A[x]);
            x++;
        }
        while (x > xi) {
            x--;
            ll c = ftb0.sum(0, a[x] + 1);
            ll s = ftb1.sum(0, a[x] + 1);
            now -= y * A[x] + bsum - 2 * ((y - c) * A[x] + s);
            asum -= A[x];
            fta0.add(a[x], -1);
            fta1.add(a[x], -A[x]);
        }
        while (y < yi) {
            ll c = fta0.sum(0, b[y] + 1);
            ll s = fta1.sum(0, b[y] + 1);
            now += x * B[y] + asum - 2 * ((x - c) * B[y] + s);
            bsum += B[y];
            ftb0.add(b[y], 1);
            ftb1.add(b[y], B[y]);
            y++;
        }
        while (y > yi) {
            y--;
            ll c = fta0.sum(0, b[y] + 1);
            ll s = fta1.sum(0, b[y] + 1);
            now -= x * B[y] + asum - 2 * ((x - c) * B[y] + s);
            bsum -= B[y];
            ftb0.add(b[y], -1);
            ftb1.add(b[y], -B[y]);
        }
        ans[i] = now;
    }

    for (const auto& res : ans) cout << res << "\n";

    return 0;
}

posted:
last update: