Submission #67791514
Source Code Expand
// #undef YOSUPO_LOCAL
#if 0 and !defined(__clang__)
#include <vector>
#pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")
#pragma GCC optimize("Ofast")
#endif
#include <stdio.h>
#include <unistd.h>
#include <algorithm>
#include <array>
#include <bit>
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <sstream>
#include <string>
#include <type_traits>
#include <vector>
namespace yosupo {
namespace internal {
template <class T>
using is_signed_int128 =
typename std::conditional<std::is_same<T, __int128_t>::value ||
std::is_same<T, __int128>::value,
std::true_type,
std::false_type>::type;
template <class T>
using is_unsigned_int128 =
typename std::conditional<std::is_same<T, __uint128_t>::value ||
std::is_same<T, unsigned __int128>::value,
std::true_type,
std::false_type>::type;
template <class T>
using make_unsigned_int128 =
typename std::conditional<std::is_same<T, __int128_t>::value,
__uint128_t,
unsigned __int128>;
template <class T>
using is_integral =
typename std::conditional<std::is_integral<T>::value ||
internal::is_signed_int128<T>::value ||
internal::is_unsigned_int128<T>::value,
std::true_type,
std::false_type>::type;
template <class T>
using is_signed_int = typename std::conditional<(is_integral<T>::value &&
std::is_signed<T>::value) ||
is_signed_int128<T>::value,
std::true_type,
std::false_type>::type;
template <class T>
using is_unsigned_int =
typename std::conditional<(is_integral<T>::value &&
std::is_unsigned<T>::value) ||
is_unsigned_int128<T>::value,
std::true_type,
std::false_type>::type;
template <class T>
using to_unsigned = typename std::conditional<
is_signed_int128<T>::value,
make_unsigned_int128<T>,
typename std::conditional<std::is_signed<T>::value,
std::make_unsigned<T>,
std::common_type<T>>::type>::type;
template <class T>
using is_integral_t = std::enable_if_t<is_integral<T>::value>;
template <class T>
using is_signed_int_t = std::enable_if_t<is_signed_int<T>::value>;
template <class T>
using is_unsigned_int_t = std::enable_if_t<is_unsigned_int<T>::value>;
template <class T> using to_unsigned_t = typename to_unsigned<T>::type;
} // namespace internal
} // namespace yosupo
namespace yosupo {
struct Scanner {
public:
Scanner(const Scanner&) = delete;
Scanner& operator=(const Scanner&) = delete;
Scanner(FILE* fp) : fd(fileno(fp)) { line[0] = 127; }
void read() {}
template <class H, class... T> void read(H& h, T&... t) {
bool f = read_single(h);
assert(f);
read(t...);
}
int read_unsafe() { return 0; }
template <class H, class... T> int read_unsafe(H& h, T&... t) {
bool f = read_single(h);
if (!f) return 0;
return 1 + read_unsafe(t...);
}
int close() { return ::close(fd); }
private:
static constexpr int SIZE = 1 << 15;
int fd = -1;
std::array<char, SIZE + 1> line;
int st = 0, ed = 0;
bool eof = false;
bool read_single(std::string& ref) {
if (!skip_space()) return false;
ref = "";
while (true) {
char c = top();
if (c <= ' ') break;
ref += c;
st++;
}
return true;
}
bool read_single(double& ref) {
std::string s;
if (!read_single(s)) return false;
ref = std::stod(s);
return true;
}
template <class T,
std::enable_if_t<std::is_same<T, char>::value>* = nullptr>
bool read_single(T& ref) {
if (!skip_space<50>()) return false;
ref = top();
st++;
return true;
}
template <class T,
internal::is_signed_int_t<T>* = nullptr,
std::enable_if_t<!std::is_same<T, char>::value>* = nullptr>
bool read_single(T& sref) {
using U = internal::to_unsigned_t<T>;
if (!skip_space<50>()) return false;
bool neg = false;
if (line[st] == '-') {
neg = true;
st++;
}
U ref = 0;
do {
ref = 10 * ref + (line[st++] & 0x0f);
} while (line[st] >= '0');
sref = neg ? -ref : ref;
return true;
}
template <class U,
internal::is_unsigned_int_t<U>* = nullptr,
std::enable_if_t<!std::is_same<U, char>::value>* = nullptr>
bool read_single(U& ref) {
if (!skip_space<50>()) return false;
ref = 0;
do {
ref = 10 * ref + (line[st++] & 0x0f);
} while (line[st] >= '0');
return true;
}
bool reread() {
if (ed - st >= 50) return true;
if (st > SIZE / 2) {
std::memmove(line.data(), line.data() + st, ed - st);
ed -= st;
st = 0;
}
if (eof) return false;
auto u = ::read(fd, line.data() + ed, SIZE - ed);
if (u == 0) {
eof = true;
line[ed] = '\0';
u = 1;
}
ed += int(u);
line[ed] = char(127);
return true;
}
char top() {
if (st == ed) {
bool f = reread();
assert(f);
}
return line[st];
}
template <int TOKEN_LEN = 0> bool skip_space() {
while (true) {
while (line[st] <= ' ') st++;
if (ed - st > TOKEN_LEN) return true;
if (st > ed) st = ed;
for (auto i = st; i < ed; i++) {
if (line[i] <= ' ') return true;
}
if (!reread()) return false;
}
}
};
struct Printer {
public:
template <char sep = ' ', bool F = false> void write() {}
template <char sep = ' ', bool F = false, class H, class... T>
void write(const H& h, const T&... t) {
if (F) write_single(sep);
write_single(h);
write<true>(t...);
}
template <char sep = ' ', class... T> void writeln(const T&... t) {
write<sep>(t...);
write_single('\n');
}
Printer(FILE* _fp) : fd(fileno(_fp)) {}
~Printer() { flush(); }
int close() {
flush();
return ::close(fd);
}
void flush() {
if (pos) {
auto res = ::write(fd, line.data(), pos);
assert(res != -1);
pos = 0;
}
}
private:
static std::array<std::array<char, 2>, 100> small;
static std::array<unsigned long long, 20> tens;
static constexpr size_t SIZE = 1 << 15;
int fd;
std::array<char, SIZE> line;
size_t pos = 0;
std::stringstream ss;
template <class T,
std::enable_if_t<std::is_same<char, T>::value>* = nullptr>
void write_single(const T& val) {
if (pos == SIZE) flush();
line[pos++] = val;
}
template <class T,
internal::is_signed_int_t<T>* = nullptr,
std::enable_if_t<!std::is_same<char, T>::value>* = nullptr>
void write_single(const T& val) {
using U = internal::to_unsigned_t<T>;
if (val == 0) {
write_single('0');
return;
}
if (pos > SIZE - 50) flush();
U uval = val;
if (val < 0) {
write_single('-');
uval = -uval;
}
write_unsigned(uval);
}
template <class U,
internal::is_unsigned_int_t<U>* = nullptr,
std::enable_if_t<!std::is_same<char, U>::value>* = nullptr>
void write_single(U uval) {
if (uval == 0) {
write_single('0');
return;
}
if (pos > SIZE - 50) flush();
write_unsigned(uval);
}
static int calc_len(uint64_t x) {
int i = ((63 - std::countl_zero(x)) * 3 + 3) / 10;
if (x < tens[i])
return i;
else
return i + 1;
}
template <class U,
internal::is_unsigned_int_t<U>* = nullptr,
std::enable_if_t<2 >= sizeof(U)>* = nullptr>
void write_unsigned(U uval) {
size_t len = calc_len(uval);
pos += len;
char* ptr = line.data() + pos;
while (uval >= 100) {
ptr -= 2;
memcpy(ptr, small[uval % 100].data(), 2);
uval /= 100;
}
if (uval >= 10) {
memcpy(ptr - 2, small[uval].data(), 2);
} else {
*(ptr - 1) = char('0' + uval);
}
}
template <class U,
internal::is_unsigned_int_t<U>* = nullptr,
std::enable_if_t<4 == sizeof(U)>* = nullptr>
void write_unsigned(U uval) {
std::array<char, 8> buf;
memcpy(buf.data() + 6, small[uval % 100].data(), 2);
memcpy(buf.data() + 4, small[uval / 100 % 100].data(), 2);
memcpy(buf.data() + 2, small[uval / 10000 % 100].data(), 2);
memcpy(buf.data() + 0, small[uval / 1000000 % 100].data(), 2);
if (uval >= 100000000) {
if (uval >= 1000000000) {
memcpy(line.data() + pos, small[uval / 100000000 % 100].data(),
2);
pos += 2;
} else {
line[pos] = char('0' + uval / 100000000);
pos++;
}
memcpy(line.data() + pos, buf.data(), 8);
pos += 8;
} else {
size_t len = calc_len(uval);
memcpy(line.data() + pos, buf.data() + (8 - len), len);
pos += len;
}
}
template <class U,
internal::is_unsigned_int_t<U>* = nullptr,
std::enable_if_t<8 == sizeof(U)>* = nullptr>
void write_unsigned(U uval) {
size_t len = calc_len(uval);
pos += len;
char* ptr = line.data() + pos;
while (uval >= 100) {
ptr -= 2;
memcpy(ptr, small[uval % 100].data(), 2);
uval /= 100;
}
if (uval >= 10) {
memcpy(ptr - 2, small[uval].data(), 2);
} else {
*(ptr - 1) = char('0' + uval);
}
}
template <
class U,
std::enable_if_t<internal::is_unsigned_int128<U>::value>* = nullptr>
void write_unsigned(U uval) {
static std::array<char, 50> buf;
size_t len = 0;
while (uval > 0) {
buf[len++] = char((uval % 10) + '0');
uval /= 10;
}
std::reverse(buf.begin(), buf.begin() + len);
memcpy(line.data() + pos, buf.data(), len);
pos += len;
}
void write_single(const std::string& s) {
for (char c : s) write_single(c);
}
void write_single(const char* s) {
size_t len = strlen(s);
for (size_t i = 0; i < len; i++) write_single(s[i]);
}
template <class T> void write_single(const std::vector<T>& val) {
auto n = val.size();
for (size_t i = 0; i < n; i++) {
if (i) write_single(' ');
write_single(val[i]);
}
}
};
inline std::array<std::array<char, 2>, 100> Printer::small = [] {
std::array<std::array<char, 2>, 100> table;
for (int i = 0; i <= 99; i++) {
table[i][1] = char('0' + (i % 10));
table[i][0] = char('0' + (i / 10 % 10));
}
return table;
}();
inline std::array<unsigned long long, 20> Printer::tens = [] {
std::array<unsigned long long, 20> table;
for (int i = 0; i < 20; i++) {
table[i] = 1;
for (int j = 0; j < i; j++) {
table[i] *= 10;
}
}
return table;
}();
} // namespace yosupo
#include <concepts>
#include <cstdlib>
#include <cmath>
namespace yosupo {
using i8 = int8_t;
using u8 = uint8_t;
using i16 = int16_t;
using u16 = uint16_t;
using i32 = int32_t;
using u32 = uint32_t;
using i64 = int64_t;
using u64 = uint64_t;
using i128 = __int128;
using u128 = unsigned __int128;
using f32 = float;
using f64 = double;
} // namespace yosupo
namespace yosupo {
// x * inv_u32(x) = 1 (mod 2^32)
inline constexpr u32 inv_u32(const u32 x) {
u32 inv = 1;
for (int i = 0; i < 5; i++) {
inv *= 2u - inv * x;
}
return inv;
}
inline constexpr u64 pow_mod_u64(u64 x, u64 n, u64 m) {
if (m == 1) return 0;
u64 r = 1;
u64 y = x % m;
while (n) {
if (n & 1) r = (u64)((u128)(1) * r * y % m);
y = (u64)((u128)(1) * y * y % m);
n >>= 1;
}
return r;
}
inline u64 isqrt(u64 a) {
if (a == u64(-1)) return (u64(1) << 32) - 1;
u64 x = (u64)std::sqrt((double)a);
return x * x > a ? x - 1 : x;
}
// integer k-th root: floor(a^(1/k))
inline u64 iroot(u64 a, int k) {
if (k == 2) return isqrt(a);
if (k == 1 || a == 0) return a;
if (k >= 64 || a < (u64(1) << k)) return 1;
// is (b^k <= a)?
auto check = [&](u64 b) {
u128 result = 1;
for (int i = 0; i < k; i++) {
result *= b;
if (result > a) return false;
}
return true;
};
// lw^k <= a < up^k
u64 lw = 2;
u64 up = 1LL << ((64 + k - 1) / k);
while (up - lw > 1) {
u64 mid = (lw + up) / 2;
if (check(mid)) {
lw = mid;
} else {
up = mid;
}
}
return lw;
}
} // namespace yosupo
#include <initializer_list>
#include <limits>
namespace yosupo {
inline constexpr bool is_prime(u32 n) {
if (n == 2) return true;
if (n % 2 == 0) return false;
u64 d = n - 1;
while (d % 2 == 0) d /= 2;
for (u64 a : {2, 7, 61}) {
if (a % n == 0) return true;
u64 t = d;
u64 y = pow_mod_u64(a, t, n);
while (t != n - 1 && y != 1 && y != n - 1) {
y = (u64)((u128)(1) * y * y % n);
t <<= 1;
}
if (y != n - 1 && t % 2 == 0) {
return false;
}
}
return true;
}
inline constexpr bool is_prime(u64 n) {
if (n <= std::numeric_limits<u32>::max()) {
return is_prime((u32)n);
}
if (n % 2 == 0) return false;
u64 d = n - 1;
while (d % 2 == 0) d /= 2;
for (u64 a : {2, 325, 9375, 28178, 450775, 9780504, 1795265022}) {
if (a % n == 0) return true;
u64 t = d;
u64 y = pow_mod_u64(a, t, n);
while (t != n - 1 && y != 1 && y != n - 1) {
y = (u64)((u128)(1) * y * y % n);
t <<= 1;
}
if (y != n - 1 && t % 2 == 0) {
return false;
}
}
return true;
}
inline bool is_prime(i32 n) {
if (n <= 1) return false;
return is_prime((u32)n);
}
inline bool is_prime(i64 n) {
if (n <= 1) return false;
return is_prime((u64)n);
}
// lpf_table[i] = the smallest prime factor of i
inline std::vector<i32> lpf_table(i32 n) {
std::vector<i32> table(n + 1, -1);
std::vector<int> primes;
for (int d = 2; d <= n; d++) {
if (table[d] == -1) {
primes.push_back(d);
table[d] = d;
}
for (int p : primes) {
if (p * d > n || p > table[d]) break;
table[p * d] = p;
}
}
return table;
}
// enumerate primes no more than n
inline std::vector<i32> primes(i32 n) {
std::vector<i32> table = lpf_table(n);
std::vector<i32> p;
for (int i = 2; i <= n; i++) {
if (table[i] == i) p.push_back(i);
}
return p;
}
} // namespace yosupo
namespace yosupo {
template <i32 MOD>
requires requires {
MOD % 2 == 1 && 1 <= MOD&& MOD <= (1 << 30) - 1;
is_prime(MOD);
}
struct ModInt {
static constexpr i32 mod() { return MOD; }
constexpr ModInt() : x(0) {}
// It's OK because _x * B2 < 2**32 * MOD
constexpr ModInt(u32 _x) : x(reduce_mul(_x, B2)) {}
constexpr ModInt(std::signed_integral auto _x)
: ModInt((u32)(_x % MOD + MOD)) {}
constexpr ModInt(std::unsigned_integral auto _x)
: ModInt((u32)(_x % MOD)) {}
constexpr i32 val() const {
u32 y = reduce_mul(x, 1);
return y < MOD ? y : y - MOD;
}
constexpr ModInt operator+() const { return *this; }
constexpr ModInt operator-() const { return ModInt() -= *this; }
constexpr ModInt& operator+=(const ModInt& rhs) {
x += rhs.x;
x = std::min(x, x - 2 * MOD);
return *this;
}
constexpr friend ModInt operator+(const ModInt& lhs, const ModInt& rhs) {
return ModInt(lhs) += rhs;
}
constexpr ModInt& operator-=(const ModInt& rhs) {
x += 2 * MOD - rhs.x;
x = std::min(x, x - 2 * MOD);
return *this;
}
constexpr friend ModInt operator-(const ModInt& lhs, const ModInt& rhs) {
return ModInt(lhs) -= rhs;
}
constexpr ModInt& operator*=(const ModInt& rhs) {
x = reduce_mul(x, rhs.x);
return *this;
}
constexpr friend ModInt operator*(const ModInt& lhs, const ModInt& rhs) {
return ModInt(lhs) *= rhs;
}
constexpr ModInt& operator/=(const ModInt& rhs) {
return *this *= rhs.inv();
}
constexpr friend ModInt operator/(const ModInt& lhs, const ModInt& rhs) {
return ModInt(lhs) /= rhs;
}
friend bool operator==(const ModInt& lhs, const ModInt& rhs) {
return std::min(lhs.x, lhs.x - MOD) == std::min(rhs.x, rhs.x - MOD);
}
constexpr ModInt pow(u64 n) const {
ModInt v = *this, r = 1;
while (n) {
if (n & 1) r *= v;
v *= v;
n >>= 1;
}
return r;
}
constexpr ModInt inv() const { return pow(MOD - 2); }
std::string dump() const { return std::to_string(val()); }
private:
u32 x; // [0, 2 * MOD)
static constexpr u32 B = ((u64(1) << 32)) % MOD;
static constexpr u32 B2 = u64(1) * B * B % MOD;
static constexpr u32 INV = -inv_u32(MOD);
// Input: (l * r) must be no more than (2^32 * MOD)
// Output: ((l * r) / 2^32) % MOD
static constexpr u32 reduce_mul(u32 l, u32 r) {
u64 x = u64(1) * l * r;
x += u64(u32(x) * INV) * MOD;
return u32(x >> 32);
}
};
using ModInt998244353 = ModInt<998244353>;
using ModInt1000000007 = ModInt<1000000007>;
} // namespace yosupo
#include <cstddef>
#include <functional>
#include <queue>
#include <utility>
#include <immintrin.h>
#include <span>
namespace yosupo {
inline constexpr u32 smallest_primitive_root(u32 m) {
if (m == 2) return 1;
u32 divs[20] = {};
int cnt = 0;
u32 x = (m - 1) / 2;
for (int i = 2; (u64)(i)*i <= x; i += 2) {
if (x % i == 0) {
divs[cnt++] = i;
while (x % i == 0) {
x /= i;
}
}
}
if (x > 1) {
divs[cnt++] = x;
}
for (u32 g = 2;; g++) {
bool ok = true;
for (int i = 0; i < cnt; i++) {
if (pow_mod_u64(g, (m - 1) / divs[i], m) == 1) {
ok = false;
break;
}
}
if (ok) return g;
}
}
} // namespace yosupo
#include <xmmintrin.h>
#include <map>
#include <set>
#include <tuple>
namespace yosupo {
inline std::string dump(const std::string& t) { return t; }
inline std::string dump(const char* t) { return t; }
template <std::integral T> std::string dump(T t) { return std::to_string(t); }
inline std::string dump(const u128& t) {
if (t == 0) {
return "0";
}
std::string s;
u128 x = t;
while (x) {
s += char(x % 10 + '0');
x /= 10;
}
std::ranges::reverse(s);
return s;
}
inline std::string dump(const i128& t) {
if (t < 0) {
return "-" + dump((u128)(-t));
} else {
return dump((u128)(t));
}
}
template <std::floating_point T> std::string dump(T t) {
return std::to_string(t);
}
template <class T>
requires requires(T t) { t.dump(); }
std::string dump(T t);
template <class T>
requires(!requires(T t) { t.dump(); }) && (requires(T t) { t.val(); })
std::string dump(T t);
template <class T, std::size_t N> std::string dump(const std::array<T, N>&);
template <class T> std::string dump(const std::vector<T>&);
template <class T1, class T2> std::string dump(const std::pair<T1, T2>&);
template <class K, class V> std::string dump(const std::map<K, V>&);
template <class T> std::string dump(const std::set<T>&);
template <class... Ts> std::string dump(const std::tuple<Ts...>& t);
template <class T>
requires requires(T t) { t.dump(); }
std::string dump(T t) {
return dump(t.dump());
}
template <class T>
requires(!requires(T t) { t.dump(); }) && (requires(T t) { t.val(); })
std::string dump(T t) {
return dump(t.val());
}
template <class T, std::size_t N> std::string dump(const std::array<T, N>& a) {
std::string s = "[";
for (size_t i = 0; i < N; i++) {
if (i) {
s += ", ";
}
s += dump(a[i]);
}
s += "]";
return s;
}
template <class T> std::string dump(const std::vector<T>& v) {
std::string s = "[";
for (std::size_t i = 0; i < v.size(); ++i) {
s += dump(v[i]);
if (i + 1 != v.size()) {
s += ", ";
}
}
s += "]";
return s;
}
template <class T1, class T2> std::string dump(const std::pair<T1, T2>& p) {
std::string s = "(";
s += dump(p.first);
s += ", ";
s += dump(p.second);
s += ")";
return s;
}
template <class K, class V> std::string dump(const std::map<K, V>& m) {
std::string s = "{";
for (auto it = m.begin(); it != m.end(); ++it) {
if (it != m.begin()) {
s += ", ";
}
s += dump(it->first);
s += ": ";
s += dump(it->second);
}
s += "}";
return s;
}
template <class T> std::string dump(const std::set<T>& s) {
std::string str = "{";
for (auto it = s.begin(); it != s.end(); ++it) {
if (it != s.begin()) {
str += ", ";
}
str += dump(*it);
}
str += "}";
return str;
}
template <class... Ts> std::string dump(const std::tuple<Ts...>& t) {
std::string s = "(";
[&]<std::size_t... I>(std::index_sequence<I...>) {
((s += dump(std::get<I>(t)) + ((I < sizeof...(Ts) - 1) ? ", " : "")),
...);
}(std::make_index_sequence<sizeof...(Ts)>());
s += ")";
return s;
}
} // namespace yosupo
namespace yosupo {
template <i32 MOD> struct ModInt8 {
private:
using modint = ModInt<MOD>;
static_assert(sizeof(modint) == 4);
alignas(32) std::array<modint, 8> x;
using m256i_u = __m256i_u;
__attribute__((target("avx2"))) m256i_u to_m256i_u() const {
return _mm256_loadu_si256((m256i_u*)x.data());
}
__attribute__((target("avx2"))) explicit ModInt8(m256i_u _x) {
_mm256_storeu_si256((m256i_u*)x.data(), _x);
}
public:
__attribute__((target("avx2"))) ModInt8() : x() {}
__attribute__((target("avx2"))) explicit ModInt8(modint _x) { x.fill(_x); }
__attribute__((target("avx2"))) ModInt8(std::span<const modint, 8> _x) {
std::copy_n(_x.begin(), 8, x.begin());
}
__attribute__((target("avx2"))) ModInt8(modint x0,
modint x1,
modint x2,
modint x3,
modint x4,
modint x5,
modint x6,
modint x7)
: ModInt8(std::array<modint, 8>{x0, x1, x2, x3, x4, x5, x6, x7}) {}
__attribute__((target("avx2"))) std::array<u32, 8> val() const {
auto _x = to_m256i_u();
auto a = reduce_mul(_x, _mm256_set1_epi32(1));
alignas(32) std::array<u32, 8> b;
_mm256_storeu_si256((__m256i_u*)b.data(),
min(a, _mm256_sub_epi32(a, MOD_X())));
return b;
}
__attribute__((target("avx2"))) std::array<modint, 8> to_array() const {
return x;
}
__attribute__((target("avx2"))) friend ModInt8 operator+(
const ModInt8& lhs,
const ModInt8& rhs) {
auto _x = _mm256_add_epi32(lhs.to_m256i_u(), rhs.to_m256i_u());
return ModInt8(min(_x, _mm256_sub_epi32(_x, MOD2_X())));
}
__attribute__((target("avx2"))) ModInt8& operator+=(const ModInt8& rhs) {
return *this = *this + rhs;
}
__attribute__((target("avx2"))) friend ModInt8 operator-(
const ModInt8& lhs,
const ModInt8& rhs) {
auto _x = _mm256_sub_epi32(lhs.to_m256i_u(), rhs.to_m256i_u());
return ModInt8(min(_x, _mm256_add_epi32(_x, MOD2_X())));
}
__attribute__((target("avx2"))) ModInt8& operator-=(const ModInt8& rhs) {
return *this = *this - rhs;
}
__attribute__((target("avx2"))) friend ModInt8 operator*(
const ModInt8& lhs,
const ModInt8& rhs) {
return ModInt8(reduce_mul(lhs.to_m256i_u(), rhs.to_m256i_u()));
}
__attribute__((target("avx2"))) ModInt8& operator*=(const ModInt8& rhs) {
return *this = *this * rhs;
}
__attribute__((target("avx2"))) ModInt8 operator-() const {
return ModInt8() - *this;
}
__attribute__((target("avx2"))) friend bool operator==(const ModInt8& lhs,
const ModInt8& rhs) {
auto _x = lhs.to_m256i_u();
auto _y = rhs.to_m256i_u();
_x = min(_x, _mm256_sub_epi32(_x, MOD_X()));
_y = min(_y, _mm256_sub_epi32(_y, MOD_X()));
auto z = _mm256_xor_si256(_x, _y);
return _mm256_testz_si256(z, z);
}
// a.permutevar(idx)[i] = a[idx[i] % 8]
__attribute__((target("avx2"))) ModInt8
permutevar(const std::array<u32, 8>& idx) const {
return ModInt8(_mm256_permutevar8x32_epi32(
to_m256i_u(), _mm256_loadu_si256((const m256i_u*)idx.data())));
}
template <u8 MASK>
__attribute__((target("avx2"))) friend ModInt8 blend(const ModInt8& lhs,
const ModInt8& rhs) {
return ModInt8(
_mm256_blend_epi32(lhs.to_m256i_u(), rhs.to_m256i_u(), MASK));
}
__attribute__((target("avx2"))) std::string dump() const {
return yosupo::dump(val());
}
private:
static constexpr u32 B2 = pow_mod_u64(2, 64, MOD);
static constexpr u32 INV = -inv_u32(MOD);
__attribute__((target("avx2"))) static m256i_u MOD_X() {
return _mm256_set1_epi32(MOD);
}
__attribute__((target("avx2"))) static m256i_u MOD2_X() {
return _mm256_set1_epi32(2 * MOD);
}
__attribute__((target("avx2"))) static m256i_u N_INV_X() {
return _mm256_set1_epi32(INV);
}
// Input: l * r <= 2^32 * MOD
// Output: l * r >>= 2^32
__attribute__((target("avx2"))) static m256i_u reduce_mul(
const m256i_u& l,
const m256i_u& r) {
auto x0 = mul_even(l, r);
auto x1 = mul_even(_mm256_shuffle_epi32(l, 0xf5),
_mm256_shuffle_epi32(r, 0xf5));
x0 += mul_even(mul_even(x0, N_INV_X()), MOD_X());
x1 += mul_even(mul_even(x1, N_INV_X()), MOD_X());
return _mm256_blend_epi32(_mm256_srli_epi64(x0, 32), x1, 0b10101010);
}
/* SIMD utils */
__attribute__((target("avx2"))) static __m256i_u min(const __m256i_u& l,
const __m256i_u& r) {
return _mm256_min_epu32(l, r);
}
__attribute__((target("avx2"))) static __m256i_u max(const __m256i_u& l,
const __m256i_u& r) {
return _mm256_max_epu32(l, r);
}
// (lr[0], lr[2], lr[4], lr[6])
__attribute__((target("avx2"))) static __m256i_u mul_even(
const __m256i_u& l,
const __m256i_u& r) {
return _mm256_mul_epu32(l, r);
}
};
} // namespace yosupo
#include <ranges>
namespace yosupo {
template <class T> bool chmin(T& a, const T& b) {
if (a > b) {
a = b;
return true;
}
return false;
}
template <class T> bool chmax(T& a, const T& b) {
if (a < b) {
a = b;
return true;
}
return false;
}
template <class T> T floor_div(T x, T y) {
auto d = x / y;
auto r = x % y;
if (r == 0) return d;
if ((r > 0) == (y > 0)) return d;
return d - 1;
}
template <class T> T ceil_div(T x, T y) {
auto d = x / y;
auto r = x % y;
if (r == 0) return d;
if ((r > 0) == (y > 0)) return d + 1;
return d;
}
template <std::ranges::input_range R>
std::vector<std::ranges::range_value_t<R>> to_vec(R&& r) {
auto common = r | std::views::common;
return std::vector(common.begin(), common.end());
}
template <class T, class Comp = std::equal_to<>>
void dedup(std::vector<T>& v, Comp comp = Comp{}) {
auto it = std::ranges::unique(v, comp);
v.erase(it.begin(), it.end());
}
template <size_t N, class T> std::span<T, N> subspan(std::span<T> a, int idx) {
return a.subspan(idx).template first<N>();
}
inline auto rep(int l, int r) {
if (l > r) return std::views::iota(l, l);
return std::views::iota(l, r);
}
} // namespace yosupo
namespace yosupo {
template <i32 MOD> struct FFTInfo {
using modint = ModInt<MOD>;
using modint8 = ModInt8<MOD>;
static constexpr u32 g = smallest_primitive_root(MOD);
static constexpr int ord2 = std::countr_zero((u32)(MOD - 1));
// w[i]^(2^i) == 1 : w[i] is the fft omega of n=2^i
static constexpr std::array<modint, ord2 + 1> w = []() {
std::array<modint, ord2 + 1> v;
v[ord2] = modint(g).pow((MOD - 1) >> ord2);
for (int i = ord2 - 1; i >= 0; i--) {
v[i] = v[i + 1] * v[i + 1];
}
return v;
}();
static constexpr std::array<modint, ord2 + 1> iw = []() {
std::array<modint, ord2 + 1> v;
v[ord2] = w[ord2].inv();
for (int i = ord2 - 1; i >= 0; i--) {
v[i] = v[i + 1] * v[i + 1];
}
return v;
}();
static constexpr std::array<modint, ord2 + 1> rot8 = []() {
std::array<modint, std::max(0, ord2 + 1)> v;
for (int i = 3; i <= ord2; i++) {
v[i] = w[i];
for (int j = 3; j < i; j++) {
v[i] *= iw[j];
}
}
return v;
}();
static constexpr std::array<modint, ord2 + 1> irot8 = []() {
std::array<modint, std::max(0, ord2 + 1)> v;
for (int i = 3; i <= ord2; i++) {
v[i] = iw[i];
for (int j = 3; j < i; j++) {
v[i] *= w[j];
}
}
return v;
}();
// rot[i] * rot_shift8(i) = rot[i + 8]
modint rot_shift8(u32 i) const { return rot8[std::countr_one(i >> 3) + 3]; }
modint irot_shift8(u32 i) const {
return irot8[std::countr_one(i >> 3) + 3];
}
static constexpr std::array<modint, ord2 + 1> rot4 = []() {
std::array<modint, std::max(0, ord2 + 1)> v;
for (int i = 4; i <= ord2; i++) {
v[i] = w[i];
for (int j = 4; j < i; j++) {
v[i] *= iw[j];
}
}
return v;
}();
static constexpr std::array<modint, ord2 + 1> irot4 = []() {
std::array<modint, std::max(0, ord2 + 1)> v;
for (int i = 4; i <= ord2; i++) {
v[i] = iw[i];
for (int j = 4; j < i; j++) {
v[i] *= w[j];
}
}
return v;
}();
std::array<std::array<modint, 8>, ord2 + 1> rot16i = []() {
std::array<std::array<modint, 8>, ord2 + 1> v;
for (int i = 4; i <= ord2; i++) {
std::array<modint, 8> buf;
buf[0] = 1;
for (int j = 1; j < 8; j++) {
buf[j] = buf[j - 1] * rot4[i];
}
v[i] = buf;
}
return v;
}();
std::array<std::array<modint, 8>, ord2 + 1> irot16i = []() {
std::array<std::array<modint, 8>, ord2 + 1> v;
for (int i = 4; i <= ord2; i++) {
std::array<modint, 8> buf;
buf[0] = 1;
for (int j = 1; j < 8; j++) {
buf[j] = buf[j - 1] * irot4[i];
}
v[i] = buf;
}
return v;
}();
// rot[i * j] * rot_shift16i(i)[j] = rot[(i + 8) * j]
__attribute__((target("avx2"))) modint8 rot_shift16i(u32 i) const {
return modint8(rot16i[std::countr_one(i >> 4) + 4]);
}
__attribute__((target("avx2"))) modint8 irot_shift16i(u32 i) const {
return modint8(irot16i[std::countr_one(i >> 4) + 4]);
}
};
template <i32 MOD> const FFTInfo<MOD> fft_info = FFTInfo<MOD>();
template <i32 MOD>
__attribute__((target("avx2"))) void butterfly(std::vector<ModInt<MOD>>& a) {
const int n = int(a.size());
const int lg = std::countr_zero((u32)n);
assert(n == (1 << lg));
using modint8 = ModInt8<MOD>;
const FFTInfo<MOD>& info = fft_info<MOD>;
if (n == 1) {
return;
}
if (n == 2) {
auto a0 = a[0], a1 = a[1];
a[0] = a0 + a1;
a[1] = a0 - a1;
return;
}
if (n == 4) {
auto a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3];
auto x = (a1 - a3) * info.w[2];
a[0] = (a0 + a2) + (a1 + a3);
a[1] = (a0 + a2) - (a1 + a3);
a[2] = (a0 - a2) + x;
a[3] = (a0 - a2) - x;
return;
}
int h = lg;
if (h % 2 == 0) {
// 2-base
int len = n / 2;
for (int i = 0; i < len; i += 8) {
auto l = modint8(subspan<8>(std::span{a}, 0 * len + i));
auto r = modint8(subspan<8>(std::span{a}, 1 * len + i));
std::copy_n((l + r).to_array().data(), 8,
subspan<8>(std::span{a}, 0 * len + i).data());
std::copy_n((l - r).to_array().data(), 8,
subspan<8>(std::span{a}, 1 * len + i).data());
}
h--;
}
while (h >= 5) {
// 4-base
const modint8 w2x = modint8(info.w[2]);
modint8 rotx = modint8(1);
for (int start = 0; start < n; start += (1 << h)) {
const modint8 rot2x = rotx * rotx;
const modint8 rot3x = rot2x * rotx;
int len = 1 << (h - 2);
for (int i = 0; i < len; i += 8) {
auto a0 =
modint8(subspan<8>(std::span{a}, start + 0 * len + i));
auto a1 =
modint8(subspan<8>(std::span{a}, start + 1 * len + i)) *
rotx;
auto a2 =
modint8(subspan<8>(std::span{a}, start + 2 * len + i)) *
rot2x;
auto a3 =
modint8(subspan<8>(std::span{a}, start + 3 * len + i)) *
rot3x;
auto x = (a1 - a3) * w2x;
std::copy_n(
(a0 + a2 + a1 + a3).to_array().data(), 8,
subspan<8>(std::span{a}, start + 0 * len + i).data());
std::copy_n(
(a0 + a2 - a1 - a3).to_array().data(), 8,
subspan<8>(std::span{a}, start + 1 * len + i).data());
std::copy_n(
(a0 - a2 + x).to_array().data(), 8,
subspan<8>(std::span{a}, start + 2 * len + i).data());
std::copy_n(
(a0 - a2 - x).to_array().data(), 8,
subspan<8>(std::span{a}, start + 3 * len + i).data());
}
rotx *= modint8(info.rot_shift8(8 * (start >> h)));
}
h -= 2;
}
{
// fft each element
const ModInt<MOD> w4 = info.w[2], w8 = info.w[3];
const auto step4 = ModInt8<MOD>(1, 1, 1, w4, 1, 1, 1, w4);
const auto step8 =
ModInt8<MOD>(1, 1, 1, 1, 1, w8, w8 * w8, w8 * w8 * w8);
modint8 rotxi = modint8(1);
for (int i = 0; i < n; i += 8) {
auto x = modint8(subspan<8>(std::span{a}, i)) * rotxi;
x = (blend<0b11110000>(x, -x) +
x.permutevar({4, 5, 6, 7, 0, 1, 2, 3})) *
step8;
x = (blend<0b11001100>(x, -x) +
x.permutevar({2, 3, 0, 1, 6, 7, 4, 5})) *
step4;
x = (blend<0b10101010>(x, -x) +
x.permutevar({1, 0, 3, 2, 5, 4, 7, 6}));
std::copy_n(x.to_array().data(), 8,
subspan<8>(std::span{a}, i).data());
if (i + 8 < n) rotxi *= info.rot_shift16i(2 * i);
}
}
}
template <i32 MOD>
__attribute__((target("avx2"))) void butterfly_inv(
std::vector<ModInt<MOD>>& a) {
const int n = int(a.size());
const int lg = std::countr_zero((u32)n);
assert(n == (1 << lg));
using modint8 = ModInt8<MOD>;
const FFTInfo<MOD>& info = fft_info<MOD>;
if (n == 1) {
return;
}
if (n == 2) {
auto a0 = a[0], a1 = a[1];
a[0] = a0 + a1;
a[1] = a0 - a1;
return;
}
if (n == 4) {
auto a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3];
auto x0 = a0 + a1;
auto x1 = a0 - a1;
auto x2 = a2 + a3;
auto x3 = (a2 - a3) * info.iw[2];
a[0] = x0 + x2;
a[1] = x1 + x3;
a[2] = x0 - x2;
a[3] = x1 - x3;
return;
}
{
// ifft each element
const ModInt<MOD> iw4 = info.iw[2], iw8 = info.iw[3];
const auto step4 = ModInt8<MOD>(1, 1, 1, iw4, 1, 1, 1, iw4);
const auto step8 =
ModInt8<MOD>(1, 1, 1, 1, 1, iw8, iw8 * iw8, iw8 * iw8 * iw8);
modint8 irotxi = modint8(1);
for (int i = 0; i < n; i += 8) {
auto x = modint8(subspan<8>(std::span{a}, i));
x = (blend<0b10101010>(x, -x) +
x.permutevar({1, 0, 3, 2, 5, 4, 7, 6})) *
step4;
x = (blend<0b11001100>(x, -x) +
x.permutevar({2, 3, 0, 1, 6, 7, 4, 5})) *
step8;
x = (blend<0b11110000>(x, -x) +
x.permutevar({4, 5, 6, 7, 0, 1, 2, 3}));
std::copy_n((x * irotxi).to_array().begin(), 8,
subspan<8>(std::span{a}, i).begin());
if (i + 8 < n) irotxi *= info.irot_shift16i(2 * i);
}
}
int h = 3;
while (h + 2 <= lg) {
h += 2;
// 4-base
const modint8 w2 = modint8(info.iw[2]);
modint8 rotx = modint8(1);
for (int start = 0; start < n; start += (1 << h)) {
const auto rot2x = rotx * rotx;
const auto rot3x = rot2x * rotx;
int len = 1 << (h - 2);
for (int i = 0; i < len; i += 8) {
auto a0 =
modint8(subspan<8>(std::span{a}, start + 0 * len + i));
auto a1 =
modint8(subspan<8>(std::span{a}, start + 1 * len + i));
auto a2 =
modint8(subspan<8>(std::span{a}, start + 2 * len + i));
auto a3 =
modint8(subspan<8>(std::span{a}, start + 3 * len + i));
auto x0 = a0 + a1;
auto x1 = a0 - a1;
auto x2 = a2 + a3;
auto x3 = (a2 - a3) * w2;
std::copy_n(
(x0 + x2).to_array().begin(), 8,
subspan<8>(std::span{a}, start + 0 * len + i).begin());
std::copy_n(
((x1 + x3) * rotx).to_array().begin(), 8,
subspan<8>(std::span{a}, start + 1 * len + i).begin());
std::copy_n(
((x0 - x2) * rot2x).to_array().begin(), 8,
subspan<8>(std::span{a}, start + 2 * len + i).begin());
std::copy_n(
((x1 - x3) * rot3x).to_array().data(), 8,
subspan<8>(std::span{a}, start + 3 * len + i).begin());
}
rotx *= modint8(info.irot_shift8(8 * (start >> h)));
}
}
if (h + 1 == lg) {
// 2-base
int len = n / 2;
for (int i = 0; i < len; i += 8) {
auto l = modint8(subspan<8>(std::span{a}, 0 * len + i));
auto r = modint8(subspan<8>(std::span{a}, 1 * len + i));
std::copy_n((l + r).to_array().begin(), 8,
subspan<8>(std::span{a}, 0 * len + i).begin());
std::copy_n((l - r).to_array().begin(), 8,
subspan<8>(std::span{a}, 1 * len + i).begin());
}
h++;
}
}
template <i32 MOD>
__attribute__((target("avx2"))) std::vector<ModInt<MOD>> convolution_fft(
std::vector<ModInt<MOD>> a,
std::vector<ModInt<MOD>> b) {
if (a.empty() || b.empty()) return {};
int n = int(a.size()), m = int(b.size());
int z = (int)std::bit_ceil((unsigned int)(n + m - 1));
a.resize(z);
butterfly(a);
b.resize(z);
butterfly(b);
for (int i = 0; i < z; i++) {
a[i] *= b[i];
}
butterfly_inv(a);
a.resize(n + m - 1);
auto iz = ModInt<MOD>(z).inv();
for (int i = 0; i < n + m - 1; i++) a[i] *= iz;
return a;
}
template <i32 MOD>
__attribute__((target("avx2"))) std::vector<ModInt<MOD>> convolution_naive(
const std::vector<ModInt<MOD>>& a,
const std::vector<ModInt<MOD>>& b) {
if (a.empty() || b.empty()) return {};
int n = int(a.size()), m = int(b.size());
int m8 = (m + 7) / 8 * 8;
std::vector<ModInt<MOD>> ans(n + m8 - 1);
using modint = ModInt<MOD>;
using modint8 = ModInt8<MOD>;
modint8 b_last = [&]() {
std::array<modint, 8> b_vec = {};
std::copy(b.begin() + m8 - 8, b.end(), b_vec.begin());
return modint8(b_vec);
}();
for (int i = 0; i < n; ++i) {
modint ai = a[i];
modint8 ai_vec(ai);
for (int j = 0; j < m8; j += 8) {
modint8 b_vec =
(j < m8 - 8) ? modint8(subspan<8>(std::span{b}, j)) : b_last;
modint8 ans_vec(subspan<8>(std::span{ans}, i + j));
modint8 prod = ai_vec * b_vec;
modint8 updated_ans = ans_vec + prod;
std::copy_n(updated_ans.to_array().data(), 8,
subspan<8>(std::span{ans}, i + j).data());
}
}
ans.resize(n + m - 1);
return ans;
}
template <i32 MOD>
__attribute__((target("avx2"))) std::vector<ModInt<MOD>> convolution(
const std::vector<ModInt<MOD>>& a,
const std::vector<ModInt<MOD>>& b) {
if (a.empty() || b.empty()) return {};
int n = int(a.size()), m = int(b.size());
if (std::min(n, m) < 100) {
if (n < m) return convolution_naive(a, b);
return convolution_naive(b, a);
}
return convolution_fft(a, b);
}
} // namespace yosupo
namespace yosupo {
template <i32 MOD> struct ModVec {
private:
using modint = ModInt<MOD>;
using modint8 = ModInt8<MOD>;
public:
ModVec() {}
explicit ModVec(size_t n) : v(n) {}
ModVec(const std::vector<modint>& _v) : v(_v) {}
ModVec(std::initializer_list<modint> init) : v(init) {}
modint& operator[](size_t i) { return v[i]; }
const modint& operator[](size_t i) const { return v[i]; }
bool operator==(const ModVec& rhs) const { return v == rhs.v; }
ModVec& operator+=(const ModVec& rhs) {
if (size() < rhs.size()) v.resize(rhs.size());
// TODO: simd
for (int i = 0; i < std::ssize(rhs); i++) {
v[i] += rhs.v[i];
}
return *this;
}
friend ModVec operator+(const ModVec& lhs, const ModVec& rhs) {
return ModVec(lhs) += rhs;
}
ModVec& operator-=(const ModVec& rhs) {
if (size() < rhs.size()) v.resize(rhs.size());
// TODO: simd
for (int i = 0; i < std::ssize(rhs); i++) {
v[i] -= rhs.v[i];
}
return *this;
}
friend ModVec operator-(const ModVec& lhs, const ModVec& rhs) {
return ModVec(lhs) -= rhs;
}
friend ModVec operator*(const ModVec& lhs, const ModVec& rhs) {
return ModVec(convolution(lhs.v, rhs.v));
}
ModVec& operator*=(const ModVec& rhs) { return *this = *this * rhs; }
size_t size() const { return v.size(); }
void resize(size_t n) { v.resize(n); }
modint& front() { return v.front(); }
const modint& front() const { return v.front(); }
modint& back() { return v.back(); }
const modint& back() const { return v.back(); }
void push_back(const modint& x) { v.push_back(x); }
void pop_back() { v.pop_back(); }
std::string dump() const { return ::yosupo::dump(v); }
static ModVec prod(std::vector<ModVec> pols) {
if (pols.empty()) return ModVec({modint(1)});
if (pols.size() == 1) return pols[0];
// sort by the first
std::priority_queue<std::pair<int, int>,
std::vector<std::pair<int, int>>, std::greater<>>
que;
for (int i = 0; i < std::ssize(pols); i++) {
que.emplace(pols[i].size(), i);
}
while (que.size() > 1) {
auto [a, i] = que.top();
que.pop();
auto [b, j] = que.top();
que.pop();
pols[i] *= pols[j];
pols[j] = ModVec();
que.emplace(pols[i].size(), i);
}
auto [_, i] = que.top();
return pols[i];
}
private:
std::vector<modint> v;
};
using ModVec998244353 = ModVec<998244353>;
using ModVec1000000007 = ModVec<1000000007>;
} // namespace yosupo
#include <random>
namespace yosupo {
struct Xoshiro256StarStar {
public:
using result_type = u64;
Xoshiro256StarStar() : Xoshiro256StarStar(0) {}
explicit Xoshiro256StarStar(u64 seed) {
// use splitmix64
// Reference: http://xoshiro.di.unimi.it/splitmix64.c
for (int i = 0; i < 4; i++) {
u64 z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s[i] = z ^ (z >> 31);
}
}
static constexpr result_type min() { return 0; }
static constexpr result_type max() { return -1; }
result_type operator()() {
const u64 result_starstar = rotl(s[1] * 5, 7) * 9;
const u64 t = s[1] << 17;
s[2] ^= s[0];
s[3] ^= s[1];
s[1] ^= s[2];
s[0] ^= s[3];
s[2] ^= t;
s[3] = rotl(s[3], 45);
return result_starstar;
}
private:
// Use xoshiro256**
// Refereces: http://xoshiro.di.unimi.it/xoshiro256starstar.c
static u64 rotl(const u64 x, int k) { return (x << k) | (x >> (64 - k)); }
std::array<u64, 4> s;
};
// https://github.com/wangyi-fudan/wyhash
struct WYRand {
public:
using result_type = u64;
explicit WYRand(u64 seed) : s(seed) {}
static constexpr result_type min() { return 0; }
static constexpr result_type max() { return -1; }
result_type operator()() {
s += 0x2d358dccaa6c78a5;
auto x = (u128)s * (s ^ 0x8bb84b93962eacc9);
return (u64)(x ^ (x >> 64));
}
private:
uint64_t s;
};
using Random = WYRand;
inline Random get_random() { return Random(std::random_device()()); }
namespace internal {
inline Random global_gen = get_random();
}
inline Random& global_gen() { return internal::global_gen; }
template <class G>
concept random_64 = std::uniform_random_bit_generator<G> &&
std::same_as<u64, std::invoke_result_t<G&>> &&
G::min() == u64(0) && G::max() == u64(-1);
namespace internal {
// random choice from [0, upper]
template <random_64 G> u64 uniform_u64(u64 upper, G& gen) {
if (upper == 0) return 0;
u64 mask = (std::bit_floor(upper) << 1) - 1;
while (true) {
u64 r = gen() & mask;
if (r <= upper) return r;
}
}
// random choice from [0, upper], faster than uniform_u64
template <random_64 G> u64 random_u64(u64 upper, G& gen) {
return (u64)(((u128)(upper) + 1) * gen() >> 64);
}
} // namespace internal
template <class T, random_64 G> T uniform(T lower, T upper, G& gen) {
return T(lower + internal::uniform_u64(u64(upper) - u64(lower), gen));
}
template <class T> T uniform(T lower, T upper) {
return uniform(lower, upper, global_gen());
}
template <std::unsigned_integral T, random_64 G> T uniform(G& gen) {
return T(gen());
}
template <std::signed_integral T, random_64 G> T uniform(G& gen) {
return T(gen() + (u64)std::numeric_limits<T>::min());
}
template <class T, random_64 G>
requires requires {
{ T::mod() } -> std::integral;
}
T uniform(G& gen) {
return T(uniform(0, T::mod() - 1, gen));
}
template <class T> T uniform() { return uniform<T>(global_gen()); }
template <class T, random_64 G> T random(T lower, T upper, G& gen) {
return T(lower + internal::random_u64(u64(upper) - u64(lower), gen));
}
template <class T> T random(T lower, T upper) {
return random(lower, upper, global_gen());
}
template <random_64 G> bool uniform_bool(G& gen) { return gen() & 1; }
inline bool uniform_bool() { return uniform_bool(global_gen()); }
// select 2 elements from [lower, uppper]
template <class T, random_64 G>
std::pair<T, T> uniform_pair(T lower, T upper, G& gen) {
assert(upper - lower >= 1);
T a, b;
do {
a = uniform(lower, upper, gen);
b = uniform(lower, upper, gen);
} while (a == b);
if (a > b) std::swap(a, b);
return {a, b};
}
template <class T> std::pair<T, T> uniform_pair(T lower, T upper) {
return uniform_pair(lower, upper, global_gen());
}
// random 0.0 <= X < 1.0
template <class G> inline double random_01(G& gen) {
constexpr double inv = 1.0 / ((double)(u64(1) << 63) * 2);
return double(gen()) * inv;
}
inline double random_01() { return random_01(global_gen()); }
} // namespace yosupo
namespace yosupo {
namespace internal {
template <class T> struct CombState {
int size = 1;
std::vector<T> fact = {T(1)};
std::vector<T> inv_fact = {T(1)};
std::vector<T> inv = {T(0)};
void extend() {
fact.resize(2 * size);
inv_fact.resize(2 * size);
inv.resize(2 * size);
for (int i = size; i < 2 * size; i++) {
fact[i] = fact[i - 1] * T(i);
}
inv_fact[2 * size - 1] = fact[2 * size - 1].inv();
for (int i = 2 * size - 1; i >= size + 1; i--) {
inv_fact[i - 1] = inv_fact[i] * T(i);
}
for (int i = size; i < 2 * size; i++) {
inv[i] = inv_fact[i] * fact[i - 1];
}
size *= 2;
}
};
template <class T> CombState<T>& get_comb_state(int n) {
static CombState<T> state;
while (state.size <= n) state.extend();
return state;
}
} // namespace internal
template <class T> T fact(int x) {
assert(0 <= x);
return internal::get_comb_state<T>(x).fact[x];
}
template <class T> T inv_fact(int x) {
assert(0 <= x);
return internal::get_comb_state<T>(x).inv_fact[x];
}
template <class T> T inv(int x) {
assert(0 <= x);
return internal::get_comb_state<T>(x).inv[x];
}
namespace internal {
template <class T> T comb(int n, int k) {
return fact<T>(n) * inv_fact<T>(k) * inv_fact<T>(n - k);
}
} // namespace internal
template <class T> T comb(int n, int k) {
assert(0 <= k);
if (0 <= n && n < k) return 0;
if (n >= 0) {
return internal::comb<T>(n, k);
}
T x = internal::comb<T>(k - n - 1, k);
if (k % 2) x = -x;
return x;
}
} // namespace yosupo
#include <bitset>
#include <iostream>
using namespace yosupo;
using std::abs, std::pow, std::sqrt;
using std::array, std::vector, std::string, std::queue, std::deque;
using std::countl_zero, std::countl_one, std::countr_zero, std::countr_one;
using std::istream, std::ostream, std::cerr, std::endl;
using std::min, std::max, std::swap;
using std::pair, std::tuple, std::bitset;
using std::popcount;
using std::priority_queue, std::set, std::multiset, std::map;
using std::views::iota, std::views::reverse;
namespace ranges = std::ranges;
using ranges::sort, ranges::copy_n;
using uint = unsigned int;
using ll = long long;
using ull = unsigned long long;
constexpr ll TEN(int n) { return (n == 0) ? 1 : 10 * TEN(n - 1); }
template <class T> using V = vector<T>;
template <class T> using VV = V<V<T>>;
#ifdef YOSUPO_LOCAL
struct PrettyOS {
ostream& os;
bool first;
template <class T> auto operator<<(T&& x) {
if (!first) os << ", ";
first = false;
os << yosupo::dump(x);
return *this;
}
};
template <class... T> void dbg0(T&&... t) {
(PrettyOS{cerr, true} << ... << t);
}
#define dbg(...) \
do { \
cerr << __LINE__ << " : " << #__VA_ARGS__ << " = "; \
dbg0(__VA_ARGS__); \
cerr << endl; \
} while (false);
#else
#define dbg(...)
#endif
using mint = ModInt998244353;
using mvec = ModVec<998244353>;
Scanner sc = Scanner(stdin);
Printer pr = Printer(stdout);
V<mint> gen0(int h, int s, int t, int turn) {
const int len = 2 * (h + 1);
const int rt = h + (h - t);
const int B = 1000;
mvec base = {1, 1, 1};
mvec base2 = {1};
V<mvec> bases(B + 1);
bases[0] = {1};
for (int i : iota(0, B)) {
bases[i + 1] = bases[i] * base;
}
mvec dp(len);
dp[s] = 1;
V<mint> vec(turn + 1);
int phase_start = 0;
while (phase_start <= turn) {
dbg(phase_start);
for (int step: iota(0, min(turn - phase_start + 1, B))) {
int phase = phase_start + step;
int k = int(bases[step].size());
{
int p = (t + phase) % len;
while (p < int(dp.size()) + k) {
for (int i : iota(0, min(p + 1, k))) {
if (p - i < 0) continue;
if (p - i >= int(dp.size())) continue;
vec[phase] += dp[p - i] * bases[step][i];
}
p += len;
}
}
{
int p = (rt + phase) % len;
while (p < int(dp.size()) + k) {
for (int i : iota(0, min(p + 1, k))) {
if (p - i < 0) continue;
if (p - i >= int(dp.size())) continue;
vec[phase] -= dp[p - i] * bases[step][i];
}
p += len;
}
}
}
dp *= bases[B];
while (int(dp.size()) > len) {
mint x = dp.back();
dp[(int(dp.size()) - 1) % len] += x;
dp.pop_back();
}
phase_start += B;
}
return vec;
}
// O(HT)
V<mint> gen1(int h, int s, int t, int turn) {
V<mint> dp(h);
dp[s] = 1;
V<mint> vec(turn + 1);
for (int phase : iota(0, turn + 1)) {
vec[phase] = dp[t];
V<mint> ndp = dp;
for (int i : iota(0, h)) {
if (i > 0) ndp[i - 1] += dp[i];
if (i < h - 1) ndp[i + 1] += dp[i];
}
dp = ndp;
}
return vec;
}
V<mint> gen(int h, int s, int t, int turn) {
if (h >= 3000) {
// For large h, use the faster gen0
return gen0(h, s, t, turn);
} else {
// For small h, use the more straightforward gen1
return gen1(h, s, t, turn);
}
}
void test() {
for (int _ : iota(0, 1000)) {
int h = uniform(2, 100);
int s = uniform(0, h - 1);
int t = uniform(0, h - 1);
int turn = uniform(0, 100);
auto v0 = gen0(h, s, t, turn);
auto v1 = gen1(h, s, t, turn);
assert(v0 == v1);
}
}
int main() {
//test();
//return 0;
int h, w, turn, a, b, c, d;
sc.read(h, w, turn, a, b, c, d);
a--; b--; c--; d--;
auto v0 = gen(h, a, c, turn);
auto v1 = gen(w, b, d, turn);
//dbg(v0, v1);
auto v = V<mint>(turn + 1);
for (int i : iota(0, turn + 1)) {
v[i] = v0[i] * v1[i];
}
// for (int t : iota(0, turn + 1)) {
// for (int i : iota(0, t)) {
// v[t] -= v[i] * comb<mint>(t, i);
// }
// }
// mint ans = v[turn];
mint result = 0;
for (int k = 0; k <= turn; ++k) {
mint term = comb<mint>(turn, k) * v[k];
if ((turn - k) % 2 == 1) {
result -= term;
} else {
result += term;
}
}
mint ans = result;
pr.writeln(ans.val());
return 0;
}
Submission Info
Submission Time |
|
Task |
D - King |
User |
yosupo |
Language |
C++ 23 (gcc 12.2) |
Score |
1000 |
Code Size |
59199 Byte |
Status |
AC |
Exec Time |
13770 ms |
Memory |
26040 KiB |
Compile Error
Main.cpp: In function ‘void test()’:
Main.cpp:1992:14: warning: unused variable ‘_’ [-Wunused-variable]
1992 | for (int _ : iota(0, 1000)) {
| ^
Judge Result
Set Name |
Sample |
All |
Score / Max Score |
0 / 0 |
1000 / 1000 |
Status |
|
|
Set Name |
Test Cases |
Sample |
00_sample_00.txt, 00_sample_01.txt |
All |
00_sample_00.txt, 00_sample_01.txt, 01_random_00.txt, 01_random_01.txt, 01_random_02.txt, 01_random_03.txt, 01_random_04.txt, 01_random_05.txt, 02_max_00.txt, 02_max_01.txt, 02_max_02.txt, 03_corner_00.txt, 03_corner_01.txt, 04_sqrt_00.txt, 04_sqrt_01.txt, 04_sqrt_02.txt, 04_sqrt_03.txt, 04_sqrt_04.txt |
Case Name |
Status |
Exec Time |
Memory |
00_sample_00.txt |
AC |
1 ms |
3468 KiB |
00_sample_01.txt |
AC |
1 ms |
3380 KiB |
01_random_00.txt |
AC |
6819 ms |
18804 KiB |
01_random_01.txt |
AC |
8150 ms |
18364 KiB |
01_random_02.txt |
AC |
3245 ms |
13816 KiB |
01_random_03.txt |
AC |
4156 ms |
14800 KiB |
01_random_04.txt |
AC |
27 ms |
13704 KiB |
01_random_05.txt |
AC |
31 ms |
13792 KiB |
02_max_00.txt |
AC |
13719 ms |
26024 KiB |
02_max_01.txt |
AC |
13701 ms |
26040 KiB |
02_max_02.txt |
AC |
13770 ms |
25936 KiB |
03_corner_00.txt |
AC |
4108 ms |
20428 KiB |
03_corner_01.txt |
AC |
8372 ms |
24536 KiB |
04_sqrt_00.txt |
AC |
149 ms |
13768 KiB |
04_sqrt_01.txt |
AC |
295 ms |
13668 KiB |
04_sqrt_02.txt |
AC |
555 ms |
13792 KiB |
04_sqrt_03.txt |
AC |
1136 ms |
13792 KiB |
04_sqrt_04.txt |
AC |
2370 ms |
13688 KiB |