Submission #30965301


Source Code Expand

#include <bits/stdc++.h>
using namespace std;
using i64 = long long;
#define rep(i,s,e) for(i64 (i) = (s);(i) < (e);(i)++)
#define all(x) x.begin(),x.end()
#define STRINGIFY(n) #n
#define TOSTRING(n) STRINGIFY(n)
#define PREFIX "#" TOSTRING(__LINE__) "| "
#define debug(x) \
{ \
  std::cout << PREFIX << #x << " = " << x << std::endl; \
}
std::ostream& output_indent(std::ostream& os, int ind) {
  for(int i = 0; i < ind; i++) os << " ";
  return os;
}
template<class S, class T> std::ostream& operator<<(std::ostream& os, const std::pair<S, T>& p);
template<class T> std::ostream& operator<<(std::ostream& os, const std::vector<T>& v);
template<class S, class T> std::ostream& operator<<(std::ostream& os, const std::pair<S, T>& p) {
  return (os << "(" << p.first << ", " << p.second << ")");
}
template<class T> std::ostream& operator<<(std::ostream& os, const std::vector<T>& v) {
  os << "[";
  for(int i = 0;i < v.size();i++) os << v[i] << ", ";
  return (os << "]");
}
template<class T>
static inline std::vector<T> ndvec(size_t&& n, T val) { return std::vector<T>(n, std::forward<T>(val)); }
template<class... Tail>
static inline auto ndvec(size_t&& n, Tail&&... tail) {
  return std::vector<decltype(ndvec(std::forward<Tail>(tail)...))>(n, ndvec(std::forward<Tail>(tail)...));
}

#include <iostream>

template<uint32_t M>
struct montgomery_modint {
  using Self = montgomery_modint<M>;
  using i32 = int32_t;
  using u32 = uint32_t;
  using u64 = uint64_t;

  static constexpr u32 get_r() {
    u32 res = M;
    for(int i = 0; i < 4; i++) res *= 2 - M * res;
    return res;
  }
  static constexpr u32 reduce(u64 a) {
    return (a + u64(u32(a) * u32(-r)) * M) >> 32;
  }

  static constexpr u32 r = get_r();
  static constexpr u32 n2 = -u64(M) % M;

  u32 a;

  constexpr montgomery_modint() : a(0) {}
  constexpr montgomery_modint(int64_t a) : a(reduce(u64(a % M + M) * n2)) {}

  constexpr u32 val() const {
    u32 res = reduce(a);
    return res >= M ? res - M : res;
  }
  constexpr Self pow(u64 r) const {
    Self ans(1); Self aa = *this;
    while(r) { if(r & 1) ans *= aa; aa *= aa; r >>= 1; }
    return ans;
  }
  constexpr Self inv() const { return this->pow(M - 2); }
  constexpr Self& operator+=(const Self& r) {
    if(i32(a += r.a - 2 * M) < 0) a += 2 * M;
    return *this;
  }
  constexpr Self& operator-=(const Self& r) {
    if(i32(a -= r.a) < 0) a += 2 * M;
    return *this;
  }
  constexpr Self& operator*=(const Self& r) {
    a = reduce(u64(a) * r.a);
    return *this;
  }
  constexpr Self& operator/=(const Self& r) {
    *this *= r.inv();
    return *this;
  }
  constexpr Self operator+(const Self r) const { return Self(*this) += r; }
  constexpr Self operator-(const Self r) const { return Self(*this) -= r; }
  constexpr Self operator-() const { return Self() - Self(*this); }
  constexpr Self operator*(const Self r) const { return Self(*this) *= r; }
  constexpr Self operator/(const Self r) const { return Self(*this) /= r; }
  constexpr bool operator==(const Self& r) const {
    return (a >= M ? a - M : a) == (r.a >= M ? r.a - M : r.a);
  }
  constexpr bool operator!=(const Self& r) const {
    return (a >= M ? a - M : a) == (r.a >= M ? r.a - M : r.a);
  }
};

template<uint32_t M>
std::ostream& operator<<(std::ostream& os, const montgomery_modint<M>& m) {
  return os << m.val();
}
template<uint32_t M>
std::istream& operator>>(std::istream& is, montgomery_modint<M>& m) {
  int64_t t;
  is >> t;
  m = montgomery_modint<M>(t);
  return is;
}

template<uint32_t mod>
using modint = montgomery_modint<mod>;

#include <vector>
using namespace std;
using i64 = long long;


constexpr i64 NTT_PRIMES[][2] = {
    {1224736769, 3}, // 2^24 * 73 + 1,
    {1053818881, 7}, // 2^20 * 3 * 5 * 67 + 1
    {1051721729, 6}, // 2^20 * 17 * 59 + 1
    {1045430273, 3}, // 2^20 * 997 + 1
    {1012924417, 5}, // 2^21 * 3 * 7 * 23 + 1
    {1007681537, 3}, // 2^20 * 31^2 + 1
    {1004535809, 3}, // 2^21 * 479 + 1
    {998244353, 3},  // 2^23 * 7 * 17 + 1
    {985661441, 3},  // 2^22 * 5 * 47 + 1
    {976224257, 3},  // 2^20 * 7^2 * 19 + 1
    {975175681, 17}, // 2^21 * 3 * 5 * 31 + 1
    {962592769, 7},  // 2^21 * 3^3 * 17 + 1
    {950009857, 7},  // 2^21 * 4 * 151 + 1
    {943718401, 7},  // 2^22 * 3^2 * 5^2 + 1
    {935329793, 3},  // 2^22 * 223 + 1
    {924844033, 5},  // 2^21 * 3^2 * 7^2 + 1
    {469762049, 3},  // 2^26 * 7 + 1
    {167772161, 3},  // 2^25 * 5 + 1
};

template<const i64 mod, const i64 primitive>
vector<modint<mod>> number_theoretic_transform4(vector<modint<mod>> a) {
  i64 n = a.size();
  vector<modint<mod>> b(a.size());
  auto unit_i = modint<mod>(primitive).pow((mod - 1) / 4);
  for(i64 s = 1, m = n; s < n; s <<= 1, m >>= 1) {
    if(m == 2) {
      for(i64 j = 0;j < s;j++) {
        auto x = a[j + 0];
        auto y = a[j + s];
        b[j + 0] = x + y;
        b[j + s] = x - y;
      }
    }
    else {
      modint<mod> zi1 = 1;
      modint<mod> zi2 = 1;
      modint<mod> zi3 = 1;
      i64 m1 = m >> 2;
      i64 m2 = m >> 1;
      i64 m3 = m1 | m2;
      modint<mod> zeta = modint<mod>(primitive).pow((mod - 1) / m);
      for(i64 i = 0;i < m1;i++) {
        for(i64 j = 0;j < s;j++) {
          auto w = a[j + s * (i + 0)];
          auto x = a[j + s * (i + m1)];
          auto y = a[j + s * (i + m2)];
          auto z = a[j + s * (i + m3)];
          auto wy1 = w + y;
          auto wy2 = w - y;
          auto xz1 = x + z;
          auto xz2 = (x - z) * unit_i;
          b[j + s * (4 * i + 0)] =  wy1 + xz1;
          b[j + s * (4 * i + 1)] = (wy2 + xz2) * zi1;
          b[j + s * (4 * i + 2)] = (wy1 - xz1) * zi2;
          b[j + s * (4 * i + 3)] = (wy2 - xz2) * zi3;
        }
        zi1 = zi1 * zeta;
        zi2 = zi1 * zi1;
        zi3 = zi1 * zi2;
      }
      s <<= 1;
      m >>= 1;
    }
    swap(a, b);
  }
  return a;
}

template<const i64 mod, const i64 primitive>
vector<modint<mod>> inverse_number_theoretic_transform4(vector<modint<mod>> a) {
  i64 n = a.size();
  vector<modint<mod>> b(a.size());
  auto unit_i = modint<mod>(primitive).pow((mod - 1) / 4).inv();
  i64 s = n;
  i64 m = 1;
  if(__builtin_ctzll(n) & 1) {
    s >>= 1;
    m <<= 1;
    for(i64 j = 0;j < s;j++) {
      auto x = a[j + 0];
      auto y = a[j + s];
      b[j + 0] = x + y;
      b[j + s] = x - y;
    }
    swap(a, b);
  }
  for(; s >>= 2, m <<= 2, s >= 1;) {
    {
      modint<mod> zi1 = 1;
      modint<mod> zi2 = 1;
      modint<mod> zi3 = 1;
      i64 m1 = m >> 2;
      i64 m2 = m >> 1;
      i64 m3 = m1 | m2;
      modint<mod> zeta = modint<mod>(primitive).pow((mod - 1) / m).inv();
      for(i64 i = 0;i < m1;i++) {
        for(i64 j = 0;j < s;j++) {
          auto w = a[j + s * (4 * i + 0)];
          auto x = a[j + s * (4 * i + 1)] * zi1;
          auto y = a[j + s * (4 * i + 2)] * zi2;
          auto z = a[j + s * (4 * i + 3)] * zi3;
          auto wy1 = w + y;
          auto wy2 = x + z;
          auto xz1 = w - y;
          auto xz2 = (x - z) * unit_i;
          b[j + s * (i + 0)]  = wy1 + wy2;
          b[j + s * (i + m1)] = xz1 + xz2;
          b[j + s * (i + m2)] = wy1 - wy2;
          b[j + s * (i + m3)] = xz1 - xz2;
        }
        zi1 = zi1 * zeta;
        zi2 = zi1 * zi1;
        zi3 = zi1 * zi2;
      }
    }
    swap(a, b);
  }
  auto inv_n = modint<mod>(n).pow(mod - 2);
  for(int i = 0;i < n;i++) a[i] *= inv_n;
  return a;
}

#include <vector>
using i64 = long long;

std::size_t bound_pow2(std::size_t sz) {
  return 1ll << (__lg(sz - 1) + 1);
}

template<class fps_multiply>
struct FPS {
  struct DFT {
    using C = typename fps_multiply::conv_type;
    std::vector<C> coef;
    DFT(std::vector<C> c): coef(std::move(c)) {}
    DFT clone() const {
      return DFT(coef);
    }
    std::size_t size() const {
      return coef.size();
    }
    C& operator[](int i) {
      return coef[i];
    }
    const C& operator[](int i) const {
      return coef[i];
    }
    DFT& operator+=(const DFT& r) {
      assert(coef.size() == r.size());
      for(int i = 0;i < coef.size(); i++) {
        coef[i] += r[i];
      }
      return (*this);
    }
    DFT& operator-=(const DFT& r) {
      assert(coef.size() == r.size());
      for(int i = 0;i < coef.size(); i++) {
        coef[i] -= r[i];
      }
      return (*this);
    }
    DFT& operator*=(const DFT& r) {
      assert(coef.size() == r.size());
      for(int i = 0;i < coef.size(); i++) {
        coef[i] *= r[i];
      }
      return (*this);
    }
    DFT& operator*=(const C& t) {
      for(int i = 0; i < coef.size(); i++) {
        coef[i] *= t;
      }
      return (*this);
    }
    DFT operator+(const DFT& r) const { return DFT(*this) += r; }
    DFT operator-(const DFT& r) const { return DFT(*this) -= r; }
    DFT operator*(const DFT& r) const { return DFT(*this) *= r; }
    DFT operator*(const C& r) const { return DFT(*this) *= r; }
    FPS idft(int n = -1) && {
      auto res = fps_multiply::idft(std::move(coef));
      if(n > 0) {
        res.resize(n);
      }
      return FPS(std::move(res));
    }
  };
  using T = typename fps_multiply::fps_type;
  std::vector<T> coef;
  FPS(std::vector<T> f): coef(std::move(f)) {}
  void resize(int n) { coef.resize(n, T(0)); }
  FPS resized(int n) const {
    std::vector<T> res(n);
    for(int i = 0;i < n && i < coef.size();i++) res[i] = coef[i];
    return FPS(std::move(res));
  }
  FPS clone() const {
    return FPS(coef);
  }
  std::size_t size() const {
    return coef.size();
  }
  T& operator[](int i) {
    return coef[i];
  }
  const T& operator[](int i) const {
    return coef[i];
  }
  FPS& operator+=(const FPS& r) {
    if(coef.size() < r.size()) coef.resize(r.size());
    for(int i = 0;i < coef.size() && i < r.size(); i++) {
      coef[i] += r[i];
    }
    return (*this);
  }
  FPS& operator-=(const FPS& r) {
    if(coef.size() < r.size()) coef.resize(r.size());
    for(int i = 0;i < coef.size() && i < r.size(); i++) {
      coef[i] -= r[i];
    }
    return (*this);
  }
  FPS& operator*=(const T& t) {
    for(int i = 0; i < coef.size(); i++) {
      coef[i] *= t;
    }
  }
  FPS operator+(const FPS& r) const { return FPS(*this) += r; }
  FPS operator-(const FPS& r) const { return FPS(*this) -= r; }
  FPS operator*(const T& r) const { return FPS(*this) *= r; }
  DFT dft(int n) && {
    assert(!(n & (n - 1)));
    coef.resize(n);
    return DFT(fps_multiply::dft(std::move(coef)));
  }

  FPS inv(int n) const {
    assert(!(n & (n - 1)));
    FPS g = FPS(std::vector<T>{ T(1) / (*this)[0] });
    for(int i = 1;i < n;i <<= 1) {
      DFT gdft = g.resized(i << 1).dft(i << 1);
      FPS e = (gdft * this->resized(i << 1).dft(i << 1)).idft();
      for(int j = 0;j < i;j++) {
        e[j] = T(0);
        e[j + i] = e[j + i] * T(-1);
      }
      FPS f = std::move(gdft *= std::move(e).dft(i << 1)).idft();
      for(int j = 0;j < i;j++) {
        f[j] = g[j];
      }
      g = std::move(f);
    }
    return g;
  }

  FPS diff(int n) const {
    std::vector<T> res(n);
    for(int i = 0; i + 1 < this->size() && i < n; i++) {
      res[i] = coef[i + 1] * T(i + 1);
    }
    return FPS(std::move(res));
  }

  FPS integral(int n) const {
    std::vector<T> res(n);
    int m = std::min(n, int(coef.size() + 1));
    res[0] = T(1);
    for(int i = 1; i < m - 1; i++) {
      res[i] = res[i - 1] * T(i);
    }
    T finv = T(1) / res[m - 1];
    for(int i = m; i --> 1;) {
      res[i] = coef[i - 1] * finv * res[i - 1];
      finv *= T(i);
    }
    res[0] = T(0);
    return FPS(std::move(res));
  }

  // F(0) must not be 0
  FPS log(int n) const {
    assert(!(n & (n - 1)));
    FPS in = this->inv(n);
    FPS di = this->diff(n);
    return (std::move(di).dft(n * 2) * std::move(in).dft(n * 2)).idft().integral(n);
  }

  FPS exp(int n) const {
    assert(!(n & (n - 1)));
    FPS f(std::vector<T>{ T(1) });
    for(i64 i = 1;i < n;i <<= 1 ) {
      FPS flog = f.log(i << 1);
      for(int j = 0; j < (i << 1); j++) {
        flog[j] = (j < coef.size() ? coef[j] - flog[j] : -flog[j]);
      }
      flog[0] += T(1);
      f = (std::move(f).dft(i << 2) * std::move(flog).dft(i << 2)).idft();
      f.resize(1 << i);
    }
    return f;
  }
};

template<const i64 mod, const i64 primitive>
struct fps_ntt_multiply {
  using fps_type = modint<mod>;
  using conv_type = modint<mod>;
  static std::vector<conv_type> dft(std::vector<fps_type> arr) {
    return number_theoretic_transform4<mod, primitive>(std::move(arr));
  }
  static std::vector<fps_type> idft(std::vector<conv_type> arr) {
    return inverse_number_theoretic_transform4<mod, primitive>(std::move(arr));
  }
  static std::vector<conv_type> multiply(std::vector<conv_type> a, const std::vector<conv_type>& b) {
    for(int i = 0;i < a.size();i++) a[i] *= b[i];
    return a;
  }
  static std::vector<conv_type> self_multiply(std::vector<conv_type> a) {
    for(int i = 0;i < a.size();i++) a[i] *= a[i];
    return a;
  }
};

using fp = modint<998244353>;
using fps = FPS<fps_ntt_multiply<998244353, 3>>;

int main() {
  i64 N;
  cin >> N;

  fps A({ fp(0) });
  fps E({ fp(1) });
  E.resize(bound_pow2(N + 1));
  int en = E.size();
  auto de = std::move(E).dft(E.size());
  while(A.size() <= N) {
    i64 n = A.size();
    auto da = A.clone().dft(n * 4);
    fps::DFT da2(vector<fp>(n * 2));
    rep(i,0,n * 2) {
      da2[i] = da[i * 2] * da[i * 2];
    }
    auto db = da2;
    i64 diff = de.size() / db.size();
    rep(i,0,db.size()) {
      db[i] += da[i * 2] * fp(3) + de[i * diff];
    }
    auto B = std::move(db).idft();
    db = std::move(B).dft(n * 4);
    auto B2 = (db * db).idft(2 * n);
    for(int i = B2.size(); i --> 1;) {
      B2[i] = B2[i - 1];
    }
    B2[0] = fp(0);
    B2.resize(4 * n);
    auto C = std::move(db *= (da - std::move(B2).dft(n * 4))).idft(2 * n);
    C.resize(4 * n);
    
    auto dd = da2;
    rep(i,0,dd.size()) {
      dd[i] = dd[i] * fp(3) + da[i * 2] * 3;
    }
    auto D = std::move(dd).idft();
    D[0] -= fp(1);
    auto iD = D.inv(2 * n);
    A.resize(2 * n);
    A += (std::move(C).dft(4 * n) * std::move(iD).dft(4 * n)).idft(2 * n);
  }
  cout << A[N] << endl;
}

Submission Info

Submission Time
Task H - Beautiful Binary Tree
User niuez
Language C++ (GCC 9.2.1)
Score 0
Code Size 14548 Byte
Status TLE
Exec Time 3313 ms
Memory 204204 KiB

Compile Error

./Main.cpp: In function ‘int main()’:
./Main.cpp:455:18: warning: comparison of integer expressions of different signedness: ‘std::size_t’ {aka ‘long unsigned int’} and ‘i64’ {aka ‘long long int’} [-Wsign-compare]
  455 |   while(A.size() <= N) {
      |         ~~~~~~~~~^~~~
./Main.cpp:4:28: warning: unnecessary parentheses in declaration of ‘i’ [-Wparentheses]
    4 | #define rep(i,s,e) for(i64 (i) = (s);(i) < (e);(i)++)
      |                            ^
./Main.cpp:459:5: note: in expansion of macro ‘rep’
  459 |     rep(i,0,n * 2) {
      |     ^~~
./Main.cpp:4:28: warning: unnecessary parentheses in declaration of ‘i’ [-Wparentheses]
    4 | #define rep(i,s,e) for(i64 (i) = (s);(i) < (e);(i)++)
      |                            ^
./Main.cpp:464:5: note: in expansion of macro ‘rep’
  464 |     rep(i,0,db.size()) {
      |     ^~~
./Main.cpp:4:42: warning: comparison of integer expressions of different signedness: ‘i64’ {aka ‘long long int’} and ‘std::size_t’ {aka ‘long unsigned int’} [-Wsign-compare]
    4 | #define rep(i,s,e) for(i64 (i) = (s);(i) < (e);(i)++)
      |                                      ~~~~^~~~~
./Main.cpp:464:5: note: in expansion of macro ‘rep’
  464 |     rep(i,0,db.size()) {
      |     ^~~
./Main.cpp:4:28: warning: unnecessary parentheses in declaration of ‘i’ [-Wparentheses]
    4 | #define rep(i,s,e) for(i64 (i) = (s);(i) < (e);(i)++)
      |                            ^
./Main.cpp:479:5: note: in expansion of macro ‘rep’
  479 |     rep(i,0,dd.size()) {
      |     ^~~
./Main.cpp:4:42: warning: comparison of integer expressions of different signedness: ‘i64’ {aka ‘long long int’} and ‘std::size_t’ {aka ‘long unsigned int’} [-Wsign-compare]
    4 | #define rep(i,s,e) for(i64 (i) = (s);(i) < (e);(i)++)
      |                                      ~~~~^~~~~
./Main.cpp:479:5: note: in expansion of macro ‘rep’
  479 |     rep(i,0,dd.size()) {
      |     ^~~
./Main.cpp:453:7: warning: unused variable ...

Judge Result

Set Name Sample All
Score / Max Score 0 / 0 0 / 600
Status
AC × 4
AC × 23
TLE × 12
Set Name Test Cases
Sample 00_sample_00.txt, 00_sample_01.txt, 00_sample_02.txt, 00_sample_03.txt
All 00_sample_00.txt, 00_sample_01.txt, 00_sample_02.txt, 00_sample_03.txt, 01_small_00.txt, 01_small_01.txt, 01_small_02.txt, 01_small_03.txt, 01_small_04.txt, 01_small_05.txt, 01_small_06.txt, 01_small_07.txt, 02_medium_00.txt, 02_medium_01.txt, 02_medium_02.txt, 02_medium_03.txt, 02_medium_04.txt, 02_medium_05.txt, 02_medium_06.txt, 02_medium_07.txt, 02_medium_08.txt, 02_medium_09.txt, 03_large_00.txt, 03_large_01.txt, 03_large_02.txt, 03_large_03.txt, 03_large_04.txt, 04_max_00.txt, 04_max_01.txt, 04_max_02.txt, 05_random_00.txt, 05_random_01.txt, 05_random_02.txt, 05_random_03.txt, 05_random_04.txt
Case Name Status Exec Time Memory
00_sample_00.txt AC 8 ms 3616 KiB
00_sample_01.txt AC 2 ms 3428 KiB
00_sample_02.txt AC 4 ms 3444 KiB
00_sample_03.txt AC 273 ms 19392 KiB
01_small_00.txt AC 3 ms 3584 KiB
01_small_01.txt AC 2 ms 3416 KiB
01_small_02.txt AC 2 ms 3564 KiB
01_small_03.txt AC 3 ms 3564 KiB
01_small_04.txt AC 2 ms 3456 KiB
01_small_05.txt AC 2 ms 3560 KiB
01_small_06.txt AC 1 ms 3508 KiB
01_small_07.txt AC 3 ms 3560 KiB
02_medium_00.txt AC 4 ms 3648 KiB
02_medium_01.txt AC 3 ms 3456 KiB
02_medium_02.txt AC 4 ms 3620 KiB
02_medium_03.txt AC 6 ms 3568 KiB
02_medium_04.txt AC 5 ms 3580 KiB
02_medium_05.txt AC 4 ms 3688 KiB
02_medium_06.txt AC 5 ms 3604 KiB
02_medium_07.txt AC 2 ms 3564 KiB
02_medium_08.txt AC 4 ms 3648 KiB
02_medium_09.txt AC 5 ms 3652 KiB
03_large_00.txt TLE 3313 ms 204124 KiB
03_large_01.txt TLE 3313 ms 204064 KiB
03_large_02.txt TLE 3313 ms 204204 KiB
03_large_03.txt TLE 3313 ms 204144 KiB
03_large_04.txt TLE 3313 ms 204108 KiB
04_max_00.txt TLE 3313 ms 204116 KiB
04_max_01.txt TLE 3313 ms 204172 KiB
04_max_02.txt TLE 3313 ms 204116 KiB
05_random_00.txt TLE 3313 ms 183684 KiB
05_random_01.txt TLE 3313 ms 204024 KiB
05_random_02.txt TLE 3313 ms 204172 KiB
05_random_03.txt TLE 3313 ms 204112 KiB
05_random_04.txt AC 2383 ms 132012 KiB