test/src/convolution/convolution_large/convolution_large.test.cpp

View this file on GitHub
Last update: 2023-05-11 13:19:06+09:00
Problem: https://judge.yosupo.jp/problem/convolution_mod_large
Link: https://judge.yosupo.jp/submission/21623

Depends on

Code

#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod_large"

#include <iostream>
#include <cstring>

#include "library/convolution/convolution_large.hpp"

using mint = atcoder::modint998244353;

std::istream& operator>>(std::istream& in, mint& a) {
    long long e; in >> e; a = e;
    return in;
}

// from https://judge.yosupo.jp/submission/21623
namespace nyaan {
    namespace fastio {
        static constexpr uint32_t SZ = 1 << 17;
        char ibuf[SZ];
        char obuf[SZ];
        uint32_t pil = 0, pir = 0, por = 0;

        struct Pre {
            char num[40000];
            constexpr Pre() : num() {
                for (int i = 0; i < 10000; i++) {
                    int n = i;
                    for (int j = 3; j >= 0; j--) {
                        num[i * 4 + j] = n % 10 + '0';
                        n /= 10;
                    }
                }
            }
        } constexpr pre;

        __attribute__((target("avx2"), optimize("O3"))) inline void load() {
            memcpy(ibuf, ibuf + pil, pir - pil);
            pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin);
            pil = 0;
        }

        __attribute__((target("avx2"), optimize("O3"))) inline void flush() {
            fwrite(obuf, 1, por, stdout);
            por = 0;
        }

        inline void rd(char& c) { c = ibuf[pil++]; }

        template <typename T>
        __attribute__((target("avx2"), optimize("O3"))) inline void rd(T& x) {
            if (pil + 32 > pir) load();
            char c;
            do
                rd(c);
            while (c < '-');
            bool minus = 0;
            if constexpr (std::is_signed<T>::value) {
                if (c == '-') {
                    minus = 1;
                    rd(c);
                }
            }
            x = 0;
            while (c >= '0') {
                x = x * 10 + (c & 15);
                rd(c);
            }
            if constexpr (std::is_signed<T>::value) {
                if (minus) x = -x;
            }
        }

        inline void wt(char c) { obuf[por++] = c; }
        template <typename T>
        __attribute__((target("avx2"), optimize("O3"))) inline void wt(T x) {
            if (por + 32 > SZ) flush();
            if (!x) {
                wt('0');
                return;
            }
            if constexpr (std::is_signed<T>::value) {
                if (x < 0) {
                    wt('-');
                    x = -x;
                }
            }
            if (x >= 10000000000000000) {
                uint32_t r1 = x % 100000000;
                uint64_t q1 = x / 100000000;
                uint32_t r2 = q1 % 100000000;
                uint32_t q2 = q1 / 100000000;
                uint32_t n1 = r1 % 10000;
                uint32_t n2 = r1 / 10000;
                uint32_t n3 = r2 % 10000;
                uint32_t n4 = r2 / 10000;
                if (x >= 1000000000000000000) {
                    memcpy(obuf + por, pre.num + (q2 << 2) + 1, 3);
                    memcpy(obuf + por + 3, pre.num + (n4 << 2), 4);
                    memcpy(obuf + por + 7, pre.num + (n3 << 2), 4);
                    memcpy(obuf + por + 11, pre.num + (n2 << 2), 4);
                    memcpy(obuf + por + 15, pre.num + (n1 << 2), 4);
                    por += 19;
                } else if (x >= 100000000000000000) {
                    uint32_t q3 = (q2 * 205) >> 11;
                    uint32_t r3 = q2 - q3 * 10;
                    obuf[por + 0] = '0' + q3;
                    obuf[por + 1] = '0' + r3;
                    memcpy(obuf + por + 2, pre.num + (n4 << 2), 4);
                    memcpy(obuf + por + 6, pre.num + (n3 << 2), 4);
                    memcpy(obuf + por + 10, pre.num + (n2 << 2), 4);
                    memcpy(obuf + por + 14, pre.num + (n1 << 2), 4);
                    por += 18;
                } else {
                    obuf[por + 0] = '0' + q2;
                    memcpy(obuf + por + 1, pre.num + (n4 << 2), 4);
                    memcpy(obuf + por + 5, pre.num + (n3 << 2), 4);
                    memcpy(obuf + por + 9, pre.num + (n2 << 2), 4);
                    memcpy(obuf + por + 13, pre.num + (n1 << 2), 4);
                    por += 17;
                }
            } else {
                int i = 8;
                char buf[12];
                while (x >= 10000) {
                    memcpy(buf + i, pre.num + (x % 10000) * 4, 4);
                    x /= 10000;
                    i -= 4;
                }
                if (x < 100) {
                    if (x < 10) {
                        wt(char('0' + x));
                    } else {
                        obuf[por + 0] = '0' + x / 10;
                        obuf[por + 1] = '0' + x % 10;
                        por += 2;
                    }
                } else {
                    if (x < 1000) {
                        memcpy(obuf + por, pre.num + (x << 2) + 1, 3);
                        por += 3;
                    } else {
                        memcpy(obuf + por, pre.num + (x << 2), 4);
                        por += 4;
                    }
                }
                memcpy(obuf + por, buf + i + 4, 8 - i);
                por += 8 - i;
            }
        }

        struct Dummy {
            Dummy() { atexit(flush); }
        } dummy;
    }  // namespace fastio
}

int main() {
    std::size_t n, m;
    nyaan::fastio::rd(n);
    nyaan::fastio::rd(m);

    std::vector<mint> a(n), b(m);
    for (auto& e : a) {
        int v;
        nyaan::fastio::rd(v);
        e = v;
    }
    for (auto& e : b) {
        int v;
        nyaan::fastio::rd(v);
        e = v;
    }

    std::vector<mint> c = suisen::convolution_large(a, b);
    for (std::size_t i = 0; i < n + m - 1; ++i) {
        nyaan::fastio::wt(c[i].val());
        nyaan::fastio::wt('\n');
    }
    return 0;
}

#line 1 "test/src/convolution/convolution_large/convolution_large.test.cpp"
#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod_large"

#include <iostream>
#include <cstring>

#line 1 "library/convolution/convolution_large.hpp"



#include <atcoder/convolution>

#line 1 "library/convolution/convolution_naive.hpp"



#include <vector>

namespace suisen::internal {
    template <typename T, typename R = T>
    std::vector<R> convolution_naive(const std::vector<T>& a, const std::vector<T>& b) {
        const int n = a.size(), m = b.size();
        std::vector<R> c(n + m - 1);
        if (n < m) {
            for (int j = 0; j < m; j++) for (int i = 0; i < n; i++) c[i + j] += R(a[i]) * b[j];
        } else {
            for (int i = 0; i < n; i++) for (int j = 0; j < m; j++) c[i + j] += R(a[i]) * b[j];
        }
        return c;
    }
} // namespace suisen



#line 7 "library/convolution/convolution_large.hpp"

namespace suisen {
    template <typename mint, atcoder::internal::is_static_modint_t<mint>* = nullptr>
    std::vector<mint> convolution_large(const std::vector<mint>& a, const std::vector<mint>& b) {
        static constexpr int max_size = (mint::mod() - 1) & -(mint::mod() - 1);
        static constexpr int half_size = max_size >> 1;
        static constexpr int inv_max_size = atcoder::internal::inv_gcd(max_size, mint::mod()).second;

        const int n = int(a.size()), m = int(b.size());
        if (n + m - 1 <= max_size) return atcoder::convolution(a, b);
        if (n == 0 or m == 0) return {};
        if (std::min(n, m) <= 60) return internal::convolution_naive(a, b);

        const int dn = (n + half_size - 1) / half_size;
        const int dm = (m + half_size - 1) / half_size;

        std::vector<std::vector<mint>> as(dn), bs(dm);
        for (int i = 0; i < dn; ++i) {
            const int offset = half_size * i;
            as[i] = std::vector<mint>(a.begin() + offset, a.begin() + std::min(n, offset + half_size));
            as[i].resize(max_size);
            atcoder::internal::butterfly(as[i]);
        }
        for (int j = 0; j < dm; ++j) {
            const int offset = half_size * j;
            bs[j] = std::vector<mint>(b.begin() + offset, b.begin() + std::min(m, offset + half_size));
            bs[j].resize(max_size);
            atcoder::internal::butterfly(bs[j]);
        }
        std::vector<std::vector<mint>> cs(dn + dm - 1, std::vector<mint>(max_size));
        for (int i = 0; i < dn; ++i) for (int j = 0; j < dm; ++j) {
            for (int k = 0; k < max_size; ++k) cs[i + j][k] += as[i][k] * bs[j][k];
        }
        std::vector<mint> res(n + m - 1);
        for (int i = 0; i < dn + dm - 1; ++i) {
            atcoder::internal::butterfly_inv(cs[i]);
            const int offset = half_size * i;
            const int jmax = std::min(n + m - 1 - offset, max_size);
            for (int j = 0; j < jmax; ++j) {
                res[offset + j] += cs[i][j] * mint::raw(inv_max_size);
            }
        }
        return res;
    }
} // namespace suisen



#line 7 "test/src/convolution/convolution_large/convolution_large.test.cpp"

using mint = atcoder::modint998244353;

std::istream& operator>>(std::istream& in, mint& a) {
    long long e; in >> e; a = e;
    return in;
}

// from https://judge.yosupo.jp/submission/21623
namespace nyaan {
    namespace fastio {
        static constexpr uint32_t SZ = 1 << 17;
        char ibuf[SZ];
        char obuf[SZ];
        uint32_t pil = 0, pir = 0, por = 0;

        struct Pre {
            char num[40000];
            constexpr Pre() : num() {
                for (int i = 0; i < 10000; i++) {
                    int n = i;
                    for (int j = 3; j >= 0; j--) {
                        num[i * 4 + j] = n % 10 + '0';
                        n /= 10;
                    }
                }
            }
        } constexpr pre;

        __attribute__((target("avx2"), optimize("O3"))) inline void load() {
            memcpy(ibuf, ibuf + pil, pir - pil);
            pir = pir - pil + fread(ibuf + pir - pil, 1, SZ - pir + pil, stdin);
            pil = 0;
        }

        __attribute__((target("avx2"), optimize("O3"))) inline void flush() {
            fwrite(obuf, 1, por, stdout);
            por = 0;
        }

        inline void rd(char& c) { c = ibuf[pil++]; }

        template <typename T>
        __attribute__((target("avx2"), optimize("O3"))) inline void rd(T& x) {
            if (pil + 32 > pir) load();
            char c;
            do
                rd(c);
            while (c < '-');
            bool minus = 0;
            if constexpr (std::is_signed<T>::value) {
                if (c == '-') {
                    minus = 1;
                    rd(c);
                }
            }
            x = 0;
            while (c >= '0') {
                x = x * 10 + (c & 15);
                rd(c);
            }
            if constexpr (std::is_signed<T>::value) {
                if (minus) x = -x;
            }
        }

        inline void wt(char c) { obuf[por++] = c; }
        template <typename T>
        __attribute__((target("avx2"), optimize("O3"))) inline void wt(T x) {
            if (por + 32 > SZ) flush();
            if (!x) {
                wt('0');
                return;
            }
            if constexpr (std::is_signed<T>::value) {
                if (x < 0) {
                    wt('-');
                    x = -x;
                }
            }
            if (x >= 10000000000000000) {
                uint32_t r1 = x % 100000000;
                uint64_t q1 = x / 100000000;
                uint32_t r2 = q1 % 100000000;
                uint32_t q2 = q1 / 100000000;
                uint32_t n1 = r1 % 10000;
                uint32_t n2 = r1 / 10000;
                uint32_t n3 = r2 % 10000;
                uint32_t n4 = r2 / 10000;
                if (x >= 1000000000000000000) {
                    memcpy(obuf + por, pre.num + (q2 << 2) + 1, 3);
                    memcpy(obuf + por + 3, pre.num + (n4 << 2), 4);
                    memcpy(obuf + por + 7, pre.num + (n3 << 2), 4);
                    memcpy(obuf + por + 11, pre.num + (n2 << 2), 4);
                    memcpy(obuf + por + 15, pre.num + (n1 << 2), 4);
                    por += 19;
                } else if (x >= 100000000000000000) {
                    uint32_t q3 = (q2 * 205) >> 11;
                    uint32_t r3 = q2 - q3 * 10;
                    obuf[por + 0] = '0' + q3;
                    obuf[por + 1] = '0' + r3;
                    memcpy(obuf + por + 2, pre.num + (n4 << 2), 4);
                    memcpy(obuf + por + 6, pre.num + (n3 << 2), 4);
                    memcpy(obuf + por + 10, pre.num + (n2 << 2), 4);
                    memcpy(obuf + por + 14, pre.num + (n1 << 2), 4);
                    por += 18;
                } else {
                    obuf[por + 0] = '0' + q2;
                    memcpy(obuf + por + 1, pre.num + (n4 << 2), 4);
                    memcpy(obuf + por + 5, pre.num + (n3 << 2), 4);
                    memcpy(obuf + por + 9, pre.num + (n2 << 2), 4);
                    memcpy(obuf + por + 13, pre.num + (n1 << 2), 4);
                    por += 17;
                }
            } else {
                int i = 8;
                char buf[12];
                while (x >= 10000) {
                    memcpy(buf + i, pre.num + (x % 10000) * 4, 4);
                    x /= 10000;
                    i -= 4;
                }
                if (x < 100) {
                    if (x < 10) {
                        wt(char('0' + x));
                    } else {
                        obuf[por + 0] = '0' + x / 10;
                        obuf[por + 1] = '0' + x % 10;
                        por += 2;
                    }
                } else {
                    if (x < 1000) {
                        memcpy(obuf + por, pre.num + (x << 2) + 1, 3);
                        por += 3;
                    } else {
                        memcpy(obuf + por, pre.num + (x << 2), 4);
                        por += 4;
                    }
                }
                memcpy(obuf + por, buf + i + 4, 8 - i);
                por += 8 - i;
            }
        }

        struct Dummy {
            Dummy() { atexit(flush); }
        } dummy;
    }  // namespace fastio
}

int main() {
    std::size_t n, m;
    nyaan::fastio::rd(n);
    nyaan::fastio::rd(m);

    std::vector<mint> a(n), b(m);
    for (auto& e : a) {
        int v;
        nyaan::fastio::rd(v);
        e = v;
    }
    for (auto& e : b) {
        int v;
        nyaan::fastio::rd(v);
        e = v;
    }

    std::vector<mint> c = suisen::convolution_large(a, b);
    for (std::size_t i = 0; i < n + m - 1; ++i) {
        nyaan::fastio::wt(c[i].val());
        nyaan::fastio::wt('\n');
    }
    return 0;
}