如何加快系列一代?系列

2023-09-11 02:04:03 作者:ー個亽の江湖

问题需要生成一个序列类似于Fibonacci序列的第n 元素。然而,这是一个有点棘手,因为 N 是非常大的(1< = N< = 10 ^ 9)。答案然后模1000000007.的序列被定义如下:

使用生成函数,我得到下式:

如果我使用序列的方法,那么答案可能是模,但它运行非常缓慢。事实上,我得到了期限超过很多次。我还试图用一个表来pre-产生一些初始值(高速缓存),它仍然不够快。此外,与10 ^ 9比较的元素,我可以在阵列/矢量(C ++)存储的最大数量是太小了,所以我想这个方法不擦出火花。 如果我用的是直接式那么它运行速度非常快,但仅适用于 N 这是小的。对于 N 大,双会得到截断,再加上我不能mod我的答案与数字,因为模只适用于整数。 我跑出去的想法,我认为必须有一个很好的伎俩来解决这个问题,遗憾的是我不能想到一个。任何想法是极大的AP preciated。

下面是我最初的方法:

 的#include<的iostream>
#包括<载体>
#包括<字符串>
#包括<算法>
#包括< CMATH>
#包括<&了cassert GT;
#包括<位集合>
#包括< fstream的>
#包括<了iomanip>
#包括<集>
#包括<栈>
#包括< sstream>
#包括< cstdio>
#包括<地图>
#包括< CMATH>

使用名字空间std;

的typedef无符号长长ULL;

ULL count_fair_coins_by_generating_function(ULL N){
    N--;
    返回
        (SQRT(3.0)+ 1)/((SQRT(3.0) -  1)* 2 * SQRT(3.0))* POW(2 /(SQRT(3.0) -  1)中,n * 1.0)
        +
        (1  - 的sqrt(3.0))/((SQRT(3.0)+ 1)* 2 * SQRT(3.0))* POW(-2 /(SQRT(3.0)+ 1)中,n * 1.0);
}

ULL count_fair_coins(ULL N){
    如果(正== 1){
        返回1;
    }
    否则,如果(N == 2){
        返回3;
    }
    其他 {
        ULL A1 = 1;
        ULL A2 = 3;
        ULL结果;
        对于(ULL I = 3; I< = N; ++ I){
            结果=(2 * A2 + 2 * a1)的%1000000007;
            A1 = A2;
            A2 =结果;
        }

        返回结果;
    }
}

无效inout_my_fair_coins(){
    INT test_cases;
    CIN>> test_cases;

    地图< ULL,ULL>高速缓存;
    ULL N;
    而(test_cases--){
        CIN>> N;
        COUT<< count_fair_coins_by_generating_function(N)其中​​;< ENDL;
        COUT<< count_fair_coins(N)其中​​;< ENDL;
    }
}

诠释的main(){
    inout_my_fair_coins();
    返回0;
}
 
Redmi手机 AIoT双引擎提速,发布X系列三大新品

更新 由于比赛结束后,我贴我的解决方案基于 tskuzzy 理念,为那些有兴趣谁。再次感谢 tskuzzy 。 你可以在这里查看原始问题陈述: HTTP://www.$c$cchef.com/problems/CSUMD 首先,你需要找出那些概率 1的硬币 2硬币,然后得到一些初始值来获得的序列。 完整的解决方案是在这里:

 的#include<的iostream>
#包括<载体>
#包括<字符串>
#包括<算法>
#包括< CMATH>
#包括<&了cassert GT;
#包括<位集合>
#包括< fstream的>
#包括<了iomanip>
#包括<集>
#包括<栈>
#包括< sstream>
#包括< cstdio>
#包括<地图>
#包括< CMATH>

使用名字空间std;

的typedef无符号长长ULL;

常数ULL special_prime = 1000000007;

/ *
    使用生成功能复发:
           | 1如果n = 1
    A_N = | 3如果n = 2
           | 2a_ {N-1} + 2a_ {N-2}如果n> 2

    这种方法可能是最快的,但它不会工作
    因为当n很大,加倍只是不能负担得起。加,
    使用这个公式,我们可以不适用mod的浮点数。
    1< = N< = 21
* /
ULL count_fair_coins_by_generating_function(ULL N){
    N--;
    返回
        (SQRT(3.0)+ 1)/((SQRT(3.0) -  1)* 2 * SQRT(3.0))* POW(2 /(SQRT(3.0) -  1)中,n * 1.0)
        +
        (1  - 的sqrt(3.0))/((SQRT(3.0)+ 1)* 2 * SQRT(3.0))* POW(-2 /(SQRT(3.0)+ 1)中,n * 1.0);
}

/ *
    幼稚的做法,它的作品,但速度很慢。
    有用的测试。
* /
ULL count_fair_coins(ULL N){
    如果(正== 1){
        返回1;
    }
    否则,如果(N == 2){
        返回3;
    }
    其他 {
        ULL A1 = 1;
        ULL A2 = 3;
        ULL结果;
        对于(ULL I = 3; I< = N; ++ I){
            结果=(2 * A2 + 2 * a1)的%1000000007;
            A1 = A2;
            A2 =结果;
        }

        返回结果;
    }
}

结构matrix_2_by_2 {
    ULL米[2] [2];
    ULL一个[2] [2];
    ULL B〔2] [2];

    明确matrix_2_by_2(ULL A00,A01 ULL,ULL A10,A11 ULL){
        米[0] [0] = A00;
        米[0] [1] = A01;
        米[1] [0] = a10中;
        米[1] [1] = A11;
    }

    matrix_2_by_2运算符*(常量matrix_2_by_2和放大器; RHS)const的{
        matrix_2_by_2结果(0,0,0,0);
        result.m [0] [0] =(米[0] [0] * rhs.m [0] [0])+(米[0] [1] * rhs.m [1] [0]);
        result.m [0] [1] =(米[0] [0] * rhs.m [0] [1])+(米[0] [1] * rhs.m [1] [1]);
        result.m [1] [0] =(米[1] [0] * rhs.m [0] [0])+(米[1] [1] * rhs.m [1] [0]);
        result.m [1] [1] =(米[1] [0] * rhs.m [0] [1])+(米[1] [1] * rhs.m [1] [1]);
        返回结果;
    }

    空方(){
        一个[0] [0] = B [0] [0] = M [0] [0];
        一个[0] [1] = B [0] [1] = M [0] [1];
        一个[1] [0] = B [1] [0] = M [1] [0];
        一个[1] [1] = B [1] [1] = M [1] [1];

        米[0] [0] =(一个[0] [0] * B [0] [0])+(一个[0] [1] * B [1] [0]);
        米[0] [1] =(一个[0] [0] * B [0] [1])+(一个[0] [1] * B [1] [1]);
        米[1] [0] =(一个[1] [0] * B [0] [0])+(一个[1] [1] * B [1] [0]);
        米[1] [1] =(一个[1] [0] * B [0] [1])+(一个[1] [1] * B [1] [1]);
    }

    无效MOD(ULL N){
        米[0] [0](%)= N;
        米[0] [1](%)= N;
        米[1] [0](%)= N;
        米[1] [1](%)= N;
    }

    / *
        幂的平方算法
                | 1如果n = 0
                | (1 / X)的n次方如果n< 0
        X ^ N = | x.x ^({(N-1)/ 2})^ 2,如果n是奇数
                | (X ^ {N / 2})^ 2当n为偶数

        下面的算法计算^ P%M
        INT模(INT A,INT磷,INT M){
            很长很长X ​​= ​​1;
            长长的Y = A;

            同时的(p 0){
                如果(第2%== 1){
                    X =(X * Y)%米;
                }

                //平方基地
                Y =(Y * Y)%米;
                P / = 2;
            }

            返回X%C;
        }

        申请矩阵,我们需要一个身份是
        相当于1,然后执行乘法矩阵
        以类似的方式。因此,该算法被定义
        如下:
    * /
    void运算符^ =(ULL P){
        matrix_2_by_2身份(1,0,0,1);

        同时的(p 0){
            如果(第2%){
                身份=运算符*(身份);
                identity.mod(special_prime);
            }

            这 - >正方形();
            这 - > MOD(special_prime);
            P / = 2;
        }

        米[0] [0] = identity.m [0] [0];
        米[0] [1] = identity.m [0] [1];
        米[1] [0] = identity.m [1] [0];
        米[1] [1] = identity.m [1] [1];
    }

    朋友
    ostream的&放大器;运营商的LT;<(ostream的&放大器;出来,常量matrix_2_by_2和放大器; RHS){
        出<< rhs.m [0] [0]&其中;&其中; ''<< rhs.m [0] [1];&其中; '\ N';
        出<< rhs.m [1] [0]&其中;&其中; ''<< rhs.m [1] [1];&其中; '\ N';
        返回了;
    }
};

/ *
    | A_ {N + 2} | = | 2 2 | n次方X | 3 |
    | A_ {N + 1} | | 1 0 | | 1 |
* /
ULL count_fair_coins_by_matrix(ULL N){
    如果(正== 1){
        返回1;
    } 其他 {
        matrix_2_by_2米(2,2,1,0);
        平方公尺=(N  -  1);
        返回(M.M [1] [0] * 3 + M.M [1] [1])%1000000007;
    }
}

无效inout_my_fair_coins(){
    INT test_cases;
    scanf函数(%d个,和放大器; test_cases);

    ULL N;
    而(test_cases--){
        scanf函数(%LLU,和放大器; N);
        的printf(%D \ N,count_fair_coins_by_matrix(N));
    }
}

诠释的main(){
    inout_my_fair_coins();
    返回0;
}
 

解决方案

您可以在矩阵指数函数的角度写的序中的条款:

可以使用幂的平方快速评估。这就导致了一个 O(log n)的解决方案,它应该解决的问题以及在时间限制之内。

只是以供将来参考,如果您需要做乘法有大量(不适用在这种情况下,因为答案是采取模1000000007),你应该看看到的 Karatsuba的算法。这给你分二次时期多元化。

The problem requires to generate the n-th element of a sequence that is similar to Fibonacci sequence. However, it's a bit tricky because n is very large (1 <= n <= 10^9). The answer then modulo 1000000007. The sequence is defined as follows:

Using generating function, I obtain the following formula:

If I use the sequence approach then the answer can be modulo, but it run extremely slow. In fact, I got time limit exceed many times. I also tried to use a table to pre-generate some initial values (cache), still it was not fast enough. In addition, the maximum number of elements that I can store in an array/vector (C++) is too small compared with 10^9, so I guess this approach doesn't work either. If I use the direct formula then it run extremely fast but only for n that is small. For n large, double will got truncated, plus I won't be able to mod my answer with that number because modulo only works with integer. I ran out of idea, and I think there must be a very nice trick to work around this problem, unfortunately I just can't think of one. Any idea would be greatly appreciated.

Here's my initial approach:

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <cmath>
#include <cassert>
#include <bitset>
#include <fstream>
#include <iomanip>
#include <set>
#include <stack>
#include <sstream>
#include <cstdio>
#include <map>
#include <cmath>

using namespace std;

typedef unsigned long long ull;

ull count_fair_coins_by_generating_function(ull n) {
    n--;
    return 
        (sqrt(3.0) + 1)/((sqrt(3.0) - 1) * 2 * sqrt(3.0)) * pow(2 / (sqrt(3.0) - 1), n * 1.0) 
        +
        (1 - sqrt(3.0))/((sqrt(3.0) + 1) * 2 * sqrt(3.0)) * pow(-2 / (sqrt(3.0) + 1), n * 1.0);
}

ull count_fair_coins(ull n) {
    if (n == 1) {
        return 1;
    }
    else if (n == 2) {
        return 3;
    }
    else {
        ull a1 = 1;
        ull a2 = 3;
        ull result;
        for (ull i = 3; i <= n; ++i) {
            result = (2*a2 + 2*a1) % 1000000007;
            a1 = a2;
            a2 = result;
        }

        return result;
    }
}

void inout_my_fair_coins() {
    int test_cases;
    cin >> test_cases;

    map<ull, ull> cache;
    ull n;
    while (test_cases--) {
        cin >> n;
        cout << count_fair_coins_by_generating_function(n) << endl;
        cout << count_fair_coins(n) << endl;
    }
}

int main() {
    inout_my_fair_coins();
    return 0;
} 

Update Since the contest was over, I posted my solution based on tskuzzy idea for those who are interested. Once again, thanks tskuzzy. You can view the original problem statement here: http://www.codechef.com/problems/CSUMD First, you need to figure out the probability of those 1 coin and 2 coin, then get some initial values to obtain the sequence. The complete solution is here:

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <cmath>
#include <cassert>
#include <bitset>
#include <fstream>
#include <iomanip>
#include <set>
#include <stack>
#include <sstream>
#include <cstdio>
#include <map>
#include <cmath>

using namespace std;

typedef unsigned long long ull;

const ull special_prime = 1000000007;

/*
    Using generating function for the recurrence:
           | 1                     if n = 1
    a_n =  | 3                     if n = 2
           | 2a_{n-1} + 2a_{n-2}     if n > 2

    This method is probably the fastest one but it won't work 
    because when n is large, double just can't afford it. Plus,
    using this formula, we can't apply mod for floating point number.
    1 <= n <= 21
*/
ull count_fair_coins_by_generating_function(ull n) {
    n--;
    return 
        (sqrt(3.0) + 1)/((sqrt(3.0) - 1) * 2 * sqrt(3.0)) * pow(2 / (sqrt(3.0) - 1), n * 1.0) 
        +
        (1 - sqrt(3.0))/((sqrt(3.0) + 1) * 2 * sqrt(3.0)) * pow(-2 / (sqrt(3.0) + 1), n * 1.0);
}

/*
    Naive approach, it works but very slow. 
    Useful for testing.
*/
ull count_fair_coins(ull n) {
    if (n == 1) {
        return 1;
    }
    else if (n == 2) {
        return 3;
    }
    else {
        ull a1 = 1;
        ull a2 = 3;
        ull result;
        for (ull i = 3; i <= n; ++i) {
            result = (2*a2 + 2*a1) % 1000000007;
            a1 = a2;
            a2 = result;
        }

        return result;
    }
}

struct matrix_2_by_2 {
    ull m[2][2];
    ull a[2][2];
    ull b[2][2];

    explicit matrix_2_by_2(ull a00, ull a01, ull a10, ull a11) {
        m[0][0] = a00;
        m[0][1] = a01;
        m[1][0] = a10;
        m[1][1] = a11;
    }

    matrix_2_by_2 operator *(const matrix_2_by_2& rhs) const {
        matrix_2_by_2 result(0, 0, 0, 0);
        result.m[0][0] = (m[0][0] * rhs.m[0][0]) + (m[0][1] * rhs.m[1][0]);
        result.m[0][1] = (m[0][0] * rhs.m[0][1]) + (m[0][1] * rhs.m[1][1]);
        result.m[1][0] = (m[1][0] * rhs.m[0][0]) + (m[1][1] * rhs.m[1][0]);
        result.m[1][1] = (m[1][0] * rhs.m[0][1]) + (m[1][1] * rhs.m[1][1]);
        return result;
    }

    void square() {
        a[0][0] = b[0][0] = m[0][0];
        a[0][1] = b[0][1] = m[0][1];
        a[1][0] = b[1][0] = m[1][0];
        a[1][1] = b[1][1] = m[1][1];

        m[0][0] = (a[0][0] * b[0][0]) + (a[0][1] * b[1][0]);
        m[0][1] = (a[0][0] * b[0][1]) + (a[0][1] * b[1][1]);
        m[1][0] = (a[1][0] * b[0][0]) + (a[1][1] * b[1][0]);
        m[1][1] = (a[1][0] * b[0][1]) + (a[1][1] * b[1][1]);
    }

    void mod(ull n) {
        m[0][0] %= n;
        m[0][1] %= n;
        m[1][0] %= n;
        m[1][1] %= n;
    }

    /*
        exponentiation by squaring algorithm
                | 1                    if n = 0 
                | (1/x)^n              if n < 0 
        x^n =   | x.x^({(n-1)/2})^2    if n is odd
                | (x^{n/2})^2          if n is even

        The following algorithm calculate a^p % m
        int modulo(int a, int p, int m){
            long long x = 1;
            long long y = a; 

            while (p > 0) {
                if (p % 2 == 1){
                    x = (x * y) % m;
                }

                // squaring the base
                y = (y * y) % m; 
                p /= 2;
            }

            return x % c;
        }

        To apply for matrix, we need an identity which is
        equivalent to 1, then perform multiplication for matrix 
        in similar manner. Thus the algorithm is defined 
        as follows:
    */
    void operator ^=(ull p) {
        matrix_2_by_2 identity(1, 0, 0, 1);

        while (p > 0) {
            if (p % 2) {
                identity = operator*(identity);
                identity.mod(special_prime);
            }

            this->square();
            this->mod(special_prime);
            p /= 2;
        }

        m[0][0] = identity.m[0][0];
        m[0][1] = identity.m[0][1];
        m[1][0] = identity.m[1][0];
        m[1][1] = identity.m[1][1];
    }

    friend
    ostream& operator <<(ostream& out, const matrix_2_by_2& rhs) {
        out << rhs.m[0][0] << ' ' << rhs.m[0][1] << '\n';
        out << rhs.m[1][0] << ' ' << rhs.m[1][1] << '\n';
        return out;
    }
};

/*
    |a_{n+2}| = |2 2|^n  x |3| 
    |a_{n+1}|   |1 0|      |1|
*/
ull count_fair_coins_by_matrix(ull n) {
    if (n == 1) {
        return 1;
    } else {
        matrix_2_by_2 m(2, 2, 1, 0);
        m ^= (n - 1);
        return (m.m[1][0] * 3 + m.m[1][1]) % 1000000007;
    }
}

void inout_my_fair_coins() {
    int test_cases;
    scanf("%d", &test_cases);

    ull n;
    while (test_cases--) {
        scanf("%llu", &n);
        printf("%d\n", count_fair_coins_by_matrix(n));
    }
}

int main() {
    inout_my_fair_coins();
    return 0;
}  

解决方案

You can write the terms of the sequence in terms of matrix exponentials:

which can be quickly evaluated using exponentiation by squaring. This leads to an O(log n) solution which should solve the problem well within the time constraints.

Just for future reference, if you are required to do multiplication with large numbers (not applicable in this situation since the answer is taken modulo 1000000007), you should look into the Karatsuba algorithm. This gives you sub-quadratic time multiplication.