#include #include #include #include "int128g.c" #define LONG long long int #define ULONG unsigned long long int /******************************************************************************************/ /* Zp utilities */ /******************************************************************************************/ #define UINT64 unsigned long long //typedef struct { // UINT64 s; /* shift */ // UINT64 v; /* reciprocal of d */ // UINT64 d0; /* divisor shifted up */ // UINT64 d1; //} recint; // //recint recip1(UINT64 p); //UINT64 mulrec64(UINT64 a, UINT64 b, recint v); ULONG seed; ULONG mult; LONG rand64s(LONG p) { LONG x,y; extern ULONG seed, mult; seed = mult*seed; x = seed >> 32; seed = mult*seed; y = seed >> 32; x = (x<<31) | y; x = x % p; return(x); } int min32s(int a, int b) { if( ab ) return a; else return b; } LONG add64s(LONG a, LONG b, LONG p) { LONG t; t = (a-p)+b; t += (t>>63) & p; return t; } LONG sub64s(LONG a, LONG b, LONG p) { LONG t; t = a-b; t += (t>>63) & p; return t; } LONG neg64s(LONG a, LONG p) { return (a==0) ? 0 : p-a; } LONG mul64s(LONG a, LONG b, LONG p) { LONG q, r; __asm__ __volatile__( \ " mulq %%rdx \n\t" \ " divq %4 \n\t" \ : "=a"(q), "=d"(r) : "0"(a), "1"(b), "rm"(p)); return r; } /* z += a1:a0 */ #define zadd(z,a0,a1) __asm__(\ " addq %4, %0 \n\t" \ " adcq %5, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "r"(a0), "r"(a1)) /* z -= a1:a0 */ #define zsub(z,a0,a1) __asm__(\ " subq %4, %0 \n\t" \ " sbbq %5, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "r"(a0), "r"(a1)) /* z = a*b */ #define zmul(z,a,b) __asm__(\ " mulq %%rdx \n\t" \ : "=a"(z[0]), "=d"(z[1]) : "a"(a), "d"(b)) /* z += a*b */ #define zfma(z,a,b) do { \ unsigned long u,v; \ __asm__( \ " mulq %%rdx \n\t" \ " addq %%rax, %0 \n\t" \ " adcq %%rdx, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]), "=a"(u), "=d"(v) : "0"(z[0]), "1"(z[1]), "a"(a), "d"(b));\ } while (0) /* z -= a*b */ #define zfms(z,a,b) do { \ unsigned long u,v; \ __asm__( \ " mulq %%rdx \n\t" \ " subq %%rax, %0 \n\t" \ " sbbq %%rdx, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]), "=a"(u), "=d"(v) : "0"(z[0]), "1"(z[1]), "a"(a), "d"(b));\ } while (0) /* z[0] = z % p */ /* z[1] = z / p */ /* quotient can overflow */ #define zdiv(z,p) __asm__(\ " divq %4 \n\t" \ : "=a"(z[1]), "=d"(z[0]) : "a"(z[0]), "d"(z[1]), "r"(p)) /* z = z % p safe */ #define zmod(z,p) __asm__(\ " divq %4 \n\t" \ " xorq %0, %0 \n\t" \ : "=a"(z[1]), "=d"(z[0]) : "a"(z[0]), "d"(z[1] < p ? z[1] : z[1] % p), "r"(p)) /* z = z << s */ #define zshl(z,s) __asm__(\ " shldq %%cl, %0, %1 \n\t" \ " shlq %%cl, %0 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "c"(s)) /* c^(-1) mod p assuming 0 < c < p < 2^63 */ LONG modinv64s( LONG c, LONG p ) { LONG d,r,q,r1,c1,d1; d = p; c1 = 1; d1 = 0; while( d != 0 ) { q = c / d; r = c - q*d; r1 = c1 - q*d1; c = d; c1 = d1; d = r; d1 = r1; } if( c!=1 ) return( 0 ); if( c1 < 0 ) c1 += p; return( c1 ); } /* a^n mod p assuming 0 <= a < p < 2^63 */ LONG powmod64s( LONG a, LONG n, LONG p ) { LONG r,s; a += (a>>63) & p; // protect from bad input if( n==0 ) return 1; if( n==1 ) return a; for( r=1, s=a; n>0; n /= 2 ) { if( n & 1 ) r = mul64s(r,s,p); s = mul64s(s,s,p); } return r; } /******************************************************************************************/ /* Polynomial routines */ /******************************************************************************************/ void vecfill64s( LONG x, LONG *A, int n ) { int i; for( i=0; i0; i-- ) if( A[i]!=0 ) printf("%lld*x^%d+",A[i],i); printf("%lld;\n",A[0]); return; } LONG poleval64s(LONG *a, int d, LONG x, LONG p) { int i; LONG r; recint P; P = recip1(p); if( d==-1 ) return 0; // a[0]+x(a[1]+x(a[2]+x(a[3]))) for( r=a[d],i=d-1; i>=0; i-- ) r = add64s(a[i], mulrec64(x,r,P), p); return r; } void polmultieval64s(LONG *a, int d, LONG *x, LONG *y, int n, LONG p) { int i; for( i=0; i=0 && c[da]==0 ) da--; return da; } if( da=0 && c[da]==0 ) da--; return da; } if( da>db ) { for ( i=db+1; i<=da; i++ ) c[i] = a[i]; return da; } else { for( i=da+1; i<=db; i++ ) c[i] = neg64s(b[i],p); return db; } } int polsubmul( LONG *A, LONG *B, LONG a, LONG b, int dA, int dB, LONG p ) { // compute A = A - (ax+b) B efficiently LONG t; int i; ULONG z[2]; if( dB==-1 ) return dA; // B = 0 z[0] = z[1] = 0LL; // if deg(A) <= deg(B) then pad A with zeroes while( dA<=dB ) A[++dA] = 0; // constant term is special t = mul64s(b,B[0],p) ; A[0] = sub64s(A[0],t,p); for( i=1; i<=dB; i++ ) { zmul(z,a,B[i-1]); zfma(z,b,B[i]); zmod(z,p); t = A[i]-z[0]; A[i] = t + ((t>>63)&p); } // update leading term from B t = mul64s(a,B[dB],p); A[dB+1] = sub64s(A[dB+1],t,p); // compute and return degree while( dA>=0 && (A[dA]==0 || A[dA]==p) ) dA--; return dA; } /* compute gcd(A,B) and put gcd in A and return it's degree */ int polsubmulP( LONG *A, LONG *B, LONG a, LONG b, int dA, int dB, LONG p, recint P ) { // compute A = A - (ax+b) B efficiently LONG s,t; int i, d; if( dB==-1 ) return dA; // B = 0 d = dA; // if deg(A) <= deg(B) then pad A with zeroes while( dA<=dB ) A[++dA] = 0; // constant term is special t = mulrec64(b,B[0],P); A[0] = sub64s(A[0],t,p); //for( i=1; i<=dB; i++ ) { t = mul64s(a,B[i-1],p); t = add64s(t,mul64s(b,B[i],p),p); A[i] = sub64s(A[i],t,p); } for( i=1; i<=dB; i++ ) { t = mulrec64(a,B[i-1],P); t = add64s(t,mulrec64(b,B[i],P),p); A[i] = sub64s(A[i],t,p); } // update leading term from B t = mulrec64(a,B[dB],P); A[dB+1] = sub64s(A[dB+1],t,p); // compute and return degree while( dA>=0 && (A[dA]==0 || A[dA]==p) ) dA--; if( dA==d ) printf("polsubmul failure: dAin=%d dAout=%d\n",d,dA); return dA; } /* compute C(x) = A(x)^2 mod p and return deg(C) */ /* we allow C to overwrite A i.e. polsqr64s(A,A,d,p) */ int polsqr64s( LONG * A, LONG * C, int d, LONG p ) { int i,k,m,dc; ULONG z[2]; if( d<0 ) return d; for( k=2*d; k>=0; k-- ) { m = min32s(k,d); i = max32s(0,k-d); z[0] ^= z[0]; // = 0? z[1] ^= z[1]; // = 0? while( i=p ) z[1] -= p; zfma(z,A[i++],A[m--]); } if( i=p ) z[1] -= p; } zadd(z,z[0],z[1]); if( z[1]>=p ) z[1] -= p; if( i==m ) zfma(z,A[i],A[i]); zmod(z,p); C[k] = z[0]; } for( dc = 2*d; dc>=0 && C[dc]==0; dc-- ); // Why is this loop here? Z_p has no zero-divisors. // Because p may not be prime!! return( dc ); } /* compute C(x) = A(x) * B(x) mod p and return deg(C) */ /* we allow C to overwrite either A or B i.e. polmul64s(A,B,A,da,db,p) */ int polmul64s( LONG * A, LONG * B, LONG * C, int da, int db, LONG p) { int i,k,m; ULONG z[2]; if( da<0 || db<0 ) return da; int dc = da+db; for( k=dc; k>=0; k-- ) { i = max32s(0,k-db); m = min32s(k,da); z[0] ^= z[0]; // = 0? z[1] ^= z[1]; // = 0? while( i=p ) z[1] -= p; zfma(z,A[i],B[k-i]); i++; } if( i==m ) { zfma(z,A[i],B[k-i]); if( z[1]>=p ) z[1] -= p; } zmod(z,p); C[k] = z[0]; } for( ; dc>=0 && C[dc]==0; dc-- ); return( dc ); } /* divide A by B and put the remainder and quotient in A */ /* return the degree of the remainder */ int poldiv64s( LONG * A, LONG * B, int da, int db, LONG p ) { int dq,dr,k,j,m; LONG t,inv; ULONG z[2]; if( db<0 ) { printf("division by zero\n"); exit(1); } if( da=0; k-- ) { z[0] = 0ll; z[1] = 0ll; m = min32s(dr,k); j = max32s(0,k-dq); //for( j=max32s(0,k-dq); j<=m; j++ ) { t -= ((LONG) B[j])*A[k-j+db]; t += (t>>63) & M; } while( j=p ) z[1] -= p; zfma(z,B[j],A[k-j+db]); j++; } if( j==m ) zfma(z,B[j],A[k-j+db]); if( z[1]>=p ) z[1] -= p; zmod(z,p); t = A[k] - z[0]; t += (t>>63) & p; if( k>=db && inv!=1 ) t = mul64s(t,inv,p); A[k] = t; } while( dr>=0 && A[dr]==0 ) dr--; return( dr ); } /* make polynomial in A monic */ void monic64s( LONG *A, int d, LONG p ) { int i; LONG inv; if( d<0 || A[d]==1 ) return; inv = modinv64s(A[d],p); for( i=0; i0 && da-db==1 ) { // normal case u = modinv64s(D[db],p); a = mulrec64(C[da],u,P); // a = mul64s(C[da],u,p); b = mulrec64(a,D[db-1],P); // b = mul64s(a,D[db-1],p); b = mulrec64(u,sub64s(C[da-1],b,p),P); // quotient = a x + b // b = mul64s(u,sub64s(C[da-1],b,p),p); // quotient = a x + b dr = polsubmulP(C,D,a,b,da,db,p,P); // C = C - (a x + b) D // dr = polsubmul(C,D,a,b,da,db,p); // C = C - (a x + b) D if( dr>=db ) printf("failure\n"); } else dr = poldiv64s(C,D,da,db,p); if( dr<0 ) { /* D|C so gcd(A,B)=D */ if( D!=A ) polcopy64s(D,db,A); monic64s( A, db, p ); return db; } R = C; C = D; D = R; da = db; db = dr; //printf("da=%d db=%d\n",da,db); } } /* C(x) := A(x)^n mod B(x) mod p; 0<=deg(A)=db ) da = poldiv64s(A,B,da,db,p); // reduce A mod B first for( k=0; n>0; k++ ) { b[k]=n&1; n=n/2; } polcopy64s(A,da,C); dc = da; k--; while( k>0 ) { k--; // Main step: compute C := C^2 mod B in Zp[x] //dc = polmul64s(C,C,R,dc,dc,p); //printf("deg(R) = %d; R = ",dc); polprint64s(R,dc); dc = polsqr64s(C,R,dc,p); //printf("deg(R) = %d; R = ",dc); polprint64s(R,dc); dc = poldiv64s(R,B,dc,db,p); polcopy64s(R,dc,C); //printf("deg(C) = %d; C = ",dc); polprint64s(C,dc); if( b[k]==1 ) { //printf(" b[%d]=%d \n", k, b[k] ); dc = polmul64s(A,C,R,da,dc,p); //printf("deg(R) = %d; R = ",dc); polprint64s(R,dc); dc = poldiv64s(R,B,dc,db,p); polcopy64s(R,dc,C); //printf("deg(C) = %d; C = ",dc); polprint64s(C,dc); } } return dc; } // Input f in Zp[x] of degree d > 0, a known product of d linear factors. // Output roots of f in R. // The input array f is destroyed. // W is a scratch array of size at least 3*d void polsplit64s( LONG *f, int d, LONG *R, LONG *W, LONG p ) { int da,dg; LONG alpha, A[2]; if( d==1 ) { alpha = p-f[0]; R[0] = alpha; return; } alpha = rand64s(p); A[1] = 1; A[0] = alpha; da = polpowmod64s( A, (p-1)/2, f, 1, d, W, W+d, p ); if( da==0 ) return polsplit64s(f,d,R,W,p); // alpha is unlucky, try again W[0] = add64s(W[0],1,p); // W = (x+alpha)^((p-1)/2) + 1 mod f polcopy64s( f, d, W+d ); dg = polgcd64s( W, W+d, da, d, p ); // g = gcd( W, f ) in W if( dg==0 ) return polsplit64s(f,d,R,W,p); // g = 1 ==> alpha is unlucky, try again poldiv64s(f,W,d,dg,p); // compute quotient q = f/g destroying f polcopy64s(W,dg-1,f); // f = [ g mod x^dg followed by q ] polsplit64s(f+dg,d-dg,R,W,p); f[dg] = 1; polsplit64s(f,dg,R+d-dg,W,p); return; } int polroots64s( LONG * f, int d, LONG * R, LONG *W, LONG p ) { int i, da, dg; LONG A[2]; extern ULONG seed,mult; clock_t st,et; printf("roots: deg(f)=%d\n",d); // printf("f := "); polprint64s(f,d); for( i=0; i0 ) { R[0]=0; return( 1 + polroots64s(f+i,d-i,R+1,W,p) ); } if( f[d]!=1 ) monic64s(f,d,p); A[1] = 1; A[0] = 0; st = clock(); da = polpowmod64s( A, p-1, f, 1, d, W, W+d, p ); // W = x^(p-1) mod f et = clock(); printf("Roots: powmod: x^(p-1) mod f = x^%d + ... time = %10d ms\n", da, (et-st)/1000 ); //printf("da = %d, a := ",da); polprint64s(W,da); if( da==0 && W[0]==1 ) dg = d; // f is all linear factors else { W[0] = sub64s(W[0],1,p); dg = polgcd64s( f, W, d, da, p ); } // f = gcd(f,W-1) //printf("g := "); polprint64s(f,dg); printf("Roots: def(f)=%d #roots=%d\n",d,dg); if( dg==0 ) return 0; seed = 1; mult = 6364136223846793003ll; st = clock(); polsplit64s( f, dg, R, W, p ); et = clock(); printf("Roots: split time=%10d ms\n", (et-st)/1000 ); return dg; // number of roots in R } int BerlekampMassey64s( LONG *a, int N, LONG *L, LONG *W, LONG p ) { // Input sequence a = [a1,a2,a3,...,aN] // Output polynomial Lambda(x) is written to L // Uses the half extended Euclidean algorithm int i,m,n,dr,dq,dr0,dr1,dv0,dv1,dt; LONG *r,*q,*r0,*r1,*v0,*v1,*t,u,A,b; //recint P; while( N>0 && a[N-1]==0 ) N--; // ignore leading zeroes n = N/2; N = 2*n; if( N==0 ) return -1; m = N-1; // W is space for r0 = x^N and r1 of degree m and v0 and v1 of degree at most n r0 = W; r1 = r0+N+1; v0 = r1+N; v1 = v0+n+1; vecfill64s(0,r0,N); r0[N] = 1; dr0 = N; // r0 = x^(2*n) for(i=0; i=0 && r1[dr1]==0; dr1--); // r1 = sum(a[m-i]*x^i,i=0..m) if( dr1==-1 ) return -1; dv0 = -1; // v0 = 0 v1[0] = 1; dv1 = 0; // v1 = 1 //P = recip1(p); while( n <= dr1 ) { if( dr1>0 && dr0-dr1==1 ) { // normal case u = modinv64s(r1[dr1],p); A = mul64s(r0[dr0],u,p); b = mul64s(A,r1[dr1-1],p); b = mul64s(u,sub64s(r0[dr0-1],b,p),p); // quotient q = A x + b //dr = polsubmulP(r0,r1,A,b,dr0,dr1,p,P); // r0 = r0 - (A x + b) r1 dr = polsubmul(r0,r1,A,b,dr0,dr1,p); // r0 = r0 - (A x + b) r1 // dt = polsubmulP(v0,v1,A,b,dv0,dv1,p,P); // v0 = v0 - (A x + b) v1 dt = polsubmul(v0,v1,A,b,dv0,dv1,p); // v0 = v0 - (A x + b) v1 } else { dr = poldiv64s(r0,r1,dr0,dr1,p); q = r0+dr1; dq = dr0-dr1; // q = quo(r0,r1) dt = polmul64s(q,v1,L,dq,dv1,p); dt = polsub64s(v0,L,v0,dv0,dt,p); } r = r0; r0 = r1; r1 = r; dr0 = dr1; dr1 = dr; // r0,r1 = r1,rem(r0,r1) t = v0; v0 = v1; v1 = t; dv0 = dv1; dv1 = dt; // v0,v1 = v1,v0 - q*v1 //printf("r0 = "); polprint64s(r0,dr0); //printf("r1 = "); polprint64s(r1,dr1); //printf("v0 = "); polprint64s(v0,dv0); //printf("v1 = "); polprint64s(v1,dv1); } if( dv1>=0 ) { polcopy64s(v1,dv1,L); monic64s(L,dv1,p); } return dv1; } void polLambda64s( LONG *R, int n, LONG *L, LONG *W, LONG p ) { // L must be length n+1 int m; //if( n==0 ) { printf("n=0\n"); } if( n==1 ) { L[0] = neg64s(R[0],p); L[1] = 1; return; } m = n/2; polLambda64s( R, m, W, L, p ); polLambda64s( R+m, n-m, W+m+1, L, p ); //for( i=0; i