// Compile with // gcc -O3 -shared VSolve3.so arrayutil.o polyalg4.o VSolve3.c -fPIC #include #include #define LONG long long int #define ULONG unsigned long long int #include #define UINT64 unsigned long long typedef struct { UINT64 s; /* shift */ UINT64 v; /* reciprocal of d */ UINT64 d0; /* divisor shifted up */ UINT64 d1; } recint; recint recip1(UINT64 p); UINT64 mulrec64(UINT64 a, UINT64 b, recint v); /******************************************************************************************/ /* Zp utilities */ /******************************************************************************************/ #define add64s VSadd64s #define sub64s VSsub64s #define neg64s VSneg64s #define mul64s VSmul64s LONG add64s(LONG a, LONG b, LONG p) { LONG t; t = (a-p)+b; t += (t>>63) & p; return t; } LONG sub64s(LONG a, LONG b, LONG p) { LONG t; t = a-b; t += (t>>63) & p; return t; } LONG neg64s(LONG a, LONG p) { return (a==0) ? 0 : p-a; } LONG mul64s(LONG a, LONG b, LONG p) { LONG q, r; __asm__ __volatile__( \ " mulq %%rdx \n\t" \ " divq %4 \n\t" \ : "=a"(q), "=d"(r) : "0"(a), "1"(b), "rm"(p)); return r; } LONG modinv64s(LONG u, LONG p); LONG powmod64s(LONG u, LONG n, LONG p); void polprint64s( LONG *A, int d, LONG p ); int polmul64s( LONG * A, LONG * B, LONG * C, int da, int db, LONG p); int poldiv64s( LONG * A, LONG * B, int da, int db, LONG p ); LONG poleval64s(LONG *a, int d, LONG x, LONG p); //LONG *array( int n ); //LONG *array( int n ) { LONG *A; A = malloc(sizeof(LONG)*n); return A; } LONG poldiv164s( LONG *f, int d, LONG alpha, LONG *Q, LONG p, recint P ) { // Q = f/(x-alpha) return the remainder. Q must be of size d+1 int i; LONG r; Q[d] = f[d]; for( i=d; i>0; i-- ) { Q[i-1] = add64s(f[i-1],mulrec64(Q[i],alpha,P),p); } r = Q[0]; for( i=0; i=0; i-- ) M[i+1] = M[i]; M[0] = 0; for( i=0; i<=d; i++ ) M[i] = add64s(M[i],mulrec64(M[i+1],alpha,P),p); return; } void VandermondeSolve64s( LONG *m, LONG *y, int n, LONG *a, LONG *M, LONG *Q, int shift, LONG p ) { // m,y,a are dimension n, M is dimension n+1 // // m = [m1,m2,...,mn] and v = [v1,v2,...,vn] // Solve V a = v for a where // // [ 1 1 ... 1 ] [ a1 ] [ y1 ] // [ m1 m2 ... mn ] [ a2 ] [ y2 ] // V = [ m1^2 m2^2 ... mn^2 ] [ a3 ] = [ y3 ] // [ ... ... ... ... ] [ ] [ ] // [ m1^(n-1) m2^(n-1) ... mn^(n-1) ] [ an ] [ yn ] // // Let M = (x-m[1])(x-m[2])*...*(x-m[n]) // Let Qj(x) = L(x)/(x-m[j]) = q1 + q2 x + ... + qn x^(n-1). // Let u = Qj(m[j]) and v = [q1,q2,...,qn=1] be the array of coefficients of Qj(x). // Then the solution a[j] = v dot y / u for 1 <= j <= n. int i,j; LONG u,s,A[2];//*Q; clock_t st,et; recint P; P = recip1(p); // compute M = (x-m[0])(x-m[1])...(x-m[n-1]) st = clock(); M[0] = 1; for( i=0; i