/* Bivariate Hensel lift - cubic cost */ /* Garrett Paluck, 2021 */ //V33 #include #include #include #include #include #define DEBUG 0 #define M_INT long long int #define LONG long long int #define ULONG unsigned long long int #define ULONG unsigned long long int #define UINT32 unsigned int #define UINT64 unsigned long long typedef enum{false, true} bool; #define WORDSIZE 64 typedef unsigned long M_UINT; /* [x0:x1] := a*b */ #define MUL211(x0,x1,a,b) __asm__(\ " mulq %3 \n\t" \ : "=a"(x0), "=d"(x1) : "0"(a), "r"(b) : "cc") /* x0 := [x0:x1] / a */ /* x1 := [x0:x1] % a */ #define DIV21H(x0,x1,a) __asm__(\ " divq %2 \n\t" \ : "=a"(x0), "=d"(x1) : "r"(a), "0"(x0), "1"(x1) : "cc") typedef struct { UINT64 s; /* shift */ UINT64 v; /* reciprocal of d */ UINT64 d0; /* divisor shifted up */ UINT64 d1; } recint; recint recip1(UINT64 p); ULONG mulrec64(ULONG a, ULONG b, recint v); /* z += a1:a0 */ #define zadd(z,a0,a1) __asm__(\ " addq %4, %0 \n\t" \ " adcq %5, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "r"(a0), "r"(a1)) /* z = a*b */ #define zmul(z,a,b) __asm__(\ " mulq %%rdx \n\t" \ : "=a"(z[0]), "=d"(z[1]) : "a"(a), "d"(b)) /* z += a*b */ #define zfma(z,a,b) do { \ unsigned long u,v; \ __asm__( \ " mulq %%rdx \n\t" \ " addq %%rax, %0 \n\t" \ " adcq %%rdx, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]), "=a"(u), "=d"(v) : "0"(z[0]), "1"(z[1]), "a"(a), "d"(b));\ } while (0) /* z -= a*b */ #define zfms(z,a,b) do { \ unsigned long u,v; \ __asm__( \ " mulq %%rdx \n\t" \ " subq %%rax, %0 \n\t" \ " sbbq %%rdx, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]), "=a"(u), "=d"(v) : "0"(z[0]), "1"(z[1]), "a"(a), "d"(b));\ } while (0) /* z = z % p safe */ #define zmod(z,p) __asm__(\ " divq %4 \n\t" \ " xorq %0, %0 \n\t" \ : "=a"(z[1]), "=d"(z[0]) : "a"(z[0]), "d"(z[1] < p ? z[1] : z[1] % p), "r"(p)) ULONG seed, mult; LONG rand64s(LONG p); int max(int a, int b); LONG * array(int n) { return (LONG *) malloc(n*sizeof(LONG)); } int * arrayI(int n) { return (int *) malloc(n*sizeof(int)); } inline int max32s(int a, int b) { if(a>b) return a; else return b; } inline int min32s(int a, int b) { if(a> 32; seed = mult*seed; y = seed >> 32; x = (x<<31) | y; x = x % p; return(x);} inline LONG add64s(LONG a, LONG b, LONG p) { LONG t; t = (a-p)+b; t += (t>>63) & p; return t; } inline void add264s(LONG a, LONG b, LONG *c, LONG p) {LONG t; t = (a-p)+b; t += (t>>63) & p; *c=t;} inline LONG sub64s(LONG a, LONG b, LONG p) { LONG t; t = a-b; t += (t>>63) & p; return t; } inline LONG mul64s(LONG a, LONG b, LONG p) { LONG q, r; __asm__ __volatile__( \ " mulq %%rdx \n\t" \ " divq %4 \n\t" \ : "=a"(q), "=d"(r) : "0"(a), "1"(b), "rm"(p)); return r; } LONG modinv64s( LONG c, LONG p ); LONG log2N(LONG n) { LONG i,j; i=0;j=1; while(j=p){z[1] -=p;} zfma(z,A[i],B[i]);i++;if(z[1]>=p){z[1] -=p;} } while(i<=d){ zfma(z,A[i],B[i]);i++;if(z[1]>=p){z[1] -=p;} } zmod(z,p); return z[0]; } ULONG fastPolyMult(LONG *poly1,LONG *poly2, LONG *T, int deg1, int deg2, LONG p){ //local variables int k,m,min,max; ULONG z[2]; //clean T //cleanArray64s(T,deg1+deg2); for(k=0;k<=deg1+deg2;k++){ min = max32s(0,k-deg2); max = min32s(k,deg1); z[0] = 0; z[1] = 0; m = min; while(m<=max-3){ zfma(z,poly1[m],poly2[k-m]);m++; zfma(z,poly1[m],poly2[k-m]);m++; zfma(z,poly1[m],poly2[k-m]);m++; } while(m<=max){ zfma(z,poly1[m],poly2[k-m]);m++; } zmod(z,p); T[k] = add64s(z[0],T[k],p); } } //perform the Extended Euclidean Algorithm int MultiEEA(LONG *U, int d, int n, LONG *M, LONG *W, LONG p){ //size of U = n*(d+1) //size of M = ... //size of W = 2*((n-1)*d+1) + d+1 //local variables int i,j,du,dm,ds,dt,dg,dmCalc,mPos,point; LONG *u,*m,*mCalc,*g,*Wptr,t,alpha2; //clean array cleanArray64s(W,4*(n*d+1) + (d+1)); //set initial array pointers mCalc = W; g = mCalc + (n-1)*d+1; //define initial position in the M array mPos = 0; for(i=n-1;i>1;i--){ mPos = mPos + i*d+1; } mCalc[0] = 1; dmCalc = 0; //Main loop u = U + (n-1)*(d+1); for (i=n;i>=2;i--){ //calculate mcalc*U_i dmCalc = polmul64s(mCalc,u,mCalc,dmCalc,d,p); //add new mcalc to M polcopy64s(mCalc,dmCalc,M+mPos); mPos = mPos - ((n-i+2)*d + 1); //update u pointer u -= d+1; } //make sure gcd(u_i,M_i) = 1 for all M u = g + (n-1)*d+1; Wptr = u + d+1; dmCalc = (n-1)*d; mPos = 0; for(i=0;i0) return -1; dmCalc -= d; mPos += (n-i-1)*d+1; } return 0; } void addPol(LONG *A, int dz, LONG p); void altDiv2(LONG *A,int dx, int dz, LONG alpha, LONG *W, LONG p); void generateGcdexS(LONG *U, int n, int du, LONG *M, int dx, LONG *S, int *sDeg, LONG *W, LONG p){ //local variables int i,j,k,du2,dm,dg,ds,wTemp,dt,mPos; LONG *uTemp,*mTemp,*g,*wPtr; cleanArray64s(W,4*n*(du+1)); uTemp = W; mTemp = uTemp + du+1; g = mTemp + (n-1)*du+1; wPtr = g + n*du+1; mPos = 0; //Calculate the sigma term and subtract it from C for (i=1;i2 case //**************************** //set degree totals p1 = &Polys[0]; p2 = &Polys[Degs[0]+1]; p3 = &Coeffs[0]; //initial multiplications if(d <= accDeg[1]){ MIN = max32s(0,d-Degs[1]); MAX = min32s(d,Degs[0]); k = MIN; while(k<=MAX-3){ zfma(z,p1[k],p2[d-k]);k++; zfma(z,p1[k],p2[d-k]);k++; zfma(z,p1[k],p2[d-k]);k++; } while(k<=MAX){ zfma(z,p1[k],p2[d-k]);k++; } zmod(z,p); p3[d] = z[0]; } p1 = p2+(Degs[1]+1); for (j=1; j<=n-3; j++){ //make sure i need to do current calculations if(d <= accDeg[j+1]){ //continued polynomial multiplication up to degree d MIN = max32s(0,d-accDeg[j]); MAX = min32s(d,Degs[j+1]); k = MIN; z[0] = 0; z[1] = 0; while(k<=MAX-3){ zfma(z,p1[k],p3[d-k]); k++; zfma(z,p1[k],p3[d-k]); k++; zfma(z,p1[k],p3[d-k]); k++; } while(k<=MAX){ zfma(z,p1[k],p3[d-k]); k++; } zmod(z,p); p3[dz+1+d] = z[0]; } //update pointers p1 += Degs[j+1]+1; p3 += dz+1; } //Calculate final coefficient MIN = max32s(0,d-accDeg[n-2]); MAX = min32s(d,Degs[n-1]); z[0] = 0; z[1] = 0; j = MIN; while(j<=MAX-3){ zfma(z,p1[j],p3[d-j]);j++; zfma(z,p1[j],p3[d-j]);j++; zfma(z,p1[j],p3[d-j]);j++; } while(j<=MAX){ zfma(z,p1[j],p3[d-j]);j++; } } else{ if(n==2){ MIN = max32s(0,d-Degs[0]); MAX = min32s(d,Degs[0]); k = MIN; p1 = &Polys[dz+1]; while(k<=MAX-2){ zfma(z,Polys[k],p1[d-k]);k++;//if(z[1]>=p){z[1] -= p;} zfma(z,Polys[k],p1[d-k]);k++;if(z[1]>=p){z[1] -= p;} *numMuls = *numMuls + 2; } while(k<=MAX){ zfma(z,Polys[k],p1[d-k]);k++;if(z[1]>=p){z[1] -= p;} *numMuls = *numMuls + 1; } zmod(z,p); return z[0]; } //**************************** //n>2 case //**************************** //set degree totals p1 = &Polys[(n-2)*(dz+1)]; p2 = &Polys[(n-1)*(dz+1)]; p3 = &Coeffs[(n-2)*(dz+1)]; //initial multiplications if(d <= accDeg[n-2]){ MIN = max32s(0,d-Degs[n-1]); MAX = min32s(d,Degs[n-2]); k = MIN; while(k<=MAX-2){ zfma(z,p1[k],p2[d-k]);k++;//if(z[1]>=p){z[1] -= p;} zfma(z,p1[k],p2[d-k]);k++;if(z[1]>=p){z[1] -= p;} *numMuls = *numMuls + 2; } while(k<=MAX){ zfma(z,p1[k],p2[d-k]);k++;if(z[1]>=p){z[1] -= p;} *numMuls = *numMuls + 1; } zmod(z,p); p3[d-(dz+1)] = z[0]; } p1 = p1-(dz+1); p3 = p3-(dz+1); for (j=n-3; j>=1; j--){ //make sure i need to do current calculations if(d <= accDeg[j]){ //continued polynomial multiplication up to degree d MIN = max32s(0,d-accDeg[j+1]); MAX = min32s(d,Degs[j]); k = MIN; z[0] = 0; z[1] = 0; while(k<=MAX-2){ zfma(z,p1[k],p3[d-k]); k++;//if(z[1]>=p){z[1] -= p;} zfma(z,p1[k],p3[d-k]); k++;if(z[1]>=p){z[1] -= p;} *numMuls = *numMuls + 2; } while(k<=MAX){ zfma(z,p1[k],p3[d-k]); k++;if(z[1]>=p){z[1] -= p;} *numMuls = *numMuls + 1; } zmod(z,p); p3[d-(dz+1)] = z[0]; } p1 = p1-(dz+1); p3 = p3-(dz+1); } //Calculate final coefficient MIN = max32s(0,d-accDeg[1]); MAX = min32s(d,Degs[1]); z[0] = 0; z[1] = 0; j = MIN; while(j<=MAX-2){ zfma(z,Polys[j],Coeffs[d-j]);j++;//if(z[1]>=p){z[1] -= p;} zfma(z,Polys[j],Coeffs[d-j]);j++;if(z[1]>=p){z[1] -= p;} *numMuls = *numMuls + 2; } while(j<=MAX){ zfma(z,Polys[j],Coeffs[d-j]);j++;if(z[1]>=p){z[1] -= p;} *numMuls = *numMuls + 1; } } zmod(z,p); //printf("final solution: %d\n",z[0]); return z[0]; } LONG getCoeff3(LONG * Polys, LONG * Coeffs, LONG * coeffDegs, int n, int dz, int d, LONG p, LONG * shift1, LONG * shift2, LONG * dshift, int *Degs, LONG *W, LONG iter, clock_t *s1, clock_t *s2){ //denote local variables LONG i,j,k,m,t,z,y,s; LONG *temp, *tempDegs, *p1, *p2, *p3, *p4, *p5; LONG MIN,MAX; ULONG Z[2]; clock_t T1,T2; //update calculation p1 = Coeffs; p2 = Polys; for(j=0; j<(n/2); j++){ if(d <= coeffDegs[j]){ //printf("got here and j=%d\n",j); MIN = max32s(0,d-Degs[2*j+1]); MAX = min32s(d,Degs[2*j]); Z[0] = 0; Z[1] = 0; p3 = &p2[Degs[2*j]+1]; i=MIN; while(i <= MAX-3){ zfma(Z,p2[i],p3[d-i]);i++; zfma(Z,p2[i],p3[d-i]);i++; zfma(Z,p2[i],p3[d-i]);i++; } while(i <= MAX){ zfma(Z,p2[i],p3[d-i]);i++; } zmod(Z,p); p1[shift1[j]+d] = Z[0]; } p2 += coeffDegs[j] + 2; } if(n%2==1){p1[shift1[n/2]+k] = p2[k];} //shift + update coeffcalc main loop p2 = p1; p1 += dz+n; p3 = shift1; p5 = coeffDegs; t = ceil2(n); //update calculation //T1 = clock(); while(t > 1){ for(j=0; j<(t/2); j++){ if(d <= p5[n+j]){ MIN = max32s(0,d-p5[2*j+1]); MAX = min32s(d,p5[2*j]); Z[0] = 0; Z[1] = 0; p4 = &p2[p5[2*j]+1]; i = MIN; while(i <= MAX-3){ zfma(Z,p2[i],p4[d-i]); i++; zfma(Z,p2[i],p4[d-i]); i++; zfma(Z,p2[i],p4[d-i]); i++; } while(i <= MAX){ zfma(Z,p2[i],p4[d-i]); i++; } zmod(Z,p); p1[p3[n+j]+d] = Z[0]; } p2 += p5[n+j] + 2; }if(t%2==1){p1[p3[n+(t/2)]+d] = p2[d]; } p2 = p1; p1 += dz+n; p3 += n; p5+=n; t=ceil2(t); } return p2[d]; } void createProblem(LONG *A, LONG *F0, int n, int dx, int dz, int dxA, int dzA, LONG alpha, LONG p); void LagInterpSetup(LONG *A,LONG *xPoints,int dx, LONG *W, LONG p){ //local variables LONG t,a,b,*p1,*p2; int i,j,k,m,dm,point1,point2,size; ULONG z[2]; size = (dx+1)/2+1; cleanArray64s(W,2*dx+6); p1 = &W[dx+2]; p2 = &W[dx+4]; //calculate master polynomial W[1] = 1; p1[1] = 1; for(i=1;i<=dx;i++){ p1[0] = p-xPoints[i]; polmul64s(W,p1,W,i,1,p); } //calculate Lagrange polynomials for(i=0;i<=dx;i++){ //copy the master polynomial to spare space for(j=0;j<=dx+1;j++){p2[j] = W[j]; } p1[0] = p-xPoints[i]; //perform division dm = poldiv64s(p2,p1,dx+1,1,p); //move polynomial to A for(j=0;j<=dx;j++){ A[i*(dx+1)+j] = p2[j+1]; } } //calculate inverse and multiply every term by it p1 = A; for(i=0;i<=dx;i++){ //calculate denom a = 1; b = xPoints[i]; for (j=0;j<=dx;j++){ if (i!=j){ a = mul64s(a,sub64s(b,xPoints[j],p),p); } } a = modinv64s(a,p); //multiply every term by a for(j=0;j<=dx;j++){ if(p1[j]!=0){ p1[j] = mul64s(p1[j],a,p); } } p1 = p1 + dx+1; } //transpose matrix (efficiently) cleanArray64s(W,2*(size)*(size-1) + 1); if(dx%2 ==0){ //single entry, W[0] = A[0]; //even case p1 = &W[1]; //firstRow for(i=2,k=0;i<=dx+1;i+=2,k++){ p1[k*size] = A[i]; } //each column (even elements) for(i=2,k=1;i<=dx+1;i+=2,k++){ for(j=2,m=0;j<=dx+1;j+=2,m++){ p1[m*(size)+k] = A[(i-1)*(dx+1) + j]; } } //odd p1 = p1 + size*(size-1); //firstRow for(i=1,k=0;i<=dx+1;i+=2,k++){ p1[k*size] = A[i]; } //each column (even elements) for(i=2,k=1;i<=dx+1;i+=2,k++){ for(j=1,m=0;j<=dx+1;j+=2,m++){ p1[m*(size)+k] = A[(i-1)*(dx+1) + j]; } } //move elements back to A for(i=0;i<2*size*(size-1)+1;i++){ A[i] = W[i]; } } else{ //transpose matrix A for(i=0;i<=dx;i++){ for(j=i+1;j<=dx;j++){ t = A[i*(dx+1)+j]; A[i*(dx+1)+j] = A[j*(dx+1)+i]; A[j*(dx+1)+i] = t; } } } return; } //perform Lagrangian interpolation. The Lagrangian polynomials are stored in LagPolys //and the interpolation points are stored in yPoints. //The resulting polynomial is stored in Delta //The size of W is dx+1 void LagInterpEval(LONG *LagPolys,LONG *yPoints,LONG *Delta, int dx, LONG *W, LONG p, LONG *nummuls){ //Lagpolys - holds the Lagrangian polynomials used for lagrange interpolation - matrix has size (dx+1)^2 //yPoints - array of length dx+1 - contains the y points for Lagrange interpolation (the x points are implied to be 0,1,-1,2,-2,... //Delta - the interpolated polynomial of degree at most dx //dx - degree of the main polynomial A in x //W - additional space needed - size = dx+1 //p - prime //local variables int i,j,k,size,point; LONG *p1,*V; ULONG z[2]; if(dx%2==0){ size = (dx+1)/2 + 1; V = W; cleanArray64s(V,size); V[0] = yPoints[0]; //solve first solution (alpha = 0) Delta[0] = mul64s(LagPolys[0],yPoints[0],p); *nummuls += 1; //******************************************************************* //solve Lagrange interpolation (even case) //******************************************************************* //build vector for(i=1,j=1;i=1;j--){ if(shift1[j] != shift2[j]){ for(k=degsYprev[j]; k>=0; k--){ p1[shift1[j]+k] = p1[shift2[j]+k]; p1[shift2[j]+k] = 0; } } } /* printf("after Eval shift\n"); for(j=0;j=p){z1[1] -=p;} zfma(z2,p1[k+1],p2[m]); if(z2[1]>=p){z2[1] -=p;} } zmod(z1,p); e = z1[0]; zmod(z2,p); o = mul64s(z2[0],i,p); p3[ shift1[j] + degsY[j] ] = add64s(e,o,p); p4[ shift1[j] + degsY[j] ] = sub64s(e,o,p); p1 = p1 + dxdz; } p2 = p2 + du+1; } } return; } void CoeffCalcShifts(LONG* CoeffCalc, LONG* Evals, LONG* shift1, LONG* shift2, int* maxDeg, LONG* coeffDegs, LONG n, LONG d, LONG dx, LONG dz, LONG p){ LONG i,j,k,t,X; LONG *p1, *p2, *p3, *p4, *p5; //calculate shift positions shift1[0]=0; shift2[0]=0; for(i=1;i<=n/2;i++){ shift1[i] = shift1[i-1] + maxDeg[2*(i-1)] + maxDeg[2*(i-1)+1] + 1; shift2[i] = shift2[i-1] + coeffDegs[i-1] + 1; //coeffDegs[i-1] = z; } t = ceil2(n); p1 = shift1 + n; p2 = shift2 + n; p3 = shift1; p4 = coeffDegs + n; while(t>1){ p1[0] = 0; p2[0] = 0; for(i=1;i<=t/2;i++){ p1[i] = p1[i-1] + p3[2*(i-1)+2] - p3[2*(i-1)] - 1; p2[i] = p2[i-1] + p4[i-1] + 1; } p1 += n; p2 += n; p3 += n; p4 += n; t = ceil2(t); } //perform shift //perform initial shift and calculation for(i=0;i<=dx;i++){ p1 = &CoeffCalc[i*log2N(n)*(dz+n)]; p2 = &Evals[i*(dz+n)]; //shift if(n%2==1){ //if there's an odd # of elements for(k=maxDeg[n-1];k>=0;k--){ p1[shift1[n/2]+k] = p1[shift2[n/2] + k]; if(shift1[n/2] != shift2[n/2]){p1[shift2[n/2] + k] = 0;} } } t = (n/2) - 1; for(j=t;j>=1;j--){ for(k=maxDeg[2*j]+maxDeg[2*j+1];k>=0;k--){ p1[shift1[j]+k] = p1[shift2[j]+k]; if(shift1[j] != shift2[j]) {p1[shift2[j]+k] = 0;} } } p3 = shift1; p4 = shift2; p5 = coeffDegs; t = ceil2(n); while(t>1){ p1 += dz+n; p3 += n; p4 += n; //preform standard shift if(t%2==1){ //if there's an odd # of elements if(p3[t/2] != p4[t/2]){ for(k=p5[n+(t/2)-1];k>=0;k--){ p1[p3[t/2]+k] = p1[p4[t/2] + k]; p1[p4[t/2] + k] = 0; } } } for(j=(t/2)-1;j>=1;j--){ if(p3[j] != p4[j]){ for(k=p5[n+j];k>=0;k--){ p1[p3[j]+k] = p1[p4[j]+k]; p1[p4[j]+k]=0; } } } t = ceil2(t); } } return; } //This preforms hensel lifting on n factors to factor polynomial A int HenselLiftCubic(LONG *A, int dx, int dz, LONG *F0, int n, int du, LONG *F, LONG alpha, LONG *TempSpace, LONG p ){ /* A[x][z]- polynomial you wish to factor dx - degree bound on variable x of A dz - degree bound on variable z of A F0 - set of linear factors n - number of initial factors du - degree bound of initial factors (could be extended to an array later) F - array that will store the final polynomials. must be of size n*(dx+1)*(dz+1) W - working array for calculations. Must be at least size (tbd) alpha - LONG integer p - prime (duh) */ //local variables int i,j,k,l,maxSizeA,ndz,flag,*maxDeg, *maxDegPrev,*sDeg, *accDegs, *curXDeg, sumDeg, InterpFlag; LONG t,t2,z,*M,*E,*p1,*p2,*p3,*p4,*p5,*TEMP1,*Coeffs, *LagInterpPolys; LONG *DioSVal, numDioMuls,numCoeffExtractMul,numEvalMuls,numInterpMuls, * coeffDegs, *dshift, *shift1, *shift2, *g, *fac, *defac; LONG *CoeffCalc,*Evals,*ck,*evalPoints,*interpPoints,*Delta,*EvalPointsMul, *tempDegs,*W, wTemp; LONG temp,temp2,temp3; recint P; clock_t T1,T2,T3,T4,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15; //set time to 0 s1=0;s2=0;s3=0;s4=0;s5=0;s6=0;s7=0;s8=0;s9=0;s10=0;s11=0;s12=0;s13=0;s14=0;s15=0; //initial size calculations maxSizeA = (dx+1)*(dz+1); ndz = n*(dz+1); numEvalMuls = 0; numDioMuls = 0; numCoeffExtractMul = 0; numInterpMuls = 0; InterpFlag = 0; P = recip1(p); //declare arrays W = TempSpace; maxDeg = arrayI(n); //this is in y maxDegPrev = arrayI(n); accDegs = arrayI(n); //accumulative sum of degrees in y (for coeffcalc) sDeg = arrayI(n); //Degree of each s variable (for Diophantine Equation) curXDeg = arrayI(n); //Degree of x for each factor (efficiency) M = W; W += (n-1)*n*du/2 + n - 1; //M polynomials (diophantine) E = W; W += maxSizeA; //The error at each iteration TEMP1 = W; W += maxSizeA; //Temporary storage evalPoints = W; W += dx+1; interpPoints = W; W += dx+1; LagInterpPolys = W; W += (dx+1)*(dx+1); Delta = W; W += dx+1; CoeffCalc = W; W += (dx+1)*log2N(n)*(dz+n); Evals = W; W += (dx+1)*(dz+n); EvalPointsMul = W; W += (du+1)*((dx/2)+1); DioSVal = W; W += (n-1)*(dx+1); coeffDegs = W; W += log2N(n)*n; tempDegs = W; W += log2N(n)*n; dshift = W; W += n; shift1 = W; W += n*(log2N(n)+1); shift2 = W; W += n*(log2N(n)+1); g = W; W += dz+1; fac = W; W += dz+1; defac = W; W += dz+1; //move contents of F0 to F //Calculate exact degree bound of factors p1 = F; p2 = F0; for(i=0;i1){ for(i=0;i<=t/2;i++){ p1[i]=i; p2[i]=i; } p1 += n; p2 += n; t=ceil2(t); } //Generate Lagrange Polynomials for Interpolation in main loop LagInterpSetup(LagInterpPolys,evalPoints,dx,W,p); //calculate evaluation point constants p1 = EvalPointsMul; for(i=1;i<=dx/2;i++){ t = 1; p1[0] = t; t2 = i*i; for(j=1;j<=du;j++){ t = mul64s(t,t2,p); p1[j] = t; } p1 = p1 + du+1; } //Recover multiple of U for future calls to diophantine equation j = MultiEEA(F0,du,n,M,W,p); //if the EEA failed, return 'FAIL'(add last) if(j==-1){ printf("The input factors are not coprime"); return -1; } //Generate the S values from Gcdex generateGcdexS(F0,n,du,M,dx,DioSVal,sDeg,W,p); // declare initial Error for(i=0;i1){ for(j=0;j1){ for(j=0;j1){ for(j=0;j=k){ TEMP1[j] = mul64s(TEMP1[2*j],p2[p4[2*j+1]],p); } else {TEMP1[j]=0;} if(p5[2*j+1]>=k){TEMP1[j] = add64s(TEMP1[j],mul64s(TEMP1[2*j+1],p2[p4[2*j]],p),p); } p2[(dz+n)+ p4[n+j]+k] = add64s(p2[(dz+n)+ p4[n+j]+k],TEMP1[j],p); p5[n+j] = p5[2*j]+p5[2*j+1]; } if(t%2==1){p2[(dz+n) + p4[n+(t/2)] + k] = add64s(p2[(dz+n) + p4[n+(t/2)] + k],TEMP1[t-1],p); TEMP1[(t/2)] = TEMP1[t-1]; p5[n+(t/2)] = p5[t-1]; } p2 += dz+n; p3 += n; p4 += n; p5 += n; t = ceil2(t); //if(i==1){for(j=0;j=0; k-- ) { for ( i=k; i<=d-1; i++ ) a[i] = add64s( a[i],mul64s(b,a[i+1],p),p ); } return d; } void ExpandF( LONG *F, int r, LONG dx, LONG dy, LONG alpha, LONG p) { // F is the answer of BHL (HenselLiftCubic), where F[rr][(dx+1)*j+i] = coeff( coeff(f[rr],y-5,j), x, i) // The input alpha is p - alpha[j] in CMSHL Maple code // Return expanded f onto F LONG *Temp=array(dy+1), i, j, k, d; int rr; for ( rr=1; rr<=r; rr++ ) { for ( i=0; i=0 ) { if ( Temp[k] == 0 ) k--; else break; } d = k; TaylorShift2( Temp, d, alpha, p ); for ( j=0; j