# Copyright: Tian Chen and Michael Monagan, 2024

CCodeDir := "./";

Det64s:= define_external('det64s',
  AA::ARRAY(1..nn,1..nn,integer[8],order=C_order), 
  nn::integer[4], 
  pp::integer[8], 
  LIB=cat(CCodeDir,"detinterp4.so"), 
  RETURN::integer[8]):
Det64s := subsop(1=(AA,nn,pp),eval(Det64s)):

DetBareissSymm := define_external('detBareissSymmetric', 
  bb::ARRAY(0..nn,datatype=integer[8]),
  nn::integer[4],
  pp::integer[8],
  LIB=cat(CCodeDir,"detinterp4.so"),
  RETURN::integer[8]):
DetBareissSym := subsop(1=(bb,nn,pp),eval(DetBareissSymm)):

evallistlist := define_external('evalLIST64s',
    LL::integer[8],
    XX::integer[8],
    aa::ARRAY(1..nn,integer[8]),
    AA::ARRAY(1..nn,1..nn,integer[8]),
    pp::integer[8],
    RETURN::integer[8],
    LIB=cat(CCodeDir,"gcd8.so") ): 
evallistlist := subsop(1=(LL,XX,aa,AA,pp),eval(evallistlist)):

NewtonInv := define_external('NewtonInverses', 
  aa::ARRAY(0..nn,datatype=integer[8]), #size n, indexed from 0 to n-1
  nn::integer[8],                       #size n, deg n-1
  II::ARRAY(0..nn,datatype=integer[8]), #size n, indexed from 0 to n-1
  pp::integer[8],
  LIB=cat(CCodeDir, "detinterp4.so") ):

Interp2var := define_external('interp2var2',
  vii::integer[4], vjj::integer[4],     # 1 <= xi < xj <= N (Maple index)
  dii::integer[8], djj::integer[8], dmm::integer[8], # dmax = max(di,dj)
  XX::ARRAY(0..nn,datatype=integer[8]), # size dmax+1, where dmax=max(di,dj) 
  II::ARRAY(0..nn,datatype=integer[8]), # size dmax+1
  MM::ARRAY(0..didj,datatype=integer[8]), # size (dj+1)*(di+1)
  pp::integer[8],
  LIB=cat(CCodeDir,"detinterp4.so") ):

getmapleID := define_external('getMapleID',
   aa::integer[8],
   LIB=cat(CCodeDir, "gcd5.so"),
   RETURN::integer[4]):

getsupport := define_external('getsupport64s',
  ff::integer[8],
  nf::integer[4],
  ALPHA::ARRAY(1..nn,integer[8]),
  nn::integer[4],
  xx::integer[4],
  dx::integer[4],
  TT::ARRAY(0..nn,integer[4]),
  CC::ARRAY(1..nn,integer[8]),
  MM::ARRAY(1..nn,integer[8]),
  pp::integer[8],
  LIB=cat(CCodeDir,"gcd5.so") ):

evalnext := define_external('evalnext64s',
  cc::ARRAY(1..nn,integer[8]), 
  yy::ARRAY(1..nn,integer[8]), 
  nn::integer[4], 
  pp::integer[8],
  LIB=cat(CCodeDir,"gcd5.so") ):

evaladd := define_external('evaladd64s',
  cc::ARRAY(1..nn,integer[8]), 
  nn::integer[4], 
  TT::ARRAY(1..nn,integer[4]), 
  dx::integer[4],
  rr::ARRAY(0..dx,integer[8]), 
  pp::integer[8],
  RETURN::integer[4],
  LIB=cat(CCodeDir,"gcd5.so") ):

allocateSpace := define_external('allocateSpaceCubic',
  n::integer[4],
  dx::integer[8],
  dz::integer[8],
  du::integer[8],
  LIB=cat(CCodeDir, "hensel2.so"),
  RETURN::integer[8]):

BHL_C := define_external('HenselLiftCubic',
  AA::ARRAY(0..dxdz,integer[8]), dx::integer[4], dz::integer[4],
  DDX::ARRAY(1..nn,0..dy,integer[8],order=C_order), 
  DDY::ARRAY(1..nn,integer[8],order=C_order),
  F0::ARRAY(1..nn,0..du,integer[8],order=C_order), nn::integer[4], du::integer[4], 
  FF::ARRAY(1..nn,0..dxdz,integer[8],order=C_order), alpha::integer[8],
  WW::ARRAY(0..dW,integer[8],order=C_order),
  pp::integer[8], 
  LIB=cat(CCodeDir,"hensel2.so"),
  RETURN::integer[4]):

VSolve := define_external('VandermondeSolve64s',
  mm::ARRAY(1..nn,datatype=integer[8]), 
  yy::ARRAY(1..nn,datatype=integer[8]), 
  nn::integer[4],
  aa::ARRAY(1..nn,datatype=integer[8]),
  MM::ARRAY(0..nn,datatype=integer[8]),
  qq::ARRAY(0..nn,datatype=integer[8]), 
  shift::integer[4], p::integer[8], 
  LIB=cat(CCodeDir, "VSolve4.so") ):

pointer := proc(f) local MaxInt := 2^63-1; addressof(f)-4*MaxInt end:

# This version evaluates into the matrix A
EVALMOD1 := proc(AL::list,X::list,alpha::Array,AA::Array,p::integer)
    evallistlist(pointer(AL),pointer(X),alpha,AA,p);
end:

(*
BB_Maple_MPoly := proc( alpha::Array, p::prime ) global a_global,CNT;
    local X := indets(a_global), N := nops(X), i; CNT += 1; 
    Eval(a_global,[seq(X[i]=alpha[i],i=1..N)]) mod p;
end:

BB_Maple := proc( X::list, alpha::Array, p::prime ) global AM,CNT,tBBeval,tBBdet;
   local N := nops(X),A_eval,i,st,et,d; CNT += 1; 
   st := time(); 
   A_eval := Eval( AM, [seq(X[i]=alpha[i],i=1..N)]) mod p; 
   et := time() - st; tBBeval += et; 
   st := time();
   d := Det(A_eval) mod p;
   et := time() - st; tBBdet += et; 
   return d;
end; 

GE := proc(A::Matrix,p::prime)
local n,m,inv,mu,i,j,k,det;
   n,m := op(1,A);
   if n<>m then error "matrix must be square" fi;
   det := 1;
   for k to n do
       i := k;
       while i<=n and A[i,k]=0 do i := i+1; od;
       if i>n then return 0 fi;
       if i>k then # interchange row i and k
          for j from k to m do A[i,j],A[k,j] := A[k,j],A[i,j] od;
          det := -det;
       fi;
       det := det*A[k,k] mod p;
       inv := 1/A[k,k] mod p;
       for i from k+1 to n do
           if A[i,k]=0 then next fi;
           mu := A[i,k]*inv mod p;
           for j from k+1 to n do
               A[i,j] := A[i,j]-mu*A[k,j] mod p;
           od;
           A[i,k] := 0;
       od;
   od;
   det;
end;

BB_Maple2 := proc( X::list, alpha::Array, p::prime ) global AM,CNT,tBBeval,tBBdet;
   local N := nops(X),A_eval,i,st,et,d; CNT += 1; 
   st := time(); 
   A_eval := Eval( AM, [seq(X[i]=alpha[i],i=1..N)]) mod p; 
   et := time() - st; tBBeval += et; 
   st := time();
   d := GE(A_eval,p);
   et := time() - st; tBBdet += et; 
   return d;
end; 

BB_Maple3 := proc( X::list, alpha::Array, p::prime ) global AM,CNT,tBBeval,tBBdet;
   local N := nops(X),A_eval,i,st,et,d; CNT += 1; 
   st := time(); 
   A_eval := Eval( AM, [seq(X[i]=alpha[i],i=1..N)]) mod p; 
   et := time() - st; tBBeval += et; 
   st := time();
   d := LinearAlgebra:-Modular:-Determinant(p,A_eval) mod p;
   et := time() - st; tBBdet += et; 
   return d;
end;

BB := proc( X::list, alpha::Array, p::prime ) global AA,AL,n, CNT,tBBeval,tBBdet;
    # Global variables: AA is a matrix to store evaluations of polynomial entries of A; 
    # AL is a list of polynomial entries in matrix A; n is the number of cols (rows) of A. 
    # Input: X=[x1,...,xN], list of variables; alpha=[alpha11,...,alphaN], an evaluation point; 
    #        p is a prime; 
    # Ouput: Det(A(alpha)) mod p
    local d, st, et;  CNT += 1; 
    st := time(); EVALMOD1( AL, X, alpha, AA, p ); et := time()-st; tBBeval += et;
    st := time(); d := Det64s(AA, n, p); et := time()-st; tBBdet += et; 
    return d;
end:
*)

B_chrem := proc( B::procedure, alpha::Array, p::prime )
   local p0,d,M,d0,i,p1,d1; 
#printf("p=%d,alpha=%a\n",p,alpha); 
   p0 := p; d := B(alpha,p0); 
   M := p0; d0 := mods(d,M); 
   for i to 1000 do 
     p1 := nextprime(p0); 
     d1 := B(alpha,p1); 
     d := chrem([d0,d1],[M,p1]); 
     M := M*p1; d := mods(d,M);
     if d = d0 then break; 
     else d0 := d; p0 := p1; i++; 
     fi;
   od;
   printf("%d prime(s) used to interplate content\n", i);
   if i > 1000 then printf("integer content too large to interpolate\n"); return 'FAIL'; fi; 
   d;
end:

degB := proc( B::procedure, N::integer, j::posint, p::prime, tdeg::integer, LI::nonnegint ) 
    # Input: B is a modular black box of a in Z[x1,...,xN], N is the no.of variables, 
    #        j is the index of a chosen degree variable, p is a prime, tdeg is a total degree bound,
    #        LI = 0 or 1 for large integer coeffs.  
    # Output: deg(a,x_j) where a = det(A) (1<=j<=N) (w.h.p.) (with a chosen variable ordering)
    local randzp:=rand(p),h,k,beta,zeta,Z,b,hf,x;
    if j > N then printf("degB: index of the chosen degree variable out of range\n"); return 'FAIL'; fi;
    if LI = 0 then beta := Array(1..N,i->randzp(),datatype=integer[8]);
    else beta := Array(1..N,i->randzp());
    fi;  
    Z := Array(1..tdeg+2); b := Array(1..tdeg+2);
    h[0] := 0; k := 0; 
    for k to tdeg+2 do
        do zeta := randzp(); until not member(zeta,Z); Z[k] := zeta; 
        beta[j] := zeta; 
        b[k] := B( beta, p ); 
        h[k] := Interp(Z[1..k], b[1..k], x) mod p;
    until h[k] = h[k-1];
    hf := h[k];
    if hf = 0 then return -1; else return degree(hf,x); fi;
end:

unpack := proc(f) local g; #f = factor(a1) 
    if type(f,`+`) then [[f,1]];
    elif type(f,`^`) then return [[op(f)]];
    elif type(f,`*`) then [seq( op(unpack(g)), g in f )];
    else [[f,1]];
    fi;
end:

sqrfreefactors := proc(f) #f is a list returned from 'unpack'
    local k:=1,cont:=1,co,A,i,tt,t,ff,x,fp,Aa,r; 
    A[0] := 0; 
    for i to nops(f) do 
        tt := op(1,f[i]); t := whattype(tt);
        ff := [op(f[i])]; # factor with multiplicity as a list
        if t = `+` then x := indets(tt)[1]; 
	        fp := primpart(tt,x,'co'); #`factor`(3*x^2+3) outputs 3*x^2+3, the content '3' not separated.
	        if co > 1 then cont := cont*co; fi;
	        A[k] := [fp,ff[2]]; k++;
        elif t = `integer` then cont := cont*tt;   
        elif t = `indexed` or t = `symbol` then A[0] := ff; 
        fi;
    od;
    A[-1] := [cont,1]; 
    r := k-1; Aa := [r,Array(-1..r,[seq(A[i],i=-1..r)]),[seq(op(1,A[i]),i=1..r)]];
    return Aa; # r = no. of factors that needs Hensel lifting, Aa[3] - sqrfree factors
end:

#pack := proc(A::Array, r::integer) local i,f;
#    if A[-1] = 0 then f := mul(A[i][1]^(A[i][2]),i=0..r); return f; 
#    else f := mul(A[i][1]^(A[i][2]),i=-1..r); return f;  
#    fi;
#end:

pack_nocontent := proc( A::Array, r::integer ) local i,f; 
    if A[0] = 0 then f := mul(A[i][1]^(A[i][2]),i=1..r);  
    else f := mul(A[i][1]^(A[i][2]),i=0..r); 
    fi;
    return f;
end:

Interp1var := proc( B::procedure, X::list, alpha::Array, deg::list, N::integer, j::posint, p::prime ) 
    # Input: The black box B, X=[x_1,...,x_N], alpha=[alpha_1,...,alpha_N], an evaluation point; 
    #        deg is the list of degrees of all variables; N is the no.of variables; 
    #        j is the chosen variable index; p is a prime.
    # Output: Det(A(alpha_1,...,x_j,...,alpha_N)) mod p
    local dj, xx, i, F, beta; 
    if nops(X)<>N then 
        printf("Interp1var: no.of variables not equal to N\n"); return 'Interp1var_FAIL'; 
    fi;
    if j > N or j < 1 then 
        printf("Interp1var: chosen variable index out of range\n"); return 'Interp1var_FAIL'; 
    fi;
    dj := deg[j]; xx := Array(1..dj+1,i->i); 
    beta := Array(alpha); F := Array(1..dj+1); 
    for i to dj+1 do beta[j] := i; F[i] := B( beta, p ); od;
    return Interp( xx, F, X[j] ) mod p;
end:

Interp1varZ_chrem := proc( B::procedure, X::list, alpha::Array, deg::list, N::integer, j::posint, p::prime )
    local p0,a0,A,M,p1,a1,i;
    p0 := p; A := Interp1var( B, X, alpha, deg, N, j, p0 ); 
    M := p0; a0 := mods(A,M); 
    for i from 1 to 2000 do
       p1 := nextprime(p0);
       a1 := Interp1var( B, X, alpha, deg, N, j, p1 ); 
       A := chrem([a0,a1],[M,p1]); 
       M := M*p1; A := mods(A,M); 
       if A = a0 then break; 
       else a0 := A; p0 := p1; i++;
       fi;
    end;
    printf("%d prime(s) used to interplate a(%a)\n",i,X[j]); 
    if i > 2000 then 
        printf("Interp1var_chrem: coefficients too large to interpolate a(%a)\n",X[j]); 
        return 'Interp1varZ_chrem_FAIL';
    fi; 
    A; 
end:

Interp2var_Maple := proc( B::procedure, X::list, beta::Array, var::list, deg::list, N::integer, p::prime, singlepow::nonnegint, LI::nonnegint ) 
    # Input: A modular black box B, X=[x_1,...,x_N], beta=[beta_1,...,beta_N], 
    #        deg is the list of degrees of all variables, N is the number of variables, 
    #        var = [vi,vj], chosen variable indices, p is a prime
    # Ouput: a(beta_1,..,xi,..,xj,..,beta_N)) mod p
    local vi, vj, di, dj, xxi, xxj, i, j, k, alpha, F, g, sigma, st, et;
    # if nops(beta)<>N then printf("Interp_2var_Maple: incorrect no.of eval pts\n"); return 'FAIL'; fi;
    vi := var[1]; vj := var[2]; di := deg[vi]; dj := deg[vj]; 
    xxi := [seq(i,i=1..di+1)]; xxj := Array([seq(i,i=1..dj+1)]); 
    if LI = 0 then alpha := Array(beta,datatype=integer[8]);
    else alpha := Array(beta); 
    fi;
    F := Array(1..dj+1);
    if singlepow > 0 then 
        for i to di+1 do alpha[vi] := i;
            for j to dj+1 do alpha[vj] := j; 
                F[j] := B( alpha, p )/alpha[vi]^singlepow mod p; 
            od;
            g[i] := Interp( xxj, F, y ) mod p;
        od;
    else 
        for i to di+1 do alpha[vi] := i;
            for j to dj+1 do alpha[vj] := j; F[j] := B( alpha, p ); od;
            g[i] := Interp( xxj, F, y ) mod p;
        od;
    fi;
    for j from 0 to dj do
        sigma[j] := Interp( xxi, [seq(coeff(g[k],y,j),k=1..di+1)], X[vi] ) mod p;
    od;
    return Expand(add(sigma[k]*X[vj]^k, k=0..dj)) mod p; 
end:

SquareFreeImage := proc( B::procedure, X::list, alpha::Array, var::list, deg::list, p::integer, singlepow::nonnegint , LI::nonnegint)
local vi,vj,x,y,dx,dy,N,b,Be,G,C,i,k,RNG,dg,XX,Y,beta,abeta,S;
  # Input:  A modular black box B of a, X=[X_1,...,X_N], alpha=[alpha_1,...,alpha_N], an evaluation point,
  #         var=[vi,vj], indices of two chosen variables, deg=[di,dj]=[deg(a,x_vi),deg(a,x_vj)], 
  #         p is a prime, singlepow = 0 or > 0, LI = 0 or 1 for large integer coeffs. 
  # Output: sqf(a(x_vi,x_vj))|[x_1=alpha_1,...,x_N=alpha_N],except x_vi,x_vj, lc(.) is monic in x_vj. 
  vi := var[1]; vj := var[2]; x := X[vi]; y := X[vj]; dx := deg[1]; dy := deg[2]; N := nops(X); 
  if LI = 0 then b := Array(1..N,alpha,datatype=integer[8]); 
  else b := Array(1..N,alpha); 
  fi;
  Be := Array(0..dy); G := Array(0..dy); C := Array(0..dy);
  XX := Array(0..dx); Y := Array(0..dx);
  RNG := rand(p):
  i := 0;
  while i<=dy do
    do beta := RNG(); until not member(beta,Be); # rules out 0 note
    Be[i] := beta;
    if singlepow > 0 then 
        for k from 0 to dx do XX[k] := k+1; b[vi]:=k+1; b[vj]:=beta; Y[k] := B(b,p)/b[vi]^singlepow mod p; od;
    else 
        for k from 0 to dx do XX[k] := k+1; b[vi]:=k+1; b[vj]:=beta; Y[k] := B(b,p); od; 
    fi;
    abeta := Interp(XX,Y,x) mod p; # in Zp[x]
    G[i] := Gcd(abeta,diff(abeta,x),'acof') mod p; # in Zp[x]
    C[i] := acof; # in Zp[x]
    if i=0 then dg := degree(G[i],x); i := 1; next; fi;
    # Check if y=beta is a bad evaluations or y=beta is an unlucky evaluation
    if degree(G[i],x)<>dg then i := 0; else i := i+1; fi;
  od;
  S := expand( Interp( Be, C, y ) mod p );
  S := Primpart(S,x,'C') mod p; # S is monic in plex(x,y) note
  #C,S;
end:

GetEvaluations := proc( B::procedure, beta::Array, var::list, deg::list, xx::Array, MM::Array, p::prime, singlepow::nonnegint )
    # Input: A modular black box B of a, beta=[beta_1,...,beta_N], an evaluation point,
    #        var=[vi,vj], two chosen variable indices, deg=[di,dj]=[deg(a,x_vi),deg(a,x_vj)], 
    #        xx is an Array of eval pts, indexed from 0 to dmax=max(di,dj), p is a prime, singlepow = 0 or > 0.
    # Output: Evaluations stored onto MM = Array(0..(di+1)*(dj+1)-1,datatype=integer[8])
    local alpha,i,j,vi:=var[1],vj:=var[2],di:=deg[1],dj:=deg[2]; 
    alpha := Array(beta, datatype=integer[8]); 
	if singlepow > 0 then 
        for i from 0 to di do alpha[vi] := xx[i]; 
            for j from 0 to dj do alpha[vj] := xx[j];
                MM[(di+1)*j+i] := B( alpha, p )/alpha[vi]^singlepow mod p; 
            od;
        od;
	else 
	    for i from 0 to di do alpha[vi] := xx[i]; 
                for j from 0 to dj do alpha[vj] := xx[j]; 
	            MM[(di+1)*j+i] := B( alpha, p ); 
	        od;
            od;
	fi;
    return;
end:  

nextimage := proc(Ca,Ya,na,Ta,da,Aa,x,p)
local dega,f,i;
    evalnext(Ca,Ya,na,p);
    dega := evaladd(Ca,na,Ta,da,Aa,p);
    f := add(Aa[i]*x^i,i=0..dega);
end:

support := proc(f, X::list) local m; 
    coeffs(f, X, 'm'); [m]; 
end:
MonCoffs := proc(f, X::list) local m; 
    return [coeffs(f, X, 'm')], [m];
end:
Tcof := proc(T, r::integer, i::integer, rr::integer, LI::nonnegint) local ca;
  if LI = 0 then 
    if r = 1 then return Array(op(op(op(op(op(T)[2]))[2])[i])[2],datatype=integer[8]); fi;
    ca := Array(op(op(op(op(op(T)[2])[rr])[2])[i])[2],datatype=integer[8]);
  else 
    if r = 1 then return Array(op(op(op(op(op(T)[2]))[2])[i])[2]); fi;
    ca := Array(op(op(op(op(op(T)[2])[rr])[2])[i])[2])
  fi;
  return ca;
end:
Tmon := proc(T, r::integer, i::integer, rr::integer)
    if r = 1 then return op(op(op(op(op(T)[2]))[2])[i])[1]; fi;
    op(op(op(op(op(T)[2])[rr])[2])[i])[1];
end:

MDP_Maple := proc( f0::list, ck::polynom, x, p::prime, M::list ) 
    # Input: f0, a list of r polynomials in Zp[x], ck is a polynomial in Zp[x], 
    #        p is a prime, M is a list (pre-computed polynomials) s.t. 
    #        M[r]=1,M[r-1]=f0[r],...,M[1]=f0[r]*f0[r-1]...f0[2].
    # Output: sigma, a list of r polynomials s.t.
    #        sigma[1]*f0[1]+...+sigma[r]*f0[r] = ck.
    local r:=nops(f0),MM,i,c,sigma,tau,s,t,q;
    if _params['M'] = NULL then MM[r] := 1;
        for i from r-1 by -1 to 1 do 
            MM[i] := Expand(MM[i+1]*f0[i+1]) mod p;
        end;
    else
        if r<>nops(M) then printf("MDP_Maple: sizes of f0 and M unequal\n"); 
		    return 'MDP_Maple_FAIL'; 
		fi;
        MM := M;
    fi;
    c[1] := ck;
    for i to r-1 do
  	    Gcdex(MM[i],f0[i],x,'s','t') mod p;
        sigma[i] := Rem(c[i]*s,f0[i],x,'q') mod p;
        tau[i] := Expand(c[i]*t+q*MM[i]) mod p;
        c[i+1] := tau[i];
    end;
    sigma[r]:=c[r];
    return [seq(sigma[i],i=1..r)];
end:

TaylorShift1 := proc(f,y,alpha::integer,p::prime,k::nonnegint) 
   local g,dy,A,i,r;
   if f=0 then return [] fi;
   g := f;
   if _params['k'] = NULL then 
   	  dy := degree(f,y);
   	  A := Array(0..dy);
       for i from 0 to dy do
           g := Quo(g,y-alpha,y,'r') mod p;
           A[i] := r;
       od;
       return convert(A,list);
    fi;
    for i from 0 to k do g := Quo(g,y-alpha,y,'r') mod p; od;
    return r;
end:

BHL_Maple := proc( A::polynom, F0::list, X::list, alpha::integer, p::prime ) 
    # Input: A in Zp[x,y], F0 is a list of r polynomials in Zp[x], s.t.
    #        A(y=alpha) = Lambda*product(F0[rho]) mod p, where Lambda is some constant, 
    #        X=[x,y], alpha is an evaluation point for y, p is a prime
    # Output: F, a list of r polynomials in Zp[x,y] s.t. 
    #        A = Lambda*product(F[rho]) mod p and F[rho](y=alpha) = F0[rho] for rho=1..r.
    #        Or FAIL;
    local r,x,y,NvarA,ga,gaa,a,f0,df0,dy,Mmdp,MMDP,MM,M,rr,gam,dga,ac,sigma,i,k,fprod,Dk,deltak,ck,Tf,dfSum,fbar,f,F,Fp,LCFe,eta,Fpp; 
    x := X[1]; y := X[2]; r := nops(F0); NvarA := nops(indets(A));
    if NvarA = 1 then return F0; fi; 
    if r = 1 then  
        Fp := Primpart(A,x) mod p;
        LCFe := Eval(lcoeff(Fp,x),y=alpha) mod p;
        eta := lcoeff(op(F0),x)/LCFe mod p; 
        return [eta*Fp] mod p;
    fi;
    ga := lcoeff(A, x); gaa := Eval(ga,y=alpha) mod p;
    a := Expand(ga^(r-1)*A) mod p;
    f0 := [seq(`mod`(gaa*F0[rr]/lcoeff(F0[rr],x),p), rr=1..r)];
    df0 := map(degree,f0,x); dy := degree(a,y); 
    Mmdp[r] := 1;  
    for i from r-1 by -1 to 1 do 
        Mmdp[i] := Expand(Mmdp[i+1]*f0[i+1]) mod p;
    end;
    MMDP := [seq(Mmdp[rr],rr=1..r)]; # Create a list for MDP as the last input argument
    MM := Expand(Mmdp[1]*f0[1]) mod p; # MM = product(f0[rr],rr=1..r)
    for rr to r do f[rr] := f0[rr]; 
    	    M[rr] := Quo(MM,f0[rr],x) mod p;
    od; 
    gam := TaylorShift1(ga,y,alpha,p); # A list of gamma_k's, gamma_k = coeff(ga,(y-alpha)^k)
    dga := degree(ga,y);
    ac := TaylorShift1(a,y,alpha,p);
    sigma := Array(1..r,0..dy);
    for k to dy do #printf("k = %d\n",k);
        fprod := mul(f[rr],rr=1..r) mod p; # Quintic algorithm
        Dk := TaylorShift1(fprod,y,alpha,p,k); 
        if k <= dga then 
            for rr to r do Tf[rr] := gam[k+1]*x^df0[rr];
                deltak := add(Tf[i]*M[i],i=1..r); 
            od;
        else deltak := 0;
        fi; 
        ck := Expand(ac[k+1]-Dk-deltak) mod p;
        dfSum := 0; for rr to r do dfSum := dfSum + degree(f[rr],y); od;
        if dfSum = dy and ck <> 0 then return 'BHL_Maple_FAIL(1)'; fi;
        if ck <> 0 then 
            fbar := MDP_Maple(f0,ck,x,p,MMDP); 
            for rr to r do 
            	if k <= dga then sigma[rr,k] := fbar[rr] + Tf[rr]; 
        	     else sigma[rr,k] := fbar[rr];
        	     fi;
        	     f[rr] := f[rr] + sigma[rr,k]*(y-alpha)^k;
        	  od;
        fi;  
    od;
    dfSum := 0; for rr to r do dfSum := dfSum + degree(f[rr],y); od; 
    if dfSum <> dy then return 'BHL_Maple_FAIL(2)'; fi;
    for rr to r do F[rr] := sigma[rr,k-1]; sigma[rr,0] := f0[rr]; od; 
    for i from k-1 by -1 to 1 do 
        for rr to r do F[rr] := F[rr]*(y-alpha)+sigma[rr,i-1]; od;
    od;
    for rr to r do F[rr] := Expand(F[rr]) mod p; od; 
    #printf("a-mul(F[rr])=%a",Expand(a-mul(F[rr],rr=1..r)) mod p);
    if ck <> 0 and Expand(a-mul(F[rr],rr=1..r)) mod p <> 0 then return 'BHL_Maple_FAIL(3)';
    else 
        for rr to r do 
    	    Fp[rr] := Primpart(F[rr], x) mod p;
            LCFe[rr] := Eval(lcoeff(Fp[rr],x),y=alpha) mod p; 
            eta[rr] := lcoeff(F0[rr],x)/LCFe[rr] mod p;
            Fpp[rr] := eta[rr]*Fp[rr] mod p;
        od;
        return [seq(Fpp[rr],rr=1..r)];
    fi;
end:

#KnuthTayCoeff := proc( a, n::integer, y0::integer, X::list, p::prime ) 
#    # Input: a = a0+a1*y+...+an*y^n in Zp[x,y], n = deg(a,y), X=[x,y], p is prime
#    # Ouput: Taylor coefficients [v0,...vn] in a = v0+v1*(y-y0)+...vn*(y-y0)^n
#    local y, u, v, mo, md, Y0, i, j, k; 
#    y := X[2]; u := coeffs( a, y, 'mo' ); md := map(degree, [mo], y); 
#    Y0 := Array(1..n+1); Y0[1] := 1; Y0[2] := y0; 
#    for i from 3 to n+1 do Y0[i] := Y0[i-1]*y0 mod p; od;
#    k := 1; v := Array(1..n+1); # Array of polynomials
#    for j in md do v[j+1] := u[k]*Y0[j+1] mod p; k := k+1; od; 
#    for k to n do 
#        for j from n by -1 to k do v[j] := v[j]+v[j+1] mod p; od;
#    od;
#    for j from 2 to n+1 do v[j] := v[j]/Y0[j] mod p; od; 
#    return v; 
#end:

#BHL_2factors_Maple := proc( A, F1, X::list, alpha::integer, p::prime ) # This program works for 2 factor only
#    # Input: A in F[x,y], F1=[F0,G0] in F[x], X=[x,y], alpha is an evaluation point for y, p is prime
#    # Output: [F,G] s.t. A = F*G mod p and F(x,alpha) = F0, G(x,alpha) = G0; Otherwise, FAIL;
#    local x,y,a,F0,G0,f0,g0,ga,s,t,df0,dg0,f,g,Df0,Dg0,D10t,dx,dy,k,i,j,ac,gam,D0seq,xj,fp,gp,df,dg,D0,D10,D11,D12,D2,c,F,G,Fp,LCFp,LCf1,eta,Ftrue,LCFe,r; 
#    x := X[1]; y := X[2]; F0:=F1[1]; G0:=F1[2]; r:=2;
#    ga := lcoeff(A, x); 
#    a := Expand(ga*A) mod p; 
#    f0 := Eval(ga*F0/lcoeff(F0, x), y=alpha) mod p; 
#    g0 := Eval(ga*G0/lcoeff(G0, x), y=alpha) mod p; 
#    Gcdex(f0, g0, x, 's', 't') mod p; 
#    df0 := degree(f0, x); dg0 := degree(g0, x); 
#    f[0] := f0-(lcoeff(f0, x)-ga)*x^df0; g[0] := g0-(lcoeff(g0, x)-ga)*x^dg0; 
#    Df0 := f0-lcoeff(f0, x)*x^df0; Dg0 := g0-lcoeff(g0, x)*x^dg0; 
#    D10t := Expand(Df0*x^dg0+Dg0*x^df0) mod p; 
#    dx := degree(a, x); dy := degree(a, y); 
#    for k from 0 to dy do gam[k] := coeftayl(ga, y=alpha, k) od; 
#    ac := KnuthTayCoeff(a, dy, alpha, X, p); 
#    xj := Array([seq(j, j=0..dx-1)]); 
#    fp := Array(1..dy); gp := Array(1..dy); 
#    df := 0; dg := 0; 
#    for k to dy while c <> 0 do 
#        D0seq := [seq(add(fp[i][j]*gp[k-i][j], i=max(1, k-dg)..min(k-1, df)), j=1..dx)] mod p; 
#        D0 := Interp(xj, D0seq, x) mod p; 
#        D10 := gam[k]*D10t; 
#        D11 := x^dg0*add(gam[i]*f[k-i], i=1..k-1) mod p; 
#        D12 := x^df0*add(gam[i]*g[k-i], i=1..k-1) mod p; 
#        D2 := x^(df0+dg0)*add(gam[i]*gam[k-i], i=0..k) mod p; 
#        c := Expand(ac[k+1]-D0-D10-D11-D12-D2) mod p;
#        g[k] := Rem(s*c, g0, x, 'q') mod p; 
#        f[k] := Quo(c-g[k]*f0, g0, x) mod p; 
#        fp[k] := [seq(Eval(f[k], x=j) mod p, j=0..dx-1)]; 
#        gp[k] := [seq(Eval(g[k], x=j) mod p, j=0..dx-1)]; 
#        if f[k] <> 0 then df := k; fi; 
#        if g[k] <> 0 then dg := k; fi;
#    od; 
#    F := f[k-1]; G := g[k-1]; 
#    for i from k by -1 to 1 do F := F*(y-alpha)+f[i-1]; G := G*(y-alpha)+g[i-1]; od; 
#    F := Expand(F) mod p; G := Expand(G) mod p; 
#    if c = 0 then Fp := [Primpart(F, x) mod p, Primpart(G, x) mod p]; 
#        LCFp := map(lcoeff,Fp,x); LCFe := Eval(LCFp,y=alpha) mod p; 
#        LCf1 := map(lcoeff,F1,x);
#        eta := [seq(`mod`(LCf1[i]/LCFe[i],p),i=1..r)];
#        Ftrue := [seq(`mod`(eta[i]*Fp[i],p),i=1..r)];
#        return Ftrue;
#    else return 'FAIL';
#    fi;
#end:

VandermondeSolve3_Maple := proc(m::{Array,Vector,list}, v0::{Array,Vector,list}, t::integer, shift::integer, p::prime) 
    local v, i, j, M, x, a, q, r, s, Q, temp; 
    # Input: m is the list of monomial evaluations, t is size of the vector, shift = 0 or 1
    # Output: solution a of the (shifted) t by t Vandermonde system Va = v0  
    v := v0[1..t]; 
    M := modp1(ConvertIn(1, x), p); 
    for i to t do 
        temp := modp1(ConvertIn(x-m[i], x), p); 
        M := modp1(:-Multiply(temp, M), p); 
    od; 
    a := Array(1..t); 
    for j to t do 
        Q := modp1(Quo(M, ConvertIn(x-m[j], x)), p); 
        r := `mod`(1/modp1(Eval(Q, m[j]), p), p); 
        Q := modp1(ConvertOut(Q), p); 
        s := `mod`(add(v[i]*Q[i], i = 1..t), p); 
        a[j] := `mod`(r*s, p); 
    od;
    if shift = 1 then for j to t do a[j] := `mod`(a[j]/m[j], p); od; fi;
    return a; 
end:

Initialf2matrix := proc( f0::list, x, F0::Array )
    # Input: f0 is a list of factors in Zp[x] s.t. a(y=alpha) = mul(f0[i],i=1..r) mod p, X=[x,y]
    # Ouput: Matrix F0 s.t. row rr is the array of coefficients of f0[rr] for rr=1..r
    local rr,i,r:=nops(f0),df0:=map(degree,f0,x);
    for rr to r do 
        for i from 0 to df0[rr] do F0[rr,i] := coeff(f0[rr],x,i); od; 
    od;
    return;
end:

Bivarpoly2matrix := proc( a, X::list, dx::integer, dy::integer, M::Array ) 
    # Input: a in Zp[x,y], X=[x,y], dx=deg(a,x), dy=deg(a,y)
    # Output: M with A[(dx+1)*j+i] = coeff of x^j*y^i in a, i=0..dy, j=0..dx
    local cof,mon,dmx,dmy,k,x:=X[1],y:=X[2];
    cof,mon := MonCoffs(a,X);
    dmx := map(degree,mon,x); dmy := map(degree,mon,y);
    for k to nops(mon) do M[(dx+1)*dmy[k]+dmx[k]] := cof[k]; od;
    return; 
end:

clearArray := proc( A::Array, index1::integer, index2::integer ) local i;
    for i from index1 to index2 do A[i] := 0; od;
    return; 
end:

clearArray2d := proc( A::Array, indexi1::integer, indexi2::integer, indexj1::integer, indexj2::integer ) local i,j; 
    # Clear a 2-d Array A = Array(indexi1..indexi2,indexj1..indexj2)
    for i from indexi1 to indexi2 do 
        for j from indexj1 to indexj2 do A[i,j] := 0; od;
    od;
    return; 
end:

CMBBSHL_stepj := proc( B::procedure, fjm1::list, j::integer, X::list, alpha::Array, deg::list, Sdf1::integer, N::posint, p::prime, singlepow::nonnegint,VarPerm::nonnegint,sqfinterp::nonnegint,MapleCode::list, LI::nonnegint )
    # Input: A modular black box B of a, a list of sqr-free factors fjm1 s.t. 
    #   aj(x_j=alpha_j) = prod(fjm1[i],i=1..r) mod p, j is the H.L. variable index,
    #   X=[x_1,...,x_N] (with a chosen variable ordering), alpha=[alpha_1,...,alpha_N] (eval.pt), 
    #   deg=[deg(a,x_j),j=1..N], Sdf1 is the sum of degree of fjm1 in x1, no.of variables N, a prime p, 
    #   singlepow = dd>0 if there is a single term x_1^dd not Hensel lifted, else singlepow = 0,
    #   VarPerm = 0 or > 0 (when there is variable permutation), 
    #   sqfinterp = 0 or > 0 (if SquareFreeImage is called instead).
    #   MapleCode = 1 for Maple Code or = 0 for C code for each subroutine.
    # Output: fj = [fj_1,...,fj_r], a list of factors of aj in [x_1,...,x_j] and 
    #   fj_i(x_j=alpha_j) = fjm1_i for i=1..r. Or FAIL.
    local MapleMaxint,r,ntf,rr,F,sigma,nsigma,degfx1,df1,M,s,S,k,test,beta,betaA,Y,ss,univar,FA,TA,CA,MA,vars,nvars,CAlist,MAlist,Xm,betaA2,betaAm,betaAm2,varperm,x1id,b,dmax,d1,dj,d1s,xx,II,MM,dx,dy,MMsqf,du,fA,f0A,sizeW,Wtemp,DDX,DDY,a,g,Asf,lcAs,nvarsAsf,f0,i,ii,jj,f,nf,Mon,Cof,mon,cof,Tf,TT,TCOF,SS,SA,mu,dx1,id,sizeVS,Mtemp,Qtemp,C,termsf,Termsf,qo,fY;
    local BBxns,BBxne,st0:=time(),tt2:=0,tt3:=0,tt4:=0,st,et,sqftime:=0,bbtime:=0,interptime:=0,vstime:=0,tsqf,tbb,tinterp,tvs,bbevaltime:=tBBeval,bbdettime:=tBBdet;
    MapleMaxint:=2^63-1; r:=nops(fjm1); ntf:=map(nops,fjm1);  
    #printf("fjm1=%a\n",fjm1);
    for rr to r do
        F[rr] := addressof(fjm1[rr])-4*MapleMaxint; 
        sigma[rr] := [coeffs(fjm1[rr], X[1], 'monfx1')]; 
	nsigma[rr] := nops(sigma[rr]);
        degfx1[rr] := map(degree, [monfx1], X[1]); 
	df1[rr] := max(degfx1[rr]);   
        M[rr] := map(support, sigma[rr], X[2..j-1]); 
        s[rr] := map(nops, M[rr]);
    od;

    # Choose an evaluation point beta
    for k to 20 do test := 1;  
        beta := [seq(rand(p)(),i=1..j)]; Y := [seq(X[i]=beta[i], i=2..j-1)];
        for rr to r do S[rr] := map( Eval, M[rr], Y ) mod p;
            for i to nsigma[rr] do 
	        if nops({op(S[rr][i])}) < s[rr][i] then test := 0; next k; fi; 
	    od;
        od; 
    until test = 1; 
    if k > 20 then printf("CMBBSHL step %d: monomial evals not unique, p likely too small\n", j); 
        return 'FAIL(1)'; 
    fi;
    ss := max([seq(s[rr],rr=1..r)]); printf("CMBBSHL step %d: s = %a\n",j,ss);
    if LI = 0 then betaA := Array(1..j-1,beta,datatype=integer[8]); # convert [beta_1,beta_{j-1}] into an array betaA    
    else betaA := Array(1..j-1,beta); 
    fi; 
    univar := Array(1..r); # indicator if a factor is univariate
  if MapleCode[2] = 0 then	
    # Set up for evaluations of fjm1
    for rr to r do 
        CA[rr] := Array(1..ntf[rr],datatype=integer[8]); 
	    MA[rr] := Array(1..ntf[rr],datatype=integer[8]);
	    TA[rr] := Array(0..df1[rr],datatype=integer[4]);
	    FA[rr] := Array(0..df1[rr],datatype=integer[8]); 
            vars := indets(fjm1[rr]); nvars := nops(vars); 
        #printf("F[%d]=%a\n",rr,F[rr]);
        if getmapleID(F[rr])<>17 then 
            if nvars = 1 then univar[rr] := 1; k := 1; 
		for i in degfx1[rr] do TA[rr][i] := 1; FA[rr][i] := sigma[rr][k]; k++: od; 
                for i to ntf[rr] do MA[rr][i] := 1; od;
                if df1[rr] = 1 then # fjm1[rr] is univariate and is linear
                    for i to ntf[rr] do CA[rr][i] := sigma[rr][i]; od; 
                else # fjm1[rr] is univariate but non-linear, i.e. df1[rr] > 1
                    for i to ntf[rr] do CA[rr][i] := sigma[rr][ntf[rr]-i+1]; od;
                fi;
                f0[rr] := fjm1[rr];
	     else # fjm1[rr] is linear but not univariate 
		k := 1; 
                for i in degfx1[rr] do 
                    TA[rr][i] := s[rr][k]; 
                    FA[rr][i] := Eval(sigma[rr][k],Y) mod p; 
		    k++; 
                od;
                CAlist := map(coeffs,sigma[rr],X[2..j-1]); 
		MAlist := map(op,S[rr]);
                for i to ntf[rr] do 
	            CA[rr][i] := CAlist[i]; 
		    MA[rr][i] := MAlist[i]; 
                od;   
	     fi;
	else
            if nvars < j-1 then 
	        betaAm := Array(1..nvars,datatype=integer[8]); k := 1;
	        Xm := Array(1..nvars);
                for i to j-1 do 
	            if member(X[i],vars) then 
	                betaAm[k] := beta[i]; Xm[k] := X[i]; k++; 
                    fi; 
		od;
		Xm := convert(Xm,list);
	        if VarPerm > 0 then betaAm2 := Array(1..nvars,datatype=integer[8]);
		    varperm := sort(Xm[1..nvars],output=permutation); 
                    x1id := ListTools:-Search(1,varperm); 
	            for i to nvars do betaAm2[i] := betaAm[varperm[i]]; od;
                    getsupport(F[rr],ntf[rr],betaAm2,nvars,x1id,df1[rr],TA[rr],CA[rr],MA[rr],p);
	        else getsupport(F[rr],ntf[rr],betaAm,nvars,1,df1[rr],TA[rr],CA[rr],MA[rr],p);
		fi;
             else
		if VarPerm > 0 then betaA2 := Array(1..j-1,datatype=integer[8]);
	            varperm := sort(X[1..j-1],output=permutation);
		    x1id := ListTools:-Search(1,varperm);
	            for i to j-1 do betaA2[i] := betaA[varperm[i]]; od;
                    getsupport(F[rr],ntf[rr],betaA2,j-1,x1id,df1[rr],TA[rr],CA[rr],MA[rr],p);
	        else getsupport(F[rr],ntf[rr],betaA,j-1,1,df1[rr],TA[rr],CA[rr],MA[rr],p);
	        fi;
            fi;
        fi; 
    od;
  fi;	 
    if LI = 0 then b := Array(1..N,i->1,datatype=integer[8]);
    else b := Array(1..N,i->1);
    fi;
    for i from 1 to j do b[i] := beta[i]*b[i] mod p; od;
    for i from j+1 to N do b[i] := alpha[i]; od;
    d1 := deg[1]; dj := deg[j]; 
    if singlepow > 0 then d1s := d1 - singlepow; else d1s := d1; fi;
    
    if j=N then BBxns := CNT; fi;
    # Compute a sqr-free image of a 
    if sqfinterp = 1 then #printf("H.L.step %d, initial sqrfree image interp completed\n",j);
        tsqf := time():
        Asf := SquareFreeImage(B,X,b,[1,j],[d1s,dj],p,singlepow, LI);
        sqftime += time() - tsqf; 
        #printf("H.L.step %d: initial sqf comp time = %f\n",j,sqftime);
    else #printf("H.L.step %d, initial bivariate interp completed\n",j);
        if MapleCode[1] = 1 then 
            a := Interp2var_Maple( B, X, b, [1,j], deg, N, p, singlepow, LI ); 
        else 
            dmax := max(d1s,dj); 
            xx := Array(0..dmax, i->i+1, datatype=integer[8]); # Avoid 0 for the case singlepow > 0 
            II := Array(0..dmax, datatype=integer[8]); 
            NewtonInv( xx, dmax+1, II, p );
            MM := Array(0..(d1s+1)*(dj+1)-1,datatype=integer[8]);
            GetEvaluations( B, b, [1,j], [d1s,dj], xx, MM, p, singlepow ); 
            Interp2var( 1, j, d1s, dj, dmax, xx, II, MM, p );
            a := Expand(add(add(MM[(d1s+1)*jj+ii]*X[1]^ii,ii=0..d1s)*X[j]^jj,jj=0..dj)) mod p;    
            clearArray(MM,0,(d1s+1)*(dj+1)-1);
        fi; 	
        if degree(a,X[1])<>d1s then 
            printf("Wrong degree of a(%a,%a) mod p in %a\n",X[1],X[j],X[1]); return 'FAIL(2)'; 
        elif degree(a,X[j])<>dj then 
	    printf("Wrong degree of a(%a,%a) mod p in %a\n",X[1],X[j],X[j]); return 'FAIL(2)'; 
        fi;
        g := Gcd(a,diff(a,X[1]),'Asf') mod p; 
        lcAs := lcoeff(lcoeff(Asf,X[1]),X[j]); 
        Asf := Asf/lcAs mod p;
    fi;
    if degree(Asf,X[1])<>Sdf1 then 
        printf("Wrong degree of sqf(a(%a,%a)) mod p in %a\n",X[1],X[j],X[1]); return 'FAIL(3)'; 
    fi;    
    dx := degree(Asf,X[1]); dy := degree(Asf,X[j]); #printf("dy=deg(Asf,%a)=%a\n",X[j],dy);
    nvarsAsf := nops([op(indets(Asf))]);
    
    # Memory allocation for BHL C code
    if MapleCode[3] = 0 and r > 1 and nvarsAsf > 1 then 
        MMsqf := Array(0..(dx+1)*(dy+1)-1,datatype=integer[8]); 
        du := max(seq(df1[rr],rr=1..r)); 
        f0A := Array(1..r,0..du,datatype=integer[8],order=C_order);
        fA := Array(1..r,0..2*(dx+1)*(dy+1)-1,datatype=integer[8],order=C_order);
        sizeW := allocateSpace(r,dx,dy,du);
        Wtemp := Array(1..sizeW,datatype=integer[8],order=C_order);
        DDX := Array(1..r,0..dy,datatype=integer[8],order=C_order);
        DDY := Array(1..r,datatype=integer[8],order=C_order);
    fi;

    Tf := Array(1..r); for rr to r do Tf[rr] := table(); od;
    if LI = 0 then b := Array(1..N,i->1,datatype=integer[8]);
    else b := Array(1..N,i->1);
    fi;
    for i from j+1 to N do b[i] := alpha[i]; od;
    
    # Main loop for k from 1 to ss 
    for k to ss do
        for i from 1 to j do b[i] := beta[i]*b[i] mod p; od;
        # Compute a sqr-free image a(x1,Yk,xj)
        if sqfinterp = 1 then 
            tsqf:=time(); 
	    Asf := SquareFreeImage(B,X,b,[1,j],[d1s,dj],p,singlepow,LI); 
            sqftime+=time()-tsqf;
        else 
            if MapleCode[1] = 1 then
                tinterp:=time();   
                a := Interp2var_Maple( B, X, b, [1,j], deg, N, p, singlepow, LI ); 
                interptime+=time()-tinterp;
            else  
                tbb:=time();     
                GetEvaluations( B, b, [1,j], [d1s,dj], xx, MM, p, singlepow );  
                bbtime+=time()-tbb;
                tinterp:=time();  
                Interp2var( 1, j, d1s, dj, dmax, xx, II, MM, p );       
                interptime+=time()-tinterp;
                a := Expand(add(add(MM[(d1s+1)*jj+ii]*X[1]^ii,ii=0..d1s)*X[j]^jj,jj=0..dj)) mod p;    
	        clearArray( MM,0,(d1s+1)*(dj+1)-1 );
            fi;
            if degree(a,X[1])<>d1s then 
                printf("Wrong degree of a(%a,%a) mod p in %a\n",X[1],X[j],X[1]); return 'FAIL(2)'; 
 	    elif degree(a,X[j])<>dj then 
	        printf("Wrong degree of a(%a,%a) mod p in %a\n",X[1],X[j],X[j]); return 'FAIL(2)'; 
	    fi;
            g := Gcd(a,diff(a,X[1]),'Asf') mod p; 
            lcAs := lcoeff(lcoeff(Asf,X[1]),X[j]); 
            Asf := Asf/lcAs mod p;    
        fi;
        if degree(Asf,X[1])<>Sdf1 then 
	        printf("Wrong degree of sqf(a(%a,%a)) mod p in %a\n",X[1],X[j],X[1]); return 'FAIL(3)'; 
	elif degree(Asf,X[j])<>dy then 
            printf("Wrong degree of sqf(a(%a,%a)) mod p in %a, likely an unlucky evaluation\n", X[1],X[j],X[j]);
            #printf("deg(Asf,%a)=%d,dy=%a\n",X[j],degree(Asf,X[j]),dy);
            return 'FAIL(3)';
        fi;			
	nvarsAsf := nops([op(indets(Asf))]);
        
        # Evaluate fjm1 at Yk = [x[2]=b[2]^k,...,x[j-1]=b[j-1]^k]
        st := time();
        if MapleCode[2] = 1 then 
            for rr to r do f0[rr] := Eval(fjm1[rr],[seq(X[ii]=b[ii],ii=2..j-1)]) mod p; od; 
        else 
            for rr to r do 
                if univar[rr] = 0 then 
                    f0[rr] := nextimage(CA[rr],MA[rr],ntf[rr],TA[rr],df1[rr],FA[rr],X[1],p);
                fi;
                for ii from 0 to df1[rr] do f0A[rr,ii] := FA[rr][ii]; od;
            od;  
        fi;
        et := time()-st; tt2 += et;

        for rr to r do 
            if degree(f0[rr],X[1]) < df1[rr] then printf("deg(f[%d],%a) too small\n",rr,X[1]); return 'FAIL(4)'; fi; 
        od; 
        for ii to r do
            for jj from ii+1 to r do
                if Gcd(f0[ii],f0[jj]) mod p <> 1 then 
		    printf("CMBBSHL step %d: f0's not relatively prime, unlukcy evaluation(s)\n", j); return 'FAIL(5)'; 
                fi;
            od;
        od; 
        
        # BHL 
        st := time();
	if MapleCode[3] = 1 or r = 1 or nvarsAsf = 1 then 
            f := BHL_Maple( Asf, [seq(f0[i],i=1..r)], [X[1],X[j]], alpha[j], p ); 
	else 
            Bivarpoly2matrix( Asf, [X[1],X[j]], dx, dy, MMsqf );
            BHL_C( MMsqf, dx, dy, DDX, DDY, f0A, r, du, fA, alpha[j], Wtemp, p ); 
            f := [seq(Expand(add(add(fA[rr][(dx+1)*jj+ii]*X[1]^ii,ii=0..DDX[rr,jj])*X[j]^jj,jj=0..DDY[rr])) mod p, rr=1..r)]; 
            clearArray2d( f0A,1,r,0,du ); clearArray2d( fA,1,r,0,2*(dx+1)*(dy+1)-1 );
            clearArray( MMsqf,0,(dx+1)*(dy+1)-1 ); clearArray( Wtemp,1,sizeW ); 
        fi;
        et := time()-st; tt3 += et;

        st := time();
        nf[k] := map( nops, f );
        for rr to r do TT := Tf[rr];
            Cof, Mon := MonCoffs( f[rr], [X[1],X[j]] );
            for i to nops(Mon) do
                mon := Mon[i]; cof := Cof[i]; 
                if not assigned(TT[mon]) then TT[mon] := Array(1..ss); TT[mon][k] := cof;
                else TT[mon][k] := cof;
                fi;
            od;
        od;
        et := time()-st; tt4 += et;
    od;

    # Vandermonde Solves
	if MapleCode[4] = 1 then 
	    for rr to r do SS := S[rr];
                mu[rr] := max( [seq(nf[k][rr],k=1..ss)] ); 
            for i to mu[rr] do 
                dx1 := degree(Tmon(Tf,r,i,rr),X[1]); 
		id := ListTools[SearchAll](dx1,degfx1[rr]);
		sizeVS := s[rr][id]; 
		TCOF := Tcof(Tf,r,i,rr,LI); 
		tvs:=time();
                C := VandermondeSolve3_Maple(SS[id], TCOF, sizeVS, 1, p);
		vstime+=time()-tvs;
                termsf[rr][i] := add(C[k]*M[rr][id][k], k=1..sizeVS);
            od;
            Termsf[rr] := Expand(add(termsf[rr][i]*Tmon(Tf,r,i,rr), i=1..mu[rr])) mod p; 
        od;
	else 
        Qtemp := Array(0..ss,datatype=integer[8]);
        for rr to r do SS := S[rr];
            mu[rr] := max( [seq(nf[k][rr],k=1..ss)] ); 
            for i to mu[rr] do
                dx1 := degree(Tmon(Tf,r,i,rr),X[1]); 
                id := ListTools[SearchAll](dx1,degfx1[rr]); 
                sizeVS := s[rr][id]; 
		TCOF := Tcof(Tf,r,i,rr,LI); 
                SA := Array(SS[id],datatype=integer[8]);
                Mtemp := Array(0..sizeVS, datatype=integer[8]); 
                C := Array(1..sizeVS, datatype=integer[8]); 
               # if j = N then printf( "H.L. x[%d]: rr = %d,i = %d, s[%d][%d] = %d\n", j, rr, i, rr, id, sizeVS ); fi;
                tvs:=time();
                VSolve( SA, TCOF, sizeVS, C, Mtemp, Qtemp, 1, p );
                vstime+=time()-tvs;
                termsf[rr][i] := add(C[k]*M[rr][id][k], k=1..sizeVS);
            od;
            Termsf[rr] := Expand(add(termsf[rr][i]*Tmon(Tf,r,i,rr), i=1..mu[rr])) mod p;
        od;
    fi; 
    if j=N then	
    #printf( "Timings: BBtot=%fs,Interp2var=%fs,sqfinterp=%fs,eval=%fs,BHL=%fs,table=%fs,VStime=%fs\n",bbtime,interptime,sqftime,tt2,tt3,tt4,vstime );
    fi;
    # Check if a = f_1...f_r by another evaluation point
    for i to j do b[i] := rand(p)(); od;  Y := [seq(X[i]=b[i],i=2..j)]; 
    for rr to r do fY[rr] := Eval(Termsf[rr],Y) mod p; od; 
    a := Interp1var( B, X, b, deg, N, 1, p );
    if degree(a,X[1])<>d1 then 
        printf("Division check: Wrong degree of a(%a) mod p\n", X[1]); return 'FAIL(6)'; 
    fi; 
    g := Gcd(a,diff(a,X[1]),'Asf') mod p; 
    if singlepow > 0 then Divide(Asf,X[1],'qo') mod p; Asf := qo; fi;   
    if degree(Asf,X[1])<>Sdf1 then 
        printf("Division check: Wrong degree of sqf(a(%a)) mod p\n", X[1]); return 'FAIL(7)'; 
    fi;
    for rr to r do 
        if Divide(Asf,fY[rr],'qo') mod p then Asf := qo; 
        else printf("Division check fails, possible failure of weak SHL assumption\n"); return 'FAIL(8)'; 
        fi; 
    od; 
    #if j = N then BBxne := CNT - BBxns; printf("Total #probes for H.L.x[%d] = %d\n", N,BBxne); fi; 
    bbevaltime := tBBeval - bbevaltime; bbdettime := tBBdet - bbdettime; 
    #if j = N then printf( "Timings: BBeval=%fs BBdet=%fs\n", bbevaltime,bbdettime ); fi;
    et := time() - st0; 
    #if j = N then printf( "Total time for H.L. step %d  = %fs (to recover %a)\n", j, et, X[j] ); fi;
    return [seq(Termsf[rr],rr=1..r)]; 
end:

CMBBSHL := proc( B::procedure, f1::list, NN::posint, X::list, alpha::Array, deg::list, Sdf1::integer, N::posint, p::prime, singlepow::nonnegint,VarPerm::nonnegint,sqfinterp::nonnegint,MapleCode::list, LI::nonnegint ) 
    local r,du,d1,dj,d1s,dmax,xx,II,MM,dx,dy,MMsqf,a,g,Asf,lcAs,nvarsAsf,f0A,fA,sizeW,Wtemp,DDX,DDY,f,rr,ii,jj,fnm1,st,et; 
    if NN = 2 then r := nops(f1); st := time(); 
    #printf("Before BHL: f1 := %a\n", f1);
        d1 := deg[1]; dj := deg[NN];
	if singlepow > 0 then d1s := d1 - singlepow; else d1s := d1; fi;
    
        if sqfinterp = 1 then #printf("Intial BHL: sqrfree image interp completed\n"); 
	    Asf := SquareFreeImage(B,X,alpha,[1,NN],[d1s,dj],p,singlepow,LI); 
        else #printf("Initial BHL: bivariate interp completed\n");
            if MapleCode[1] = 1 then 
                a := Interp2var_Maple( B, X, alpha, [1,NN], deg, N, p, singlepow, LI ); 
            else 
                dmax := max(d1s,dj); 
                xx := Array(0..dmax, i->i+1, datatype=integer[8]); # Avoid 0 for the case singlepow > 0 
                II := Array(0..dmax, datatype=integer[8]); 
                NewtonInv( xx, dmax+1, II, p );
	        MM := Array(0..(d1s+1)*(dj+1)-1,datatype=integer[8]);
	        GetEvaluations( B, alpha, [1,NN], [d1s,dj], xx, MM, p, singlepow );
                Interp2var( 1,NN,d1s,dj,dmax,xx,II,MM,p ); 		
                a := Expand(add(add(MM[(d1s+1)*jj+ii]*X[1]^ii,ii=0..d1s)*X[NN]^jj,jj=0..dj)) mod p;
                clearArray( MM,0,(d1s+1)*(dj+1)-1 );
            fi;
            #printf("d1 = %d, d1s=%d, X = %a, a = %a\n", d1,d1s, X, a);  
            if degree(a,X[1])<>d1s then 
	        printf("Initial BHL: Wrong degree of a(%a,%a) mod p in %a\n",X[1],X[NN],X[1]); return 'FAIL(2)'; 
	    elif degree(a,X[NN])<>dj then 
                printf("Initial BHL: Wrong degree of a(%a,%a) mod p in %a\n",X[1],X[NN],X[NN]); return 'FAIL(2)'; 
	    fi;
            g := Gcd(a,diff(a,X[1]),'Asf') mod p;
            lcAs := lcoeff(lcoeff(Asf,X[1]),X[NN]); 
            Asf := Asf/lcAs mod p;     
            #printf("Before BHL: Asf = %a\n", Asf);
        fi;
        if degree(Asf,X[1])<>Sdf1 then 
	    printf("Wrong degree of sqf(a(%a,%a)) mod p in %a\n",X[1],X[NN],X[1]); return 'FAIL(3)'; 
	fi;    
        dx := degree(Asf,X[1]); dy := degree(Asf,X[NN]); 
        nvarsAsf := nops([op(indets(Asf))]); 
		
        if MapleCode[3] = 1 or r = 1 or nvarsAsf = 1 then 
            f := BHL_Maple( Asf, f1, [X[1],X[NN]], alpha[NN], p ); 
            et := time()-st; #printf("Total time for initial BHL = %fs (to recover %a)\n",et,X[NN]);
            #printf("After BHL_Maple, f = %a\n",f);
            return f;
 	else 
            MMsqf := Array(0..(dx+1)*(dy+1)-1,datatype=integer[8]);
            Bivarpoly2matrix( Asf, [X[1],X[NN]], dx, dy, MMsqf );
            du := max(map(degree,f1,X[1]));
            f0A := Array(1..r,0..du,datatype=integer[8],order=C_order);
            Initialf2matrix( f1, X[1], f0A );
            fA := Array(1..r,0..2*(dx+1)*(dy+1)-1,datatype=integer[8],order=C_order);
            sizeW := allocateSpace(r,dx,dy,du);
            Wtemp := Array(1..sizeW,datatype=integer[8],order=C_order); 		
            DDX := Array(1..r,0..dy,datatype=integer[8],order=C_order);
            DDY := Array(1..r,datatype=integer[8],order=C_order);
            BHL_C( MMsqf, dx, dy, DDX, DDY, f0A, r, du, fA, alpha[NN], Wtemp, p ); 
            f := [seq(Expand(add(add(fA[rr][(dx+1)*jj+ii]*X[1]^ii,ii=0..DDX[rr,jj])*X[NN]^jj,jj=0..DDY[rr])) mod p, rr=1..r)];
            #printf("After BHL_C, f = %a\n",f);
            clearArray( Wtemp,1,sizeW ); clearArray2d( f0A,1,r,0,du ); clearArray2d( fA,1,r,0,2*(dx+1)*(dy+1)-1 );  
            clearArray( MMsqf,0,(dx+1)*(dy+1)-1 );
            et := time()-st; #printf("Total time for initial BHL = %fs (to recover %a)\n",et,X[NN]);
            return f; 
        fi;
    fi; 
    fnm1 := CMBBSHL( B, f1, NN-1, X, alpha, deg, Sdf1, N, p, singlepow, VarPerm, sqfinterp, MapleCode, LI );  
    return CMBBSHL_stepj( B, fnm1, NN, X, alpha, deg, Sdf1, N, p, singlepow, VarPerm, sqfinterp, MapleCode, LI );
end:

CMBBSHLcont := proc( B::procedure, X::list, alpha::Array, deg::list, p::prime, VarPerm::nonnegint, ContFlag::nonnegint, sqfinterp::nonnegint, MapleCode::list, LI::nonnegint )
    local MakeBF,BBF,alpha0,MakeCont,BBC,alphaC,i,a1,a1f,a1flist,a1fArray,singlepow,r,f1,Sdf1,rr,fN,dn,nfN,ic,pow,fNlist,fNA,degFpp,j,degC,F_pp,co,et,st:=time();
    N := nops(X); printf("CMBBSHLcont: N = %d\n",N);
    if N = 0 then 
        co := B_chrem( B, alpha, p ); 
        et := time()-st; printf("Total time for CMBBSHLcont at N=%d = %fs\n",N,et); 
        printf("N = %d, integer content = %a\n", N, co);
        return co; 
    fi; 
    if deg[1] = 0 then 
        MakeCont := proc( B::procedure, alpha1, p::prime ) 
            proc( alpha::Array, p::prime ) 
                local i, alphaF, na:=numelems(alpha);
                if LI = 0 then alphaF := Array(1..na+1,datatype=integer[8]);
                else alphaF := Array(1..na+1);
                fi;
                alphaF[1] := alpha1; 
                for i to na do alphaF[i+1] := alpha[i]; od; 
                B( alphaF, p ); 
            end; 
        end;
        alpha0 := rand(p)();
        BBC := MakeCont( B, alpha0, p );
        if LI = 0 then alphaC:= Array(1..N-1,datatype=integer[8]): 
        else alphaC:= Array(1..N-1):
        fi; 
	for i to N-1 do alphaC[i] := alpha[i+1]; od:
        if ContFlag = 0 then 
            co := B_chrem( BBC, alphaC, p );
            et := time()-st; printf("Total time for CMBBSHLcont at N=%d (deg(a,%a)=0, ContFlag=0) = %fs\n",N,X[1],et);
            return co; 
        fi;
        et := time()-st; printf("Total time for CMBBSHLcont at N=%d (deg(a,%a)=0, MakeCont only) = %fs\n",N,X[1],et);
        co := CMBBSHLcont( BBC, X[2..N], alphaC, deg[2..N], p, VarPerm, ContFlag, sqfinterp, MapleCode, LI );
        printf("N = %d, content (in the variable %a) = %a\n", N,X[1],co);
        return co; 
    else 
        a1 := Interp1varZ_chrem( B, X, alpha, deg, N, 1, p ); #printf("a(%a) = %a\n",X[1],a1);
        if N = 1 then return factor(a1); fi;  
        a1f := unpack(factor(a1)): printf("N = %d, factors of a(%a) = %a\n", N, X[1],a1f);
        a1flist := sqrfreefactors(a1f);
        a1fArray := a1flist[2];   
        if a1fArray[0] <> 0 then singlepow := a1fArray[0][2]; else singlepow := 0; fi;
        r := a1flist[1]; 
        if r = 0 then fNA := a1fArray; 
            F_pp := pack_nocontent(a1fArray,r); 
	else 
	    f1 := a1flist[3];  f1 := map( modp,f1,p ); #printf("f1 = %a\n", f1);
            Sdf1 := 0; for rr to r do Sdf1 += degree(f1[rr],X[1]); od;         
            fN := CMBBSHL(B, f1, N, X, alpha, deg, Sdf1, N, p, singlepow, VarPerm, sqfinterp, MapleCode, LI); 
            printf("fN = %a\n",fN);      
            fN := iratrecon(fN, p);  
            dn := denom(fN); 
	    fN := numer(fN); nfN := map(nops, fN); 
	    ic := a1fArray[-1][1]: # integer coefficient in a1f  
	    for i to r do pow := a1fArray[i][2];
		fNlist[i] := [fN[i],pow]; ic := ic/(dn[i]^pow);   
	    od:
	    fNlist[-1] := [ic,1]: # updated integer coefficient 
            fNlist[0] := a1fArray[0]:  
	    fNA := Array(-1..r,[seq(fNlist[i],i=-1..r)]);  
	    F_pp := pack_nocontent(fNA,r);
        fi;
        et := time()-st; printf("Total time for CMBBSHLcont at N=%d (Computing F_pp) = %fs\n",N,et);
        if ContFlag = 0 then printf("F_pp = %a\n", F_pp); return F_pp; fi; 
        
        #MakeCont := proc( B::procedure, F::polynom, X::list, p::prime )
        #    local alpha1 := rand(p)(); 
        #    proc( Y::list, alpha::Array, p::prime ) 
        #        local Feval,i,nY:=nops(Y),alphaF;
        #        if LI = 0 then alphaF := Array(1..nY+1,datatype=integer[8]);
        #  	 else alphaF := Array(1..nY+1);
        #        fi; 
        #        alphaF[1] := alpha1; for i to nY do alphaF[i+1] := alpha[i]; od; 
        #        Feval := Eval( F, [X[1]=alpha1,seq(Y[i]=alpha[i],i=1..nY)] ) mod p; 
        #        if Feval = 0 then return 'FAIL_Feval_0'; fi;
        #        B( [X[1],op(Y)], alphaF, p )/Feval mod p; 
        #    end;
        #end;
        (*  
        MakeBF := proc( F::polynom,X::list ) local N:=nops(X); 
            proc( alpha::Array, p::prime ) local i; 
                Eval(F,[seq(X[i]=alpha[i],i=1..N)]) mod p;
            end;
        end;
        BBF := MakeBF( F_pp, X );
        *)
          
        MakeBF := proc( FA::Array, r::nonnegint, X::list ) local N := nops(X); 
	    proc( alpha::Array, p::prime )
	        local tmult:=1,i,t,tmon,tpow,teval,ii;
		for i from 0 to r do t := FA[i];
		    if i = 0 then if t = 0 then next i; fi; fi; 
                    tmon := t[1]; tpow := t[2];
                    teval := Eval( t[1], [seq(X[ii]=alpha[ii],ii=1..N)] ) mod p; 
                    teval := teval^tpow mod p; 
                    tmult := tmult*teval mod p;
                od; 
                tmult;
            end;
        end:
        BBF := MakeBF( fNA, r, X ); 
       
        MakeCont := proc( B::procedure, BF::procedure, alpha1, p::prime )
            proc( alpha::Array, p::prime ) 
                local na := numelems(alpha),alphaNew,g;
                if LI = 0 then alphaNew := Array(1..na+1,datatype=integer[8]);
                else alphaNew := Array(1..na+1);
                fi;  
                alphaNew[1] := alpha1;
                for i to na do alphaNew[i+1]:=alpha[i]; od;
                g := BF( alphaNew, p ); 
                if g = 0 then return FAIL; fi; 
                B( alphaNew, p )/g mod p;
            end;
        end;     
        alpha0 := rand(p)();
        BBC := MakeCont( B, BBF, alpha0, p );
        
        degFpp := [seq(degree(F_pp,X[j]),j=1..N)];
        degC := deg - degFpp;
        if LI = 0 then alphaC := Array(1..N-1,datatype=integer[8]): 
        else alphaC := Array(1..N-1):
        fi; 
	for i to N-1 do alphaC[i] := alpha[i+1]; od:
        
        co := CMBBSHLcont( BBC, X[2..N], alphaC, degC[2..N], p, VarPerm, ContFlag, sqfinterp, MapleCode, LI );
        printf("N = %d, f_pp = %a, content (in the variable %a) = %a\n", N,F_pp,X[1],co);
        return co*F_pp; 
    fi;
end:
