10 #ifndef MULT_CLOVER_QXS_INCLUDED 
   11 #define MULT_CLOVER_QXS_INCLUDED 
   19                                        int *Nsize, 
int *do_comm)
 
   25   int Nstv = Nxv * Nyv * Nz * Nt;
 
   26   int Nst  = Nstv * 
VLEN;
 
   28   svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
 
   29   svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
 
   30   set_predicate_xp(pg1_xp, pg2_xp);
 
   31   set_predicate_xm(pg1_xm, pg2_xm);
 
   32   set_predicate_yp(pg1_yp, pg2_yp);
 
   33   set_predicate_ym(pg1_ym, pg2_ym);
 
   36   int Nxyz = Nxv * Nyv * Nz;
 
   39   set_threadtask(ith, nth, is, ns, Nstv);
 
   41   for (
int site = is; site < ns; ++site) {
 
   43     int iyzt = site / Nxv;
 
   48     int ixy  = ix + Nxv * iy;
 
   49     int ixyz = ixy + Nxy * iz;
 
   59       int    nei = ix + 1 + Nxv * iyzt;
 
   60       mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN * 
NDF * site],
 
   62     } 
else if (do_comm[0] == 0) {  
 
   64       int    nei = 0 + Nxv * iyzt;
 
   65       mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN * 
NDF * site],
 
   71       int    nei = ix - 1 + Nxv * iyzt;
 
   72       mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
 
   75     } 
else if (do_comm[0] == 0) {   
 
   77       int    nei = Nxv - 1 + Nxv * iyzt;
 
   78       mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
 
   84       int    iy2 = (iy + 1) % Nyv;
 
   85       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
   87       mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
 
   90     } 
else if (do_comm[1] == 0) {  
 
   91       int    iy2 = (iy + 1) % Nyv;
 
   92       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
   94       mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
 
  100       int    iy2 = (iy - 1 + Nyv) % Nyv;
 
  101       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
  103       mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
 
  106     } 
else if (do_comm[1] == 0) {  
 
  107       int    iy2 = (iy - 1 + Nyv) % Nyv;
 
  108       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
  110       mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
 
  115     if ((iz < Nz - 1) || (do_comm[2] == 0)) {
 
  116       int    iz2 = (iz + 1) % Nz;
 
  117       int    nei = ixy + Nxy * (iz2 + Nz * it);
 
  122     if ((iz > 0) || (do_comm[2] == 0)) {
 
  123       int    iz2 = (iz - 1 + Nz) % Nz;
 
  124       int    nei = ixy + Nxy * (iz2 + Nz * it);
 
  129     if ((it < Nt - 1) || (do_comm[3] == 0)) {
 
  130       int    it2 = (it + 1) % Nt;
 
  131       int    nei = ixyz + Nxyz * it2;
 
  133       mult_wilson_tpb_dirac(v2v, &u[
VLEN * 
NDF * site],
 
  137     if ((it > 0) || (do_comm[3] == 0)) {
 
  138       int    it2 = (it - 1 + Nt) % Nt;
 
  139       int    nei = ixyz + Nxyz * it2;
 
  141       mult_wilson_tmb_dirac(v2v, &u[
VLEN * 
NDF * nei],
 
  145     mult_clover_csw_aypx(&v2[
VLEN * 
NVCD * site], -kappa, &v2v[0],
 
  156   int *Nsize, 
int *do_comm)
 
  162   int Nstv = Nxv * Nyv * Nz * Nt;
 
  163   int Nst  = Nstv * 
VLEN;
 
  165   svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
 
  166   svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
 
  167   set_predicate_xp(pg1_xp, pg2_xp);
 
  168   set_predicate_xm(pg1_xm, pg2_xm);
 
  169   set_predicate_yp(pg1_yp, pg2_yp);
 
  170   set_predicate_ym(pg1_ym, pg2_ym);
 
  173   int Nxyz = Nxv * Nyv * Nz;
 
  175   int ith, nth, is, ns;
 
  176   set_threadtask(ith, nth, is, ns, Nstv);
 
  178   for (
int site = is; site < ns; ++site) {
 
  180     int iyzt = site / Nxv;
 
  182     int izt  = site / Nxy;
 
  185     int ixy  = ix + Nxv * iy;
 
  186     int ixyz = ixy + Nxy * iz;
 
  189     clear_vec(v2v, 
NVCD);
 
  194       int    nei = ix + 1 + Nxv * iyzt;
 
  195       mult_wilson_xpb(pg1_xp, pg2_xp, &v2v[0].v[0], &u[
VLEN * 
NDF * site],
 
  197     } 
else if (do_comm[0] == 0) {  
 
  199       int    nei = 0 + Nxv * iyzt;
 
  200       mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN * 
NDF * site],
 
  206       int    nei = ix - 1 + Nxv * iyzt;
 
  207       mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
 
  210     } 
else if (do_comm[0] == 0) {   
 
  212       int    nei = Nxv - 1 + Nxv * iyzt;
 
  213       mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
 
  219       int    iy2 = (iy + 1) % Nyv;
 
  220       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
  222       mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
 
  225     } 
else if (do_comm[1] == 0) {  
 
  226       int    iy2 = (iy + 1) % Nyv;
 
  227       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
  229       mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
 
  235       int    iy2 = (iy - 1 + Nyv) % Nyv;
 
  236       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
  238       mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
 
  241     } 
else if (do_comm[1] == 0) {  
 
  242       int    iy2 = (iy - 1 + Nyv) % Nyv;
 
  243       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
  245       mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
 
  250     if ((iz < Nz - 1) || (do_comm[2] == 0)) {
 
  251       int    iz2 = (iz + 1) % Nz;
 
  252       int    nei = ixy + Nxy * (iz2 + Nz * it);
 
  257     if ((iz > 0) || (do_comm[2] == 0)) {
 
  258       int    iz2 = (iz - 1 + Nz) % Nz;
 
  259       int    nei = ixy + Nxy * (iz2 + Nz * it);
 
  264     if ((it < Nt - 1) || (do_comm[3] == 0)) {
 
  265       int    it2 = (it + 1) % Nt;
 
  266       int    nei = ixyz + Nxyz * it2;
 
  268       mult_wilson_tpb_dirac(v2v, &u[
VLEN * 
NDF * site],
 
  272     if ((it > 0) || (do_comm[3] == 0)) {
 
  273       int    it2 = (it - 1 + Nt) % Nt;
 
  274       int    nei = ixyz + Nxyz * it2;
 
  276       mult_wilson_tmb_dirac(v2v, &u[
VLEN * 
NDF * nei],
 
  280     mult_clover_csw_aypx_chrot(
 
  281       &v2[
VLEN * 
NVCD * site], -kappa, &v2v[0].v[0],
 
  296   int Nstv = Nxv * Nyv * Nz * Nt;
 
  297   int Nst  = Nstv * 
VLEN;
 
  299   int ith, nth, is, ns;
 
  300   set_threadtask(ith, nth, is, ns, Nstv);
 
  302   for (
int site = is; site < ns; ++site) {
 
  303     mult_cswinv_chrot(&v2[
VLEN * 
NVCD * site],