10 #ifndef MULT_WILSON_QXS_INCLUDED 
   11 #define MULT_WILSON_QXS_INCLUDED 
   18                                        real_t kappa, 
int *bc, 
int *Nsize, 
int *do_comm)
 
   24   int Nstv = Nxv * Nyv * Nz * Nt;
 
   25   int Nst  = Nstv * 
VLEN;
 
   27   svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
 
   28   svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
 
   29   set_predicate_xp(pg1_xp, pg2_xp);
 
   30   set_predicate_xm(pg1_xm, pg2_xm);
 
   31   set_predicate_yp(pg1_yp, pg2_yp);
 
   32   set_predicate_ym(pg1_ym, pg2_ym);
 
   37   int Nxyz = Nxv * Nyv * Nz;
 
   40   set_threadtask(ith, nth, is, ns, Nstv);
 
   42   for (
int site = is; site < ns; ++site) {
 
   44     int iyzt = site / Nxv;
 
   49     int ixy  = ix + Nxv * iy;
 
   50     int ixyz = ixy + Nxy * iz;
 
   60       int    nei = ix + 1 + Nxv * iyzt;
 
   61       mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN * 
NDF * site],
 
   63     } 
else if (do_comm[0] == 0) {  
 
   65       int    nei = 0 + Nxv * iyzt;
 
   66       mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN * 
NDF * site],
 
   72       int    nei = ix - 1 + Nxv * iyzt;
 
   73       mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
 
   76     } 
else if (do_comm[0] == 0) {   
 
   78       int    nei = Nxv - 1 + Nxv * iyzt;
 
   79       mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
 
   85       int    iy2 = (iy + 1) % Nyv;
 
   86       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
   88       mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
 
   91     } 
else if (do_comm[1] == 0) {  
 
   92       int    iy2 = (iy + 1) % Nyv;
 
   93       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
   95       mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
 
  101       int    iy2 = (iy - 1 + Nyv) % Nyv;
 
  102       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
  104       mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
 
  107     } 
else if (do_comm[1] == 0) {  
 
  108       int    iy2 = (iy - 1 + Nyv) % Nyv;
 
  109       int    nei = ix + Nxv * (iy2 + Nyv * izt);
 
  111       mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
 
  116     if ((iz < Nz - 1) || (do_comm[2] == 0)) {
 
  117       int    iz2 = (iz + 1) % Nz;
 
  118       int    nei = ixy + Nxy * (iz2 + Nz * it);
 
  123     if ((iz > 0) || (do_comm[2] == 0)) {
 
  124       int    iz2 = (iz - 1 + Nz) % Nz;
 
  125       int    nei = ixy + Nxy * (iz2 + Nz * it);
 
  130     if ((it < Nt - 1) || (do_comm[3] == 0)) {
 
  131       int    it2 = (it + 1) % Nt;
 
  132       int    nei = ixyz + Nxyz * it2;
 
  134       mult_wilson_tpb_dirac(v2v, &u[
VLEN * 
NDF * site],
 
  138     if ((it > 0) || (do_comm[3] == 0)) {
 
  139       int    it2 = (it - 1 + Nt) % Nt;
 
  140       int    nei = ixyz + Nxyz * it2;
 
  142       mult_wilson_tmb_dirac(v2v, &u[
VLEN * 
NDF * nei],
 
  146     mult_wilson_aypx_save(&v2[
VLEN * 
NVCD * site],
 
  147                           -kappa, v2v, &v1[
VLEN * 
NVCD * site]);
 
  159   int *bc, 
int *Nsize, 
int *do_comm)
 
  165   int Nstv = Nxv * Nyv * Nz * Nt;
 
  166   int Nst  = Nstv * 
VLEN;
 
  169   int Nxyz = Nxv * Nyv * Nz;
 
  171   svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
 
  172   svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
 
  173   set_predicate_xp(pg1_xp, pg2_xp);
 
  174   set_predicate_xm(pg1_xm, pg2_xm);
 
  175   set_predicate_yp(pg1_yp, pg2_yp);
 
  176   set_predicate_ym(pg1_ym, pg2_ym);
 
  178   set_index_xp(svidx_xp);
 
  179   set_index_xm(svidx_xm);
 
  181   if (do_comm[0] > 0) {
 
  185     int Nyzt = Nyv * Nz * Nt;
 
  187     int ith, nth, is, ns;
 
  188     set_threadtask(ith, nth, is, ns, Nyzt);
 
  190     for (
int iyzt = is; iyzt < ns; ++iyzt) {
 
  193         int site = ix + Nxv * iyzt;
 
  195         set_index_xm(svidx_xm);
 
  196         mult_wilson_xp1(pg2_xm, svidx_xm,
 
  197                         &buf_xp[ibf], &v1[
VLEN * 
NVCD * site]);
 
  201         int site = ix + Nxv * iyzt;
 
  203         set_index_xp(svidx_xp);
 
  204         mult_wilson_xm1(pg2_xp, svidx_xp,
 
  205                         &buf_xm[ibf], &u[
VLEN * 
NDF * site],
 
  211   if (do_comm[1] > 0) {
 
  215     int Nxzt = Nxv * Nz * Nt;
 
  217     int ith, nth, is, ns;
 
  218     set_threadtask(ith, nth, is, ns, Nxzt);
 
  220     for (
int ixzt = is; ixzt < ns; ++ixzt) {
 
  222       int izt = ixzt / Nxv;
 
  225         int site = ix + Nxv * (iy + Nyv * izt);
 
  227         mult_wilson_yp1(pg2_ym,
 
  228                         &buf_yp[ibf], &v1[
VLEN * 
NVCD * site]);
 
  232         int site = ix + Nxv * (iy + Nyv * izt);
 
  234         mult_wilson_ym1(pg2_yp,
 
  235                         &buf_ym[ibf], &u[
VLEN * 
NDF * site],
 
  241   if (do_comm[2] > 0) {
 
  245     int Nxyt = Nxv * Nyv * Nt;
 
  247     int ith, nth, is, ns;
 
  248     set_threadtask(ith, nth, is, ns, Nxyt);
 
  250     for (
int ixyt = is; ixyt < ns; ++ixyt) {
 
  251       int ixy = ixyt % Nxy;
 
  255         int site = ixy + Nxy * (iz + Nz * it);
 
  256         int ibf  = 
VLEN * 
NVC * 
ND2 * (ixy + Nxy * it);
 
  257         mult_wilson_zp1(&buf_zp[ibf], &v1[
VLEN * 
NVCD * site]);
 
  261         int site = ixy + Nxy * (iz + Nz * it);
 
  262         int ibf  = 
VLEN * 
NVC * 
ND2 * (ixy + Nxy * it);
 
  263         mult_wilson_zm1(&buf_zm[ibf], &u[
VLEN * 
NDF * site],
 
  269   if (do_comm[3] > 0) {
 
  273     int ith, nth, is, ns;
 
  274     set_threadtask(ith, nth, is, ns, Nxyz);
 
  277       for (
int ixyz = is; ixyz < ns; ++ixyz) {
 
  278         int site = ixyz + Nxyz * it;
 
  279         mult_wilson_tp1_dirac(&buf_tp[
VLEN * 
NVC * 
ND2 * ixyz],
 
  285       for (
int ixyz = is; ixyz < ns; ++ixyz) {
 
  286         int site = ixyz + Nxyz * it;
 
  287         mult_wilson_tm1_dirac(&buf_tm[
VLEN * 
NVC * 
ND2 * ixyz],
 
  301                                     real_t kappa, 
int *bc, 
int *Nsize, 
int *do_comm)
 
  307   int Nstv = Nxv * Nyv * Nz * Nt;
 
  308   int Nst  = Nstv * 
VLEN;
 
  311   int Nxyz = Nxv * Nyv * Nz;
 
  313   svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
 
  314   svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
 
  315   set_predicate_xp(pg1_xp, pg2_xp);
 
  316   set_predicate_xm(pg1_xm, pg2_xm);
 
  317   set_predicate_yp(pg1_yp, pg2_yp);
 
  318   set_predicate_ym(pg1_ym, pg2_ym);
 
  320   set_index_xp(svidx_xp);
 
  321   set_index_xm(svidx_xm);
 
  323   int ith, nth, is, ns;
 
  324   set_threadtask(ith, nth, is, ns, Nstv);
 
  326   for (
int site = is; site < ns; ++site) {
 
  328     int iyzt = site / Nxv;
 
  330     int izt  = site / Nxy;
 
  333     int ixy  = ix + Nxv * iy;
 
  334     int ixyz = ixy + Nxy * iz;
 
  337     clear_vec(v2v, 
NVCD);
 
  340     if ((ix == Nxv - 1) && (do_comm[0] > 0)) {
 
  343       set_index_xp(svidx_xp);
 
  344       mult_wilson_xp2(pg1_xp, pg2_xp, svidx_xp,
 
  346                       &v1[
VLEN * 
NVCD * site], &buf_xp[ibf]);
 
  350     if ((ix == 0) && (do_comm[0] > 0)) {
 
  353       set_index_xm(svidx_xm);
 
  354       mult_wilson_xm2(pg1_xm, pg2_xm, svidx_xm,
 
  356                       &v1[
VLEN * 
NVCD * site], &buf_xm[ibf]);
 
  360     if ((iy == Nyv - 1) && (do_comm[1] > 0)) {
 
  363       mult_wilson_yp2(pg1_yp, pg2_yp,
 
  365                       &v1[
VLEN * 
NVCD * site], &buf_yp[ibf]);
 
  369     if ((iy == 0) && (do_comm[1] > 0)) {
 
  372       mult_wilson_ym2(pg1_ym, pg2_ym,
 
  374                       &v1[
VLEN * 
NVCD * site], &buf_ym[ibf]);
 
  378     if ((iz == Nz - 1) && (do_comm[2] > 0)) {
 
  379       int    ibf = 
VLEN * 
NVC * 
ND2 * (ixy + Nxy * it);
 
  381       mult_wilson_zp2(v2v, &u[
VLEN * 
NDF * site], &buf_zp[ibf]);
 
  385     if ((iz == 0) && (do_comm[2] > 0)) {
 
  386       int ibf = 
VLEN * 
NVC * 
ND2 * (ixy + Nxy * it);
 
  387       mult_wilson_zm2(v2v, &buf_zm[ibf]);
 
  391     if ((it == Nt - 1) && (do_comm[3] > 0)) {
 
  393       mult_wilson_tp2_dirac(v2v, &u[
VLEN * 
NDF * site],
 
  398     if ((it == 0) && (do_comm[3] > 0)) {
 
  399       mult_wilson_tm2_dirac(v2v, &buf_tm[
VLEN * 
NVC * 
ND2 * ixyz]);
 
  404       mult_wilson_aypx_save(&v2[
VLEN * 
NVCD * site],
 
  405                             -kappa, v2v, &v2[
VLEN * 
NVCD * site]);
 
  418   int Nstv = Nxv * Nyv * Nz * Nt;
 
  422   int ith, nth, is, ns;
 
  423   set_threadtask(ith, nth, is, ns, Nstv);
 
  425   for (
int site = is; site < ns; ++site) {
 
  428     for (
int ic = 0; ic < 
NC; ++ic) {
 
  429       mult_gm5_dirac_vec(pg, &vv2[
VLEN * 2 * 
ND * ic],
 
  430                          &vv1[
VLEN * 2 * 
ND * ic]);