10 #ifndef MULT_WILSON_QXS_INCLUDED
11 #define MULT_WILSON_QXS_INCLUDED
18 real_t kappa,
int *bc,
int *Nsize,
int *do_comm)
24 int Nstv = Nxv * Nyv * Nz * Nt;
25 int Nst = Nstv *
VLEN;
27 svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
28 svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
29 set_predicate_xp(pg1_xp, pg2_xp);
30 set_predicate_xm(pg1_xm, pg2_xm);
31 set_predicate_yp(pg1_yp, pg2_yp);
32 set_predicate_ym(pg1_ym, pg2_ym);
37 int Nxyz = Nxv * Nyv * Nz;
40 set_threadtask(ith, nth, is, ns, Nstv);
42 for (
int site = is; site < ns; ++site) {
44 int iyzt = site / Nxv;
49 int ixy = ix + Nxv * iy;
50 int ixyz = ixy + Nxy * iz;
60 int nei = ix + 1 + Nxv * iyzt;
61 mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN *
NDF * site],
63 }
else if (do_comm[0] == 0) {
65 int nei = 0 + Nxv * iyzt;
66 mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN *
NDF * site],
72 int nei = ix - 1 + Nxv * iyzt;
73 mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
76 }
else if (do_comm[0] == 0) {
78 int nei = Nxv - 1 + Nxv * iyzt;
79 mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
85 int iy2 = (iy + 1) % Nyv;
86 int nei = ix + Nxv * (iy2 + Nyv * izt);
88 mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
91 }
else if (do_comm[1] == 0) {
92 int iy2 = (iy + 1) % Nyv;
93 int nei = ix + Nxv * (iy2 + Nyv * izt);
95 mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
101 int iy2 = (iy - 1 + Nyv) % Nyv;
102 int nei = ix + Nxv * (iy2 + Nyv * izt);
104 mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
107 }
else if (do_comm[1] == 0) {
108 int iy2 = (iy - 1 + Nyv) % Nyv;
109 int nei = ix + Nxv * (iy2 + Nyv * izt);
111 mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
116 if ((iz < Nz - 1) || (do_comm[2] == 0)) {
117 int iz2 = (iz + 1) % Nz;
118 int nei = ixy + Nxy * (iz2 + Nz * it);
123 if ((iz > 0) || (do_comm[2] == 0)) {
124 int iz2 = (iz - 1 + Nz) % Nz;
125 int nei = ixy + Nxy * (iz2 + Nz * it);
130 if ((it < Nt - 1) || (do_comm[3] == 0)) {
131 int it2 = (it + 1) % Nt;
132 int nei = ixyz + Nxyz * it2;
134 mult_wilson_tpb_dirac(v2v, &u[
VLEN *
NDF * site],
138 if ((it > 0) || (do_comm[3] == 0)) {
139 int it2 = (it - 1 + Nt) % Nt;
140 int nei = ixyz + Nxyz * it2;
142 mult_wilson_tmb_dirac(v2v, &u[
VLEN *
NDF * nei],
146 mult_wilson_aypx_save(&v2[
VLEN *
NVCD * site],
147 -kappa, v2v, &v1[
VLEN *
NVCD * site]);
159 int *bc,
int *Nsize,
int *do_comm)
165 int Nstv = Nxv * Nyv * Nz * Nt;
166 int Nst = Nstv *
VLEN;
169 int Nxyz = Nxv * Nyv * Nz;
171 svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
172 svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
173 set_predicate_xp(pg1_xp, pg2_xp);
174 set_predicate_xm(pg1_xm, pg2_xm);
175 set_predicate_yp(pg1_yp, pg2_yp);
176 set_predicate_ym(pg1_ym, pg2_ym);
178 set_index_xp(svidx_xp);
179 set_index_xm(svidx_xm);
181 if (do_comm[0] > 0) {
185 int Nyzt = Nyv * Nz * Nt;
187 int ith, nth, is, ns;
188 set_threadtask(ith, nth, is, ns, Nyzt);
190 for (
int iyzt = is; iyzt < ns; ++iyzt) {
193 int site = ix + Nxv * iyzt;
195 set_index_xm(svidx_xm);
196 mult_wilson_xp1(pg2_xm, svidx_xm,
197 &buf_xp[ibf], &v1[
VLEN *
NVCD * site]);
201 int site = ix + Nxv * iyzt;
203 set_index_xp(svidx_xp);
204 mult_wilson_xm1(pg2_xp, svidx_xp,
205 &buf_xm[ibf], &u[
VLEN *
NDF * site],
211 if (do_comm[1] > 0) {
215 int Nxzt = Nxv * Nz * Nt;
217 int ith, nth, is, ns;
218 set_threadtask(ith, nth, is, ns, Nxzt);
220 for (
int ixzt = is; ixzt < ns; ++ixzt) {
222 int izt = ixzt / Nxv;
225 int site = ix + Nxv * (iy + Nyv * izt);
227 mult_wilson_yp1(pg2_ym,
228 &buf_yp[ibf], &v1[
VLEN *
NVCD * site]);
232 int site = ix + Nxv * (iy + Nyv * izt);
234 mult_wilson_ym1(pg2_yp,
235 &buf_ym[ibf], &u[
VLEN *
NDF * site],
241 if (do_comm[2] > 0) {
245 int Nxyt = Nxv * Nyv * Nt;
247 int ith, nth, is, ns;
248 set_threadtask(ith, nth, is, ns, Nxyt);
250 for (
int ixyt = is; ixyt < ns; ++ixyt) {
251 int ixy = ixyt % Nxy;
255 int site = ixy + Nxy * (iz + Nz * it);
256 int ibf =
VLEN *
NVC *
ND2 * (ixy + Nxy * it);
257 mult_wilson_zp1(&buf_zp[ibf], &v1[
VLEN *
NVCD * site]);
261 int site = ixy + Nxy * (iz + Nz * it);
262 int ibf =
VLEN *
NVC *
ND2 * (ixy + Nxy * it);
263 mult_wilson_zm1(&buf_zm[ibf], &u[
VLEN *
NDF * site],
269 if (do_comm[3] > 0) {
273 int ith, nth, is, ns;
274 set_threadtask(ith, nth, is, ns, Nxyz);
277 for (
int ixyz = is; ixyz < ns; ++ixyz) {
278 int site = ixyz + Nxyz * it;
279 mult_wilson_tp1_dirac(&buf_tp[
VLEN *
NVC *
ND2 * ixyz],
285 for (
int ixyz = is; ixyz < ns; ++ixyz) {
286 int site = ixyz + Nxyz * it;
287 mult_wilson_tm1_dirac(&buf_tm[
VLEN *
NVC *
ND2 * ixyz],
301 real_t kappa,
int *bc,
int *Nsize,
int *do_comm)
307 int Nstv = Nxv * Nyv * Nz * Nt;
308 int Nst = Nstv *
VLEN;
311 int Nxyz = Nxv * Nyv * Nz;
313 svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
314 svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
315 set_predicate_xp(pg1_xp, pg2_xp);
316 set_predicate_xm(pg1_xm, pg2_xm);
317 set_predicate_yp(pg1_yp, pg2_yp);
318 set_predicate_ym(pg1_ym, pg2_ym);
320 set_index_xp(svidx_xp);
321 set_index_xm(svidx_xm);
323 int ith, nth, is, ns;
324 set_threadtask(ith, nth, is, ns, Nstv);
326 for (
int site = is; site < ns; ++site) {
328 int iyzt = site / Nxv;
330 int izt = site / Nxy;
333 int ixy = ix + Nxv * iy;
334 int ixyz = ixy + Nxy * iz;
337 clear_vec(v2v,
NVCD);
340 if ((ix == Nxv - 1) && (do_comm[0] > 0)) {
343 set_index_xp(svidx_xp);
344 mult_wilson_xp2(pg1_xp, pg2_xp, svidx_xp,
346 &v1[
VLEN *
NVCD * site], &buf_xp[ibf]);
350 if ((ix == 0) && (do_comm[0] > 0)) {
353 set_index_xm(svidx_xm);
354 mult_wilson_xm2(pg1_xm, pg2_xm, svidx_xm,
356 &v1[
VLEN *
NVCD * site], &buf_xm[ibf]);
360 if ((iy == Nyv - 1) && (do_comm[1] > 0)) {
363 mult_wilson_yp2(pg1_yp, pg2_yp,
365 &v1[
VLEN *
NVCD * site], &buf_yp[ibf]);
369 if ((iy == 0) && (do_comm[1] > 0)) {
372 mult_wilson_ym2(pg1_ym, pg2_ym,
374 &v1[
VLEN *
NVCD * site], &buf_ym[ibf]);
378 if ((iz == Nz - 1) && (do_comm[2] > 0)) {
379 int ibf =
VLEN *
NVC *
ND2 * (ixy + Nxy * it);
381 mult_wilson_zp2(v2v, &u[
VLEN *
NDF * site], &buf_zp[ibf]);
385 if ((iz == 0) && (do_comm[2] > 0)) {
386 int ibf =
VLEN *
NVC *
ND2 * (ixy + Nxy * it);
387 mult_wilson_zm2(v2v, &buf_zm[ibf]);
391 if ((it == Nt - 1) && (do_comm[3] > 0)) {
393 mult_wilson_tp2_dirac(v2v, &u[
VLEN *
NDF * site],
398 if ((it == 0) && (do_comm[3] > 0)) {
399 mult_wilson_tm2_dirac(v2v, &buf_tm[
VLEN *
NVC *
ND2 * ixyz]);
404 mult_wilson_aypx_save(&v2[
VLEN *
NVCD * site],
405 -kappa, v2v, &v2[
VLEN *
NVCD * site]);
418 int Nstv = Nxv * Nyv * Nz * Nt;
422 int ith, nth, is, ns;
423 set_threadtask(ith, nth, is, ns, Nstv);
425 for (
int site = is; site < ns; ++site) {
428 for (
int ic = 0; ic <
NC; ++ic) {
429 mult_gm5_dirac_vec(pg, &vv2[
VLEN * 2 *
ND * ic],
430 &vv1[
VLEN * 2 *
ND * ic]);