Bridge++  Ver. 2.0.2
mult_Domainwall_5din_qxs-inc.h
Go to the documentation of this file.
1 
10 #ifndef MULT_DOMAINWALL_5DIN_QXS_INCLUDED
11 #define MULT_DOMAINWALL_5DIN_QXS_INCLUDED
12 
13 #include "mult_common_th-inc.h"
14 
15 //====================================================================
17  real_t *vp, real_t *yp, real_t *wp,
18  real_t mq, real_t M0, int Ns, int *bc,
19  real_t *b, real_t *c,
20  int *Nsize, int *do_comm)
21 {
22  int Nxv = Nsize[0];
23  int Nyv = Nsize[1];
24  int Nz = Nsize[2];
25  int Nt = Nsize[3];
26  int Nstv = Nxv * Nyv * Nz * Nt;
27  int Nst = Nstv * VLEN;
28 
29  int Nin4 = VLEN * NVCD;
30  int Nin5 = Nin4 * Ns;
31 
32  int ith, nth, site0, site1;
33  set_threadtask(ith, nth, site0, site1, Nstv);
34 
35  for (int site = site0; site < site1; ++site) {
36  real_t *vp2 = &vp[Nin5 * site];
37  real_t *wp2 = &wp[Nin5 * site];
38  real_t *yp2 = &yp[Nin5 * site];
39 
40  for (int is = 0; is < Ns; ++is) {
41  svbool_t pg = set_predicate();
42 
43  for (int ic = 0; ic < NC; ++ic) {
44  svreal_t vt1r, vt1i, vt2r, vt2i, vt3r, vt3i, vt4r, vt4i;
45 
46  int is_up = (is + 1) % Ns;
47  real_t Fup = 0.5;
48  if (is == Ns - 1) Fup = -0.5 * mq;
49  set_aPm5_dirac_vec(vt1r, vt1i, vt2r, vt2i,
50  vt3r, vt3i, vt4r, vt4i,
51  Fup, wp2, is_up, ic);
52 
53  int is_dn = (is - 1 + Ns) % Ns;
54  real_t Fdn = 0.5;
55  if (is == 0) Fdn = -0.5 * mq;
56  add_aPp5_dirac_vec(vt1r, vt1i, vt2r, vt2i,
57  vt3r, vt3i, vt4r, vt4i,
58  Fdn, wp2, is_dn, ic);
59 
60  svreal_t xt;
61  real_t FF1 = b[is] * (4.0 - M0) + 1.0;
62  real_t FF2 = c[is] * (4.0 - M0) - 1.0;
63  int offset = 2 * ND * ic + NVCD * is;
64  dw_5dir_axpy(pg, vp2, yp2, wp2, FF1, FF2, b[is], c[is], vt1r, 0 + offset);
65  dw_5dir_axpy(pg, vp2, yp2, wp2, FF1, FF2, b[is], c[is], vt1i, 1 + offset);
66  dw_5dir_axpy(pg, vp2, yp2, wp2, FF1, FF2, b[is], c[is], vt2r, 2 + offset);
67  dw_5dir_axpy(pg, vp2, yp2, wp2, FF1, FF2, b[is], c[is], vt2i, 3 + offset);
68  dw_5dir_axpy(pg, vp2, yp2, wp2, FF1, FF2, b[is], c[is], vt3r, 4 + offset);
69  dw_5dir_axpy(pg, vp2, yp2, wp2, FF1, FF2, b[is], c[is], vt3i, 5 + offset);
70  dw_5dir_axpy(pg, vp2, yp2, wp2, FF1, FF2, b[is], c[is], vt4r, 6 + offset);
71  dw_5dir_axpy(pg, vp2, yp2, wp2, FF1, FF2, b[is], c[is], vt4i, 7 + offset);
72  }
73  }
74  }
75 }
76 
77 
78 //====================================================================
80  real_t *vp, real_t *yp, real_t *wp,
81  real_t mq, real_t M0, int Ns, int *bc,
82  real_t *b, real_t *c,
83  int *Nsize, int *do_comm)
84 {
85  int Nxv = Nsize[0];
86  int Nyv = Nsize[1];
87  int Nz = Nsize[2];
88  int Nt = Nsize[3];
89  int Nstv = Nxv * Nyv * Nz * Nt;
90  int Nst = Nstv * VLEN;
91 
92  int Nin4 = VLEN * NVCD;
93  int Nin5 = Nin4 * Ns;
94 
95  int ith, nth, site0, site1;
96  set_threadtask(ith, nth, site0, site1, Nstv);
97 
98  for (int site = site0; site < site1; ++site) {
99  real_t *vp2 = &vp[Nin5 * site];
100  real_t *wp2 = &wp[Nin5 * site];
101  real_t *yp2 = &yp[Nin5 * site];
102 
103  Vsimd_t vL[NVCD], yL[NVCD], xL[NVCD];
104 
105  real_t FF1, FF2;
106 
107  for (int is = 0; is < Ns; ++is) {
108  svbool_t pg = set_predicate();
109 
110  for (int ic = 0; ic < NC; ++ic) {
111  svreal_t vt1r, vt1i, vt2r, vt2i, vt3r, vt3i, vt4r, vt4i;
112  FF1 = b[is] * (4.0 - M0) + 1.0;
113  real_t a1 = -0.5 * b[is];
114  int index = 2 * ND * ic + NVCD * is;
115  // dw_5dir_dag(pg, vt1r, vt1i, vt2r, vt2i, vt3r, vt3i, vt4r, vt4i,
116  // wp2, yp2, FF1, a1, index);
117  // note: origianl operation multiply gm5 here.
118  dw_5dir_dag(pg, vt3r, vt3i, vt4r, vt4i, vt1r, vt1i, vt2r, vt2i,
119  wp2, yp2, -FF1, -a1, index);
120 
121  int is_up = (is + 1) % Ns;
122  FF2 = c[is_up] * (4.0 - M0) - 1.0;
123  real_t aup = -0.5 * c[is_up];
124  int index_up = 2 * ND * ic + NVCD * is_up;
125  svreal_t xt1r, xt1i, xt2r, xt2i, xt3r, xt3i, xt4r, xt4i;
126  dw_5dir_dag(pg, xt1r, xt1i, xt2r, xt2i, xt3r, xt3i, xt4r, xt4i,
127  wp2, yp2, FF2, aup, index_up);
128 
129  real_t Fup = 0.5;
130  if (is == Ns - 1) Fup = -0.5 * mq;
131  add_aPp5_dirac_vec(pg, vt1r, vt1i, vt2r, vt2i, vt3r, vt3i, vt4r, vt4i,
132  Fup, xt1r, xt1i, xt2r, xt2i, xt3r, xt3i, xt4r, xt4i);
133 
134  int is_dn = (is - 1 + Ns) % Ns;
135  FF2 = c[is_dn] * (4.0 - M0) - 1.0;
136  real_t adn = -0.5 * c[is_dn];
137  int index_dn = 2 * ND * ic + NVCD * is_dn;
138  dw_5dir_dag(pg, xt1r, xt1i, xt2r, xt2i, xt3r, xt3i, xt4r, xt4i,
139  wp2, yp2, FF2, adn, index_dn);
140 
141  real_t Fdn = 0.5;
142  if (is == 0) Fdn = -0.5 * mq;
143  add_aPm5_dirac_vec(pg, vt1r, vt1i, vt2r, vt2i, vt3r, vt3i, vt4r, vt4i,
144  -Fdn, xt1r, xt1i, xt2r, xt2i, xt3r, xt3i, xt4r, xt4i);
145 
146  int offset = 2 * ND * ic + NVCD * is;
147  save_vec(pg, &vp2[VLEN * (0 + offset)], vt1r);
148  save_vec(pg, &vp2[VLEN * (1 + offset)], vt1i);
149  save_vec(pg, &vp2[VLEN * (2 + offset)], vt2r);
150  save_vec(pg, &vp2[VLEN * (3 + offset)], vt2i);
151  save_vec(pg, &vp2[VLEN * (4 + offset)], vt3r);
152  save_vec(pg, &vp2[VLEN * (5 + offset)], vt3i);
153  save_vec(pg, &vp2[VLEN * (6 + offset)], vt4r);
154  save_vec(pg, &vp2[VLEN * (7 + offset)], vt4i);
155  }
156  }
157  }
158 }
159 
160 
161 //====================================================================
163  int Ns, int *Nsize)
164 {
165  int Nxv = Nsize[0];
166  int Nyv = Nsize[1];
167  int Nz = Nsize[2];
168  int Nt = Nsize[3];
169  int Nstv = Nxv * Nyv * Nz * Nt;
170 
171  int Nin4 = VLEN * NVCD;
172  int Nin5 = Nin4 * Ns;
173 
174  int ith, nth, site0, site1;
175  set_threadtask(ith, nth, site0, site1, Nstv);
176 
177  for (int site = site0; site < site1; ++site) {
178  real_t *vp2 = &vp[Nin5 * site];
179  real_t *wp2 = &wp[Nin5 * site];
180 
181  svbool_t pg = set_predicate();
182  for (int is = 0; is < Ns; ++is) {
183  for (int ic = 0; ic < NC; ++ic) {
184  svreal_t vt1r, vt1i, vt2r, vt2i, vt3r, vt3i, vt4r, vt4i;
185  int index = 2 * ND * ic + NVCD * is;
186  load_vec(pg, vt1r, &wp2[VLEN * (4 + index)]);
187  load_vec(pg, vt1i, &wp2[VLEN * (5 + index)]);
188  load_vec(pg, vt2r, &wp2[VLEN * (6 + index)]);
189  load_vec(pg, vt2i, &wp2[VLEN * (7 + index)]);
190  load_vec(pg, vt3r, &wp2[VLEN * (0 + index)]);
191  load_vec(pg, vt3i, &wp2[VLEN * (1 + index)]);
192  load_vec(pg, vt4r, &wp2[VLEN * (2 + index)]);
193  load_vec(pg, vt4i, &wp2[VLEN * (3 + index)]);
194 
195  flip_sign(pg, vt1r); // vt1r := -vt1r
196  flip_sign(pg, vt1i);
197  flip_sign(pg, vt2r);
198  flip_sign(pg, vt2i);
199  flip_sign(pg, vt3r);
200  flip_sign(pg, vt3i);
201  flip_sign(pg, vt4r);
202  flip_sign(pg, vt4i);
203  save_vec(pg, &vp2[VLEN * (0 + index)], vt1r);
204  save_vec(pg, &vp2[VLEN * (1 + index)], vt1i);
205  save_vec(pg, &vp2[VLEN * (2 + index)], vt2r);
206  save_vec(pg, &vp2[VLEN * (3 + index)], vt2i);
207  save_vec(pg, &vp2[VLEN * (4 + index)], vt3r);
208  save_vec(pg, &vp2[VLEN * (5 + index)], vt3i);
209  save_vec(pg, &vp2[VLEN * (6 + index)], vt4r);
210  save_vec(pg, &vp2[VLEN * (7 + index)], vt4i);
211  }
212  }
213  }
214 }
215 
216 
217 //====================================================================
218 void BridgeQXS::mult_domainwall_5din_clear(real_t *vp, int Ns, int *Nsize)
219 {
220  int Nxv = Nsize[0];
221  int Nyv = Nsize[1];
222  int Nz = Nsize[2];
223  int Nt = Nsize[3];
224  int Nstv = Nxv * Nyv * Nz * Nt;
225 
226  int Nin4 = VLEN * NVCD;
227  int Nin5 = Nin4 * Ns;
228 
229  int ith, nth, site0, site1;
230  set_threadtask(ith, nth, site0, site1, Nstv);
231 
232  Vsimd_t yL[NVCD];
233  clear_vec(yL, NVCD);
234 
235  for (int site = site0; site < site1; ++site) {
236  real_t *vp2 = &vp[Nin5 * site];
237 
238  for (int is = 0; is < Ns; ++is) {
239  save_vec(&vp2[Nin4 * is], yL, NVCD);
240  }
241  }
242 }
243 
244 
245 //====================================================================
247  real_t *vp, real_t *up, real_t *wp,
248  real_t mq, real_t M0, int Ns, int *bc,
249  real_t *b, real_t *c,
250  int *Nsize, int *do_comm)
251 {
252  int Nxv = Nsize[0];
253  int Nyv = Nsize[1];
254  int Nz = Nsize[2];
255  int Nt = Nsize[3];
256  int Nstv = Nxv * Nyv * Nz * Nt;
257  int Nst = Nstv * VLEN;
258 
259  int Nin4 = VLEN * NVCD;
260  int Nin5 = Nin4 * Ns;
261 
262  int NvU = NDF * Nst;
263 
264  int Nxy = Nxv * Nyv;
265  int Nxyz = Nxv * Nyv * Nz;
266 
267  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
268  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
269  set_predicate_xp(pg1_xp, pg2_xp);
270  set_predicate_xm(pg1_xm, pg2_xm);
271  set_predicate_yp(pg1_yp, pg2_yp);
272  set_predicate_ym(pg1_ym, pg2_ym);
273 
274  int ith, nth, site0, site1;
275  set_threadtask(ith, nth, site0, site1, Nstv);
276 
277  real_t bufL[Nin5];
278 
279  for (int site = site0; site < site1; ++site) {
280  int ix = site % Nxv;
281  int iyzt = site / Nxv;
282  int ixy = site % Nxy;
283  int iy = iyzt % Nyv;
284  int izt = site / Nxy;
285  int iz = izt % Nz;
286  int it = izt / Nz;
287  int ixyz = site % Nxyz;
288 
289  int idir, nei;
290 
291  real_t *wp2 = &wp[Nin5 * site];
292  real_t *vp2 = &vp[Nin5 * site];
293 
294  real_t z4[VLEN * NVCD];
295  Vsimd_t vL[NVCD * Ns];
296  Vsimd_t uL[NDF];
297 
298  load_vec(vL, vp2, NVCD * Ns);
299 
300  idir = 0;
301 
302  if ((ix < Nxv - 1) || (do_comm[idir] == 0)) {
303  int ix2 = (ix + 1) % Nxv;
304  nei = ix2 + Nxv * iyzt;
305  real_t *wpn = &wp[Nin5 * nei];
306  real_t *u = &up[VLEN * NDF * site + NvU * idir];
307  for (int is = 0; is < Ns; ++is) {
308  mult_wilson_xpb(pg1_xp, pg2_xp, &vL[NVCD * is].v[0], u,
309  &wp2[Nin4 * is], &wpn[Nin4 * is]);
310  }
311  }
312 
313  if ((ix > 0) || (do_comm[idir] == 0)) {
314  int ix2 = (ix - 1 + Nxv) % Nxv;
315  nei = ix2 + Nxv * iyzt;
316  real_t *wpn = &wp[Nin5 * nei];
317  real_t *ux = &up[VLEN * NDF * site + NvU * idir];
318  real_t *un = &up[VLEN * NDF * nei + NvU * idir];
319  for (int is = 0; is < Ns; ++is) {
320  mult_wilson_xmb(pg1_xm, pg2_xm, &vL[NVCD * is], ux, un,
321  &wp2[Nin4 * is], &wpn[Nin4 * is]);
322  }
323  }
324 
325  idir = 1;
326 
327  if ((iy < Nyv - 1) || (do_comm[idir] == 0)) {
328  int iy2 = (iy + 1) % Nyv;
329  nei = ix + Nxv * (iy2 + Nyv * izt);
330  real_t *wpn = &wp[Nin5 * nei];
331  real_t *u = &up[VLEN * NDF * site + NvU * idir];
332  for (int is = 0; is < Ns; ++is) {
333  mult_wilson_ypb(pg1_yp, pg2_yp, &vL[NVCD * is].v[0], u,
334  &wp2[Nin4 * is], &wpn[Nin4 * is]);
335  }
336  }
337 
338  if ((iy > 0) || (do_comm[idir] == 0)) {
339  int iy2 = (iy - 1 + Nyv) % Nyv;
340  nei = ix + Nxv * (iy2 + Nyv * izt);
341  real_t *wpn = &wp[Nin5 * nei];
342  real_t *ux = &up[VLEN * NDF * site + NvU * idir];
343  real_t *un = &up[VLEN * NDF * nei + NvU * idir];
344  for (int is = 0; is < Ns; ++is) {
345  mult_wilson_ymb(pg1_ym, pg2_ym, &vL[NVCD * is].v[0], ux, un,
346  &wp2[Nin4 * is], &wpn[Nin4 * is]);
347  }
348  }
349 
350  idir = 2;
351 
352  if ((iz < Nz - 1) || (do_comm[idir] == 0)) {
353  int iz2 = (iz + 1) % Nz;
354  nei = ixy + Nxy * (iz2 + Nz * it);
355  real_t *wpn = &wp[Nin5 * nei];
356  real_t *u = &up[VLEN * NDF * site + NvU * idir];
357  for (int is = 0; is < Ns; ++is) {
358  mult_wilson_zpb(&vL[NVCD * is].v[0], u, &wpn[Nin4 * is]);
359  }
360  }
361 
362  if ((iz > 0) || (do_comm[idir] == 0)) {
363  int iz2 = (iz - 1 + Nz) % Nz;
364  nei = ixy + Nxy * (iz2 + Nz * it);
365  real_t *wpn = &wp[Nin5 * nei];
366  real_t *u = &up[VLEN * NDF * nei + NvU * idir];
367  for (int is = 0; is < Ns; ++is) {
368  mult_wilson_zmb(&vL[NVCD * is].v[0], u, &wpn[Nin4 * is]);
369  }
370  }
371 
372  idir = 3;
373 
374  if ((it < Nt - 1) || (do_comm[idir] == 0)) {
375  int it2 = (it + 1) % Nt;
376  nei = ixyz + Nxyz * it2;
377  real_t *wpn = &wp[Nin5 * nei];
378  real_t *u = &up[VLEN * NDF * site + NvU * idir];
379  for (int is = 0; is < Ns; ++is) {
380  mult_wilson_tpb_dirac(&vL[NVCD * is].v[0], u, &wpn[Nin4 * is]);
381  }
382  }
383 
384  if ((it > 0) || (do_comm[idir] == 0)) {
385  int it2 = (it - 1 + Nt) % Nt;
386  nei = ixyz + Nxyz * it2;
387  real_t *wpn = &wp[Nin5 * nei];
388  real_t *u = &up[VLEN * NDF * nei + NvU * idir];
389  for (int is = 0; is < Ns; ++is) {
390  mult_wilson_tmb_dirac(&vL[NVCD * is].v[0], u, &wpn[Nin4 * is]);
391  }
392  }
393 
394  save_vec(vp2, vL, NVCD * Ns);
395  }
396 }
397 
398 
399 //====================================================================
401  real_t *buf1_xp, real_t *buf1_xm,
402  real_t *buf1_yp, real_t *buf1_ym,
403  real_t *buf1_zp, real_t *buf1_zm,
404  real_t *buf1_tp, real_t *buf1_tm,
405  real_t *up, real_t *wp,
406  real_t mq, real_t M0, int Ns, int *bc,
407  int *Nsize, int *do_comm)
408 {
409  int Nxv = Nsize[0];
410  int Nyv = Nsize[1];
411  int Nz = Nsize[2];
412  int Nt = Nsize[3];
413  int Nstv = Nxv * Nyv * Nz * Nt;
414  int Nst = Nstv * VLEN;
415  int NvU = NDF * Nst;
416 
417  int Nin4 = VLEN * NVCD;
418  int Nin5 = Nin4 * Ns;
419  int Nin4H = VLEN * NVC * ND2;
420  int Nin5H = Nin4H * Ns;
421 
422  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
423  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
424  set_predicate_xp(pg1_xp, pg2_xp);
425  set_predicate_xm(pg1_xm, pg2_xm);
426  set_predicate_yp(pg1_yp, pg2_yp);
427  set_predicate_ym(pg1_ym, pg2_ym);
428  svint_t svidx_xp, svidx_xm;
429  set_index_xp(svidx_xp);
430  set_index_xm(svidx_xm);
431 
432  if (do_comm[0] == 1) {
433  int idir = 0;
434 
435  int Nyzt = Nyv * Nz * Nt;
436 
437  int ith, nth, site0, site1;
438  set_threadtask(ith, nth, site0, site1, Nyzt);
439 
440  for (int iyzt = site0; iyzt < site1; ++iyzt) {
441  {
442  int ix = 0;
443  int site = ix + Nxv * iyzt;
444  real_t *wp2 = &wp[Nin5 * site];
445  int ibf = VLENY * NVC * ND2 * Ns * iyzt;
446  set_index_xm(svidx_xm);
447  for (int is = 0; is < Ns; ++is) {
448  mult_wilson_xp1(pg2_xm, svidx_xm,
449  &buf1_xp[ibf + VLENY * NVC * ND2 * is], &wp2[Nin4 * is]);
450  }
451  }
452 
453  {
454  int ix = Nxv - 1;
455  int site = ix + Nxv * iyzt;
456  real_t *wp2 = &wp[Nin5 * site];
457  int ibf = VLENY * NVC * ND2 * Ns * iyzt;
458  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
459  set_index_xp(svidx_xp);
460  for (int is = 0; is < Ns; ++is) {
461  mult_wilson_xm1(pg2_xp, svidx_xp,
462  &buf1_xm[ibf + VLENY * NVC * ND2 * is], u2, &wp2[Nin4 * is]);
463  }
464  }
465  }
466  } // do_comm[0]
467 
468  if (do_comm[1] == 1) {
469  int idir = 1;
470  int Nxzt = Nxv * Nz * Nt;
471 
472  int ith, nth, site0, site1;
473  set_threadtask(ith, nth, site0, site1, Nxzt);
474 
475  for (int ixzt = site0; ixzt < site1; ++ixzt) {
476  int ix = ixzt % Nxv;
477  int izt = ixzt / Nxv;
478 
479  {
480  int iy = 0;
481  int site = ix + Nxv * (iy + Nyv * izt);
482  real_t *wp2 = &wp[Nin5 * site];
483 
484  int ibf = VLENX * NVC * ND2 * Ns * (ix + Nxv * izt);
485  for (int is = 0; is < Ns; ++is) {
486  mult_wilson_yp1(pg2_ym,
487  &buf1_yp[ibf + VLENX * NVC * ND2 * is], &wp2[Nin4 * is]);
488  }
489  }
490 
491  {
492  int iy = Nyv - 1;
493  int site = ix + Nxv * (iy + Nyv * izt);
494  real_t *wp2 = &wp[Nin5 * site];
495 
496  int ibf = VLENX * NVC * ND2 * Ns * (ix + Nxv * izt);
497  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
498  for (int is = 0; is < Ns; ++is) {
499  mult_wilson_ym1(pg2_yp,
500  &buf1_ym[ibf + VLENX * NVC * ND2 * is], u2, &wp2[Nin4 * is]);
501  }
502  }
503  }
504  } // do_comm[1]
505 
506 
507  if (do_comm[2] == 1) {
508  int idir = 2;
509  int Nxy = Nxv * Nyv;
510  int Nxyt = Nxv * Nyv * Nt;
511 
512  int ith, nth, site0, site1;
513  set_threadtask(ith, nth, site0, site1, Nxyt);
514 
515  for (int ixyt = site0; ixyt < site1; ++ixyt) {
516  int ixy = ixyt % Nxy;
517  int it = ixyt / Nxy;
518 
519  {
520  int iz = 0;
521  int site = ixy + Nxy * (iz + Nz * it);
522  real_t *wp2 = &wp[Nin5 * site];
523  int ibf = Nin5H * (ixy + Nxy * it);
524  for (int is = 0; is < Ns; ++is) {
525  mult_wilson_zp1(&buf1_zp[ibf + Nin4H * is], &wp2[Nin4 * is]);
526  }
527  }
528  {
529  int iz = Nz - 1;
530  int site = ixy + Nxy * (iz + Nz * it);
531  real_t *wp2 = &wp[Nin5 * site];
532  int ibf = Nin5H * (ixy + Nxy * it);
533  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
534  for (int is = 0; is < Ns; ++is) {
535  mult_wilson_zm1(&buf1_zm[ibf + Nin4H * is], u2, &wp2[Nin4 * is]);
536  }
537  }
538  }
539  }
540 
541  if (do_comm[3] == 1) {
542  int idir = 3;
543  int Nxyz = Nxv * Nyv * Nz;
544 
545  int ith, nth, site0, site1;
546  set_threadtask(ith, nth, site0, site1, Nxyz);
547 
548  for (int ixyz = site0; ixyz < site1; ++ixyz) {
549  {
550  int it = 0;
551  int site = ixyz + Nxyz * it;
552  real_t *wp2 = &wp[Nin5 * site];
553  int ibf = Nin5H * ixyz;
554  for (int is = 0; is < Ns; ++is) {
555  mult_wilson_tp1_dirac(&buf1_tp[ibf + Nin4H * is], &wp2[Nin4 * is]);
556  }
557  }
558 
559  {
560  int it = Nt - 1;
561  int site = ixyz + Nxyz * it;
562  real_t *wp2 = &wp[Nin5 * site];
563  int ibf = Nin5H * ixyz;
564  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
565  for (int is = 0; is < Ns; ++is) {
566  mult_wilson_tm1_dirac(&buf1_tm[ibf + Nin4H * is], u2, &wp2[Nin4 * is]);
567  }
568  }
569  }
570  }
571 }
572 
573 
574 //====================================================================
576  real_t *vp, real_t *up, real_t *wp,
577  real_t *buf2_xp, real_t *buf2_xm,
578  real_t *buf2_yp, real_t *buf2_ym,
579  real_t *buf2_zp, real_t *buf2_zm,
580  real_t *buf2_tp, real_t *buf2_tm,
581  real_t mq, real_t M0, int Ns, int *bc,
582  int *Nsize, int *do_comm)
583 
584 
585 {
586  int Nxv = Nsize[0];
587  int Nyv = Nsize[1];
588  int Nz = Nsize[2];
589  int Nt = Nsize[3];
590  int Nstv = Nxv * Nyv * Nz * Nt;
591  int Nst = Nstv * VLEN;
592 
593  int Nin4 = VLEN * NVCD;
594  int Nin5 = Nin4 * Ns;
595  int Nin4H = VLEN * NVC * ND2;
596  int Nin5H = Nin4H * Ns;
597  int NvU = NDF * Nst;
598 
599  int Nxy = Nxv * Nyv;
600  int Nxyz = Nxv * Nyv * Nz;
601 
602  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
603  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
604  set_predicate_xp(pg1_xp, pg2_xp);
605  set_predicate_xm(pg1_xm, pg2_xm);
606  set_predicate_yp(pg1_yp, pg2_yp);
607  set_predicate_ym(pg1_ym, pg2_ym);
608  svint_t svidx_xp, svidx_xm;
609  set_index_xp(svidx_xp);
610  set_index_xm(svidx_xm);
611 
612  int ith, nth, site0, site1;
613  set_threadtask(ith, nth, site0, site1, Nstv);
614 
615  real_t bufL[Nin5];
616 
617  for (int site = site0; site < site1; ++site) {
618  int ix = site % Nxv;
619  int iyzt = site / Nxv;
620  int ixy = site % Nxy;
621  int iy = iyzt % Nyv;
622  int izt = site / Nxy;
623  int iz = izt % Nz;
624  int it = izt / Nz;
625  int ixyz = site % Nxyz;
626 
627  int idir, nei;
628 
629  real_t *wp2 = &wp[Nin5 * site];
630  real_t *vp2 = &vp[Nin5 * site];
631 
632  real_t z4[VLEN * NVCD];
633 
634  Vsimd_t vL[NVCD * Ns];
635 
636  clear_vec(vL, NVCD * Ns);
637 
638  int opr_any = 0;
639 
640  idir = 0;
641  if (do_comm[idir] == 1) {
642  if (ix == Nxv - 1) {
643  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
644  int ibf = VLENY * NVC * ND2 * Ns * iyzt;
645  for (int is = 0; is < Ns; ++is) {
646  set_index_xp(svidx_xp);
647  mult_wilson_xp2(pg1_xp, pg2_xp, svidx_xp,
648  &vL[NVCD * is].v[0], u2,
649  &wp2[Nin4 * is], &buf2_xp[ibf + VLENY * NVC * ND2 * is]);
650  }
651  ++opr_any;
652  }
653 
654  if (ix == 0) {
655  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
656  int ibf = VLENY * NVC * ND2 * Ns * iyzt;
657  set_index_xm(svidx_xm);
658  for (int is = 0; is < Ns; ++is) {
659  mult_wilson_xm2(pg1_xm, pg2_xm, svidx_xm,
660  &vL[NVCD * is].v[0], u2,
661  &wp2[Nin4 * is], &buf2_xm[ibf + VLENY * NVC * ND2 * is]);
662  }
663  ++opr_any;
664  }
665  }
666 
667  idir = 1;
668  if (do_comm[idir] == 1) {
669  if (iy == Nyv - 1) {
670  int ibf = VLENX * NVC * ND2 * Ns * (ix + Nxv * izt);
671  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
672  for (int is = 0; is < Ns; ++is) {
673  mult_wilson_yp2(pg1_yp, pg2_yp,
674  &vL[NVCD * is].v[0], u2,
675  &wp2[Nin4 * is], &buf2_yp[ibf + VLENX * NVC * ND2 * is]);
676  }
677  ++opr_any;
678  }
679 
680  if (iy == 0) {
681  int ibf = VLENX * NVC * ND2 * Ns * (ix + Nxv * izt);
682  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
683  for (int is = 0; is < Ns; ++is) {
684  mult_wilson_ym2(pg1_ym, pg2_ym,
685  &vL[NVCD * is].v[0], u2,
686  &wp2[Nin4 * is], &buf2_ym[ibf + VLENX * NVC * ND2 * is]);
687  }
688  ++opr_any;
689  }
690  }
691 
692  idir = 2;
693  if (do_comm[idir] == 1) {
694  if (iz == Nz - 1) {
695  int ibf = Nin5H * (ixy + Nxy * it);
696  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
697  for (int is = 0; is < Ns; ++is) {
698  mult_wilson_zp2(&vL[NVCD * is].v[0], u2, &buf2_zp[ibf + Nin4H * is]);
699  }
700  ++opr_any;
701  }
702 
703  if (iz == 0) {
704  int ibf = Nin5H * (ixy + Nxy * it);
705  for (int is = 0; is < Ns; ++is) {
706  mult_wilson_zm2(&vL[NVCD * is].v[0], &buf2_zm[ibf + Nin4H * is]);
707  }
708  ++opr_any;
709  }
710  }
711 
712  idir = 3;
713  if (do_comm[idir] == 1) {
714  if (it == Nt - 1) {
715  int ibf = Nin5H * ixyz;
716  real_t *u2 = &up[VLEN * NDF * site + NvU * idir];
717  for (int is = 0; is < Ns; ++is) {
718  mult_wilson_tp2_dirac(&vL[NVCD * is].v[0], u2, &buf2_tp[ibf + Nin4H * is]);
719  }
720  ++opr_any;
721  }
722 
723  if (it == 0) {
724  int ibf = Nin5H * ixyz;
725  for (int is = 0; is < Ns; ++is) {
726  mult_wilson_tm2_dirac(&vL[NVCD * is].v[0], &buf2_tm[ibf + Nin4H * is]);
727  }
728  ++opr_any;
729  }
730  }
731 
732  // if(opr_any > 0) save_vec(vp2, vL, NVCD * Ns);
733 
734  if (opr_any > 0) {
735  svbool_t pg = set_predicate();
736  for (int ivs = 0; ivs < NVCD * Ns; ivs += 4) {
737  svreal_t vt1, vt2, vt3, vt4;
738  svreal_t yt1, yt2, yt3, yt4;
739  load_vec(pg, yt1, &vp2[VLEN * (ivs)]);
740  load_vec(pg, yt2, &vp2[VLEN * (ivs + 1)]);
741  load_vec(pg, yt3, &vp2[VLEN * (ivs + 2)]);
742  load_vec(pg, yt4, &vp2[VLEN * (ivs + 3)]);
743  load_vec(pg, vt1, &(vL[ivs].v[0]));
744  load_vec(pg, vt2, &(vL[ivs + 1].v[0]));
745  load_vec(pg, vt3, &(vL[ivs + 2].v[0]));
746  load_vec(pg, vt4, &(vL[ivs + 3].v[0]));
747 
748  add_vec(pg, yt1, vt1);
749  add_vec(pg, yt2, vt2);
750  add_vec(pg, yt3, vt3);
751  add_vec(pg, yt4, vt4);
752  save_vec(pg, &vp2[VLEN * (ivs)], yt1);
753  save_vec(pg, &vp2[VLEN * (ivs + 1)], yt2);
754  save_vec(pg, &vp2[VLEN * (ivs + 2)], yt3);
755  save_vec(pg, &vp2[VLEN * (ivs + 3)], yt4);
756  }
757  }
758  }
759 }
760 
761 
762 //====================================================================
764  real_t *__restrict vp,
765  real_t *__restrict wp,
766  int Ns, int *Nsize,
767  real_t *e, real_t *dpinv, real_t *dm)
768 {
770  e, dpinv, dm);
771 }
772 
773 
774 //====================================================================
776  real_t *__restrict vp,
777  real_t *__restrict wp,
778  int Ns, int *Nsize,
779  real_t *f, real_t *dpinv, real_t *dm)
780 {
782  f, dpinv, dm);
783 }
784 
785 
786 //====================================================================
788  real_t *__restrict vp,
789  real_t *__restrict wp,
790  int Ns, int *Nsize,
791  real_t *e, real_t *dpinv, real_t *dm)
792 {
794  e, dpinv, dm);
795 }
796 
797 
798 //====================================================================
800  real_t *__restrict vp,
801  real_t *__restrict wp,
802  int Ns, int *Nsize,
803  real_t *f, real_t *dpinv, real_t *dm)
804 {
806  f, dpinv, dm);
807 }
808 
809 
810 #endif
811 //============================================================END=====
BridgeQXS::mult_domainwall_5din_hopb_dirac
void mult_domainwall_5din_hopb_dirac(double *vp, double *up, double *wp, double mq, double M0, int Ns, int *bc, double *b, double *c, int *Nsize, int *do_comm)
Definition: mult_Domainwall_5din_qxs-inc.h:246
BridgeQXS::mult_domainwall_5din_hop2_dirac
void mult_domainwall_5din_hop2_dirac(double *vp, double *up, double *wp, double *buf2_xp, double *buf2_xm, double *buf2_yp, double *buf2_ym, double *buf2_zp, double *buf2_zm, double *buf2_tp, double *buf2_tm, double mq, double M0, int Ns, int *bc, int *Nsize, int *do_comm)
Definition: mult_Domainwall_5din_qxs-inc.h:575
BridgeQXS::mult_domainwall_5din_Udag_inv_dirac
void mult_domainwall_5din_Udag_inv_dirac(real_t *vp, real_t *wp, int Ns, int *Nsize, real_t *f, real_t *dpinv, real_t *dm)
BridgeQXS::mult_domainwall_5din_5dir_dirac
void mult_domainwall_5din_5dir_dirac(double *vp, double *yp, double *wp, double mq, double M0, int Ns, int *bc, double *b, double *c, int *Nsize, int *do_comm)
Definition: mult_Domainwall_5din_qxs-inc.h:16
NVCD
#define NVCD
Definition: define_params_SU3.h:20
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
NDF
#define NDF
Definition: field_F_imp_SU2-inc.h:4
Vsimd_t
Definition: vsimd_double-inc.h:13
BridgeQXS::mult_domainwall_5din_L_inv_dirac
void mult_domainwall_5din_L_inv_dirac(real_t *vp, real_t *wp, int Ns, int *Nsize, real_t *e, real_t *dpinv, real_t *dm)
Isimd_t
Definition: vsimd_double-inc.h:20
mult_common_th-inc.h
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
BridgeQXS::mult_domainwall_5din_eo_Ldag_inv_dirac
void mult_domainwall_5din_eo_Ldag_inv_dirac(real_t *vp, real_t *wp, int Ns, int *Nsize, real_t *e, real_t *dpinv, real_t *dm)
BridgeQXS::mult_domainwall_5din_clear
void mult_domainwall_5din_clear(double *vp, int Ns, int *Nsize)
Definition: mult_Domainwall_5din_qxs-inc.h:218
NC
#define NC
Definition: field_F_imp_SU2-inc.h:2
BridgeQXS::mult_domainwall_5din_U_inv_dirac
void mult_domainwall_5din_U_inv_dirac(real_t *vp, real_t *wp, int Ns, int *Nsize, real_t *f, real_t *dpinv, real_t *dm)
BridgeQXS::mult_domainwall_5din_hop1_dirac
void mult_domainwall_5din_hop1_dirac(double *buf1_xp, double *buf1_xm, double *buf1_yp, double *buf1_ym, double *buf1_zp, double *buf1_zm, double *buf1_tp, double *buf1_tm, double *up, double *wp, double mq, double M0, int Ns, int *bc, int *Nsize, int *do_comm)
Definition: mult_Domainwall_5din_qxs-inc.h:400
ND
#define ND
Definition: field_F_imp_SU2-inc.h:5
BridgeQXS::mult_domainwall_5din_eo_Udag_inv_dirac
void mult_domainwall_5din_eo_Udag_inv_dirac(real_t *vp, real_t *wp, int Ns, int *Nsize, real_t *f, real_t *dpinv, real_t *dm)
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
BridgeQXS::mult_domainwall_5din_eo_U_inv_dirac
void mult_domainwall_5din_eo_U_inv_dirac(real_t *vp, real_t *wp, int Ns, int *Nsize, real_t *f, real_t *dpinv, real_t *dm)
BridgeQXS::mult_domainwall_5din_mult_gm5_dirac
void mult_domainwall_5din_mult_gm5_dirac(double *vp, double *wp, int Ns, int *Nsize)
Definition: mult_Domainwall_5din_qxs-inc.h:162
svbool_t
Definition: vsimd_double-inc.h:30
BridgeQXS::mult_domainwall_5din_eo_L_inv_dirac
void mult_domainwall_5din_eo_L_inv_dirac(real_t *vp, real_t *wp, int Ns, int *Nsize, real_t *e, real_t *dpinv, real_t *dm)
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
BridgeQXS::mult_domainwall_5din_Ldag_inv_dirac
void mult_domainwall_5din_Ldag_inv_dirac(real_t *vp, real_t *wp, int Ns, int *Nsize, real_t *e, real_t *dpinv, real_t *dm)
ND2
#define ND2
Definition: define_params_SU3.h:18
BridgeQXS::mult_domainwall_5din_5dirdag_dirac
void mult_domainwall_5din_5dirdag_dirac(double *vp, double *yp, double *wp, double mq, double M0, int Ns, int *bc, double *b, double *c, int *Nsize, int *do_comm)
Definition: mult_Domainwall_5din_qxs-inc.h:79