Bridge++  Ver. 2.0.2
mult_Wilson_eo_qxs-inc.h
Go to the documentation of this file.
1 
10 #ifndef MULT_WILSON_EO_QXS_INCLUDED
11 #define MULT_WILSON_EO_QXS_INCLUDED
12 
13 #include "mult_common_th-inc.h"
14 
15 //#define IMPLE 0 // 0: original, 1: Nitadori-san's
16 #define IMPLE 1 // 0: original, 1: Nitadori-san's
17 //#define IMPLE 2 // setting of predicate/index only once
18 
19 //====================================================================
21  real_t *v1, real_t *xp,
22  real_t kappa, int *bc,
23  int *Nsize, int *do_comm,
24  int *Leo, const int ieo,
25  const int iflag)
26 {
27  int Nx2v = Nsize[0];
28  int Ny = Nsize[1];
29  int Nz = Nsize[2];
30  int Nt = Nsize[3];
31 
32  int Nst2v = Nx2v * Ny * Nz * Nt;
33  int Nst2 = Nst2v * VLEN;
34 
35  int Nxy2 = Nx2v * Ny;
36  int Nxyz2 = Nx2v * Ny * Nz;
37 
38  svbool_t pg1e_xp, pg2e_xp, pg3e_xp, pg1e_xm, pg2e_xm, pg3e_xm;
39  svbool_t pg1o_xp, pg2o_xp, pg3o_xp, pg1o_xm, pg2o_xm, pg3o_xm;
40  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
41 
42 #if IMPLE == 0
43  // original
44  set_predicate_xp_eo(pg1e_xp, pg2e_xp, pg3e_xp, 0);
45  set_predicate_xp_eo(pg1o_xp, pg2o_xp, pg3o_xp, 1);
46  set_predicate_xm_eo(pg1e_xm, pg2e_xm, pg3e_xm, 0);
47  set_predicate_xm_eo(pg1o_xm, pg2o_xm, pg3o_xm, 1);
48  set_predicate_yp(pg1_yp, pg2_yp);
49  set_predicate_ym(pg1_ym, pg2_ym);
50 #else
51  // nitadori-san
52  svuint_t idx1e_xp, idx1o_xp, idx1e_xm, idx1o_xm;
53  svuint_t idx1_yp, idx1_ym;
54  set_idx_predicate_xp_eo(pg1e_xp, idx1e_xp, 0);
55  set_idx_predicate_xp_eo(pg1o_xp, idx1o_xp, 1);
56  set_idx_predicate_xm_eo(pg1e_xm, idx1e_xm, 0);
57  set_idx_predicate_xm_eo(pg1o_xm, idx1o_xm, 1);
58  set_idx_predicate_yp(pg1_yp, idx1_yp);
59  set_idx_predicate_ym(pg1_ym, idx1_ym);
60 #endif
61 
62  int ith, nth, is, ns;
63  set_threadtask(ith, nth, is, ns, Nst2v);
64 
65  for (int site = is; site < ns; ++site) {
66  int ix = site % Nx2v;
67  int iyzt = site / Nx2v;
68  int iy = iyzt % Ny;
69  int izt = site / Nxy2;
70  int iz = izt % Nz;
71  int it = izt / Nz;
72  int ixy = ix + Nx2v * iy;
73  int ixyz = ixy + Nxy2 * iz;
74  int jeo = (ieo + Leo[VLENY * iyzt]) % 2;
75 
76  Vsimd_t v2v[NVCD];
77  clear_vec(v2v, NVCD);
78 
79  real_t zL[VLEN * NVCD], uL[VLEN * NDF];
80 
81  if ((ix < Nx2v - 1) || (do_comm[0] == 0)) {
82  int nei = ix + 1 + Nx2v * iyzt;
83  if (ix == Nx2v - 1) nei = 0 + Nx2v * iyzt;
84  real_t *u = &up[NDF * Nst2 * (ieo + 2 * 0)];
85 
86  if (jeo == 0) {
87 #if IMPLE == 0
88  // original
89  mult_wilson_eo_xpb(pg1e_xp, pg2e_xp, pg3e_xp,
90  v2v, &u[VLEN * NDF * site],
91  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
92 #else
93  // nitadori-san
94 #if IMPLE < 2
95  set_idx_predicate_xp_eo(pg1e_xp, idx1e_xp, 0);
96 #endif
97  mult_wilson_eo_xpb(pg1e_xp, idx1e_xp,
98  v2v, &u[VLEN * NDF * site],
99  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
100 #endif
101  } else {
102 #if IMPLE == 0
103  // original
104  mult_wilson_eo_xpb(pg1o_xp, pg2o_xp, pg3o_xp,
105  v2v, &u[VLEN * NDF * site],
106  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
107 #else
108  // nitadori-san
109 #if IMPLE < 2
110  set_idx_predicate_xp_eo(pg1o_xp, idx1o_xp, 1);
111 #endif
112  mult_wilson_eo_xpb(pg1o_xp, idx1o_xp,
113  v2v, &u[VLEN * NDF * site],
114  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
115 #endif
116  }
117  }
118 
119  if ((ix > 0) || (do_comm[0] == 0)) {
120  int nei = ix - 1 + Nx2v * iyzt;
121  if (ix == 0) nei = Nx2v - 1 + Nx2v * iyzt;
122  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * 0)];
123  if (jeo == 0) {
124 #if IMPLE == 0
125  // original
126  mult_wilson_eo_xmb(pg1e_xm, pg2e_xm, pg3e_xm, v2v,
127  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
128  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
129 #else
130  // nitadori-san
131 #if IMPLE < 2
132  set_idx_predicate_xm_eo(pg1e_xm, idx1e_xm, 0);
133 #endif
134  mult_wilson_eo_xmb(pg1e_xm, idx1e_xm, v2v,
135  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
136  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
137 #endif
138  } else {
139 #if IMPLE == 0
140  // original
141  mult_wilson_eo_xmb(pg1o_xm, pg2o_xm, pg3o_xm, v2v,
142  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
143  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
144 #else
145  // nitadori-san
146 #if IMPLE < 2
147  set_idx_predicate_xm_eo(pg1o_xm, idx1o_xm, 1);
148 #endif
149  mult_wilson_eo_xmb(pg1o_xm, idx1o_xm, v2v,
150  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
151  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
152 #endif
153  }
154  }
155 
156  if ((iy < Ny - 1) || (do_comm[1] == 0)) {
157  int iy2 = (iy + 1) % Ny;
158  int nei = ix + Nx2v * (iy2 + Ny * izt);
159  real_t *u = &up[NDF * Nst2 * (ieo + 2 * 1)];
160 #if IMPLE == 0
161  mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
162  &u[VLEN * NDF * site],
163  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
164 #else
165 #if IMPLE < 2
166  set_idx_predicate_yp(pg1_yp, idx1_yp);
167 #endif
168  mult_wilson_ypb(pg1_yp, idx1_yp, v2v,
169  &u[VLEN * NDF * site],
170  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
171 #endif
172  }
173 
174  if ((iy > 0) || (do_comm[1] == 0)) {
175  int iy2 = (iy - 1 + Ny) % Ny;
176  int nei = ix + Nx2v * (iy2 + Ny * izt);
177  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * 1)];
178 #if IMPLE == 0
179  mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
180  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
181  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
182 #else
183 #if IMPLE < 2
184  set_idx_predicate_ym(pg1_ym, idx1_ym);
185 #endif
186  mult_wilson_ymb(pg1_ym, idx1_ym, v2v,
187  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
188  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
189 #endif
190  }
191 
192  if ((iz < Nz - 1) || (do_comm[2] == 0)) {
193  int iz2 = (iz + 1) % Nz;
194  int nei = ixy + Nxy2 * (iz2 + Nz * it);
195  real_t *u = &up[NDF * Nst2 * (ieo + 2 * 2)];
196  mult_wilson_zpb(v2v, &u[VLEN * NDF * site], &v1[VLEN * NVCD * nei]);
197  }
198 
199  if ((iz > 0) || (do_comm[2] == 0)) {
200  int iz2 = (iz - 1 + Nz) % Nz;
201  int nei = ixy + Nxy2 * (iz2 + Nz * it);
202  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * 2)];
203  mult_wilson_zmb(v2v, &u[VLEN * NDF * nei], &v1[VLEN * NVCD * nei]);
204  }
205 
206  if ((it < Nt - 1) || (do_comm[3] == 0)) {
207  int it2 = (it + 1) % Nt;
208  int nei = ixyz + Nxyz2 * it2;
209  real_t *u = &up[NDF * Nst2 * (ieo + 2 * 3)];
210  mult_wilson_tpb_dirac(v2v, &u[VLEN * NDF * site], &v1[VLEN * NVCD * nei]);
211  }
212 
213  if ((it > 0) || (do_comm[3] == 0)) {
214  int it2 = (it - 1 + Nt) % Nt;
215  int nei = ixyz + Nxyz2 * it2;
216  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * 3)];
217  mult_wilson_tmb_dirac(v2v, &u[VLEN * NDF * nei], &v1[VLEN * NVCD * nei]);
218  }
219 
220  svbool_t pg = set_predicate();
221  if (iflag == 0) {
222  real_t *vv2 = &v2[VLEN * NVCD * site];
223  for (int i = 0; i < NVCD; ++i) {
224  svreal_t v2t;
225  load_vec(pg, v2t, &v2v[i].v[0]);
226  scal_vec(pg, v2t, -kappa);
227  save_vec(pg, &vv2[VLEN * i], v2t);
228  }
229  } else {
230  mult_wilson_aypx_save(&v2[VLEN * NVCD * site],
231  kappa, v2v, &xp[VLEN * NVCD * site]);
232  }
233  }
234 }
235 
236 
237 //====================================================================
239  real_t *buf_yp, real_t *buf_ym,
240  real_t *buf_zp, real_t *buf_zm,
241  real_t *buf_tp, real_t *buf_tm,
242  real_t *up, real_t *v1, int *bc,
243  int *Nsize, int *do_comm, int *Leo,
244  const int ieo, const int iflag)
245 {
246  int Nx2v = Nsize[0];
247  int Ny = Nsize[1];
248  int Nz = Nsize[2];
249  int Nt = Nsize[3];
250 
251  int Nst2v = Nx2v * Ny * Nz * Nt;
252  int Nst2 = Nst2v * VLEN;
253 
254  int Nxy2 = Nx2v * Ny;
255  int Nxyz2 = Nx2v * Ny * Nz;
256 
257  svbool_t pg1e_xp, pg2e_xp, pg3e_xp, pg1e_xm, pg2e_xm, pg3e_xm;
258  svbool_t pg1o_xp, pg2o_xp, pg3o_xp, pg1o_xm, pg2o_xm, pg3o_xm;
259  set_predicate_xp_eo(pg1e_xp, pg2e_xp, pg3e_xp, 0);
260  set_predicate_xp_eo(pg1o_xp, pg2o_xp, pg3o_xp, 1);
261  set_predicate_xm_eo(pg1e_xm, pg2e_xm, pg3e_xm, 0);
262  set_predicate_xm_eo(pg1o_xm, pg2o_xm, pg3o_xm, 1);
263  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
264  set_predicate_yp(pg1_yp, pg2_yp);
265  set_predicate_ym(pg1_ym, pg2_ym);
266 #if IMPLE == 0
267  svint_t svidx_xp, svidx_xm;
268 #else
269  svuint_t svidx_xp, svidx_xm;
270 #endif
271  set_index_xp_eo(svidx_xp);
272  set_index_xm_eo(svidx_xm);
273 
274  int Nskipx = (VLENY + 1) / 2;
275 
276 
277  if (do_comm[0] == 1) {
278  int idir = 0;
279 
280  int Nyzt = Ny * Nz * Nt;
281 
282  int ith, nth, is, ns;
283  set_threadtask(ith, nth, is, ns, Nyzt);
284 
285  for (int iyzt = is; iyzt < ns; ++iyzt) {
286  int jeo = (ieo + Leo[VLENY * iyzt]) % 2;
287  {
288  int ix = 0;
289  int site = ix + Nx2v * iyzt;
290  int ibf_up = Nskipx * NVC * ND2 * iyzt;
291  if (jeo == 0) {
292 #if VLENY > 1
293 #if IMPLE == 0
294  set_index_xm_eo(svidx_xm);
295  mult_wilson_eo_xp1(pg2o_xm, svidx_xm,
296  &buf_xp[ibf_up], &v1[VLEN * NVCD * site]);
297 #else
298  mult_wilson_eo_xp1(pg2o_xm,
299  &buf_xp[ibf_up], &v1[VLEN * NVCD * site]);
300 #endif
301 #endif
302  } else {
303  if (VLENY == 1) ibf_up = Nskipx * NVC * ND2 * (iyzt / 2);
304 #if IMPLE == 0
305  set_index_xm_eo(svidx_xm);
306  mult_wilson_eo_xp1(pg2e_xm, svidx_xm,
307  &buf_xp[ibf_up], &v1[VLEN * NVCD * site]);
308 #else
309  mult_wilson_eo_xp1(pg2e_xm,
310  &buf_xp[ibf_up], &v1[VLEN * NVCD * site]);
311 #endif
312  }
313  }
314 
315  {
316  int ix = Nx2v - 1;
317  int site = ix + Nx2v * iyzt;
318  int ibf_dn = Nskipx * NVC * ND2 * iyzt;
319  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * idir)];
320  if (jeo == 0) {
321  if (VLENY == 1) ibf_dn = Nskipx * NVC * ND2 * (iyzt / 2);
322 #if IMPLE == 0
323  set_index_xp_eo(svidx_xp);
324  mult_wilson_eo_xm1(pg2o_xp, svidx_xp, &buf_xm[ibf_dn],
325  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
326 #else
327  mult_wilson_eo_xm1(pg2o_xp, &buf_xm[ibf_dn],
328  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
329 #endif
330  } else {
331 #if VLENY > 1
332 #if IMPLE == 0
333  set_index_xp_eo(svidx_xp);
334  mult_wilson_eo_xm1(pg2e_xp, svidx_xp, &buf_xm[ibf_dn],
335  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
336 #else
337  mult_wilson_eo_xm1(pg2e_xp, &buf_xm[ibf_dn],
338  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
339 #endif
340 #endif
341  }
342  }
343  }
344  }
345 
346  if (do_comm[1] > 0) {
347  int idir = 1;
348 
349  int Nxzt = Nx2v * Nz * Nt;
350 
351  int ith, nth, is, ns;
352  set_threadtask(ith, nth, is, ns, Nxzt);
353 
354  for (int ixzt = is; ixzt < ns; ++ixzt) {
355  int ix = ixzt % Nx2v;
356  int izt = ixzt / Nx2v;
357  {
358  int iy = 0;
359  int site = ix + Nx2v * (iy + Ny * izt);
360  int ibf = VLENX * NVC * ND2 * (ix + Nx2v * izt);
361  mult_wilson_yp1(pg2_ym,
362  &buf_yp[ibf], &v1[VLEN * NVCD * site]);
363  }
364  {
365  int iy = Ny - 1;
366  int site = ix + Nx2v * (iy + Ny * izt);
367  real_t *u = &up[NDF * Nst2 * ((1 - ieo) + 2 * idir)];
368  int ibf = VLENX * NVC * ND2 * (ix + Nx2v * izt);
369  mult_wilson_ym1(pg2_yp,
370  &buf_ym[ibf], &u[VLEN * NDF * site],
371  &v1[VLEN * NVCD * site]);
372  }
373  }
374  }
375 
376  if (do_comm[2] > 0) {
377  int idir = 2;
378  int Nxyt2 = Nxy2 * Nt;
379 
380  int ith, nth, is, ns;
381  set_threadtask(ith, nth, is, ns, Nxyt2);
382 
383  for (int ixyt = is; ixyt < ns; ++ixyt) {
384  int ixy = ixyt % Nxy2;
385  int it = ixyt / Nxy2;
386  {
387  int iz = 0;
388  int site = ixy + Nxy2 * (iz + Nz * it);
389  int ibf = VLEN * NVC * ND2 * (ixy + Nxy2 * it);
390  mult_wilson_zp1(&buf_zp[ibf], &v1[VLEN * NVCD * site]);
391  }
392  {
393  int iz = Nz - 1;
394  int site = ixy + Nxy2 * (iz + Nz * it);
395  int ibf = VLEN * NVC * ND2 * (ixy + Nxy2 * it);
396  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * idir)];
397  mult_wilson_zm1(&buf_zm[ibf], &u[VLEN * NDF * site],
398  &v1[VLEN * NVCD * site]);
399  }
400  }
401  }
402 
403  if (do_comm[3] > 0) {
404  int idir = 3;
405 
406  int ith, nth, is, ns;
407  set_threadtask(ith, nth, is, ns, Nxyz2);
408 
409  for (int ixyz = is; ixyz < ns; ++ixyz) {
410  {
411  int it = 0;
412  int site = ixyz + Nxyz2 * it;
413  mult_wilson_tp1_dirac(&buf_tp[VLEN * NVC * ND2 * ixyz],
414  &v1[VLEN * NVCD * site]);
415  }
416  {
417  int it = Nt - 1;
418  int site = ixyz + Nxyz2 * it;
419  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * idir)];
420  mult_wilson_tm1_dirac(&buf_tm[VLEN * NVC * ND2 * ixyz],
421  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
422  }
423  }
424  }
425 }
426 
427 
428 //====================================================================
430  real_t *xp,
431  real_t *buf_xp, real_t *buf_xm,
432  real_t *buf_yp, real_t *buf_ym,
433  real_t *buf_zp, real_t *buf_zm,
434  real_t *buf_tp, real_t *buf_tm,
435  real_t kappa, int *bc,
436  int *Nsize, int *do_comm, int *Leo,
437  const int ieo, const int iflag)
438 {
439  int Nx2v = Nsize[0];
440  int Ny = Nsize[1];
441  int Nz = Nsize[2];
442  int Nt = Nsize[3];
443 
444  int Nst2v = Nx2v * Ny * Nz * Nt;
445  int Nst2 = Nst2v * VLEN;
446 
447  int Nxy2 = Nx2v * Ny;
448  int Nxyz2 = Nx2v * Ny * Nz;
449 
450  svbool_t pg1e_xp, pg2e_xp, pg3e_xp, pg1e_xm, pg2e_xm, pg3e_xm;
451  svbool_t pg1o_xp, pg2o_xp, pg3o_xp, pg1o_xm, pg2o_xm, pg3o_xm;
452  set_predicate_xp_eo(pg1e_xp, pg2e_xp, pg3e_xp, 0);
453  set_predicate_xp_eo(pg1o_xp, pg2o_xp, pg3o_xp, 1);
454  set_predicate_xm_eo(pg1e_xm, pg2e_xm, pg3e_xm, 0);
455  set_predicate_xm_eo(pg1o_xm, pg2o_xm, pg3o_xm, 1);
456  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
457  set_predicate_yp(pg1_yp, pg2_yp);
458  set_predicate_ym(pg1_ym, pg2_ym);
459 #if IMPLE == 0
460  svint_t svidx_xp, svidx_xm;
461 #else
462  svuint_t svidx_xp, svidx_xm;
463 #endif
464  set_index_xp_eo(svidx_xp);
465  set_index_xm_eo(svidx_xm);
466  svbool_t pg = set_predicate();
467 
468  real_t kappa_eo = kappa;
469  if (iflag == 0) {
470  kappa_eo = -kappa;
471  }
472 
473  int Nskipx = (VLENY + 1) / 2;
474 
475  int ith, nth, is, ns;
476  set_threadtask(ith, nth, is, ns, Nst2v);
477 
478  for (int site = is; site < ns; ++site) {
479  int ix = site % Nx2v;
480  int iyzt = site / Nx2v;
481  int iy = iyzt % Ny;
482  int izt = site / Nxy2;
483  int iz = izt % Nz;
484  int it = izt / Nz;
485  int ixy = ix + Nx2v * iy;
486  int ixyz = ixy + Nxy2 * iz;
487  int jeo = (ieo + Leo[VLENY * iyzt]) % 2;
488 
489  Vsimd_t v2v[NVCD];
490  clear_vec(v2v, NVCD);
491 
492  real_t zL[VLEN * NVCD], uL[VLEN * NDF];
493 
494  if ((ix == Nx2v - 1) && (do_comm[0] > 0)) {
495  int ibf_up = Nskipx * NVC * ND2 * iyzt;
496  if (VLENY == 1) ibf_up = Nskipx * NVC * ND2 * (iyzt / 2);
497  real_t *u = &up[NDF * Nst2 * (ieo + 2 * 0)];
498  if (jeo == 0) {
499  set_index_xp_eo(svidx_xp);
500  mult_wilson_eo_xp2(pg1e_xp, pg2e_xp, pg3e_xp, svidx_xp,
501  v2v, &u[VLEN * NDF * site],
502  &v1[VLEN * NVCD * site], &buf_xp[ibf_up]);
503  } else {
504  set_index_xp_eo(svidx_xp);
505  mult_wilson_eo_xp2(pg1o_xp, pg2o_xp, pg3o_xp, svidx_xp,
506  v2v, &u[VLEN * NDF * site],
507  &v1[VLEN * NVCD * site], &buf_xp[ibf_up]);
508  }
509  }
510 
511  if ((ix == 0) && (do_comm[0] > 0)) {
512  int ibf_dn = Nskipx * NVC * ND2 * iyzt;
513  if (VLENY == 1) ibf_dn = Nskipx * NVC * ND2 * (iyzt / 2);
514  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * 0)];
515  if (jeo == 0) {
516  set_index_xm_eo(svidx_xm);
517  mult_wilson_eo_xm2(pg1e_xm, pg2e_xm, pg3e_xm, svidx_xm,
518  v2v, &u[VLEN * NDF * site],
519  &v1[VLEN * NVCD * site], &buf_xm[ibf_dn]);
520  } else {
521  set_index_xm_eo(svidx_xm);
522  mult_wilson_eo_xm2(pg1o_xm, pg2o_xm, pg3o_xm, svidx_xm,
523  v2v, &u[VLEN * NDF * site],
524  &v1[VLEN * NVCD * site], &buf_xm[ibf_dn]);
525  }
526  }
527 
528  if ((iy == Ny - 1) && (do_comm[1] > 0)) {
529  int ibf = VLENX * NVC * ND2 * (ix + Nx2v * izt);
530  real_t *u = &up[NDF * Nst2 * (ieo + 2 * 1)];
531  mult_wilson_yp2(pg1_yp, pg2_yp,
532  v2v, &u[VLEN * NDF * site],
533  &v1[VLEN * NVCD * site], &buf_yp[ibf]);
534  }
535 
536  if ((iy == 0) && (do_comm[1] > 0)) {
537  int ibf = VLENX * NVC * ND2 * (ix + Nx2v * izt);
538  real_t *u = &up[NDF * Nst2 * (1 - ieo + 2 * 1)];
539  mult_wilson_ym2(pg1_ym, pg2_ym,
540  v2v, &u[VLEN * NDF * site],
541  &v1[VLEN * NVCD * site], &buf_ym[ibf]);
542  }
543 
544  if ((iz == Nz - 1) && (do_comm[2] > 0)) {
545  int ibf = VLEN * NVC * ND2 * (ixy + Nxy2 * it);
546  real_t *u = &up[NDF * Nst2 * (ieo + 2 * 2)];
547  mult_wilson_zp2(v2v, &u[VLEN * NDF * site], &buf_zp[ibf]);
548  }
549 
550  if ((iz == 0) && (do_comm[2] > 0)) {
551  int ibf = VLEN * NVC * ND2 * (ixy + Nxy2 * it);
552  mult_wilson_zm2(v2v, &buf_zm[ibf]);
553  }
554 
555  if ((it == Nt - 1) && (do_comm[3] > 0)) {
556  real_t *u = &up[NDF * Nst2 * (ieo + 2 * 3)];
557  mult_wilson_tp2_dirac(v2v, &u[VLEN * NDF * site],
558  &buf_tp[VLEN * NVC * ND2 * ixyz]);
559  }
560 
561  if ((it == 0) && (do_comm[3] > 0)) {
562  mult_wilson_tm2_dirac(v2v, &buf_tm[VLEN * NVC * ND2 * ixyz]);
563  }
564 
565 
566  real_t *ww2 = &v2v[0].v[0];
567  real_t *vv2 = &v2[VLEN * NVCD * site];
568  for (int i = 0; i < NVCD; i += 8) {
569  svreal_t wt1, wt2, wt3, wt4;
570  svreal_t vt1, vt2, vt3, vt4;
571  svreal_t wt5, wt6, wt7, wt8;
572  svreal_t vt5, vt6, vt7, vt8;
573  load_vec(pg, vt1, &vv2[VLEN * (i)]);
574  load_vec(pg, vt2, &vv2[VLEN * (i + 1)]);
575  load_vec(pg, vt3, &vv2[VLEN * (i + 2)]);
576  load_vec(pg, vt4, &vv2[VLEN * (i + 3)]);
577 
578  load_vec(pg, wt1, &ww2[VLEN * (i)]);
579  load_vec(pg, wt2, &ww2[VLEN * (i + 1)]);
580  load_vec(pg, wt3, &ww2[VLEN * (i + 2)]);
581  load_vec(pg, wt4, &ww2[VLEN * (i + 3)]);
582 
583  axpy_vec(pg, vt1, kappa_eo, wt1);
584  axpy_vec(pg, vt2, kappa_eo, wt2);
585  axpy_vec(pg, vt3, kappa_eo, wt3);
586 
587  load_vec(pg, vt5, &vv2[VLEN * (i + 4)]);
588  load_vec(pg, vt6, &vv2[VLEN * (i + 5)]);
589  load_vec(pg, vt7, &vv2[VLEN * (i + 6)]);
590  load_vec(pg, vt8, &vv2[VLEN * (i + 7)]);
591 
592  load_vec(pg, wt5, &ww2[VLEN * (i + 4)]);
593  load_vec(pg, wt6, &ww2[VLEN * (i + 5)]);
594  load_vec(pg, wt7, &ww2[VLEN * (i + 6)]);
595  load_vec(pg, wt8, &ww2[VLEN * (i + 7)]);
596 
597  axpy_vec(pg, vt4, kappa_eo, wt4);
598  axpy_vec(pg, vt5, kappa_eo, wt5);
599  axpy_vec(pg, vt6, kappa_eo, wt6);
600  axpy_vec(pg, vt7, kappa_eo, wt7);
601  axpy_vec(pg, vt8, kappa_eo, wt8);
602 
603  save_vec(pg, &vv2[VLEN * (i)], vt1);
604  save_vec(pg, &vv2[VLEN * (i + 1)], vt2);
605  save_vec(pg, &vv2[VLEN * (i + 2)], vt3);
606  save_vec(pg, &vv2[VLEN * (i + 3)], vt4);
607  save_vec(pg, &vv2[VLEN * (i + 4)], vt5);
608  save_vec(pg, &vv2[VLEN * (i + 5)], vt6);
609  save_vec(pg, &vv2[VLEN * (i + 6)], vt7);
610  save_vec(pg, &vv2[VLEN * (i + 7)], vt8);
611  }
612  }
613 }
614 
615 
616 #endif
617 //============================================================END=====
NVCD
#define NVCD
Definition: define_params_SU3.h:20
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
NDF
#define NDF
Definition: field_F_imp_SU2-inc.h:4
Vsimd_t
Definition: vsimd_double-inc.h:13
Isimd_t
Definition: vsimd_double-inc.h:20
mult_common_th-inc.h
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
Vsimd_t::v
double v[VLEND]
Definition: vsimd_double-inc.h:15
BridgeQXS::mult_wilson_eo_bulk_dirac
void mult_wilson_eo_bulk_dirac(double *v2, double *up, double *v1, double *xp, double kappa, int *bc, int *Nsize, int *do_comm, int *Leo, const int ieo, const int iflag)
Definition: mult_Wilson_eo_qxs-inc.h:20
BridgeQXS::mult_wilson_eo_2_dirac
void mult_wilson_eo_2_dirac(double *v2, double *up, double *v1, double *xp, double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double kappa, int *bc, int *Nsize, int *do_comm, int *Leo, const int ieo, const int iflag)
Definition: mult_Wilson_eo_qxs-inc.h:429
Usimd_t
Definition: vsimd_double-inc.h:25
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
svbool_t
Definition: vsimd_double-inc.h:30
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
ND2
#define ND2
Definition: define_params_SU3.h:18
BridgeQXS::mult_wilson_eo_1_dirac
void mult_wilson_eo_1_dirac(double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double *up, double *v1, int *bc, int *Nsize, int *do_comm, int *Leo, const int ieo, const int iflag)
Definition: mult_Wilson_eo_qxs-inc.h:238