Bridge++  Ver. 2.0.2
mult_Wilson_qxs-inc.h
Go to the documentation of this file.
1 
10 #ifndef MULT_WILSON_QXS_INCLUDED
11 #define MULT_WILSON_QXS_INCLUDED
12 
13 #include "mult_common_th-inc.h"
14 //#include "mult_Wilson_qxs_parts-inc.h"
15 
16 //====================================================================
18  real_t kappa, int *bc, int *Nsize, int *do_comm)
19 {
20  int Nxv = Nsize[0];
21  int Nyv = Nsize[1];
22  int Nz = Nsize[2];
23  int Nt = Nsize[3];
24  int Nstv = Nxv * Nyv * Nz * Nt;
25  int Nst = Nstv * VLEN;
26 
27  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
28  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
29  set_predicate_xp(pg1_xp, pg2_xp);
30  set_predicate_xm(pg1_xm, pg2_xm);
31  set_predicate_yp(pg1_yp, pg2_yp);
32  set_predicate_ym(pg1_ym, pg2_ym);
33 
34  Vsimd_t v2v[NVCD];
35 
36  int Nxy = Nxv * Nyv;
37  int Nxyz = Nxv * Nyv * Nz;
38 
39  int ith, nth, is, ns;
40  set_threadtask(ith, nth, is, ns, Nstv);
41 
42  for (int site = is; site < ns; ++site) {
43  int ix = site % Nxv;
44  int iyzt = site / Nxv;
45  int iy = iyzt % Nyv;
46  int izt = site / Nxy;
47  int iz = izt % Nz;
48  int it = izt / Nz;
49  int ixy = ix + Nxv * iy;
50  int ixyz = ixy + Nxy * iz;
51 
52  Vsimd_t v2v[NVCD];
53  clear_vec(v2v, NVCD);
54 
55  real_t zL[VLEN * NVCD];
56  real_t uL[VLEN * NDF];
57 
58  if (ix < Nxv - 1) {
59  real_t *u = &up[NDF * Nst * 0];
60  int nei = ix + 1 + Nxv * iyzt;
61  mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[VLEN * NDF * site],
62  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
63  } else if (do_comm[0] == 0) { // ix = Nxv-1
64  real_t *u = &up[NDF * Nst * 0];
65  int nei = 0 + Nxv * iyzt;
66  mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[VLEN * NDF * site],
67  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
68  }
69 
70  if (ix > 0) {
71  real_t *u = &up[NDF * Nst * 0];
72  int nei = ix - 1 + Nxv * iyzt;
73  mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
74  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
75  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
76  } else if (do_comm[0] == 0) { // ix = 0
77  real_t *u = &up[NDF * Nst * 0];
78  int nei = Nxv - 1 + Nxv * iyzt;
79  mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
80  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
81  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
82  }
83 
84  if (iy < Nyv - 1) {
85  int iy2 = (iy + 1) % Nyv;
86  int nei = ix + Nxv * (iy2 + Nyv * izt);
87  real_t *u = &up[NDF * Nst * 1];
88  mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
89  &u[VLEN * NDF * site],
90  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
91  } else if (do_comm[1] == 0) { // iy = Nyv-1
92  int iy2 = (iy + 1) % Nyv;
93  int nei = ix + Nxv * (iy2 + Nyv * izt);
94  real_t *u = &up[NDF * Nst * 1];
95  mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
96  &u[VLEN * NDF * site],
97  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
98  }
99 
100  if (iy > 0) {
101  int iy2 = (iy - 1 + Nyv) % Nyv;
102  int nei = ix + Nxv * (iy2 + Nyv * izt);
103  real_t *u = &up[NDF * Nst * 1];
104  mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
105  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
106  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
107  } else if (do_comm[1] == 0) { // iy = 0
108  int iy2 = (iy - 1 + Nyv) % Nyv;
109  int nei = ix + Nxv * (iy2 + Nyv * izt);
110  real_t *u = &up[NDF * Nst * 1];
111  mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
112  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
113  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
114  }
115 
116  if ((iz < Nz - 1) || (do_comm[2] == 0)) {
117  int iz2 = (iz + 1) % Nz;
118  int nei = ixy + Nxy * (iz2 + Nz * it);
119  real_t *u = &up[NDF * Nst * 2];
120  mult_wilson_zpb(v2v, &u[VLEN * NDF * site], &v1[VLEN * NVCD * nei]);
121  }
122 
123  if ((iz > 0) || (do_comm[2] == 0)) {
124  int iz2 = (iz - 1 + Nz) % Nz;
125  int nei = ixy + Nxy * (iz2 + Nz * it);
126  real_t *u = &up[NDF * Nst * 2];
127  mult_wilson_zmb(v2v, &u[VLEN * NDF * nei], &v1[VLEN * NVCD * nei]);
128  }
129 
130  if ((it < Nt - 1) || (do_comm[3] == 0)) {
131  int it2 = (it + 1) % Nt;
132  int nei = ixyz + Nxyz * it2;
133  real_t *u = &up[NDF * Nst * 3];
134  mult_wilson_tpb_dirac(v2v, &u[VLEN * NDF * site],
135  &v1[VLEN * NVCD * nei]);
136  }
137 
138  if ((it > 0) || (do_comm[3] == 0)) {
139  int it2 = (it - 1 + Nt) % Nt;
140  int nei = ixyz + Nxyz * it2;
141  real_t *u = &up[NDF * Nst * 3];
142  mult_wilson_tmb_dirac(v2v, &u[VLEN * NDF * nei],
143  &v1[VLEN * NVCD * nei]);
144  }
145 
146  mult_wilson_aypx_save(&v2[VLEN * NVCD * site],
147  -kappa, v2v, &v1[VLEN * NVCD * site]);
148  }
149 }
150 
151 
152 //====================================================================
154  real_t *buf_xp, real_t *buf_xm,
155  real_t *buf_yp, real_t *buf_ym,
156  real_t *buf_zp, real_t *buf_zm,
157  real_t *buf_tp, real_t *buf_tm,
158  real_t *up, real_t *v1,
159  int *bc, int *Nsize, int *do_comm)
160 {
161  int Nxv = Nsize[0];
162  int Nyv = Nsize[1];
163  int Nz = Nsize[2];
164  int Nt = Nsize[3];
165  int Nstv = Nxv * Nyv * Nz * Nt;
166  int Nst = Nstv * VLEN;
167 
168  int Nxy = Nxv * Nyv;
169  int Nxyz = Nxv * Nyv * Nz;
170 
171  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
172  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
173  set_predicate_xp(pg1_xp, pg2_xp);
174  set_predicate_xm(pg1_xm, pg2_xm);
175  set_predicate_yp(pg1_yp, pg2_yp);
176  set_predicate_ym(pg1_ym, pg2_ym);
177  svint_t svidx_xp, svidx_xm;
178  set_index_xp(svidx_xp);
179  set_index_xm(svidx_xm);
180 
181  if (do_comm[0] > 0) {
182  int idir = 0;
183  real_t *u = &up[NDF * Nst * idir];
184 
185  int Nyzt = Nyv * Nz * Nt;
186 
187  int ith, nth, is, ns;
188  set_threadtask(ith, nth, is, ns, Nyzt);
189 
190  for (int iyzt = is; iyzt < ns; ++iyzt) {
191  {
192  int ix = 0;
193  int site = ix + Nxv * iyzt;
194  int ibf = VLENY * NVC * ND2 * iyzt;
195  set_index_xm(svidx_xm);
196  mult_wilson_xp1(pg2_xm, svidx_xm,
197  &buf_xp[ibf], &v1[VLEN * NVCD * site]);
198  }
199  {
200  int ix = Nxv - 1;
201  int site = ix + Nxv * iyzt;
202  int ibf = VLENY * NVC * ND2 * iyzt;
203  set_index_xp(svidx_xp);
204  mult_wilson_xm1(pg2_xp, svidx_xp,
205  &buf_xm[ibf], &u[VLEN * NDF * site],
206  &v1[VLEN * NVCD * site]);
207  }
208  }
209  }
210 
211  if (do_comm[1] > 0) {
212  int idir = 1;
213  real_t *u = &up[NDF * Nst * idir];
214 
215  int Nxzt = Nxv * Nz * Nt;
216 
217  int ith, nth, is, ns;
218  set_threadtask(ith, nth, is, ns, Nxzt);
219 
220  for (int ixzt = is; ixzt < ns; ++ixzt) {
221  int ix = ixzt % Nxv;
222  int izt = ixzt / Nxv;
223  {
224  int iy = 0;
225  int site = ix + Nxv * (iy + Nyv * izt);
226  int ibf = VLENX * NVC * ND2 * ixzt;
227  mult_wilson_yp1(pg2_ym,
228  &buf_yp[ibf], &v1[VLEN * NVCD * site]);
229  }
230  {
231  int iy = Nyv - 1;
232  int site = ix + Nxv * (iy + Nyv * izt);
233  int ibf = VLENX * NVC * ND2 * ixzt;
234  mult_wilson_ym1(pg2_yp,
235  &buf_ym[ibf], &u[VLEN * NDF * site],
236  &v1[VLEN * NVCD * site]);
237  }
238  }
239  }
240 
241  if (do_comm[2] > 0) {
242  int idir = 2;
243  real_t *u = &up[NDF * Nst * idir];
244 
245  int Nxyt = Nxv * Nyv * Nt;
246 
247  int ith, nth, is, ns;
248  set_threadtask(ith, nth, is, ns, Nxyt);
249 
250  for (int ixyt = is; ixyt < ns; ++ixyt) {
251  int ixy = ixyt % Nxy;
252  int it = ixyt / Nxy;
253  {
254  int iz = 0;
255  int site = ixy + Nxy * (iz + Nz * it);
256  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
257  mult_wilson_zp1(&buf_zp[ibf], &v1[VLEN * NVCD * site]);
258  }
259  {
260  int iz = Nz - 1;
261  int site = ixy + Nxy * (iz + Nz * it);
262  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
263  mult_wilson_zm1(&buf_zm[ibf], &u[VLEN * NDF * site],
264  &v1[VLEN * NVCD * site]);
265  }
266  }
267  }
268 
269  if (do_comm[3] > 0) {
270  int idir = 3;
271  real_t *u = &up[NDF * Nst * idir];
272 
273  int ith, nth, is, ns;
274  set_threadtask(ith, nth, is, ns, Nxyz);
275  {
276  int it = 0;
277  for (int ixyz = is; ixyz < ns; ++ixyz) {
278  int site = ixyz + Nxyz * it;
279  mult_wilson_tp1_dirac(&buf_tp[VLEN * NVC * ND2 * ixyz],
280  &v1[VLEN * NVCD * site]);
281  }
282  }
283  {
284  int it = Nt - 1;
285  for (int ixyz = is; ixyz < ns; ++ixyz) {
286  int site = ixyz + Nxyz * it;
287  mult_wilson_tm1_dirac(&buf_tm[VLEN * NVC * ND2 * ixyz],
288  &u[VLEN * NDF * site], &v1[VLEN * NVCD * site]);
289  }
290  }
291  }
292 }
293 
294 
295 //====================================================================
297  real_t *buf_xp, real_t *buf_xm,
298  real_t *buf_yp, real_t *buf_ym,
299  real_t *buf_zp, real_t *buf_zm,
300  real_t *buf_tp, real_t *buf_tm,
301  real_t kappa, int *bc, int *Nsize, int *do_comm)
302 {
303  int Nxv = Nsize[0];
304  int Nyv = Nsize[1];
305  int Nz = Nsize[2];
306  int Nt = Nsize[3];
307  int Nstv = Nxv * Nyv * Nz * Nt;
308  int Nst = Nstv * VLEN;
309 
310  int Nxy = Nxv * Nyv;
311  int Nxyz = Nxv * Nyv * Nz;
312 
313  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
314  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
315  set_predicate_xp(pg1_xp, pg2_xp);
316  set_predicate_xm(pg1_xm, pg2_xm);
317  set_predicate_yp(pg1_yp, pg2_yp);
318  set_predicate_ym(pg1_ym, pg2_ym);
319  svint_t svidx_xp, svidx_xm;
320  set_index_xp(svidx_xp);
321  set_index_xm(svidx_xm);
322 
323  int ith, nth, is, ns;
324  set_threadtask(ith, nth, is, ns, Nstv);
325 
326  for (int site = is; site < ns; ++site) {
327  int ix = site % Nxv;
328  int iyzt = site / Nxv;
329  int iy = iyzt % Nyv;
330  int izt = site / Nxy;
331  int iz = izt % Nz;
332  int it = izt / Nz;
333  int ixy = ix + Nxv * iy;
334  int ixyz = ixy + Nxy * iz;
335 
336  Vsimd_t v2v[NVCD];
337  clear_vec(v2v, NVCD);
338  int opr_any = 0;
339 
340  if ((ix == Nxv - 1) && (do_comm[0] > 0)) {
341  real_t *u = &up[NDF * Nst * 0];
342  int ibf = VLENY * NVC * ND2 * iyzt;
343  set_index_xp(svidx_xp);
344  mult_wilson_xp2(pg1_xp, pg2_xp, svidx_xp,
345  v2v, &u[VLEN * NDF * site],
346  &v1[VLEN * NVCD * site], &buf_xp[ibf]);
347  ++opr_any;
348  }
349 
350  if ((ix == 0) && (do_comm[0] > 0)) {
351  real_t *u = &up[NDF * Nst * 0];
352  int ibf = VLENY * NVC * ND2 * iyzt;
353  set_index_xm(svidx_xm);
354  mult_wilson_xm2(pg1_xm, pg2_xm, svidx_xm,
355  v2v, &u[VLEN * NDF * site],
356  &v1[VLEN * NVCD * site], &buf_xm[ibf]);
357  ++opr_any;
358  }
359 
360  if ((iy == Nyv - 1) && (do_comm[1] > 0)) {
361  real_t *u = &up[NDF * Nst * 1];
362  int ibf = VLENX * NVC * ND2 * (ix + Nxv * izt);
363  mult_wilson_yp2(pg1_yp, pg2_yp,
364  v2v, &u[VLEN * NDF * site],
365  &v1[VLEN * NVCD * site], &buf_yp[ibf]);
366  ++opr_any;
367  }
368 
369  if ((iy == 0) && (do_comm[1] > 0)) {
370  real_t *u = &up[NDF * Nst * 1];
371  int ibf = VLENX * NVC * ND2 * (ix + Nxv * izt);
372  mult_wilson_ym2(pg1_ym, pg2_ym,
373  v2v, &u[VLEN * NDF * site],
374  &v1[VLEN * NVCD * site], &buf_ym[ibf]);
375  ++opr_any;
376  }
377 
378  if ((iz == Nz - 1) && (do_comm[2] > 0)) {
379  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
380  real_t *u = &up[NDF * Nst * 2];
381  mult_wilson_zp2(v2v, &u[VLEN * NDF * site], &buf_zp[ibf]);
382  ++opr_any;
383  }
384 
385  if ((iz == 0) && (do_comm[2] > 0)) {
386  int ibf = VLEN * NVC * ND2 * (ixy + Nxy * it);
387  mult_wilson_zm2(v2v, &buf_zm[ibf]);
388  ++opr_any;
389  }
390 
391  if ((it == Nt - 1) && (do_comm[3] > 0)) {
392  real_t *u = &up[NDF * Nst * 3];
393  mult_wilson_tp2_dirac(v2v, &u[VLEN * NDF * site],
394  &buf_tp[VLEN * NVC * ND2 * ixyz]);
395  ++opr_any;
396  }
397 
398  if ((it == 0) && (do_comm[3] > 0)) {
399  mult_wilson_tm2_dirac(v2v, &buf_tm[VLEN * NVC * ND2 * ixyz]);
400  ++opr_any;
401  }
402 
403  if (opr_any > 0) {
404  mult_wilson_aypx_save(&v2[VLEN * NVCD * site],
405  -kappa, v2v, &v2[VLEN * NVCD * site]);
406  }
407  }
408 }
409 
410 
412  int *Nsize)
413 {
414  int Nxv = Nsize[0];
415  int Nyv = Nsize[1];
416  int Nz = Nsize[2];
417  int Nt = Nsize[3];
418  int Nstv = Nxv * Nyv * Nz * Nt;
419 
420  svbool_t pg = set_predicate();
421 
422  int ith, nth, is, ns;
423  set_threadtask(ith, nth, is, ns, Nstv);
424 
425  for (int site = is; site < ns; ++site) {
426  real_t *vv1 = v1 + VLEN * 2 * ND * NC * site;
427  real_t *vv2 = v2 + VLEN * 2 * ND * NC * site;
428  for (int ic = 0; ic < NC; ++ic) {
429  mult_gm5_dirac_vec(pg, &vv2[VLEN * 2 * ND * ic],
430  &vv1[VLEN * 2 * ND * ic]);
431  }
432  }
433 }
434 
435 
436 #endif
437 //============================================================END=====
BridgeQXS::mult_wilson_gm5_dirac
void mult_wilson_gm5_dirac(double *v2, double *v1, int *Nsize)
Definition: mult_Wilson_qxs-inc.h:411
BridgeQXS::mult_wilson_2_dirac
void mult_wilson_2_dirac(double *v2, double *up, double *v1, double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:296
BridgeQXS::mult_wilson_1_dirac
void mult_wilson_1_dirac(double *buf_xp, double *buf_xm, double *buf_yp, double *buf_ym, double *buf_zp, double *buf_zm, double *buf_tp, double *buf_tm, double *up, double *v1, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:153
NVCD
#define NVCD
Definition: define_params_SU3.h:20
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
NDF
#define NDF
Definition: field_F_imp_SU2-inc.h:4
Vsimd_t
Definition: vsimd_double-inc.h:13
Isimd_t
Definition: vsimd_double-inc.h:20
mult_common_th-inc.h
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
NC
#define NC
Definition: field_F_imp_SU2-inc.h:2
BridgeQXS::mult_wilson_bulk_dirac
void mult_wilson_bulk_dirac(double *v2, double *up, double *v1, double kappa, int *bc, int *Nsize, int *do_comm)
Definition: mult_Wilson_qxs-inc.h:17
ND
#define ND
Definition: field_F_imp_SU2-inc.h:5
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
svbool_t
Definition: vsimd_double-inc.h:30
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
ND2
#define ND2
Definition: define_params_SU3.h:18