Bridge++  Ver. 2.0.2
mult_Clover_dd_qxs-inc.h
Go to the documentation of this file.
1 
9 #ifndef MULT_CLOVER_DD_QXS_INCLUDED
10 #define MULT_CLOVER_DD_QXS_INCLUDED
11 
12 #include "mult_common_th-inc.h"
13 
14 //====================================================================
16  real_t *ct, real_t *v1,
17  real_t kappa, int *bc,
18  int *Nsize, int *block_size,
19  int ieo)
20 {
21  // lattice size in units of SIMD vector
22  int Nxv = Nsize[0];
23  int Nyv = Nsize[1];
24  int Nz = Nsize[2];
25  int Nt = Nsize[3];
26  int Nstv = Nxv * Nyv * Nz * Nt;
27  int Nst = Nstv * VLEN;
28 
29  // size of block in units of SIMD vector
30  int Bxv = block_size[0];
31  int Byv = block_size[1];
32  int Bz = block_size[2];
33  int Bt = block_size[3];
34  int Bsize = Bxv * Byv * Bz * Bt;
35 
36  // numbers of blocks
37  int NBx = Nsize[0] / block_size[0];
38  int NBy = Nsize[1] / block_size[1];
39  int NBz = Nsize[2] / block_size[2];
40  int NBt = Nsize[3] / block_size[3];
41  int Nblock = NBx * NBy * NBz * NBt;
42 
43  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
44  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
45  set_predicate_xp(pg1_xp, pg2_xp);
46  set_predicate_xm(pg1_xm, pg2_xm);
47  set_predicate_yp(pg1_yp, pg2_yp);
48  set_predicate_ym(pg1_ym, pg2_ym);
49 
50  int ith, nth, is, ns;
51  set_threadtask(ith, nth, is, ns, Bsize);
52 
53  for (int block = 0; block < Nblock; ++block) {
54  int ibx = block % NBx;
55  int iby = (block / NBx) % NBy;
56  int ibz = (block / (NBx * NBy)) % NBz;
57  int ibt = block / (NBx * NBy * NBz);
58  int jeo = (ieo + ibx + iby + ibz + ibt) % 2;
59  // if(jeo == 1) continue;
60  if ((ieo > -1) && (jeo == 1)) continue;
61 
62  for (int bsite = is; bsite < ns; ++bsite) {
63  int kx = bsite % Bxv;
64  int ix = kx + Bxv * ibx;
65  int kyzt = bsite / Bxv;
66  int ky = kyzt % Byv;
67  int iy = ky + Byv * iby;
68  int kzt = kyzt / Byv;
69  int kz = kzt % Bz;
70  int iz = kz + Bz * ibz;
71  int kt = kzt / Bz;
72  int it = kt + Bt * ibt;
73  int site = ix + Nxv * (iy + Nyv * (iz + Nz * it));
74 
75  Vsimd_t v2v[NVCD];
76  clear_vec(v2v, NVCD);
77 
78  real_t zL[VLEN * NVCD];
79  real_t uL[VLEN * NDF];
80 
81  if (kx < Bxv - 1) {
82  real_t *u = &up[NDF * Nst * 0];
83  int nei = site + 1;
84  mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[VLEN * NDF * site],
85  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
86  } else { // kx = Bxv-1
87  real_t *u = &up[NDF * Nst * 0];
88  int nei = site;
89  mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[VLEN * NDF * site],
90  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
91  }
92 
93  if (kx > 0) {
94  real_t *u = &up[NDF * Nst * 0];
95  int nei = site - 1;
96  mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
97  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
98  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
99  } else { // kx = 0
100  real_t *u = &up[NDF * Nst * 0];
101  int nei = site + Bxv - 1;
102  mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
103  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
104  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
105  }
106 
107  if (ky < Byv - 1) {
108  int nei = site + Nxv;
109  real_t *u = &up[NDF * Nst * 1];
110  mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
111  &u[VLEN * NDF * site],
112  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
113  } else { // ky = Byv-1
114  int nei = site;
115  real_t *u = &up[NDF * Nst * 1];
116  mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
117  &u[VLEN * NDF * site],
118  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
119  }
120 
121  if (ky > 0) {
122  int nei = site - Nxv;
123  real_t *u = &up[NDF * Nst * 1];
124  mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
125  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
126  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
127  } else { // ky = 0
128  int nei = site + Nxv * (Byv - 1);
129  real_t *u = &up[NDF * Nst * 1];
130  mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
131  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
132  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
133  }
134 
135  if (kz < Bz - 1) {
136  int nei = site + Nxv * Nyv;
137  real_t *u = &up[NDF * Nst * 2];
138  mult_wilson_zpb(v2v, &u[VLEN * NDF * site], &v1[VLEN * NVCD * nei]);
139  }
140 
141  if (kz > 0) {
142  int nei = site - Nxv * Nyv;
143  real_t *u = &up[NDF * Nst * 2];
144  mult_wilson_zmb(v2v, &u[VLEN * NDF * nei], &v1[VLEN * NVCD * nei]);
145  }
146 
147  if (kt < Bt - 1) {
148  int nei = site + Nxv * Nyv * Nz;
149  real_t *u = &up[NDF * Nst * 3];
150  mult_wilson_tpb_dirac(v2v, &u[VLEN * NDF * site],
151  &v1[VLEN * NVCD * nei]);
152  }
153 
154  if (kt > 0) {
155  int nei = site - Nxv * Nyv * Nz;
156  real_t *u = &up[NDF * Nst * 3];
157  mult_wilson_tmb_dirac(v2v, &u[VLEN * NDF * nei],
158  &v1[VLEN * NVCD * nei]);
159  }
160 
161  mult_clover_csw_aypx(&v2[VLEN * NVCD * site], -kappa, v2v,
162  &ct[VLEN * NDF * 2 * ND * site], &v1[VLEN * NVCD * site]);
163  }
164  }
165 }
166 
167 
168 //====================================================================
170  real_t *v2, real_t *up,
171  real_t *ct, real_t *v1,
172  real_t kappa, int *bc,
173  int *Nsize, int *block_size,
174  int ieo)
175 {
176  // lattice size in units of SIMD vector
177  int Nxv = Nsize[0];
178  int Nyv = Nsize[1];
179  int Nz = Nsize[2];
180  int Nt = Nsize[3];
181  int Nstv = Nxv * Nyv * Nz * Nt;
182  int Nst = Nstv * VLEN;
183 
184  // size of block in units of SIMD vector
185  int Bxv = block_size[0];
186  int Byv = block_size[1];
187  int Bz = block_size[2];
188  int Bt = block_size[3];
189  int Bsize = Bxv * Byv * Bz * Bt;
190 
191  // numbers of blocks
192  int NBx = Nsize[0] / block_size[0];
193  int NBy = Nsize[1] / block_size[1];
194  int NBz = Nsize[2] / block_size[2];
195  int NBt = Nsize[3] / block_size[3];
196  int Nblock = NBx * NBy * NBz * NBt;
197 
198  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
199  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
200  set_predicate_xp(pg1_xp, pg2_xp);
201  set_predicate_xm(pg1_xm, pg2_xm);
202  set_predicate_yp(pg1_yp, pg2_yp);
203  set_predicate_ym(pg1_ym, pg2_ym);
204 
205  int ith, nth, is, ns;
206  set_threadtask(ith, nth, is, ns, Bsize);
207 
208  for (int block = 0; block < Nblock; ++block) {
209  int ibx = block % NBx;
210  int iby = (block / NBx) % NBy;
211  int ibz = (block / (NBx * NBy)) % NBz;
212  int ibt = block / (NBx * NBy * NBz);
213  int jeo = (ieo + ibx + iby + ibz + ibt) % 2;
214  // if(jeo == 1) continue;
215  if ((ieo > -1) && (jeo == 1)) continue;
216 
217  for (int bsite = is; bsite < ns; ++bsite) {
218  int kx = bsite % Bxv;
219  int ix = kx + Bxv * ibx;
220  int kyzt = bsite / Bxv;
221  int ky = kyzt % Byv;
222  int iy = ky + Byv * iby;
223  int kzt = kyzt / Byv;
224  int kz = kzt % Bz;
225  int iz = kz + Bz * ibz;
226  int kt = kzt / Bz;
227  int it = kt + Bt * ibt;
228  int site = ix + Nxv * (iy + Nyv * (iz + Nz * it));
229 
230  Vsimd_t v2v[NVCD];
231  clear_vec(v2v, NVCD);
232 
233  real_t zL[VLEN * NVCD];
234  real_t uL[VLEN * NDF];
235 
236  if (kx < Bxv - 1) {
237  real_t *u = &up[NDF * Nst * 0];
238  int nei = site + 1;
239  mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[VLEN * NDF * site],
240  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
241  } else { // kx = Bxv-1
242  real_t *u = &up[NDF * Nst * 0];
243  int nei = site;
244  mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[VLEN * NDF * site],
245  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
246  }
247 
248  if (kx > 0) {
249  real_t *u = &up[NDF * Nst * 0];
250  int nei = site - 1;
251  mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
252  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
253  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
254  } else { // kx = 0
255  real_t *u = &up[NDF * Nst * 0];
256  int nei = site + Bxv - 1;
257  mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
258  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
259  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
260  }
261 
262  if (ky < Byv - 1) {
263  int nei = site + Nxv;
264  real_t *u = &up[NDF * Nst * 1];
265  mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
266  &u[VLEN * NDF * site],
267  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
268  } else { // ky = Byv-1
269  int nei = site;
270  real_t *u = &up[NDF * Nst * 1];
271  mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
272  &u[VLEN * NDF * site],
273  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
274  }
275 
276  if (ky > 0) {
277  int nei = site - Nxv;
278  real_t *u = &up[NDF * Nst * 1];
279  mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
280  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
281  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
282  } else { // ky = 0
283  int nei = site + Nxv * (Byv - 1);
284  real_t *u = &up[NDF * Nst * 1];
285  mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
286  &u[VLEN * NDF * site], &u[VLEN * NDF * nei],
287  &v1[VLEN * NVCD * site], &v1[VLEN * NVCD * nei]);
288  }
289 
290  if (kz < Bz - 1) {
291  int nei = site + Nxv * Nyv;
292  real_t *u = &up[NDF * Nst * 2];
293  mult_wilson_zpb(v2v, &u[VLEN * NDF * site], &v1[VLEN * NVCD * nei]);
294  }
295 
296  if (kz > 0) {
297  int nei = site - Nxv * Nyv;
298  real_t *u = &up[NDF * Nst * 2];
299  mult_wilson_zmb(v2v, &u[VLEN * NDF * nei], &v1[VLEN * NVCD * nei]);
300  }
301 
302  if (kt < Bt - 1) {
303  int nei = site + Nxv * Nyv * Nz;
304  real_t *u = &up[NDF * Nst * 3];
305  mult_wilson_tpb_dirac(v2v, &u[VLEN * NDF * site],
306  &v1[VLEN * NVCD * nei]);
307  }
308 
309  if (kt > 0) {
310  int nei = site - Nxv * Nyv * Nz;
311  real_t *u = &up[NDF * Nst * 3];
312  mult_wilson_tmb_dirac(v2v, &u[VLEN * NDF * nei],
313  &v1[VLEN * NVCD * nei]);
314  }
315 
316  mult_clover_csw_aypx_chrot(
317  &v2[VLEN * NVCD * site], -kappa, &v2v[0].v[0],
318  &ct[VLEN * NDF * 2 * ND * site], &v1[VLEN * NVCD * site]);
319  }
320  }
321 }
322 
323 
324 //============================================================END=====
325 #endif
NVCD
#define NVCD
Definition: define_params_SU3.h:20
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
NDF
#define NDF
Definition: field_F_imp_SU2-inc.h:4
Vsimd_t
Definition: vsimd_double-inc.h:13
mult_common_th-inc.h
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
BridgeQXS::mult_clover_dd_dirac_chrot
void mult_clover_dd_dirac_chrot(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *block_size, int ieo)
Definition: mult_Clover_dd_qxs-inc.h:169
ND
#define ND
Definition: field_F_imp_SU2-inc.h:5
svbool_t
Definition: vsimd_double-inc.h:30
BridgeQXS::mult_clover_dd_dirac
void mult_clover_dd_dirac(double *v2, double *up, double *ct, double *v1, double kappa, int *bc, int *Nsize, int *block_size, int ieo)
Definition: mult_Clover_dd_qxs-inc.h:15