Bridge++  Ver. 2.0.2
mult_Clover_coarse_qxs-inc.h
Go to the documentation of this file.
1 
10 #ifndef MULT_COARSE_QXS_INCLUDED
11 #define MULT_COARSE_QXS_INCLUDED
12 
13 #include "mult_common_th-inc.h"
14 
15 
16 #define RUN_DIAG
17 #define RUN_HOP_XP
18 #define RUN_HOP_XM
19 
20 #define RUN_HOP_YP
21 #define RUN_HOP_YM
22 
23 #define RUN_HOP_ZP
24 #define RUN_HOP_ZM
25 
26 #define RUN_HOP_TP
27 #define RUN_HOP_TM
28 
29 
30 namespace BridgeQXS {
31  //====================================================================
32  void mult_coarse_1(real_t *buf1_xp, real_t *buf1_xm,
33  real_t *buf1_yp, real_t *buf1_ym,
34  real_t *buf1_zp, real_t *buf1_zm,
35  real_t *buf1_tp, real_t *buf1_tm,
36  real_t *u0, real_t *v1, const int *Nsize,
37  int ncol, const int *do_comm)
38  {
39  int ith, nth, is, ns;
40  int Nstv = Nsize[0] * Nsize[1] * Nsize[2] * Nsize[3];
41  int Nxv = Nsize[0];
42  int Nyv = Nsize[1];
43  int Nz = Nsize[2];
44  int Nt = Nsize[3];
45  int Nc = ncol;
46  int Nvc = 2 * ncol; // 2 for complex
47  int Nc2 = ncol * ncol;
48  int Ndf = 2 * Nc2; // 2 for complex
49 
50  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
51  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
52  set_predicate_xp(pg1_xp, pg2_xp);
53  set_predicate_xm(pg1_xm, pg2_xm);
54  set_predicate_yp(pg1_yp, pg2_yp);
55  set_predicate_ym(pg1_ym, pg2_ym);
56  svint_t svidx_xp, svidx_xm;
57  set_index_xp(svidx_xp);
58  set_index_xm(svidx_xm);
59 
60  int taskx = (do_comm[0] > 0) ? (Nyv * Nz * Nt) : 0;
61  int tasky = (do_comm[1] > 0) ? (Nxv * Nz * Nt) : 0;
62  int taskz = (do_comm[2] > 0) ? (Nxv * Nyv * Nt) : 0;
63  int taskt = (do_comm[3] > 0) ? (Nxv * Nyv * Nz) : 0;
64  int task_total = taskx + tasky + taskz + taskt;
65  set_threadtask(ith, nth, is, ns, task_total);
66 
67  int isx = is;
68  int nsx = (ns > taskx) ? taskx : ns;
69  is -= taskx;
70  ns -= taskx;
71  int isy = (is < 0) ? 0 : is;
72  int nsy = (ns > tasky) ? tasky : ns;
73  is -= tasky;
74  ns -= tasky;
75  int isz = (is < 0) ? 0 : is;
76  int nsz = (ns > taskz) ? taskz : ns;
77  is -= taskz;
78  ns -= taskz;
79  int ist = (is < 0) ? 0 : is;
80  int nst = (ns < 0) ? 0 : ns;
81 
82  for (int sitex = isx; sitex < nsx; ++sitex) {
83  int iyzt = sitex;
84  int ibf = VLENY * Nvc * iyzt;
85  int idir = 0;
86  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
87  {
88  int ix = 0;
89  int site = ix + Nxv * iyzt;
90  set_index_xm(svidx_xm);
91  mult_coarse_xp1(pg2_xm, svidx_xm,
92  &buf1_xp[ibf], &v1[VLEN * Nvc * site], Nc);
93  }
94  {
95  int ix = Nxv - 1;
96  int site = ix + Nxv * iyzt;
97  set_index_xp(svidx_xp);
98  mult_coarse_xm1(pg2_xp, svidx_xp,
99  &buf1_xm[ibf], &u[VLEN * Ndf * site],
100  &v1[VLEN * Nvc * site], Nc);
101  }
102  } // sitex
103 
104  for (int sitey = isy; sitey < nsy; sitey++) {
105  int ixzt = sitey;
106  int ix = sitey % Nxv;
107  int izt = sitey / Nxv;
108  int ibf = VLENX * Nvc * ixzt;
109  int idir = 1;
110  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
111  {
112  int iy = 0;
113  int site = ix + Nxv * iy + Nxv * Nyv * izt;
114  mult_coarse_yp1(pg2_ym,
115  &buf1_yp[ibf], &v1[VLEN * Nvc * site], Nc);
116  }
117  {
118  int iy = Nyv - 1;
119  int site = ix + Nxv * iy + Nxv * Nyv * izt;
120  mult_coarse_ym1(pg2_yp,
121  &buf1_ym[ibf], &u[VLEN * Ndf * site],
122  &v1[VLEN * Nvc * site], Nc);
123  }
124  } // sitey
125 
126  for (int sitez = isz; sitez < nsz; sitez++) {
127  int ixyt = sitez;
128  int ixy = sitez % (Nxv * Nyv);
129  int it = sitez / (Nxv * Nyv);
130  int idir = 2;
131  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
132  {
133  int iz = 0;
134  int site = ixy + Nxv * Nyv * (iz + Nz * it);
135  mult_coarse_zp1(&buf1_zp[VLEN * Nvc * ixyt], &v1[VLEN * Nvc * site], Nc);
136  }
137  {
138  int iz = Nz - 1;
139  int site = ixy + Nxv * Nyv * (iz + Nz * it);
140  mult_coarse_zm1(&buf1_zm[VLEN * Nvc * ixyt],
141  &u[VLEN * Ndf * site], &v1[VLEN * Nvc * site], Nc);
142  }
143  } // sitez
144 
145  for (int sitet = ist; sitet < nst; sitet++) {
146  int ixyz = sitet;
147  int idir = 3;
148  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
149  {
150  int it = 0;
151  int site = ixyz + Nxv * Nyv * Nz * it;
152  mult_coarse_tp1(&buf1_tp[VLEN * Nvc * ixyz], &v1[VLEN * Nvc * site], Nc);
153  }
154  {
155  int it = Nt - 1;
156  int site = ixyz + Nxv * Nyv * Nz * it;
157  mult_coarse_tm1(&buf1_tm[VLEN * Nvc * ixyz],
158  &u[VLEN * Ndf * site], &v1[VLEN * Nvc * site], Nc);
159  }
160  } // sitet
161  }
162 
163 
164 //====================================================================
166  real_t *u0, real_t *c0,
167  real_t *v1,
168  const int *Nsize, int ncol,
169  const int *do_comm, real_t *work)
170  {
171  int ith, nth, is, ns;
172  int Nstv = Nsize[0] * Nsize[1] * Nsize[2] * Nsize[3];
173  int Nxv = Nsize[0];
174  int Nyv = Nsize[1];
175  int Nz = Nsize[2];
176  int Nt = Nsize[3];
177  int Nc = ncol;
178  int Nvc = 2 * ncol; // 2 for complex
179  int Nc2 = ncol * ncol;
180  int Ndf = 2 * Nc2; // 2 for complex
181 
182  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
183  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
184  set_predicate_xp(pg1_xp, pg2_xp);
185  set_predicate_xm(pg1_xm, pg2_xm);
186  set_predicate_yp(pg1_yp, pg2_yp);
187  set_predicate_ym(pg1_ym, pg2_ym);
188 
189  int nv = VLEN * Nvc;
190  int nv2 = VLEN * Ndf;
191  set_threadtask(ith, nth, is, ns, Nstv);
192 
193  for (int site = is; site < ns; ++site) {
194  real_t *out = &v2[nv * site];
195 
196  // clover term
197 #ifdef RUN_DIAG
198  set_mult_u(out, &v1[nv * site],
199  &c0[nv2 * site], Nc);
200 #else
201  for (int i = 0; i < nv; i++) {
202  out[i] = 0.0;
203  }
204 #endif
205  int ix = site % Nxv;
206  int iyzt = site / Nxv;
207  { // mult_xpb, mult_xmb
208  int idir = 0;
209  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
210 
211 #ifdef RUN_HOP_XP
212  if ((ix < Nxv - 1) || (do_comm[0] == 0)) {
213  int nei = (ix + 1) + Nxv * iyzt;
214  if (ix == Nxv - 1) nei = 0 + Nxv * iyzt;
215  mult_coarse_xpb(pg1_xp, pg2_xp, out,
216  &u[nv2 * site],
217  &v1[nv * site], &v1[nv * nei], Nc, work);
218  }
219 #endif
220 
221 #ifdef RUN_HOP_XM
222  if ((ix > 0) || (do_comm[0] == 0)) {
223  int ix2 = (ix - 1 + Nxv) % Nxv;
224  int nei = ix2 + Nxv * iyzt;
225  mult_coarse_xmb(pg1_xm, pg2_xm, out,
226  &u[nv2 * site], &u[nv2 * nei],
227  &v1[nv * site], &v1[nv * nei],
228  Nc, work);
229  }
230 #endif
231  } // mult_xpb, mult_xmb, done
232 
233  int iy = iyzt % Nyv;
234  int izt = iyzt / Nyv;
235  { // mult_ypb, mult_ymb
236  int idir = 1;
237  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
238 #ifdef RUN_HOP_YP
239  if ((iy < Nyv - 1) || (do_comm[1] == 0)) {
240  int iy2 = (iy + 1) % Nyv;
241  int nei = ix + Nxv * (iy2 + Nyv * izt);
242  mult_coarse_ypb(pg1_yp, pg2_yp, out,
243  &u[nv2 * site],
244  &v1[nv * site], &v1[nv * nei],
245  Nc, work);
246  }
247 #endif
248 #ifdef RUN_HOP_YM
249  if ((iy != 0) || (do_comm[idir] == 0)) {
250  int iy2 = (iy - 1 + Nyv) % Nyv;
251  int nei = ix + Nxv * (iy2 + Nyv * izt);
252  mult_coarse_ymb(pg1_ym, pg2_ym, out,
253  &u[nv2 * site], &u[nv2 * nei],
254  &v1[nv * site], &v1[nv * nei],
255  Nc, work);
256  }
257 #endif
258  } // mult_ypb, mult_ymb, done
259 
260  int ixy = ix + Nxv * iy;
261  int iz = izt % Nz;
262  int it = izt / Nz;
263  int Nxyv = Nxv * Nyv;
264  { // mult_zpb, mult_zmb
265  int idir = 2;
266  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
267 
268 #ifdef RUN_HOP_ZP
269  if ((iz != Nz - 1) || (do_comm[2] == 0)) {
270  int iz2 = (iz + 1) % Nz;
271  int nei = ixy + Nxyv * (iz2 + Nz * it);
272  mult_coarse_zpb(out,
273  &u[nv2 * site], &v1[nv * nei], Nc);
274  }
275 #endif
276 #ifdef RUN_HOP_ZM
277  if ((iz > 0) || (do_comm[2] == 0)) {
278  int iz2 = (iz - 1 + Nz) % Nz;
279  int nei = ixy + Nxyv * (iz2 + Nz * it);
280  mult_coarse_zmb(out,
281  &u[nv2 * nei], &v1[nv * nei], Nc);
282  }
283 #endif
284  } // mult_zpb, mult_zmb, done
285 
286  int Nxyzv = Nxyv * Nz;
287  int ixyz = site - it * Nxyzv;
288  { // mult_tpb, mult_tmb
289  int idir = 3;
290  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
291 
292 #ifdef RUN_HOP_TP
293  if ((it < Nt - 1) || (do_comm[3] == 0)) {
294  int it2 = (it + 1) % Nt;
295  int nei = ixyz + Nxyzv * it2;
296  mult_coarse_tpb(out,
297  &u[nv2 * site], &v1[nv * nei], Nc);
298  }
299 #endif
300 #ifdef RUN_HOP_TM
301  if ((it > 0) || (do_comm[3] == 0)) {
302  int it2 = (it - 1 + Nt) % Nt;
303  int nei = ixyz + Nxyzv * it2;
304  mult_coarse_tmb(out,
305  &u[nv2 * nei], &v1[nv * nei], Nc);
306  }
307 #endif
308  } // mult_tpb, mult_tmb, done
309  } // site
310  }
311 
312 
313 //====================================================================
314  void mult_coarse_2(real_t *v2, real_t *u0, real_t *v1,
315  real_t *buf2_xp, real_t *buf2_xm,
316  real_t *buf2_yp, real_t *buf2_ym,
317  real_t *buf2_zp, real_t *buf2_zm,
318  real_t *buf2_tp, real_t *buf2_tm,
319  const int *Nsize, int ncol, const int *do_comm,
320  real_t *work,
321  std::vector<int>& list)
322  {
323  int ith, nth, is, ns;
324  int Nstv = Nsize[0] * Nsize[1] * Nsize[2] * Nsize[3];
325  int Nxv = Nsize[0];
326  int Nyv = Nsize[1];
327  int Nz = Nsize[2];
328  int Nt = Nsize[3];
329  int Nc = ncol;
330  int Nvc = 2 * ncol; // 2 for complex
331  int Nc2 = ncol * ncol;
332  int Ndf = 2 * Nc2; // 2 for complex
333 
334  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
335  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
336  set_predicate_xp(pg1_xp, pg2_xp);
337  set_predicate_xm(pg1_xm, pg2_xm);
338  set_predicate_yp(pg1_yp, pg2_yp);
339  set_predicate_ym(pg1_ym, pg2_ym);
340  svint_t svidx_xp, svidx_xm;
341  set_index_xp(svidx_xp);
342  set_index_xm(svidx_xm);
343 
344  int nv = VLEN * Nvc;
345  int nv2 = VLEN * Ndf;
346 
347  for (int i = 0; i < list.size(); i++) {
348  int site = list[i];
349  real_t *out = &v2[nv * site];
350 
351  const int ix = site % Nxv;
352  const int iyzt = site / Nxv;
353 
354  if (do_comm[0] == 1) {
355  int idir = 0;
356  int ibf = VLENY * Nvc * iyzt;
357  real_t *u = u0 + nv2 * Nstv * idir;
358 #ifdef RUN_HOP_XP
359  if (ix == Nxv - 1) {
360  set_index_xp(svidx_xp);
361  mult_coarse_xp2(pg1_xp, pg2_xp, svidx_xp,
362  out, &u[nv2 * site],
363  &v1[nv * site], &buf2_xp[ibf], Nc, work);
364  }
365 #endif
366 #ifdef RUN_HOP_XM
367  if (ix == 0) {
368  set_index_xm(svidx_xm);
369  mult_coarse_xm2(pg1_xm, pg2_xm, svidx_xm,
370  out, &u[nv2 * site],
371  &v1[nv * site], &buf2_xm[ibf], Nc);
372  }
373 #endif
374  } // do_comm[0] == 1
375 
376 
377  const int iy = iyzt % Nyv;
378  const int izt = iyzt / Nyv;
379 
380  if (do_comm[1] == 1) {
381  int idir = 1;
382  int ixzt = ix + Nxv * izt;
383  int ibf = VLENX * Nvc * ixzt;
384  real_t *u = u0 + nv2 * Nstv * idir;
385 #ifdef RUN_HOP_YP
386  if (iy == Nyv - 1) {
387  mult_coarse_yp2(pg1_yp, pg2_yp,
388  out,
389  &u[nv2 * site],
390  &v1[nv * site], &buf2_yp[ibf], Nc, work);
391  }
392 #endif
393 #ifdef RUN_HOP_YM
394  if (iy == 0) {
395  mult_coarse_ym2(pg1_ym, pg2_ym,
396  out,
397  &u[nv2 * site],
398  &v1[nv * site], &buf2_ym[ibf], Nc);
399  }
400 #endif
401  } // do_comm[1] == 1
402 
403 
404  const int ixy = ix + Nxv * iy;
405  const int iz = izt % Nz;
406  const int it = izt / Nz;
407  const int Nxyv = Nxv * Nyv;
408 
409  if (do_comm[2] == 1) {
410  int idir = 2;
411  int ixyt = ixy + Nxyv * it;
412  real_t *u = u0 + nv2 * Nstv * idir;
413 #ifdef RUN_HOP_ZP
414  if (iz == Nz - 1) {
415  mult_coarse_zp2(out,
416  &u[nv2 * site], &buf2_zp[nv * ixyt], Nc);
417  }
418 #endif
419 #ifdef RUN_HOP_ZM
420  if (iz == 0) {
421  mult_coarse_zm2(out,
422  &buf2_zm[nv * ixyt], Nc);
423  }
424 #endif
425  } // do_comm[2] == 1
426 
427  if (do_comm[3] == 1) {
428  int idir = 3;
429  int ixyz = ixy + Nxyv * iz;
430  real_t *u = u0 + nv2 * Nstv * idir;
431 #ifdef RUN_HOP_TP
432  if (it == Nt - 1) {
433  mult_coarse_tp2(out,
434  &u[nv2 * site], &buf2_tp[nv * ixyz], Nc);
435  }
436 #endif
437 #ifdef RUN_HOP_TM
438  if (it == 0) {
439  mult_coarse_tm2(out,
440  &buf2_tm[nv * ixyz], Nc);
441  }
442 #endif
443  } // do_comm[3] == 1
444  } // site
445  }
446 }
447 
448 #endif
449 //============================================================END=====
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
BridgeQXS::mult_coarse_2
void mult_coarse_2(double *v2, double *u0, double *v1, double *buf2_xp, double *buf2_xm, double *buf2_yp, double *buf2_ym, double *buf2_zp, double *buf2_zm, double *buf2_tp, double *buf2_tm, const int *Nsize, int ncol, const int *do_comm, double *work, std::vector< int > &list)
Definition: mult_Clover_coarse_qxs-inc.h:314
Isimd_t
Definition: vsimd_double-inc.h:20
mult_common_th-inc.h
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
BridgeQXS::mult_coarse_1
void mult_coarse_1(double *buf1_xp, double *buf1_xm, double *buf1_yp, double *buf1_ym, double *buf1_zp, double *buf1_zm, double *buf1_tp, double *buf1_tm, double *u0, double *v1, const int *Nsize, int ncol, const int *do_comm)
Definition: mult_Clover_coarse_qxs-inc.h:32
BridgeQXS::mult_coarse_b
void mult_coarse_b(double *v2, double *u0, double *c0, double *v1, const int *Nsize, int ncol, const int *do_comm, double *work)
Definition: mult_Clover_coarse_qxs-inc.h:165
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
svbool_t
Definition: vsimd_double-inc.h:30
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
BridgeQXS
Definition: bridgeQXS_Clover.h:12