Bridge++  Ver. 2.0.2
mult_Clover_coarse_qxs_res-inc.h
Go to the documentation of this file.
1 
10 #ifndef MULT_COARSE_QXS_INCLUDED
11 #define MULT_COARSE_QXS_INCLUDED
12 
13 #include "mult_common_th-inc.h"
14 
15 
16 #define RUN_DIAG
17 #define RUN_HOP_XP
18 #define RUN_HOP_XM
19 
20 #define RUN_HOP_YP
21 #define RUN_HOP_YM
22 
23 #define RUN_HOP_ZP
24 #define RUN_HOP_ZM
25 
26 #define RUN_HOP_TP
27 #define RUN_HOP_TM
28 
29 
30 namespace BridgeQXS {
31  //====================================================================
32  void mult_coarse_1(real_t *buf1_xp, real_t *buf1_xm,
33  real_t *buf1_yp, real_t *buf1_ym,
34  real_t *buf1_zp, real_t *buf1_zm,
35  real_t *buf1_tp, real_t *buf1_tm,
36  real_t *u0, real_t *v1, const int *Nsize,
37  int ncol, const int *do_comm)
38  {
39  int ith, nth, is, ns;
40  int Nstv = Nsize[0] * Nsize[1] * Nsize[2] * Nsize[3];
41  int Nxv = Nsize[0];
42  int Nyv = Nsize[1];
43  int Nz = Nsize[2];
44  int Nt = Nsize[3];
45  int Nc = ncol;
46  int Nvc = 2 * ncol; // 2 for complex
47  int Nc2 = ncol * ncol;
48  int Ndf = 2 * Nc2; // 2 for complex
49 
50 #ifdef USE_QXS_ACLE
51  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
52  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
53  set_predicate_xp(pg1_xp, pg2_xp);
54  set_predicate_xm(pg1_xm, pg2_xm);
55  set_predicate_yp(pg1_yp, pg2_yp);
56  set_predicate_ym(pg1_ym, pg2_ym);
57  svint_t svidx_xp, svidx_xm, svidx_yp, svidx_ym;
58  set_index_xp(svidx_xp);
59  set_index_xm(svidx_xm);
60  set_index_yp(svidx_yp);
61  set_index_ym(svidx_ym);
62 #endif
63 
64  int taskx = (do_comm[0] > 0) ? (Nyv * Nz * Nt) : 0;
65  int tasky = (do_comm[1] > 0) ? (Nxv * Nz * Nt) : 0;
66  int taskz = (do_comm[2] > 0) ? (Nxv * Nyv * Nt) : 0;
67  int taskt = (do_comm[3] > 0) ? (Nxv * Nyv * Nz) : 0;
68  int task_total = taskx + tasky + taskz + taskt;
69  set_threadtask(ith, nth, is, ns, task_total);
70 
71  int isx = is;
72  int nsx = (ns > taskx) ? taskx : ns;
73  is -= taskx;
74  ns -= taskx;
75  int isy = (is < 0) ? 0 : is;
76  int nsy = (ns > tasky) ? tasky : ns;
77  is -= tasky;
78  ns -= tasky;
79  int isz = (is < 0) ? 0 : is;
80  int nsz = (ns > taskz) ? taskz : ns;
81  is -= taskz;
82  ns -= taskz;
83  int ist = (is < 0) ? 0 : is;
84  int nst = (ns < 0) ? 0 : ns;
85 
86  for (int sitex = isx; sitex < nsx; ++sitex) {
87  int iyzt = sitex;
88  int ibf = VLENY * Nvc * iyzt;
89  int idir = 0;
90  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
91  {
92  int ix = 0;
93  int site = ix + Nxv * iyzt;
94 #ifdef USE_QXS_ACLE
95  mult_coarse_xp1(pg2_xm, svidx_xm,
96  &buf1_xp[ibf], &v1[VLEN * Nvc * site], Nc);
97 #else
98  mult_coarse_xp1(&buf1_xp[ibf], &v1[VLEN * Nvc * site], Nc);
99 #endif
100  }
101  {
102  int ix = Nxv - 1;
103  int site = ix + Nxv * iyzt;
104 #ifdef USE_QXS_ACLE
105  mult_coarse_xm1(pg2_xp, svidx_xp,
106  &buf1_xm[ibf], &u[VLEN * Ndf * site],
107  &v1[VLEN * Nvc * site], Nc);
108 #else
109  mult_coarse_xm1(&buf1_xm[ibf], &u[VLEN * Ndf * site],
110  &v1[VLEN * Nvc * site], Nc);
111 #endif
112  }
113  } // sitex
114 
115  for (int sitey = isy; sitey < nsy; sitey++) {
116  int ixzt = sitey;
117  int ix = sitey % Nxv;
118  int izt = sitey / Nxv;
119  int ibf = VLENX * Nvc * ixzt;
120  int idir = 1;
121  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
122  {
123  int iy = 0;
124  int site = ix + Nxv * iy + Nxv * Nyv * izt;
125 #ifdef USE_QXS_ACLE
126  mult_coarse_yp1(pg2_ym, svidx_ym,
127  &buf1_yp[ibf], &v1[VLEN * Nvc * site], Nc);
128 #else
129  mult_coarse_yp1(&buf1_yp[ibf], &v1[VLEN * Nvc * site], Nc);
130 #endif
131  }
132  {
133  int iy = Nyv - 1;
134  int site = ix + Nxv * iy + Nxv * Nyv * izt;
135 #ifdef USE_QXS_ACLE
136  mult_coarse_ym1(pg2_yp, svidx_yp,
137  &buf1_ym[ibf], &u[VLEN * Ndf * site],
138  &v1[VLEN * Nvc * site], Nc);
139 #else
140  mult_coarse_ym1(&buf1_ym[ibf], &u[VLEN * Ndf * site],
141  &v1[VLEN * Nvc * site], Nc);
142 #endif
143  }
144  } // sitey
145 
146  for (int sitez = isz; sitez < nsz; sitez++) {
147  int ixyt = sitez;
148  int ixy = sitez % (Nxv * Nyv);
149  int it = sitez / (Nxv * Nyv);
150  int idir = 2;
151  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
152  {
153  int iz = 0;
154  int site = ixy + Nxv * Nyv * (iz + Nz * it);
155  mult_coarse_zp1(&buf1_zp[VLEN * Nvc * ixyt], &v1[VLEN * Nvc * site], Nc);
156  }
157  {
158  int iz = Nz - 1;
159  int site = ixy + Nxv * Nyv * (iz + Nz * it);
160  mult_coarse_zm1(&buf1_zm[VLEN * Nvc * ixyt],
161  &u[VLEN * Ndf * site], &v1[VLEN * Nvc * site], Nc);
162  }
163  } // sitez
164 
165  for (int sitet = ist; sitet < nst; sitet++) {
166  int ixyz = sitet;
167  int idir = 3;
168  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
169  {
170  int it = 0;
171  int site = ixyz + Nxv * Nyv * Nz * it;
172  mult_coarse_tp1(&buf1_tp[VLEN * Nvc * ixyz], &v1[VLEN * Nvc * site], Nc);
173  }
174  {
175  int it = Nt - 1;
176  int site = ixyz + Nxv * Nyv * Nz * it;
177  mult_coarse_tm1(&buf1_tm[VLEN * Nvc * ixyz],
178  &u[VLEN * Ndf * site], &v1[VLEN * Nvc * site], Nc);
179  }
180  } // sitet
181  }
182 
183 
184 //====================================================================
185  void mult_coarse_b(real_t *v2,
186  real_t *u0, real_t *c0,
187  real_t *v1,
188  const int *Nsize, int ncol,
189  const int *do_comm, real_t *work)
190  {
191  int ith, nth, is, ns;
192  int Nstv = Nsize[0] * Nsize[1] * Nsize[2] * Nsize[3];
193  int Nxv = Nsize[0];
194  int Nyv = Nsize[1];
195  int Nz = Nsize[2];
196  int Nt = Nsize[3];
197  int Nc = ncol;
198  int Nvc = 2 * ncol; // 2 for complex
199  int Nc2 = ncol * ncol;
200  int Ndf = 2 * Nc2; // 2 for complex
201 
202 #ifdef USE_QXS_ACLE
203  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
204  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
205  set_predicate_xp(pg1_xp, pg2_xp);
206  set_predicate_xm(pg1_xm, pg2_xm);
207  set_predicate_yp(pg1_yp, pg2_yp);
208  set_predicate_ym(pg1_ym, pg2_ym);
209 #endif
210 
211 
212  int nv = VLEN * Nvc;
213  int nv2 = VLEN * Ndf;
214  set_threadtask(ith, nth, is, ns, Nstv);
215 
216  for (int site = is; site < ns; ++site) {
217  real_t *out = &v2[nv * site];
218 
219  // clover term
220 #ifdef RUN_DIAG
221  set_mult_u(out, &v1[nv * site],
222  &c0[nv2 * site], Nc);
223 #else
224  for (int i = 0; i < nv; i++) {
225  out[i] = 0.0;
226  }
227 #endif
228  int ix = site % Nxv;
229  int iyzt = site / Nxv;
230  { // mult_xpb, mult_xmb
231  int idir = 0;
232  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
233 
234 #ifdef RUN_HOP_XP
235  if ((ix < Nxv - 1) || (do_comm[0] == 0)) {
236  int nei = (ix + 1) + Nxv * iyzt;
237  if (ix == Nxv - 1) nei = 0 + Nxv * iyzt;
238 #ifdef USE_QXS_ACLE
239  mult_coarse_xpb(pg1_xp, pg2_xp, out,
240  &u[nv2 * site],
241  &v1[nv * site], &v1[nv * nei], Nc, work);
242 #else
243  mult_coarse_xpb(out,
244  &u[nv2 * site],
245  &v1[nv * site], &v1[nv * nei], Nc, work);
246 #endif
247  }
248 #endif
249 
250 #ifdef RUN_HOP_XM
251  if ((ix > 0) || (do_comm[0] == 0)) {
252  int ix2 = (ix - 1 + Nxv) % Nxv;
253  int nei = ix2 + Nxv * iyzt;
254 #ifdef USE_QXS_ACLE
255  mult_coarse_xmb(pg1_xm, pg2_xm, out,
256  &u[nv2 * site], &u[nv2 * nei],
257  &v1[nv * site], &v1[nv * nei],
258  Nc, work);
259 #else
260  mult_coarse_xmb(out,
261  &u[nv2 * site], &u[nv2 * nei],
262  &v1[nv * site], &v1[nv * nei],
263  Nc, work);
264 #endif
265  }
266 #endif
267  } // mult_xpb, mult_xmb, done
268 
269  int iy = iyzt % Nyv;
270  int izt = iyzt / Nyv;
271  { // mult_ypb, mult_ymb
272  int idir = 1;
273  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
274 #ifdef RUN_HOP_YP
275  if ((iy < Nyv - 1) || (do_comm[1] == 0)) {
276  int iy2 = (iy + 1) % Nyv;
277  int nei = ix + Nxv * (iy2 + Nyv * izt);
278 #ifdef USE_QXS_ACLE
279  mult_coarse_ypb(pg1_yp, pg2_yp, out,
280  &u[nv2 * site],
281  &v1[nv * site], &v1[nv * nei],
282  Nc, work);
283 #else
284  mult_coarse_ypb(out,
285  &u[nv2 * site],
286  &v1[nv * site], &v1[nv * nei],
287  Nc, work);
288 #endif
289  }
290 #endif
291 #ifdef RUN_HOP_YM
292  if ((iy != 0) || (do_comm[idir] == 0)) {
293  int iy2 = (iy - 1 + Nyv) % Nyv;
294  int nei = ix + Nxv * (iy2 + Nyv * izt);
295 #ifdef USE_QXS_ACLE
296  mult_coarse_ymb(pg1_ym, pg2_ym, out,
297  &u[nv2 * site], &u[nv2 * nei],
298  &v1[nv * site], &v1[nv * nei],
299  Nc, work);
300 #else
301  mult_coarse_ymb(out,
302  &u[nv2 * site], &u[nv2 * nei],
303  &v1[nv * site], &v1[nv * nei],
304  Nc, work);
305 #endif
306  }
307 #endif
308  } // mult_ypb, mult_ymb, done
309 
310  int ixy = ix + Nxv * iy;
311  int iz = izt % Nz;
312  int it = izt / Nz;
313  int Nxyv = Nxv * Nyv;
314  { // mult_zpb, mult_zmb
315  int idir = 2;
316  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
317 
318 #ifdef RUN_HOP_ZP
319  if ((iz != Nz - 1) || (do_comm[2] == 0)) {
320  int iz2 = (iz + 1) % Nz;
321  int nei = ixy + Nxyv * (iz2 + Nz * it);
322  mult_coarse_zpb(out,
323  &u[nv2 * site], &v1[nv * nei], Nc);
324  }
325 #endif
326 #ifdef RUN_HOP_ZM
327  if ((iz > 0) || (do_comm[2] == 0)) {
328  int iz2 = (iz - 1 + Nz) % Nz;
329  int nei = ixy + Nxyv * (iz2 + Nz * it);
330  mult_coarse_zmb(out,
331  &u[nv2 * nei], &v1[nv * nei], Nc);
332  }
333 #endif
334  } // mult_zpb, mult_zmb, done
335 
336  int Nxyzv = Nxyv * Nz;
337  int ixyz = site - it * Nxyzv;
338  { // mult_tpb, mult_tmb
339  int idir = 3;
340  real_t *u = u0 + VLEN * Ndf * Nstv * idir;
341 
342 #ifdef RUN_HOP_TP
343  if ((it < Nt - 1) || (do_comm[3] == 0)) {
344  int it2 = (it + 1) % Nt;
345  int nei = ixyz + Nxyzv * it2;
346  mult_coarse_tpb(out,
347  &u[nv2 * site], &v1[nv * nei], Nc);
348  }
349 #endif
350 #ifdef RUN_HOP_TM
351  if ((it > 0) || (do_comm[3] == 0)) {
352  int it2 = (it - 1 + Nt) % Nt;
353  int nei = ixyz + Nxyzv * it2;
354  mult_coarse_tmb(out,
355  &u[nv2 * nei], &v1[nv * nei], Nc);
356  }
357 #endif
358  } // mult_tpb, mult_tmb, done
359  } // site
360  }
361 
362 
363 //====================================================================
364  void mult_coarse_2(real_t *v2, real_t *u0, real_t *v1,
365  real_t *buf2_xp, real_t *buf2_xm,
366  real_t *buf2_yp, real_t *buf2_ym,
367  real_t *buf2_zp, real_t *buf2_zm,
368  real_t *buf2_tp, real_t *buf2_tm,
369  const int *Nsize, int ncol, const int *do_comm,
370  real_t *work,
371  std::vector<int>& list)
372  {
373  int ith, nth, is, ns;
374  int Nstv = Nsize[0] * Nsize[1] * Nsize[2] * Nsize[3];
375  int Nxv = Nsize[0];
376  int Nyv = Nsize[1];
377  int Nz = Nsize[2];
378  int Nt = Nsize[3];
379  int Nc = ncol;
380  int Nvc = 2 * ncol; // 2 for complex
381  int Nc2 = ncol * ncol;
382  int Ndf = 2 * Nc2; // 2 for complex
383 
384 #ifdef USE_QXS_ACLE
385  svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
386  svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
387  set_predicate_xp(pg1_xp, pg2_xp);
388  set_predicate_xm(pg1_xm, pg2_xm);
389  set_predicate_yp(pg1_yp, pg2_yp);
390  set_predicate_ym(pg1_ym, pg2_ym);
391  svint_t svidx_xp, svidx_xm, svidx_yp, svidx_ym;
392  set_index_xp(svidx_xp);
393  set_index_xm(svidx_xm);
394  set_index_yp(svidx_yp);
395  set_index_ym(svidx_ym);
396 #endif
397 
398  int nv = VLEN * Nvc;
399  int nv2 = VLEN * Ndf;
400 
401  for (int i = 0; i < list.size(); i++) {
402  int site = list[i];
403  real_t *out = &v2[nv * site];
404 
405  const int ix = site % Nxv;
406  const int iyzt = site / Nxv;
407 
408  if (do_comm[0] == 1) {
409  int idir = 0;
410  int ibf = VLENY * Nvc * iyzt;
411  real_t *u = u0 + nv2 * Nstv * idir;
412 #ifdef RUN_HOP_XP
413  if (ix == Nxv - 1) {
414 #ifdef USE_QXS_ACLE
415  mult_coarse_xp2(pg1_xp, pg2_xp, svidx_xp,
416  out, &u[nv2 * site],
417  &v1[nv * site], &buf2_xp[ibf], Nc, work);
418 #else
419  mult_coarse_xp2(out, &u[nv2 * site],
420  &v1[nv * site], &buf2_xp[ibf], Nc, work);
421 #endif
422  }
423 #endif
424 #ifdef RUN_HOP_XM
425  if (ix == 0) {
426 #ifdef USE_QXS_ACLE
427  mult_coarse_xm2(pg1_xm, pg2_xm, svidx_xm,
428  out, &u[nv2 * site],
429  &v1[nv * site], &buf2_xm[ibf], Nc);
430 #else
431  mult_coarse_xm2(out, &u[nv2 * site],
432  &v1[nv * site], &buf2_xm[ibf], Nc);
433 #endif
434  }
435 #endif
436  } // do_comm[0] == 1
437 
438 
439  const int iy = iyzt % Nyv;
440  const int izt = iyzt / Nyv;
441 
442  if (do_comm[1] == 1) {
443  int idir = 1;
444  int ixzt = ix + Nxv * izt;
445  int ibf = VLENX * Nvc * ixzt;
446  real_t *u = u0 + nv2 * Nstv * idir;
447 #ifdef RUN_HOP_YP
448  if (iy == Nyv - 1) {
449 #ifdef USE_QXS_ACLE
450  mult_coarse_yp2(pg1_yp, pg2_yp, svidx_yp,
451  out,
452  &u[nv2 * site],
453  &v1[nv * site], &buf2_yp[ibf], Nc, work);
454 #else
455  mult_coarse_yp2(out,
456  &u[nv2 * site],
457  &v1[nv * site], &buf2_yp[ibf], Nc, work);
458 #endif
459  }
460 #endif
461 #ifdef RUN_HOP_YM
462  if (iy == 0) {
463 #ifdef USE_QXS_ACLE
464  mult_coarse_ym2(pg1_ym, pg2_ym, svidx_ym,
465  out,
466  &u[nv2 * site],
467  &v1[nv * site], &buf2_ym[ibf], Nc);
468 #else
469  mult_coarse_ym2(out,
470  &u[nv2 * site],
471  &v1[nv * site], &buf2_ym[ibf], Nc);
472 #endif
473  }
474 #endif
475  } // do_comm[1] == 1
476 
477 
478  const int ixy = ix + Nxv * iy;
479  const int iz = izt % Nz;
480  const int it = izt / Nz;
481  const int Nxyv = Nxv * Nyv;
482 
483  if (do_comm[2] == 1) {
484  int idir = 2;
485  int ixyt = ixy + Nxyv * it;
486  real_t *u = u0 + nv2 * Nstv * idir;
487 #ifdef RUN_HOP_ZP
488  if (iz == Nz - 1) {
489  mult_coarse_zp2(out,
490  &u[nv2 * site], &buf2_zp[nv * ixyt], Nc);
491  }
492 #endif
493 #ifdef RUN_HOP_ZM
494  if (iz == 0) {
495  mult_coarse_zm2(out,
496  &buf2_zm[nv * ixyt], Nc);
497  }
498 #endif
499  } // do_comm[2] == 1
500 
501  if (do_comm[3] == 1) {
502  int idir = 3;
503  int ixyz = ixy + Nxyv * iz;
504  real_t *u = u0 + nv2 * Nstv * idir;
505 #ifdef RUN_HOP_TP
506  if (it == Nt - 1) {
507  mult_coarse_tp2(out,
508  &u[nv2 * site], &buf2_tp[nv * ixyz], Nc);
509  }
510 #endif
511 #ifdef RUN_HOP_TM
512  if (it == 0) {
513  mult_coarse_tm2(out,
514  &buf2_tm[nv * ixyz], Nc);
515  }
516 #endif
517  } // do_comm[3] == 1
518  } // site
519  }
520 }
521 
522 #endif
523 //============================================================END=====
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
BridgeQXS::mult_coarse_2
void mult_coarse_2(double *v2, double *u0, double *v1, double *buf2_xp, double *buf2_xm, double *buf2_yp, double *buf2_ym, double *buf2_zp, double *buf2_zm, double *buf2_tp, double *buf2_tm, const int *Nsize, int ncol, const int *do_comm, double *work, std::vector< int > &list)
Definition: mult_Clover_coarse_qxs-inc.h:314
Isimd_t
Definition: vsimd_double-inc.h:20
mult_common_th-inc.h
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
BridgeQXS::mult_coarse_1
void mult_coarse_1(double *buf1_xp, double *buf1_xm, double *buf1_yp, double *buf1_ym, double *buf1_zp, double *buf1_zm, double *buf1_tp, double *buf1_tm, double *u0, double *v1, const int *Nsize, int ncol, const int *do_comm)
Definition: mult_Clover_coarse_qxs-inc.h:32
BridgeQXS::mult_coarse_b
void mult_coarse_b(double *v2, double *u0, double *c0, double *v1, const int *Nsize, int ncol, const int *do_comm, double *work)
Definition: mult_Clover_coarse_qxs-inc.h:165
VLENY
#define VLENY
Definition: bridgeQXS_Clover_coarse_double.cpp:14
svbool_t
Definition: vsimd_double-inc.h:30
VLENX
#define VLENX
Definition: bridgeQXS_Clover_coarse_double.cpp:13
BridgeQXS
Definition: bridgeQXS_Clover.h:12