9 #ifndef MULT_CLOVER_DD_QXS_INCLUDED
10 #define MULT_CLOVER_DD_QXS_INCLUDED
18 int *Nsize,
int *block_size,
26 int Nstv = Nxv * Nyv * Nz * Nt;
27 int Nst = Nstv *
VLEN;
30 int Bxv = block_size[0];
31 int Byv = block_size[1];
32 int Bz = block_size[2];
33 int Bt = block_size[3];
34 int Bsize = Bxv * Byv * Bz * Bt;
37 int NBx = Nsize[0] / block_size[0];
38 int NBy = Nsize[1] / block_size[1];
39 int NBz = Nsize[2] / block_size[2];
40 int NBt = Nsize[3] / block_size[3];
41 int Nblock = NBx * NBy * NBz * NBt;
43 svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
44 svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
45 set_predicate_xp(pg1_xp, pg2_xp);
46 set_predicate_xm(pg1_xm, pg2_xm);
47 set_predicate_yp(pg1_yp, pg2_yp);
48 set_predicate_ym(pg1_ym, pg2_ym);
51 set_threadtask(ith, nth, is, ns, Bsize);
53 for (
int block = 0; block < Nblock; ++block) {
54 int ibx = block % NBx;
55 int iby = (block / NBx) % NBy;
56 int ibz = (block / (NBx * NBy)) % NBz;
57 int ibt = block / (NBx * NBy * NBz);
58 int jeo = (ieo + ibx + iby + ibz + ibt) % 2;
60 if ((ieo > -1) && (jeo == 1))
continue;
62 for (
int bsite = is; bsite < ns; ++bsite) {
64 int ix = kx + Bxv * ibx;
65 int kyzt = bsite / Bxv;
67 int iy = ky + Byv * iby;
70 int iz = kz + Bz * ibz;
72 int it = kt + Bt * ibt;
73 int site = ix + Nxv * (iy + Nyv * (iz + Nz * it));
84 mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN *
NDF * site],
89 mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN *
NDF * site],
96 mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
101 int nei = site + Bxv - 1;
102 mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
108 int nei = site + Nxv;
110 mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
116 mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
122 int nei = site - Nxv;
124 mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
128 int nei = site + Nxv * (Byv - 1);
130 mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
136 int nei = site + Nxv * Nyv;
142 int nei = site - Nxv * Nyv;
148 int nei = site + Nxv * Nyv * Nz;
150 mult_wilson_tpb_dirac(v2v, &u[
VLEN *
NDF * site],
155 int nei = site - Nxv * Nyv * Nz;
157 mult_wilson_tmb_dirac(v2v, &u[
VLEN *
NDF * nei],
161 mult_clover_csw_aypx(&v2[
VLEN *
NVCD * site], -kappa, v2v,
173 int *Nsize,
int *block_size,
181 int Nstv = Nxv * Nyv * Nz * Nt;
182 int Nst = Nstv *
VLEN;
185 int Bxv = block_size[0];
186 int Byv = block_size[1];
187 int Bz = block_size[2];
188 int Bt = block_size[3];
189 int Bsize = Bxv * Byv * Bz * Bt;
192 int NBx = Nsize[0] / block_size[0];
193 int NBy = Nsize[1] / block_size[1];
194 int NBz = Nsize[2] / block_size[2];
195 int NBt = Nsize[3] / block_size[3];
196 int Nblock = NBx * NBy * NBz * NBt;
198 svbool_t pg1_xp, pg2_xp, pg1_xm, pg2_xm;
199 svbool_t pg1_yp, pg2_yp, pg1_ym, pg2_ym;
200 set_predicate_xp(pg1_xp, pg2_xp);
201 set_predicate_xm(pg1_xm, pg2_xm);
202 set_predicate_yp(pg1_yp, pg2_yp);
203 set_predicate_ym(pg1_ym, pg2_ym);
205 int ith, nth, is, ns;
206 set_threadtask(ith, nth, is, ns, Bsize);
208 for (
int block = 0; block < Nblock; ++block) {
209 int ibx = block % NBx;
210 int iby = (block / NBx) % NBy;
211 int ibz = (block / (NBx * NBy)) % NBz;
212 int ibt = block / (NBx * NBy * NBz);
213 int jeo = (ieo + ibx + iby + ibz + ibt) % 2;
215 if ((ieo > -1) && (jeo == 1))
continue;
217 for (
int bsite = is; bsite < ns; ++bsite) {
218 int kx = bsite % Bxv;
219 int ix = kx + Bxv * ibx;
220 int kyzt = bsite / Bxv;
222 int iy = ky + Byv * iby;
223 int kzt = kyzt / Byv;
225 int iz = kz + Bz * ibz;
227 int it = kt + Bt * ibt;
228 int site = ix + Nxv * (iy + Nyv * (iz + Nz * it));
231 clear_vec(v2v,
NVCD);
239 mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN *
NDF * site],
244 mult_wilson_xpb(pg1_xp, pg2_xp, v2v, &u[
VLEN *
NDF * site],
251 mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
256 int nei = site + Bxv - 1;
257 mult_wilson_xmb(pg1_xm, pg2_xm, v2v,
263 int nei = site + Nxv;
265 mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
271 mult_wilson_ypb(pg1_yp, pg2_yp, v2v,
277 int nei = site - Nxv;
279 mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
283 int nei = site + Nxv * (Byv - 1);
285 mult_wilson_ymb(pg1_ym, pg2_ym, v2v,
291 int nei = site + Nxv * Nyv;
297 int nei = site - Nxv * Nyv;
303 int nei = site + Nxv * Nyv * Nz;
305 mult_wilson_tpb_dirac(v2v, &u[
VLEN *
NDF * site],
310 int nei = site - Nxv * Nyv * Nz;
312 mult_wilson_tmb_dirac(v2v, &u[
VLEN *
NDF * nei],
316 mult_clover_csw_aypx_chrot(
317 &v2[
VLEN *
NVCD * site], -kappa, &v2v[0].v[0],