9 #ifndef MULT_WILSON_PARTS_QXS_H
10 #define MULT_WILSON_PARTS_QXS_H
26 template<
typename REALTYPE>
27 inline void mult_wilson_xp1(REALTYPE *buf, REALTYPE *v1)
31 load_vec1_x(vt, v1, 0,
NVCD);
37 template<
typename REALTYPE>
38 inline void mult_wilson_xp2(
Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
41 shift_vec1_xbw(vt1, &buf[0],
NVC);
48 for (
int ic = 0; ic <
NC; ++ic) {
49 int ic2 =
ND * 2 * ic;
50 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
51 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
52 set_sp4_xp(&v2[ic2], wt1, wt2);
58 template<
typename REALTYPE>
59 inline void mult_wilson_xpb(
Vsimd_t *v2,
60 REALTYPE *u, REALTYPE *v1)
63 set_sp2_xp(vt1, vt2, v1);
69 for (
int ic = 0; ic <
NC; ++ic) {
70 int ic2 =
ND * 2 * ic;
71 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
72 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
73 set_sp4_xp(&v2[ic2], wt1, wt2);
79 template<
typename REALTYPE>
80 inline void mult_wilson_xm1(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
83 set_sp2_xm(vt1, vt2, v1);
89 for (
int ic = 0; ic <
NC; ++ic) {
91 mult_udagv(&wt1[2 * ic], &ut[ic2], vt1,
NC);
92 mult_udagv(&wt2[2 * ic], &ut[ic2], vt2,
NC);
95 for (
int ic = 0; ic <
NC; ++ic) {
103 template<
typename REALTYPE>
104 inline void mult_wilson_xm2(
Vsimd_t *v2, REALTYPE *buf)
107 for (
int ic = 0; ic <
NC; ++ic) {
108 int ic2 =
ND * 2 * ic;
109 shift_vec1_xfw(wt1, &buf[
VLENY * (2 * ic)], 2);
110 shift_vec1_xfw(wt2, &buf[
VLENY * (2 * ic +
NVC)], 2);
111 set_sp4_xm(&v2[ic2], wt1, wt2);
117 template<
typename REALTYPE>
118 inline void mult_wilson_xmb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
121 set_sp2_xm(vt1, vt2, v1);
124 load_vec(ut, u,
NDF);
127 for (
int ic = 0; ic <
NC; ++ic) {
129 int ic3 =
ND * 2 * ic;
130 mult_udagv(wt1, &ut[ic2], vt1,
NC);
131 mult_udagv(wt2, &ut[ic2], vt2,
NC);
132 set_sp4_xm(&v2[ic3], wt1, wt2);
138 template<
typename REALTYPE>
139 inline void mult_wilson_yp1(REALTYPE *buf, REALTYPE *v1)
143 load_vec1_y(vt, v1, 0,
NVCD);
144 set_sp2_yp1(buf, vt);
149 template<
typename REALTYPE>
150 inline void mult_wilson_yp2(
Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
153 shift_vec1_ybw(vt1, &buf[0],
NVC);
157 load_vec(ut, u,
NDF);
160 for (
int ic = 0; ic <
NC; ++ic) {
161 int ic2 =
ND * 2 * ic;
162 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
163 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
164 set_sp4_yp(&v2[ic2], wt1, wt2);
170 template<
typename REALTYPE>
171 inline void mult_wilson_ypb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
174 set_sp2_yp(vt1, vt2, v1);
177 load_vec(ut, u,
NDF);
180 for (
int ic = 0; ic <
NC; ++ic) {
181 int ic2 =
ND * 2 * ic;
182 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
183 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
184 set_sp4_yp(&v2[ic2], wt1, wt2);
190 template<
typename REALTYPE>
191 inline void mult_wilson_ym1(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
194 set_sp2_ym(vt1, vt2, v1);
197 load_vec(ut, u,
NDF);
200 for (
int ic = 0; ic <
NC; ++ic) {
202 mult_udagv(&wt1[2 * ic], &ut[ic2], vt1,
NC);
203 mult_udagv(&wt2[2 * ic], &ut[ic2], vt2,
NC);
206 for (
int ic = 0; ic <
NC; ++ic) {
214 template<
typename REALTYPE>
215 inline void mult_wilson_ym2(
Vsimd_t *v2, REALTYPE *buf)
218 for (
int ic = 0; ic <
NC; ++ic) {
219 int ic2 =
ND * 2 * ic;
220 shift_vec1_yfw(wt1, &buf[
VLENX * (2 * ic)], 2);
221 shift_vec1_yfw(wt2, &buf[
VLENX * (2 * ic +
NVC)], 2);
222 set_sp4_ym(&v2[ic2], wt1, wt2);
228 template<
typename REALTYPE>
229 inline void mult_wilson_ymb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
232 set_sp2_ym(vt1, vt2, v1);
235 load_vec(ut, u,
NDF);
238 for (
int ic = 0; ic <
NC; ++ic) {
240 int ic3 =
ND * 2 * ic;
241 mult_udagv(wt1, &ut[ic2], vt1,
NC);
242 mult_udagv(wt2, &ut[ic2], vt2,
NC);
243 set_sp4_ym(&v2[ic3], wt1, wt2);
249 template<
typename REALTYPE>
250 inline void mult_wilson_zp1(REALTYPE *buf, REALTYPE *v1)
253 set_sp2_zp(vt1, vt2, v1);
255 save_vec(&buf[0], vt1,
NVC);
261 template<
typename REALTYPE>
262 inline void mult_wilson_zp2(
Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
265 load_vec(vt1, &buf[0],
NVC);
269 load_vec(ut, u,
NDF);
272 for (
int ic = 0; ic <
NC; ++ic) {
273 int ic2 =
ND * 2 * ic;
274 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
275 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
276 set_sp4_zp(&v2[ic2], wt1, wt2);
282 template<
typename REALTYPE>
283 inline void mult_wilson_zpb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
286 set_sp2_zp(vt1, vt2, v1);
289 load_vec(ut, u,
NDF);
292 for (
int ic = 0; ic <
NC; ++ic) {
293 int ic2 =
ND * 2 * ic;
294 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
295 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
296 set_sp4_zp(&v2[ic2], wt1, wt2);
302 template<
typename REALTYPE>
303 inline void mult_wilson_zm1(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
306 set_sp2_zm(vt1, vt2, v1);
309 load_vec(ut, u,
NDF);
312 for (
int ic = 0; ic <
NC; ++ic) {
314 mult_udagv(&wt1[2 * ic], &ut[ic2], vt1,
NC);
315 mult_udagv(&wt2[2 * ic], &ut[ic2], vt2,
NC);
318 save_vec(&buf[0], wt1,
NVC);
324 template<
typename REALTYPE>
325 inline void mult_wilson_zm2(
Vsimd_t *v2, REALTYPE *buf)
328 for (
int ic = 0; ic <
NC; ++ic) {
329 int ic2 =
ND * 2 * ic;
330 load_vec(wt1, &buf[
VLEN * 2 * ic], 2);
331 load_vec(wt2, &buf[
VLEN * 2 * (ic +
NC)], 2);
332 set_sp4_zm(&v2[ic2], wt1, wt2);
338 template<
typename REALTYPE>
339 inline void mult_wilson_zmb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
342 set_sp2_zm(vt1, vt2, v1);
345 load_vec(ut, u,
NDF);
348 for (
int ic = 0; ic <
NC; ++ic) {
350 int ic3 =
ND * 2 * ic;
351 mult_udagv(wt1, &ut[ic2], vt1,
NC);
352 mult_udagv(wt2, &ut[ic2], vt2,
NC);
353 set_sp4_zm(&v2[ic3], wt1, wt2);
359 template<
typename REALTYPE>
360 inline void mult_wilson_tp1_dirac(REALTYPE *buf, REALTYPE *v1)
363 set_sp2_tp_dirac(vt1, vt2, v1);
365 save_vec(&buf[0], vt1,
NVC);
371 template<
typename REALTYPE>
372 inline void mult_wilson_tp2_dirac(
Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
375 load_vec(vt1, &buf[0],
NVC);
379 load_vec(ut, u,
NDF);
382 for (
int ic = 0; ic <
NC; ++ic) {
383 int ic2 =
ND * 2 * ic;
384 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
385 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
386 set_sp4_tp_dirac(&v2[ic2], wt1, wt2);
392 template<
typename REALTYPE>
393 inline void mult_wilson_tpb_dirac(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
396 set_sp2_tp_dirac(vt1, vt2, v1);
399 load_vec(ut, u,
NDF);
402 for (
int ic = 0; ic <
NC; ++ic) {
403 int ic2 =
ND * 2 * ic;
404 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
405 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
406 set_sp4_tp_dirac(&v2[ic2], wt1, wt2);
412 template<
typename REALTYPE>
413 inline void mult_wilson_tm1_dirac(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
416 set_sp2_tm_dirac(vt1, vt2, v1);
419 load_vec(ut, u,
NDF);
422 for (
int ic = 0; ic <
NC; ++ic) {
424 mult_udagv(&wt1[2 * ic], &ut[ic2], vt1,
NC);
425 mult_udagv(&wt2[2 * ic], &ut[ic2], vt2,
NC);
428 save_vec(&buf[0], wt1,
NVC);
434 template<
typename REALTYPE>
435 inline void mult_wilson_tm2_dirac(
Vsimd_t *v2, REALTYPE *buf)
438 for (
int ic = 0; ic <
NC; ++ic) {
439 int ic2 =
ND * 2 * ic;
440 load_vec(wt1, &buf[
VLEN * 2 * ic], 2);
441 load_vec(wt2, &buf[
VLEN * 2 * (ic +
NC)], 2);
442 set_sp4_tm_dirac(&v2[ic2], wt1, wt2);
448 template<
typename REALTYPE>
449 inline void mult_wilson_tmb_dirac(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
452 set_sp2_tm_dirac(vt1, vt2, v1);
455 load_vec(ut, u,
NDF);
458 for (
int ic = 0; ic <
NC; ++ic) {
460 int ic3 =
ND * 2 * ic;
461 mult_udagv(wt1, &ut[ic2], vt1,
NC);
462 mult_udagv(wt2, &ut[ic2], vt2,
NC);
463 set_sp4_tm_dirac(&v2[ic3], wt1, wt2);