10 #ifndef MULT_WILSON_QXS_PARTS_H
11 #define MULT_WILSON_QXS_PARTS_H
15 inline void check_setup()
28 template<
typename REALTYPE>
29 inline void mult_wilson_xp1(REALTYPE *buf, REALTYPE *v1)
33 load_vec1(vt, v1, 0,
NVCD);
39 template<
typename REALTYPE>
40 inline void mult_wilson_xp2(
Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
43 shift_vec1_bw(vt1, &buf[0],
NVC);
44 shift_vec1_bw(vt2, &buf[
NVC],
NVC);
50 for (
int ic = 0; ic <
NC; ++ic) {
51 int ic2 =
ND * 2 * ic;
52 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
53 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
54 set_sp4_xp(&v2[ic2], wt1, wt2);
60 template<
typename REALTYPE>
61 inline void mult_wilson_xpb(
Vsimd_t *v2,
62 REALTYPE *u, REALTYPE *v1)
65 set_sp2_xp(vt1, vt2, v1);
71 for (
int ic = 0; ic <
NC; ++ic) {
72 int ic2 =
ND * 2 * ic;
73 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
74 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
75 set_sp4_xp(&v2[ic2], wt1, wt2);
81 template<
typename REALTYPE>
82 inline void mult_wilson_xm1(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
85 set_sp2_xm(vt1, vt2, v1);
91 for (
int ic = 0; ic <
NC; ++ic) {
93 mult_udagv(&wt1[2 * ic], &ut[ic2], vt1,
NC);
94 mult_udagv(&wt2[2 * ic], &ut[ic2], vt2,
NC);
97 for (
int ic = 0; ic <
NC; ++ic) {
98 save_vec1(&buf[0], wt1,
VLEN - 1,
NVC);
105 template<
typename REALTYPE>
106 inline void mult_wilson_xm2(
Vsimd_t *v2, REALTYPE *buf)
109 for (
int ic = 0; ic <
NC; ++ic) {
110 int ic2 =
ND * 2 * ic;
111 shift_vec1_fw(wt1, &buf[2 * ic], 2);
112 shift_vec1_fw(wt2, &buf[2 * ic +
NVC], 2);
113 set_sp4_xm(&v2[ic2], wt1, wt2);
119 template<
typename REALTYPE>
120 inline void mult_wilson_xmb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
123 set_sp2_xm(vt1, vt2, v1);
126 load_vec(ut, u,
NDF);
129 for (
int ic = 0; ic <
NC; ++ic) {
131 int ic3 =
ND * 2 * ic;
132 mult_udagv(wt1, &ut[ic2], vt1,
NC);
133 mult_udagv(wt2, &ut[ic2], vt2,
NC);
134 set_sp4_xm(&v2[ic3], wt1, wt2);
140 template<
typename REALTYPE>
141 inline void mult_wilson_yp1(REALTYPE *buf, REALTYPE *v1)
144 set_sp2_yp(vt1, vt2, v1);
146 save_vec(&buf[0], vt1,
NVC);
152 template<
typename REALTYPE>
153 inline void mult_wilson_yp2(
Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
156 load_vec(vt1, &buf[0],
NVC);
160 load_vec(ut, u,
NDF);
163 for (
int ic = 0; ic <
NC; ++ic) {
164 int ic2 =
ND * 2 * ic;
165 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
166 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
167 set_sp4_yp(&v2[ic2], wt1, wt2);
173 template<
typename REALTYPE>
174 inline void mult_wilson_ypb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
177 set_sp2_yp(vt1, vt2, v1);
180 load_vec(ut, u,
NDF);
183 for (
int ic = 0; ic <
NC; ++ic) {
184 int ic2 =
ND * 2 * ic;
185 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
186 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
187 set_sp4_yp(&v2[ic2], wt1, wt2);
193 template<
typename REALTYPE>
194 inline void mult_wilson_ym1(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
197 set_sp2_ym(vt1, vt2, v1);
200 load_vec(ut, u,
NDF);
203 for (
int ic = 0; ic <
NC; ++ic) {
205 mult_udagv(&wt1[2 * ic], &ut[ic2], vt1,
NC);
206 mult_udagv(&wt2[2 * ic], &ut[ic2], vt2,
NC);
209 save_vec(&buf[0], wt1,
NVC);
215 template<
typename REALTYPE>
216 inline void mult_wilson_ym2(
Vsimd_t *v2, REALTYPE *buf)
219 for (
int ic = 0; ic <
NC; ++ic) {
220 int ic2 =
ND * 2 * ic;
221 load_vec(wt1, &buf[
VLEN * 2 * ic], 2);
222 load_vec(wt2, &buf[
VLEN * 2 * (ic +
NC)], 2);
223 set_sp4_ym(&v2[ic2], wt1, wt2);
229 template<
typename REALTYPE>
230 inline void mult_wilson_ymb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
233 set_sp2_ym(vt1, vt2, v1);
236 load_vec(ut, u,
NDF);
239 for (
int ic = 0; ic <
NC; ++ic) {
241 int ic3 =
ND * 2 * ic;
242 mult_udagv(wt1, &ut[ic2], vt1,
NC);
243 mult_udagv(wt2, &ut[ic2], vt2,
NC);
244 set_sp4_ym(&v2[ic3], wt1, wt2);
250 template<
typename REALTYPE>
251 inline void mult_wilson_zp1(REALTYPE *buf, REALTYPE *v1)
254 set_sp2_zp(vt1, vt2, v1);
256 save_vec(&buf[0], vt1,
NVC);
262 template<
typename REALTYPE>
263 inline void mult_wilson_zp2(
Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
266 load_vec(vt1, &buf[0],
NVC);
270 load_vec(ut, u,
NDF);
273 for (
int ic = 0; ic <
NC; ++ic) {
274 int ic2 =
ND * 2 * ic;
275 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
276 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
277 set_sp4_zp(&v2[ic2], wt1, wt2);
283 template<
typename REALTYPE>
284 inline void mult_wilson_zpb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
287 set_sp2_zp(vt1, vt2, v1);
290 load_vec(ut, u,
NDF);
293 for (
int ic = 0; ic <
NC; ++ic) {
294 int ic2 =
ND * 2 * ic;
295 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
296 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
297 set_sp4_zp(&v2[ic2], wt1, wt2);
303 template<
typename REALTYPE>
304 inline void mult_wilson_zm1(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
307 set_sp2_zm(vt1, vt2, v1);
310 load_vec(ut, u,
NDF);
313 for (
int ic = 0; ic <
NC; ++ic) {
315 mult_udagv(&wt1[2 * ic], &ut[ic2], vt1,
NC);
316 mult_udagv(&wt2[2 * ic], &ut[ic2], vt2,
NC);
319 save_vec(&buf[0], wt1,
NVC);
325 template<
typename REALTYPE>
326 inline void mult_wilson_zm2(
Vsimd_t *v2, REALTYPE *buf)
329 for (
int ic = 0; ic <
NC; ++ic) {
330 int ic2 =
ND * 2 * ic;
331 load_vec(wt1, &buf[
VLEN * 2 * ic], 2);
332 load_vec(wt2, &buf[
VLEN * 2 * (ic +
NC)], 2);
333 set_sp4_zm(&v2[ic2], wt1, wt2);
339 template<
typename REALTYPE>
340 inline void mult_wilson_zmb(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
343 set_sp2_zm(vt1, vt2, v1);
346 load_vec(ut, u,
NDF);
349 for (
int ic = 0; ic <
NC; ++ic) {
351 int ic3 =
ND * 2 * ic;
352 mult_udagv(wt1, &ut[ic2], vt1,
NC);
353 mult_udagv(wt2, &ut[ic2], vt2,
NC);
354 set_sp4_zm(&v2[ic3], wt1, wt2);
360 template<
typename REALTYPE>
361 inline void mult_wilson_tp1_dirac(REALTYPE *buf, REALTYPE *v1)
364 set_sp2_tp_dirac(vt1, vt2, v1);
366 save_vec(&buf[0], vt1,
NVC);
372 template<
typename REALTYPE>
373 inline void mult_wilson_tp2_dirac(
Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
376 load_vec(vt1, &buf[0],
NVC);
380 load_vec(ut, u,
NDF);
383 for (
int ic = 0; ic <
NC; ++ic) {
384 int ic2 =
ND * 2 * ic;
385 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
386 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
387 set_sp4_tp_dirac(&v2[ic2], wt1, wt2);
393 template<
typename REALTYPE>
394 inline void mult_wilson_tpb_dirac(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
397 set_sp2_tp_dirac(vt1, vt2, v1);
400 load_vec(ut, u,
NDF);
403 for (
int ic = 0; ic <
NC; ++ic) {
404 int ic2 =
ND * 2 * ic;
405 mult_uv(wt1, &ut[2 * ic], vt1,
NC);
406 mult_uv(wt2, &ut[2 * ic], vt2,
NC);
407 set_sp4_tp_dirac(&v2[ic2], wt1, wt2);
413 template<
typename REALTYPE>
414 inline void mult_wilson_tm1_dirac(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
417 set_sp2_tm_dirac(vt1, vt2, v1);
420 load_vec(ut, u,
NDF);
423 for (
int ic = 0; ic <
NC; ++ic) {
425 mult_udagv(&wt1[2 * ic], &ut[ic2], vt1,
NC);
426 mult_udagv(&wt2[2 * ic], &ut[ic2], vt2,
NC);
429 save_vec(&buf[0], wt1,
NVC);
435 template<
typename REALTYPE>
436 inline void mult_wilson_tm2_dirac(
Vsimd_t *v2, REALTYPE *buf)
439 for (
int ic = 0; ic <
NC; ++ic) {
440 int ic2 =
ND * 2 * ic;
441 load_vec(wt1, &buf[
VLEN * 2 * ic], 2);
442 load_vec(wt2, &buf[
VLEN * 2 * (ic +
NC)], 2);
443 set_sp4_tm_dirac(&v2[ic2], wt1, wt2);
449 template<
typename REALTYPE>
450 inline void mult_wilson_tmb_dirac(
Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
453 set_sp2_tm_dirac(vt1, vt2, v1);
456 load_vec(ut, u,
NDF);
459 for (
int ic = 0; ic <
NC; ++ic) {
461 int ic3 =
ND * 2 * ic;
462 mult_udagv(wt1, &ut[ic2], vt1,
NC);
463 mult_udagv(wt2, &ut[ic2], vt2,
NC);
464 set_sp4_tm_dirac(&v2[ic3], wt1, wt2);