23 #if defined USE_GROUP_SU3
24 #include "fopr_Wilson_impl_SU3.inc"
25 #elif defined USE_GROUP_SU2
26 #include "fopr_Wilson_impl_SU2.inc"
27 #elif defined USE_GROUP_SU_N
28 #include "fopr_Wilson_impl_SU_N.inc"
49 vout.
crucial(
m_vl,
"%s: Nz = %d and Nt = %d do not match Nthread = %d\n",
59 vout.
crucial(
m_vl,
"%s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
65 vout.
crucial(
m_vl,
"%s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
78 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
79 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
80 int itask = ith_z + m_Ntask_z * ith_t;
88 if (ith_t == 0)
m_arg[itask].kt0 = 1;
89 if (ith_z == 0)
m_arg[itask].kz0 = 1;
90 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
91 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
95 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy2;
96 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy2;
104 double *w,
double fac)
106 int Nvcd = m_Nvc *
m_Nd;
107 int Nvxy = Nvcd * m_Nx2 * m_Ny;
109 int isite = m_arg[itask].isite;
110 double *wp = &w[Nvcd * isite];
112 for (
int it = 0; it < m_Mt; ++it) {
113 for (
int iz = 0; iz < m_Mz; ++iz) {
114 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
115 int iv = ivxy + Nvxy * (iz + m_Nz * it);
116 wp[iv] = fac * wp[iv];
127 int Nvcd = m_Nvc *
m_Nd;
128 int Nvxy = Nvcd * m_Nx2 * m_Ny;
130 int isite = m_arg[itask].isite;
131 double *wp = &v[Nvcd * isite];
133 for (
int it = 0; it < m_Mt; ++it) {
134 for (
int iz = 0; iz < m_Mz; ++iz) {
135 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
136 int iv = ivxy + Nvxy * (iz + m_Nz * it);
146 int itask,
double *vcp1,
const double *
v1,
int ieo)
148 int Nvc2 = 2 * m_Nvc;
149 int Nvcd = m_Nvc *
m_Nd;
150 int Nvcd2 = Nvcd / 2;
157 int isite = m_arg[itask].isite;
158 int isite_cp = m_arg[itask].isite_cpx;
159 int iyzt0 = isite / m_Nx2;
161 const double *w1 = &v1[Nvcd * isite];
162 double *w2 = &vcp1[Nvcd2 * isite_cp];
165 double bc2 = m_boundary2[idir];
170 for (
int it = 0; it < m_Mt; ++it) {
171 for (
int iz = 0; iz < m_Mz; ++iz) {
172 for (
int iy = 0; iy < m_Ny; ++iy) {
173 int iyzt = iy + m_Ny * (iz + m_Nz * it);
174 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
176 int is = ix + m_Nx2 * iyzt;
179 int ix1 = Nvc2 * ibf;
180 int ix2 = ix1 + m_Nvc;
182 for (
int ic = 0; ic <
m_Nc; ++ic) {
183 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
184 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
185 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
186 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
198 int itask,
double *
v2,
const double *vcp2,
int ieo)
200 int Nvc2 = 2 * m_Nvc;
201 int Nvcd = m_Nvc *
m_Nd;
202 int Nvcd2 = Nvcd / 2;
211 double wt1r, wt1i, wt2r, wt2i;
213 int isite = m_arg[itask].isite;
214 int isite_cp = m_arg[itask].isite_cpx;
215 int iyzt0 = isite / m_Nx2;
217 const double *w1 = &vcp2[Nvcd2 * isite_cp];
218 double *w2 = &v2[Nvcd * isite];
219 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
224 for (
int it = 0; it < m_Mt; ++it) {
225 for (
int iz = 0; iz < m_Mz; ++iz) {
226 for (
int iy = 0; iy < m_Ny; ++iy) {
227 int iyzt = iy + m_Ny * (iz + m_Nz * it);
228 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
231 int is = ix + m_Nx2 * iyzt;
234 int ix1 = Nvc2 * ibf;
235 int ix2 = ix1 + m_Nvc;
237 for (
int ic = 0; ic <
m_Nc; ++ic) {
238 int ic2 = ic * m_Nvc;
239 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
240 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
241 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
242 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
243 w2[2 * ic + id1 + iv] += wt1r;
244 w2[2 * ic + 1 + id1 + iv] += wt1i;
245 w2[2 * ic + id2 + iv] += wt2r;
246 w2[2 * ic + 1 + id2 + iv] += wt2i;
247 w2[2 * ic + id3 + iv] += wt2i;
248 w2[2 * ic + 1 + id3 + iv] += -wt2r;
249 w2[2 * ic + id4 + iv] += wt1i;
250 w2[2 * ic + 1 + id4 + iv] += -wt1r;
262 int itask,
double *
v2,
const double *
v1,
int ieo)
264 int Nvcd = m_Nvc *
m_Nd;
273 double vt1[m_Nvc], vt2[m_Nvc];
274 double wt1r, wt1i, wt2r, wt2i;
276 int isite = m_arg[itask].isite;
277 int iyzt0 = isite / m_Nx2;
279 const double *w1 = &v1[Nvcd * isite];
280 double *w2 = &v2[Nvcd * isite];
281 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
283 for (
int it = 0; it < m_Mt; ++it) {
284 for (
int iz = 0; iz < m_Mz; ++iz) {
285 for (
int iy = 0; iy < m_Ny; ++iy) {
286 int iyzt = iy + m_Ny * (iz + m_Nz * it);
287 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
288 for (
int ix = 0; ix < m_Nx2 - Leo; ++ix) {
289 int is = ix + m_Nx2 * iyzt;
291 int in = Nvcd * (is + Leo);
294 for (
int ic = 0; ic <
m_Nc; ++ic) {
295 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
296 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
297 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
298 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
301 for (
int ic = 0; ic <
m_Nc; ++ic) {
302 int ic2 = ic * m_Nvc;
304 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
305 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
306 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
307 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
309 w2[2 * ic + id1 + iv] += wt1r;
310 w2[2 * ic + 1 + id1 + iv] += wt1i;
311 w2[2 * ic + id2 + iv] += wt2r;
312 w2[2 * ic + 1 + id2 + iv] += wt2i;
313 w2[2 * ic + id3 + iv] += wt2i;
314 w2[2 * ic + 1 + id3 + iv] += -wt2r;
315 w2[2 * ic + id4 + iv] += wt1i;
316 w2[2 * ic + 1 + id4 + iv] += -wt1r;
327 int itask,
double *vcp1,
const double *
v1,
int ieo)
329 int Nvc2 = 2 * m_Nvc;
330 int Nvcd = m_Nvc *
m_Nd;
331 int Nvcd2 = Nvcd / 2;
340 int isite = m_arg[itask].isite;
341 int isite_cp = m_arg[itask].isite_cpx;
342 int iyzt0 = isite / m_Nx2;
344 const double *w1 = &v1[Nvcd * isite];
345 double *w2 = &vcp1[Nvcd2 * isite_cp];
346 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
348 double vt1[m_Nvc], vt2[m_Nvc];
353 for (
int it = 0; it < m_Mt; ++it) {
354 for (
int iz = 0; iz < m_Mz; ++iz) {
355 for (
int iy = 0; iy < m_Ny; ++iy) {
356 int iyzt = iy + m_Ny * (iz + m_Nz * it);
357 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
359 int is = ix + m_Nx2 * iyzt;
363 int ix1 = Nvc2 * ibf;
364 int ix2 = ix1 + m_Nvc;
366 for (
int ic = 0; ic <
m_Nc; ++ic) {
367 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
368 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
369 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
370 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
373 for (
int ic = 0; ic <
m_Nc; ++ic) {
375 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
376 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
377 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
378 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
390 int itask,
double *
v2,
const double *vcp2,
int ieo)
392 int Nvc2 = 2 * m_Nvc;
393 int Nvcd = m_Nvc *
m_Nd;
394 int Nvcd2 = Nvcd / 2;
402 double bc2 = m_boundary2[idir];
406 int isite = m_arg[itask].isite;
407 int isite_cp = m_arg[itask].isite_cpx;
408 int iyzt0 = isite / m_Nx2;
410 const double *w1 = &vcp2[Nvcd2 * isite_cp];
411 double *w2 = &v2[Nvcd * isite];
416 for (
int it = 0; it < m_Mt; ++it) {
417 for (
int iz = 0; iz < m_Mz; ++iz) {
418 for (
int iy = 0; iy < m_Ny; ++iy) {
419 int iyzt = iy + m_Ny * (iz + m_Nz * it);
420 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
422 int is = ix + m_Nx2 * iyzt;
425 int ix1 = Nvc2 * ibf;
426 int ix2 = ix1 + m_Nvc;
428 for (
int ic = 0; ic <
m_Nc; ++ic) {
430 int ici = 2 * ic + 1;
431 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
432 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
433 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
434 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
435 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
436 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
437 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
438 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
450 int itask,
double *
v2,
const double *
v1,
int ieo)
452 int Nvcd = m_Nvc *
m_Nd;
461 double vt1[m_Nvc], vt2[m_Nvc];
462 double wt1r, wt1i, wt2r, wt2i;
464 int isite = m_arg[itask].isite;
465 int iyzt0 = isite / m_Nx2;
467 const double *w1 = &v1[Nvcd * isite];
468 double *w2 = &v2[Nvcd * isite];
469 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
471 for (
int it = 0; it < m_Mt; ++it) {
472 for (
int iz = 0; iz < m_Mz; ++iz) {
473 for (
int iy = 0; iy < m_Ny; ++iy) {
474 int iyzt = iy + m_Ny * (iz + m_Nz * it);
475 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
477 for (
int ix = Meo; ix < m_Nx2; ++ix) {
478 int is = ix + m_Nx2 * iyzt;
480 int in = Nvcd * (is -
Meo);
481 int ig = m_Ndf * (is -
Meo);
483 for (
int ic = 0; ic <
m_Nc; ++ic) {
484 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
485 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
486 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
487 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
490 for (
int ic = 0; ic <
m_Nc; ++ic) {
493 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
494 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
495 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
496 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
498 w2[2 * ic + id1 + iv] += wt1r;
499 w2[2 * ic + 1 + id1 + iv] += wt1i;
500 w2[2 * ic + id2 + iv] += wt2r;
501 w2[2 * ic + 1 + id2 + iv] += wt2i;
502 w2[2 * ic + id3 + iv] += -wt2i;
503 w2[2 * ic + 1 + id3 + iv] += +wt2r;
504 w2[2 * ic + id4 + iv] += -wt1i;
505 w2[2 * ic + 1 + id4 + iv] += +wt1r;
516 int itask,
double *vcp1,
const double *
v1,
int ieo)
518 int Nvc2 = 2 * m_Nvc;
519 int Nvcd = m_Nvc *
m_Nd;
520 int Nvcd2 = Nvcd / 2;
527 int isite = m_arg[itask].isite;
528 int isite_cp = m_arg[itask].isite_cpy;
530 const double *w1 = &v1[Nvcd * isite];
531 double *w2 = &vcp1[Nvcd2 * isite_cp];
534 double bc2 = m_boundary2[idir];
538 for (
int it = 0; it < m_Mt; ++it) {
539 for (
int iz = 0; iz < m_Mz; ++iz) {
540 for (
int ix = 0; ix < m_Nx2; ++ix) {
541 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
542 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
544 int ix1 = Nvc2 * is2;
545 int ix2 = ix1 + m_Nvc;
547 for (
int ic = 0; ic <
m_Nc; ++ic) {
548 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
549 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
550 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
551 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
561 int itask,
double *
v2,
const double *vcp2,
int ieo)
563 int Nvc2 = 2 * m_Nvc;
564 int Nvcd = m_Nvc *
m_Nd;
565 int Nvcd2 = Nvcd / 2;
574 double wt1r, wt1i, wt2r, wt2i;
576 int isite = m_arg[itask].isite;
577 int isite_cp = m_arg[itask].isite_cpy;
579 const double *w1 = &vcp2[Nvcd2 * isite_cp];
580 double *w2 = &v2[Nvcd * isite];
581 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
585 for (
int it = 0; it < m_Mt; ++it) {
586 for (
int iz = 0; iz < m_Mz; ++iz) {
587 for (
int ix = 0; ix < m_Nx2; ++ix) {
588 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
589 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
592 int ix1 = Nvc2 * is2;
593 int ix2 = ix1 + m_Nvc;
595 for (
int ic = 0; ic <
m_Nc; ++ic) {
596 int ic2 = ic * m_Nvc;
598 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
599 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
600 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
601 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
603 w2[2 * ic + id1 + iv] += wt1r;
604 w2[2 * ic + 1 + id1 + iv] += wt1i;
605 w2[2 * ic + id2 + iv] += wt2r;
606 w2[2 * ic + 1 + id2 + iv] += wt2i;
607 w2[2 * ic + id3 + iv] += -wt2r;
608 w2[2 * ic + 1 + id3 + iv] += -wt2i;
609 w2[2 * ic + id4 + iv] += wt1r;
610 w2[2 * ic + 1 + id4 + iv] += wt1i;
620 int itask,
double *
v2,
const double *
v1,
int ieo)
622 int Nvcd = m_Nvc *
m_Nd;
631 double vt1[m_Nvc], vt2[m_Nvc];
632 double wt1r, wt1i, wt2r, wt2i;
634 int isite = m_arg[itask].isite;
636 const double *w1 = &v1[Nvcd * isite];
637 double *w2 = &v2[Nvcd * isite];
638 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
640 for (
int it = 0; it < m_Mt; ++it) {
641 for (
int iz = 0; iz < m_Mz; ++iz) {
642 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
643 for (
int ix = 0; ix < m_Nx2; ++ix) {
644 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
646 int in = Nvcd * (is + m_Nx2);
649 for (
int ic = 0; ic <
m_Nc; ++ic) {
650 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
651 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
652 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
653 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
656 for (
int ic = 0; ic <
m_Nc; ++ic) {
657 int ic2 = ic * m_Nvc;
659 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
660 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
661 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
662 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
664 w2[2 * ic + id1 + iv] += wt1r;
665 w2[2 * ic + 1 + id1 + iv] += wt1i;
666 w2[2 * ic + id2 + iv] += wt2r;
667 w2[2 * ic + 1 + id2 + iv] += wt2i;
668 w2[2 * ic + id3 + iv] += -wt2r;
669 w2[2 * ic + 1 + id3 + iv] += -wt2i;
670 w2[2 * ic + id4 + iv] += wt1r;
671 w2[2 * ic + 1 + id4 + iv] += wt1i;
682 int itask,
double *vcp1,
const double *
v1,
int ieo)
684 int Nvc2 = 2 * m_Nvc;
685 int Nvcd = m_Nvc *
m_Nd;
686 int Nvcd2 = Nvcd / 2;
695 int isite = m_arg[itask].isite;
696 int isite_cp = m_arg[itask].isite_cpy;
698 const double *w1 = &v1[Nvcd * isite];
699 double *w2 = &vcp1[Nvcd2 * isite_cp];
700 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
702 double vt1[m_Nvc], vt2[m_Nvc];
706 for (
int it = 0; it < m_Mt; ++it) {
707 for (
int iz = 0; iz < m_Mz; ++iz) {
708 for (
int ix = 0; ix < m_Nx2; ++ix) {
709 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
710 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
713 int ix1 = Nvc2 * is2;
714 int ix2 = ix1 + m_Nvc;
716 for (
int ic = 0; ic <
m_Nc; ++ic) {
717 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
718 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
719 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
720 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
723 for (
int ic = 0; ic <
m_Nc; ++ic) {
725 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
726 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
727 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
728 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
738 int itask,
double *
v2,
const double *vcp2,
int ieo)
740 int Nvc2 = 2 * m_Nvc;
741 int Nvcd = m_Nvc *
m_Nd;
742 int Nvcd2 = Nvcd / 2;
750 double bc2 = m_boundary2[idir];
754 int isite = m_arg[itask].isite;
755 int isite_cp = m_arg[itask].isite_cpy;
757 const double *w1 = &vcp2[Nvcd2 * isite_cp];
758 double *w2 = &v2[Nvcd * isite];
762 for (
int it = 0; it < m_Mt; ++it) {
763 for (
int iz = 0; iz < m_Mz; ++iz) {
764 for (
int ix = 0; ix < m_Nx2; ++ix) {
765 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
766 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
768 int ix1 = Nvc2 * is2;
769 int ix2 = ix1 + m_Nvc;
771 for (
int ic = 0; ic <
m_Nc; ++ic) {
773 int ici = 2 * ic + 1;
774 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
775 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
776 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
777 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
778 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
779 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
780 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
781 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
791 int itask,
double *
v2,
const double *
v1,
int ieo)
793 int Nvcd = m_Nvc *
m_Nd;
802 double vt1[m_Nvc], vt2[m_Nvc];
803 double wt1r, wt1i, wt2r, wt2i;
805 int isite = m_arg[itask].isite;
807 const double *w1 = &v1[Nvcd * isite];
808 double *w2 = &v2[Nvcd * isite];
809 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
811 for (
int it = 0; it < m_Mt; ++it) {
812 for (
int iz = 0; iz < m_Mz; ++iz) {
813 for (
int iy = 1; iy < m_Ny; ++iy) {
814 for (
int ix = 0; ix < m_Nx2; ++ix) {
815 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
817 int in = Nvcd * (is - m_Nx2);
818 int ig = m_Ndf * (is - m_Nx2);
820 for (
int ic = 0; ic <
m_Nc; ++ic) {
821 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
822 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
823 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
824 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
827 for (
int ic = 0; ic <
m_Nc; ++ic) {
829 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
830 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
831 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
832 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
834 w2[ic2 + id1 + iv] += wt1r;
835 w2[ic2 + 1 + id1 + iv] += wt1i;
836 w2[ic2 + id2 + iv] += wt2r;
837 w2[ic2 + 1 + id2 + iv] += wt2i;
838 w2[ic2 + id3 + iv] += wt2r;
839 w2[ic2 + 1 + id3 + iv] += wt2i;
840 w2[ic2 + id4 + iv] += -wt1r;
841 w2[ic2 + 1 + id4 + iv] += -wt1i;
852 int itask,
double *vcp1,
const double *
v1,
int ieo)
854 int Nvc2 = 2 * m_Nvc;
855 int Nvcd = m_Nvc *
m_Nd;
856 int Nvcd2 = Nvcd / 2;
863 int isite = m_arg[itask].isite;
864 int isite_cp = m_arg[itask].isite_cpz;
866 const double *w1 = &v1[Nvcd * isite];
867 double *w2 = &vcp1[Nvcd2 * isite_cp];
870 double bc2 = m_boundary2[idir];
872 if (m_arg[itask].kz0 == 1) {
873 int Nxy = m_Nx2 * m_Ny;
875 for (
int it = 0; it < m_Mt; ++it) {
876 for (
int ixy = 0; ixy < Nxy; ++ixy) {
877 int is = ixy + Nxy * (iz + m_Nz * it);
878 int is2 = ixy + Nxy * it;
881 int ix1 = Nvc2 * is2;
882 int ix2 = ix1 + m_Nvc;
884 for (
int ic = 0; ic <
m_Nc; ++ic) {
885 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
886 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
887 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
888 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
898 int itask,
double *
v2,
const double *vcp2,
int ieo)
900 int Nvc2 = 2 * m_Nvc;
901 int Nvcd = m_Nvc *
m_Nd;
902 int Nvcd2 = Nvcd / 2;
911 double wt1r, wt1i, wt2r, wt2i;
913 int isite = m_arg[itask].isite;
914 int isite_cp = m_arg[itask].isite_cpz;
916 const double *w1 = &vcp2[Nvcd2 * isite_cp];
917 double *w2 = &v2[Nvcd * isite];
918 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
920 if (m_arg[itask].kz1 == 1) {
921 int Nxy = m_Nx2 * m_Ny;
923 for (
int it = 0; it < m_Mt; ++it) {
924 for (
int ixy = 0; ixy < Nxy; ++ixy) {
925 int is = ixy + Nxy * (iz + m_Nz * it);
926 int is2 = ixy + Nxy * it;
929 int ix1 = Nvc2 * is2;
930 int ix2 = ix1 + m_Nvc;
932 for (
int ic = 0; ic <
m_Nc; ++ic) {
933 int ic2 = ic * m_Nvc;
935 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
936 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
937 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
938 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
940 w2[2 * ic + id1 + iv] += wt1r;
941 w2[2 * ic + 1 + id1 + iv] += wt1i;
942 w2[2 * ic + id2 + iv] += wt2r;
943 w2[2 * ic + 1 + id2 + iv] += wt2i;
944 w2[2 * ic + id3 + iv] += wt1i;
945 w2[2 * ic + 1 + id3 + iv] += -wt1r;
946 w2[2 * ic + id4 + iv] += -wt2i;
947 w2[2 * ic + 1 + id4 + iv] += wt2r;
957 int itask,
double *
v2,
const double *
v1,
int ieo)
959 int Nvcd = m_Nvc *
m_Nd;
968 double vt1[m_Nvc], vt2[m_Nvc];
969 double wt1r, wt1i, wt2r, wt2i;
971 int isite = m_arg[itask].isite;
973 const double *w1 = &v1[Nvcd * isite];
974 double *w2 = &v2[Nvcd * isite];
975 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
977 int kz1 = m_arg[itask].kz1;
978 int Nxy = m_Nx2 * m_Ny;
980 for (
int it = 0; it < m_Mt; ++it) {
981 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
982 for (
int ixy = 0; ixy < Nxy; ++ixy) {
983 int is = ixy + Nxy * (iz + m_Nz * it);
985 int in = Nvcd * (is + Nxy);
988 for (
int ic = 0; ic <
m_Nc; ++ic) {
989 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
990 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
991 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
992 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
995 for (
int ic = 0; ic <
m_Nc; ++ic) {
996 int ic2 = ic * m_Nvc;
998 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
999 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1000 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1001 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1003 w2[2 * ic + id1 + iv] += wt1r;
1004 w2[2 * ic + 1 + id1 + iv] += wt1i;
1005 w2[2 * ic + id2 + iv] += wt2r;
1006 w2[2 * ic + 1 + id2 + iv] += wt2i;
1007 w2[2 * ic + id3 + iv] += wt1i;
1008 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1009 w2[2 * ic + id4 + iv] += -wt2i;
1010 w2[2 * ic + 1 + id4 + iv] += wt2r;
1020 int itask,
double *vcp1,
const double *
v1,
int ieo)
1022 int Nvc2 = 2 * m_Nvc;
1023 int Nvcd = m_Nvc *
m_Nd;
1024 int Nvcd2 = Nvcd / 2;
1028 int id3 = m_Nvc * 2;
1029 int id4 = m_Nvc * 3;
1033 int isite = m_arg[itask].isite;
1034 int isite_cp = m_arg[itask].isite_cpz;
1036 const double *w1 = &v1[Nvcd * isite];
1037 double *w2 = &vcp1[Nvcd2 * isite_cp];
1038 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1040 double vt1[m_Nvc], vt2[m_Nvc];
1042 if (m_arg[itask].kz1 == 1) {
1043 int Nxy = m_Nx2 * m_Ny;
1045 for (
int it = 0; it < m_Mt; ++it) {
1046 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1047 int is = ixy + Nxy * (iz + m_Nz * it);
1048 int is2 = ixy + Nxy * it;
1050 int ig = m_Ndf * is;
1051 int ix1 = Nvc2 * is2;
1052 int ix2 = ix1 + m_Nvc;
1054 for (
int ic = 0; ic <
m_Nc; ++ic) {
1055 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1056 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1057 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1058 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1061 for (
int ic = 0; ic <
m_Nc; ++ic) {
1063 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1064 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1065 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1066 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1076 int itask,
double *
v2,
const double *vcp2,
int ieo)
1078 int Nvc2 = 2 * m_Nvc;
1079 int Nvcd = m_Nvc *
m_Nd;
1080 int Nvcd2 = Nvcd / 2;
1084 int id3 = m_Nvc * 2;
1085 int id4 = m_Nvc * 3;
1088 double bc2 = m_boundary2[idir];
1092 int isite = m_arg[itask].isite;
1093 int isite_cp = m_arg[itask].isite_cpz;
1095 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1096 double *w2 = &v2[Nvcd * isite];
1098 if (m_arg[itask].kz0 == 1) {
1099 int Nxy = m_Nx2 * m_Ny;
1102 for (
int it = 0; it < m_Mt; ++it) {
1103 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1104 int is = ixy + Nxy * (iz + m_Nz * it);
1105 int is2 = ixy + Nxy * it;
1107 int ix1 = Nvc2 * is2;
1108 int ix2 = ix1 + m_Nvc;
1110 for (
int ic = 0; ic <
m_Nc; ++ic) {
1112 int ici = 2 * ic + 1;
1113 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1114 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1115 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1116 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1117 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1118 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1119 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1120 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1130 int itask,
double *
v2,
const double *
v1,
int ieo)
1132 int Nvcd = m_Nvc *
m_Nd;
1136 int id3 = m_Nvc * 2;
1137 int id4 = m_Nvc * 3;
1141 double vt1[m_Nvc], vt2[m_Nvc];
1142 double wt1r, wt1i, wt2r, wt2i;
1144 int isite = m_arg[itask].isite;
1146 const double *w1 = &v1[Nvcd * isite];
1147 double *w2 = &v2[Nvcd * isite];
1148 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1150 int kz0 = m_arg[itask].kz0;
1151 int Nxy = m_Nx2 * m_Ny;
1153 for (
int it = 0; it < m_Mt; ++it) {
1154 for (
int iz = kz0; iz < m_Mz; ++iz) {
1155 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1156 int is = ixy + Nxy * (iz + m_Nz * it);
1158 int in = Nvcd * (is - Nxy);
1159 int ig = m_Ndf * (is - Nxy);
1161 for (
int ic = 0; ic <
m_Nc; ++ic) {
1162 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1163 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1164 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1165 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1168 for (
int ic = 0; ic <
m_Nc; ++ic) {
1170 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1171 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1172 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1173 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1175 w2[ic2 + id1 + iv] += wt1r;
1176 w2[ic2 + 1 + id1 + iv] += wt1i;
1177 w2[ic2 + id2 + iv] += wt2r;
1178 w2[ic2 + 1 + id2 + iv] += wt2i;
1179 w2[ic2 + id3 + iv] += -wt1i;
1180 w2[ic2 + 1 + id3 + iv] += wt1r;
1181 w2[ic2 + id4 + iv] += wt2i;
1182 w2[ic2 + 1 + id4 + iv] += -wt2r;
1192 int itask,
double *vcp1,
const double *
v1,
int ieo)
1194 int Nvc2 = 2 * m_Nvc;
1195 int Nvcd = m_Nvc *
m_Nd;
1196 int Nvcd2 = Nvcd / 2;
1200 int id3 = m_Nvc * 2;
1201 int id4 = m_Nvc * 3;
1203 int isite = m_arg[itask].isite;
1204 int isite_cp = m_arg[itask].isite_cpt;
1206 const double *w1 = &v1[Nvcd * isite];
1207 double *w2 = &vcp1[Nvcd2 * isite_cp];
1210 double bc2 = m_boundary2[idir];
1212 if (m_arg[itask].kt0 == 1) {
1213 int Nxy = m_Nx2 * m_Ny;
1215 for (
int iz = 0; iz < m_Mz; ++iz) {
1216 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1217 int is = ixy + Nxy * (iz + m_Nz * it);
1218 int is2 = ixy + Nxy * iz;
1221 int ix1 = Nvc2 * is2;
1222 int ix2 = ix1 + m_Nvc;
1224 for (
int ic = 0; ic <
m_Nc; ++ic) {
1225 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1226 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1227 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1228 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1238 int itask,
double *
v2,
const double *vcp2,
int ieo)
1240 int Nvc2 = 2 * m_Nvc;
1241 int Nvcd = m_Nvc *
m_Nd;
1242 int Nvcd2 = Nvcd / 2;
1246 int id3 = m_Nvc * 2;
1247 int id4 = m_Nvc * 3;
1251 double wt1r, wt1i, wt2r, wt2i;
1253 int isite = m_arg[itask].isite;
1254 int isite_cp = m_arg[itask].isite_cpt;
1256 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1257 double *w2 = &v2[Nvcd * isite];
1258 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
1260 if (m_arg[itask].kt1 == 1) {
1261 int Nxy = m_Nx2 * m_Ny;
1263 for (
int iz = 0; iz < m_Mz; ++iz) {
1264 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1265 int is = ixy + Nxy * (iz + m_Nz * it);
1266 int is2 = ixy + Nxy * iz;
1268 int ig = m_Ndf * is;
1269 int ix1 = Nvc2 * is2;
1270 int ix2 = ix1 + m_Nvc;
1272 for (
int ic = 0; ic <
m_Nc; ++ic) {
1273 int ic2 = ic * m_Nvc;
1275 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1276 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1277 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1278 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1280 w2[2 * ic + id3 + iv] += wt1r;
1281 w2[2 * ic + 1 + id3 + iv] += wt1i;
1282 w2[2 * ic + id4 + iv] += wt2r;
1283 w2[2 * ic + 1 + id4 + iv] += wt2i;
1293 int itask,
double *
v2,
const double *
v1,
int ieo)
1295 int Nvcd = m_Nvc *
m_Nd;
1299 int id3 = m_Nvc * 2;
1300 int id4 = m_Nvc * 3;
1304 double vt1[m_Nvc], vt2[m_Nvc];
1305 double wt1r, wt1i, wt2r, wt2i;
1307 int isite = m_arg[itask].isite;
1309 const double *w1 = &v1[Nvcd * isite];
1310 double *w2 = &v2[Nvcd * isite];
1311 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
1313 int kt1 = m_arg[itask].kt1;
1314 int Nxy = m_Nx2 * m_Ny;
1315 int Nxyz = Nxy * m_Nz;
1317 for (
int it = 0; it < m_Mt - kt1; ++it) {
1318 for (
int iz = 0; iz < m_Mz; ++iz) {
1319 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1320 int is = ixy + Nxy * (iz + m_Nz * it);
1322 int in = Nvcd * (is + Nxyz);
1323 int ig = m_Ndf * is;
1325 for (
int ic = 0; ic <
m_Nc; ++ic) {
1326 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1327 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1328 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1329 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1332 for (
int ic = 0; ic <
m_Nc; ++ic) {
1333 int ic2 = ic * m_Nvc;
1335 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1336 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1337 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1338 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1340 w2[2 * ic + id3 + iv] += wt1r;
1341 w2[2 * ic + 1 + id3 + iv] += wt1i;
1342 w2[2 * ic + id4 + iv] += wt2r;
1343 w2[2 * ic + 1 + id4 + iv] += wt2i;
1353 int itask,
double *vcp1,
const double *
v1,
int ieo)
1355 int Nvc2 = 2 * m_Nvc;
1356 int Nvcd = m_Nvc *
m_Nd;
1357 int Nvcd2 = Nvcd / 2;
1366 int isite = m_arg[itask].isite;
1367 int isite_cp = m_arg[itask].isite_cpt;
1369 const double *w1 = &v1[Nvcd * isite];
1370 double *w2 = &vcp1[Nvcd2 * isite_cp];
1371 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1373 double vt1[m_Nvc], vt2[m_Nvc];
1375 if (m_arg[itask].kt1 == 1) {
1376 int Nxy = m_Nx2 * m_Ny;
1378 for (
int iz = 0; iz < m_Mz; ++iz) {
1379 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1380 int is = ixy + Nxy * (iz + m_Nz * it);
1381 int is2 = ixy + Nxy * iz;
1383 int ig = m_Ndf * is;
1384 int ix1 = Nvc2 * is2;
1385 int ix2 = ix1 + m_Nvc;
1387 for (
int ic = 0; ic <
m_Nc; ++ic) {
1388 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1389 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1390 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1391 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1394 for (
int ic = 0; ic <
m_Nc; ++ic) {
1396 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1397 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1398 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1399 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1409 int itask,
double *
v2,
const double *vcp2,
int ieo)
1411 int Nvc2 = 2 * m_Nvc;
1412 int Nvcd = m_Nvc *
m_Nd;
1413 int Nvcd2 = Nvcd / 2;
1421 double bc2 = m_boundary2[idir];
1425 int isite = m_arg[itask].isite;
1426 int isite_cp = m_arg[itask].isite_cpt;
1428 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1429 double *w2 = &v2[Nvcd * isite];
1431 if (m_arg[itask].kt0 == 1) {
1432 int Nxy = m_Nx2 * m_Ny;
1434 for (
int iz = 0; iz < m_Mz; ++iz) {
1435 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1436 int is = ixy + Nxy * (iz + m_Nz * it);
1437 int is2 = ixy + Nxy * iz;
1439 int ix1 = Nvc2 * is2;
1440 int ix2 = ix1 + m_Nvc;
1442 for (
int ic = 0; ic <
m_Nc; ++ic) {
1444 int ici = 2 * ic + 1;
1445 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1446 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1447 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1448 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1458 int itask,
double *
v2,
const double *
v1,
int ieo)
1460 int Nvcd = m_Nvc *
m_Nd;
1469 double vt1[m_Nvc], vt2[m_Nvc];
1470 double wt1r, wt1i, wt2r, wt2i;
1472 int isite = m_arg[itask].isite;
1474 const double *w1 = &v1[Nvcd * isite];
1475 double *w2 = &v2[Nvcd * isite];
1476 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1478 int kt0 = m_arg[itask].kt0;
1479 int Nxy = m_Nx2 * m_Ny;
1480 int Nxyz = Nxy * m_Nz;
1482 for (
int it = kt0; it < m_Mt; ++it) {
1483 for (
int iz = 0; iz < m_Mz; ++iz) {
1484 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1485 int is = ixy + Nxy * (iz + m_Nz * it);
1487 int in = Nvcd * (is - Nxyz);
1488 int ig = m_Ndf * (is - Nxyz);
1490 for (
int ic = 0; ic <
m_Nc; ++ic) {
1491 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1492 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1493 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1494 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1497 for (
int ic = 0; ic <
m_Nc; ++ic) {
1499 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1500 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1501 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1502 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1504 w2[ic2 + id1 + iv] += wt1r;
1505 w2[ic2 + 1 + id1 + iv] += wt1i;
1506 w2[ic2 + id2 + iv] += wt2r;
1507 w2[ic2 + 1 + id2 + iv] += wt2i;
1517 int itask,
double *vcp1,
const double *
v1,
int ieo)
1519 int Nvc2 = 2 * m_Nvc;
1520 int Nvcd = m_Nvc *
m_Nd;
1521 int Nvcd2 = Nvcd / 2;
1525 int id3 = m_Nvc * 2;
1526 int id4 = m_Nvc * 3;
1528 int isite = m_arg[itask].isite;
1529 int isite_cp = m_arg[itask].isite_cpt;
1531 const double *w1 = &v1[Nvcd * isite];
1532 double *w2 = &vcp1[Nvcd2 * isite_cp];
1535 double bc2 = m_boundary2[idir];
1537 if (m_arg[itask].kt0 == 1) {
1538 int Nxy = m_Nx2 * m_Ny;
1540 for (
int iz = 0; iz < m_Mz; ++iz) {
1541 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1542 int is = ixy + Nxy * (iz + m_Nz * it);
1543 int is2 = ixy + Nxy * iz;
1546 int ix1 = Nvc2 * is2;
1547 int ix2 = ix1 + m_Nvc;
1549 for (
int ic = 0; ic <
m_Nc; ++ic) {
1550 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1551 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1552 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1553 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1563 int itask,
double *
v2,
const double *vcp2,
int ieo)
1565 int Nvc2 = 2 * m_Nvc;
1566 int Nvcd = m_Nvc *
m_Nd;
1567 int Nvcd2 = Nvcd / 2;
1571 int id3 = m_Nvc * 2;
1572 int id4 = m_Nvc * 3;
1576 double wt1r, wt1i, wt2r, wt2i;
1578 int isite = m_arg[itask].isite;
1579 int isite_cp = m_arg[itask].isite_cpt;
1581 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1582 double *w2 = &v2[Nvcd * isite];
1583 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
1585 if (m_arg[itask].kt1 == 1) {
1586 int Nxy = m_Nx2 * m_Ny;
1588 for (
int iz = 0; iz < m_Mz; ++iz) {
1589 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1590 int is = ixy + Nxy * (iz + m_Nz * it);
1591 int is2 = ixy + Nxy * iz;
1593 int ig = m_Ndf * is;
1594 int ix1 = Nvc2 * is2;
1595 int ix2 = ix1 + m_Nvc;
1597 for (
int ic = 0; ic <
m_Nc; ++ic) {
1598 int ic2 = ic * m_Nvc;
1600 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1601 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1602 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1603 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1605 w2[2 * ic + id1 + iv] += wt1r;
1606 w2[2 * ic + 1 + id1 + iv] += wt1i;
1607 w2[2 * ic + id2 + iv] += wt2r;
1608 w2[2 * ic + 1 + id2 + iv] += wt2i;
1609 w2[2 * ic + id3 + iv] += wt1r;
1610 w2[2 * ic + 1 + id3 + iv] += wt1i;
1611 w2[2 * ic + id4 + iv] += wt2r;
1612 w2[2 * ic + 1 + id4 + iv] += wt2i;
1622 int itask,
double *
v2,
const double *
v1,
int ieo)
1624 int Nvcd = m_Nvc *
m_Nd;
1628 int id3 = m_Nvc * 2;
1629 int id4 = m_Nvc * 3;
1633 double vt1[m_Nvc], vt2[m_Nvc];
1634 double wt1r, wt1i, wt2r, wt2i;
1636 int isite = m_arg[itask].isite;
1638 const double *w1 = &v1[Nvcd * isite];
1639 double *w2 = &v2[Nvcd * isite];
1640 const double *u = m_U->ptr(m_Ndf * (isite + ieo *
m_Nvol / 2 + idir *
m_Nvol));
1642 int kt1 = m_arg[itask].kt1;
1643 int Nxy = m_Nx2 * m_Ny;
1644 int Nxyz = Nxy * m_Nz;
1646 for (
int it = 0; it < m_Mt - kt1; ++it) {
1647 for (
int iz = 0; iz < m_Mz; ++iz) {
1648 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1649 int is = ixy + Nxy * (iz + m_Nz * it);
1651 int in = Nvcd * (is + Nxyz);
1652 int ig = m_Ndf * is;
1654 for (
int ic = 0; ic <
m_Nc; ++ic) {
1655 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1656 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1657 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1658 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1661 for (
int ic = 0; ic <
m_Nc; ++ic) {
1662 int ic2 = ic * m_Nvc;
1664 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1665 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1666 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1667 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1669 w2[2 * ic + id1 + iv] += wt1r;
1670 w2[2 * ic + 1 + id1 + iv] += wt1i;
1671 w2[2 * ic + id2 + iv] += wt2r;
1672 w2[2 * ic + 1 + id2 + iv] += wt2i;
1673 w2[2 * ic + id3 + iv] += wt1r;
1674 w2[2 * ic + 1 + id3 + iv] += wt1i;
1675 w2[2 * ic + id4 + iv] += wt2r;
1676 w2[2 * ic + 1 + id4 + iv] += wt2i;
1686 int itask,
double *vcp1,
const double *
v1,
int ieo)
1688 int Nvc2 = 2 * m_Nvc;
1689 int Nvcd = m_Nvc *
m_Nd;
1690 int Nvcd2 = Nvcd / 2;
1694 int id3 = m_Nvc * 2;
1695 int id4 = m_Nvc * 3;
1699 int isite = m_arg[itask].isite;
1700 int isite_cp = m_arg[itask].isite_cpt;
1702 const double *w1 = &v1[Nvcd * isite];
1703 double *w2 = &vcp1[Nvcd2 * isite_cp];
1704 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1706 double vt1[m_Nvc], vt2[m_Nvc];
1708 if (m_arg[itask].kt1 == 1) {
1709 int Nxy = m_Nx2 * m_Ny;
1711 for (
int iz = 0; iz < m_Mz; ++iz) {
1712 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1713 int is = ixy + Nxy * (iz + m_Nz * it);
1714 int is2 = ixy + Nxy * iz;
1716 int ig = m_Ndf * is;
1717 int ix1 = Nvc2 * is2;
1718 int ix2 = ix1 + m_Nvc;
1720 for (
int ic = 0; ic <
m_Nc; ++ic) {
1721 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1722 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1723 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1724 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1727 for (
int ic = 0; ic <
m_Nc; ++ic) {
1729 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1730 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1731 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1732 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1742 int itask,
double *
v2,
const double *vcp2,
int ieo)
1744 int Nvc2 = 2 * m_Nvc;
1745 int Nvcd = m_Nvc *
m_Nd;
1746 int Nvcd2 = Nvcd / 2;
1750 int id3 = m_Nvc * 2;
1751 int id4 = m_Nvc * 3;
1754 double bc2 = m_boundary2[idir];
1758 int isite = m_arg[itask].isite;
1759 int isite_cp = m_arg[itask].isite_cpt;
1761 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1762 double *w2 = &v2[Nvcd * isite];
1764 if (m_arg[itask].kt0 == 1) {
1765 int Nxy = m_Nx2 * m_Ny;
1767 for (
int iz = 0; iz < m_Mz; ++iz) {
1768 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1769 int is = ixy + Nxy * (iz + m_Nz * it);
1770 int is2 = ixy + Nxy * iz;
1772 int ix1 = Nvc2 * is2;
1773 int ix2 = ix1 + m_Nvc;
1775 for (
int ic = 0; ic <
m_Nc; ++ic) {
1777 int ici = 2 * ic + 1;
1778 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1779 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1780 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1781 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1782 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1783 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1784 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1785 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1795 int itask,
double *
v2,
const double *
v1,
int ieo)
1797 int Nvcd = m_Nvc *
m_Nd;
1801 int id3 = m_Nvc * 2;
1802 int id4 = m_Nvc * 3;
1806 double vt1[m_Nvc], vt2[m_Nvc];
1807 double wt1r, wt1i, wt2r, wt2i;
1809 int isite = m_arg[itask].isite;
1811 const double *w1 = &v1[Nvcd * isite];
1812 double *w2 = &v2[Nvcd * isite];
1813 const double *u = m_U->ptr(m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1815 int kt0 = m_arg[itask].kt0;
1816 int Nxy = m_Nx2 * m_Ny;
1817 int Nxyz = Nxy * m_Nz;
1819 for (
int it = kt0; it < m_Mt; ++it) {
1820 for (
int iz = 0; iz < m_Mz; ++iz) {
1821 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1822 int is = ixy + Nxy * (iz + m_Nz * it);
1824 int in = Nvcd * (is - Nxyz);
1825 int ig = m_Ndf * (is - Nxyz);
1827 for (
int ic = 0; ic <
m_Nc; ++ic) {
1828 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1829 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1830 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1831 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1834 for (
int ic = 0; ic <
m_Nc; ++ic) {
1836 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1837 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1838 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1839 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1841 w2[ic2 + id1 + iv] += wt1r;
1842 w2[ic2 + 1 + id1 + iv] += wt1i;
1843 w2[ic2 + id2 + iv] += wt2r;
1844 w2[ic2 + 1 + id2 + iv] += wt2i;
1845 w2[ic2 + id3 + iv] -= wt1r;
1846 w2[ic2 + 1 + id3 + iv] -= wt1i;
1847 w2[ic2 + id4 + iv] -= wt2r;
1848 w2[ic2 + 1 + id4 + iv] -= wt2i;
1858 int itask,
double *
v2,
const double *
v1)
1860 int Nvcd = m_Nvc *
m_Nd;
1861 int Nxy = m_Nx2 * m_Ny;
1865 int id3 = m_Nvc * 2;
1866 int id4 = m_Nvc * 3;
1868 int isite = m_arg[itask].isite;
1870 const double *w1 = &v1[Nvcd * isite];
1871 double *w2 = &v2[Nvcd * isite];
1873 for (
int it = 0; it < m_Mt; ++it) {
1874 for (
int iz = 0; iz < m_Mz; ++iz) {
1875 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1876 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1877 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1878 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
1879 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
1880 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
1881 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
1891 int itask,
double *
v2,
const double *
v1)
1893 int Nvcd = m_Nvc *
m_Nd;
1894 int Nxy = m_Nx2 * m_Ny;
1898 int id3 = m_Nvc * 2;
1899 int id4 = m_Nvc * 3;
1901 int isite = m_arg[itask].isite;
1903 const double *w1 = &v1[Nvcd * isite];
1904 double *w2 = &v2[Nvcd * isite];
1906 for (
int it = 0; it < m_Mt; ++it) {
1907 for (
int iz = 0; iz < m_Mz; ++iz) {
1908 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1909 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1910 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1911 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
1912 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
1913 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
1914 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
1926 int Nvcd = m_Nvc *
m_Nd;
1927 int Nxy = m_Nx2 * m_Ny;
1931 int id3 = m_Nvc * 2;
1932 int id4 = m_Nvc * 3;
1934 int isite = m_arg[itask].isite;
1935 double *w1 = &v1[Nvcd * isite];
1937 for (
int it = 0; it < m_Mt; ++it) {
1938 for (
int iz = 0; iz < m_Mz; ++iz) {
1939 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1940 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1941 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1942 double wt1 = w1[ivc + id1 + iv];
1943 double wt2 = w1[ivc + id2 + iv];
1944 w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
1945 w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
1946 w1[ivc + id3 + iv] = wt1;
1947 w1[ivc + id4 + iv] = wt2;
1959 int Nvcd = m_Nvc *
m_Nd;
1960 int Nxy = m_Nx2 * m_Ny;
1964 int id3 = m_Nvc * 2;
1965 int id4 = m_Nvc * 3;
1967 int isite = m_arg[itask].isite;
1968 double *w1 = &v1[Nvcd * isite];
1970 for (
int it = 0; it < m_Mt; ++it) {
1971 for (
int iz = 0; iz < m_Mz; ++iz) {
1972 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1973 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1974 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1975 w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
1976 w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_xpb_thread(int, double *, const double *, int)
void mult_tpb_dirac_thread(int, double *, const double *, int)
void mult_tmb_chiral_thread(int, double *, const double *, int)
void mult_tp2_chiral_thread(int, double *, const double *, int)
void mult_ym1_thread(int, double *, const double *, int)
static const std::string class_name
void mult_ymb_thread(int, double *, const double *, int)
void mult_tmb_dirac_thread(int, double *, const double *, int)
void general(const char *format,...)
void mult_xm2_thread(int, double *, const double *, int)
void mult_zm2_thread(int, double *, const double *, int)
void mult_zm1_thread(int, double *, const double *, int)
void clear_thread(int, double *)
void mult_zp1_thread(int, double *, const double *, int)
void mult_tm2_chiral_thread(int, double *, const double *, int)
void mult_tp2_dirac_thread(int, double *, const double *, int)
void mult_ypb_thread(int, double *, const double *, int)
void mult_tm1_dirac_thread(int, double *, const double *, int)
void mult_tm2_dirac_thread(int, double *, const double *, int)
void mult_tp1_chiral_thread(int, double *, const double *, int)
void mult_yp1_thread(int, double *, const double *, int)
void mult_zmb_thread(int, double *, const double *, int)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_zpb_thread(int, double *, const double *, int)
void mult_xm1_thread(int, double *, const double *, int)
void mult_yp2_thread(int, double *, const double *, int)
void crucial(const char *format,...)
void mult_tm1_chiral_thread(int, double *, const double *, int)
std::vector< mult_arg > m_arg
void mult_xmb_thread(int, double *, const double *, int)
void gm5_dirac_thread(int, double *, const double *)
void Meo(Field &, const Field &, const int ieo)
even-odd operatior: ieo=0: even <– odd, ieo=1: odd <– even
void mult_tp1_dirac_thread(int, double *, const double *, int)
Bridge::VerboseLevel m_vl
void mult_zp2_thread(int, double *, const double *, int)
void mult_tpb_chiral_thread(int, double *, const double *, int)
void mult_xp2_thread(int, double *, const double *, int)
void scal_thread(int, double *, double)
void mult_ym2_thread(int, double *, const double *, int)
void gm5_chiral_thread(int, double *, const double *)
void mult_xp1_thread(int, double *, const double *, int)