20 #if defined USE_GROUP_SU3
22 #elif defined USE_GROUP_SU2
24 #elif defined USE_GROUP_SU_N
46 vout.
crucial(
m_vl,
"Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
56 vout.
crucial(
m_vl,
"Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
62 vout.
crucial(
m_vl,
"Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
77 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
78 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
79 int itask = ith_z + m_Ntask_z * ith_t;
87 if (ith_t == 0)
m_arg[itask].kt0 = 1;
88 if (ith_z == 0)
m_arg[itask].kz0 = 1;
89 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
90 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
94 m_arg[itask].isite_cp_z = ith_t *
m_Mt * Nxy2;
95 m_arg[itask].isite_cp_t = ith_z *
m_Mz * Nxy2;
103 double *w,
const double fac)
108 const int isite =
m_arg[itask].isite;
110 double *wp = &w[Nvcd * isite];
113 for (
int it = 0; it <
m_Mt; ++it) {
114 for (
int iz = 0; iz <
m_Mz; ++iz) {
115 for (
int iv_xy = 0; iv_xy < Nv_xy; ++iv_xy) {
116 int iv = iv_xy + Nv_xy * (iz +
m_Nz * it);
118 wp[iv] = fac * wp[iv];
132 const int isite =
m_arg[itask].isite;
134 double *wp = &v[Nvcd * isite];
137 for (
int it = 0; it <
m_Mt; ++it) {
138 for (
int iz = 0; iz <
m_Mz; ++iz) {
139 for (
int iv_xy = 0; iv_xy < Nv_xy; ++iv_xy) {
140 int iv = iv_xy + Nv_xy * (iz +
m_Nz * it);
151 double *vcp1,
const double *v1,
const int ieo)
153 const int Nvc2 = 2 *
m_Nvc;
155 const int Nvcd2 = Nvcd / 2;
158 const int id2 =
m_Nvc;
159 const int id3 =
m_Nvc * 2;
160 const int id4 =
m_Nvc * 3;
166 const int isite =
m_arg[itask].isite;
167 const int isite_cp =
m_arg[itask].isite_cp_x;
168 const int iyzt0 = isite /
m_Nx2;
170 const double *w1 = &v1[Nvcd * isite];
171 double *w2 = &vcp1[Nvcd2 * isite_cp];
176 for (
int it = 0; it <
m_Mt; ++it) {
177 for (
int iz = 0; iz <
m_Mz; ++iz) {
178 for (
int iy = 0; iy <
m_Ny; ++iy) {
179 int iyzt = iy + m_Ny * (iz +
m_Nz * it);
180 int yzt_eo = ieo + (1 - 2 * ieo) *
m_yzt_eo[iyzt0 + iyzt];
183 int is = ix +
m_Nx2 * iyzt;
185 int ix1 = Nvc2 * ibf;
186 int ix2 = ix1 +
m_Nvc;
188 for (
int ic = 0; ic <
m_Nc; ++ic) {
190 int ic_i = 2 * ic + 1;
192 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] - w1[ic_i + id4 + in]);
193 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_r + id4 + in]);
194 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] - w1[ic_i + id3 + in]);
195 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_r + id3 + in]);
208 double *v2,
const double *vcp2,
const int ieo)
210 const int Nvc2 = 2 *
m_Nvc;
212 const int Nvcd2 = Nvcd / 2;
215 const int id2 =
m_Nvc;
216 const int id3 =
m_Nvc * 2;
217 const int id4 =
m_Nvc * 3;
220 const int ix =
m_Nx2 - 1;
222 const int isite =
m_arg[itask].isite;
223 const int isite_cp =
m_arg[itask].isite_cp_x;
224 const int iyzt0 = isite /
m_Nx2;
226 const double *w1 = &vcp2[Nvcd2 * isite_cp];
227 double *w2 = &v2[Nvcd * isite];
233 for (
int it = 0; it <
m_Mt; ++it) {
234 for (
int iz = 0; iz <
m_Mz; ++iz) {
235 for (
int iy = 0; iy <
m_Ny; ++iy) {
236 int iyzt = iy + m_Ny * (iz +
m_Nz * it);
237 int yzt_eo = ieo + (1 - 2 * ieo) *
m_yzt_eo[iyzt0 + iyzt];
240 int is = ix +
m_Nx2 * iyzt;
243 int ix1 = Nvc2 * ibf;
244 int ix2 = ix1 +
m_Nvc;
246 for (
int ic = 0; ic <
m_Nc; ++ic) {
247 int ic2 = ic *
m_Nvc;
249 double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
250 double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
251 double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
252 double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
255 int ic_i = 2 * ic + 1;
257 w2[ic_r + id1 + iv] += wt1r;
258 w2[ic_i + id1 + iv] += wt1i;
259 w2[ic_r + id2 + iv] += wt2r;
260 w2[ic_i + id2 + iv] += wt2i;
262 w2[ic_r + id3 + iv] += wt2i;
263 w2[ic_i + id3 + iv] -= wt2r;
264 w2[ic_r + id4 + iv] += wt1i;
265 w2[ic_i + id4 + iv] -= wt1r;
278 double *v2,
const double *v1,
const int ieo)
283 const int id2 =
m_Nvc;
284 const int id3 =
m_Nvc * 2;
285 const int id4 =
m_Nvc * 3;
289 const int isite =
m_arg[itask].isite;
290 const int iyzt0 = isite /
m_Nx2;
292 const double *w1 = &v1[Nvcd * isite];
293 double *w2 = &v2[Nvcd * isite];
297 for (
int it = 0; it <
m_Mt; ++it) {
298 for (
int iz = 0; iz <
m_Mz; ++iz) {
299 for (
int iy = 0; iy <
m_Ny; ++iy) {
300 int iyzt = iy + m_Ny * (iz +
m_Nz * it);
301 int yzt_eo = ieo + (1 - 2 * ieo) *
m_yzt_eo[iyzt0 + iyzt];
303 for (
int ix = 0; ix <
m_Nx2 - yzt_eo; ++ix) {
304 int is = ix +
m_Nx2 * iyzt;
306 int in = Nvcd * (is + yzt_eo);
311 for (
int ic = 0; ic <
m_Nc; ++ic) {
313 int ic_i = 2 * ic + 1;
315 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_i + id4 + in];
316 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_r + id4 + in];
317 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id3 + in];
318 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id3 + in];
321 for (
int ic = 0; ic <
m_Nc; ++ic) {
322 int ic2 = ic *
m_Nvc;
324 double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
325 double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
326 double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
327 double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
330 int ic_i = 2 * ic + 1;
332 w2[ic_r + id1 + iv] += wt1r;
333 w2[ic_i + id1 + iv] += wt1i;
334 w2[ic_r + id2 + iv] += wt2r;
335 w2[ic_i + id2 + iv] += wt2i;
337 w2[ic_r + id3 + iv] += wt2i;
338 w2[ic_i + id3 + iv] -= wt2r;
339 w2[ic_r + id4 + iv] += wt1i;
340 w2[ic_i + id4 + iv] -= wt1r;
351 double *vcp1,
const double *v1,
const int ieo)
353 const int Nvc2 = 2 *
m_Nvc;
355 const int Nvcd2 = Nvcd / 2;
358 const int id2 =
m_Nvc;
359 const int id3 =
m_Nvc * 2;
360 const int id4 =
m_Nvc * 3;
363 const int ix =
m_Nx2 - 1;
365 const int isite =
m_arg[itask].isite;
366 const int isite_cp =
m_arg[itask].isite_cp_x;
367 const int iyzt0 = isite /
m_Nx2;
369 const double *w1 = &v1[Nvcd * isite];
370 double *w2 = &vcp1[Nvcd2 * isite_cp];
376 for (
int it = 0; it <
m_Mt; ++it) {
377 for (
int iz = 0; iz <
m_Mz; ++iz) {
378 for (
int iy = 0; iy <
m_Ny; ++iy) {
379 int iyzt = iy + m_Ny * (iz +
m_Nz * it);
380 int yzt_eo = ieo + (1 - 2 * ieo) *
m_yzt_eo[iyzt0 + iyzt];
383 int is = ix +
m_Nx2 * iyzt;
386 int ix1 = Nvc2 * ibf;
387 int ix2 = ix1 +
m_Nvc;
391 for (
int ic = 0; ic <
m_Nc; ++ic) {
393 int ic_i = 2 * ic + 1;
395 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id4 + in];
396 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id4 + in];
397 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id3 + in];
398 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id3 + in];
401 for (
int ic = 0; ic <
m_Nc; ++ic) {
405 int ic_i = 2 * ic + 1;
407 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
408 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
409 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
410 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
423 double *v2,
const double *vcp2,
const int ieo)
425 const int Nvc2 = 2 *
m_Nvc;
427 const int Nvcd2 = Nvcd / 2;
430 const int id2 =
m_Nvc;
431 const int id3 =
m_Nvc * 2;
432 const int id4 =
m_Nvc * 3;
438 const int isite =
m_arg[itask].isite;
439 const int isite_cp =
m_arg[itask].isite_cp_x;
440 const int iyzt0 = isite /
m_Nx2;
442 const double *w1 = &vcp2[Nvcd2 * isite_cp];
443 double *w2 = &v2[Nvcd * isite];
448 for (
int it = 0; it <
m_Mt; ++it) {
449 for (
int iz = 0; iz <
m_Mz; ++iz) {
450 for (
int iy = 0; iy <
m_Ny; ++iy) {
451 int iyzt = iy + m_Ny * (iz +
m_Nz * it);
452 int yzt_eo = ieo + (1 - 2 * ieo) *
m_yzt_eo[iyzt0 + iyzt];
455 int is = ix +
m_Nx2 * iyzt;
458 int ix1 = Nvc2 * ibf;
459 int ix2 = ix1 +
m_Nvc;
461 for (
int ic = 0; ic <
m_Nc; ++ic) {
463 int ic_i = 2 * ic + 1;
465 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
466 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
467 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
468 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
470 w2[ic_r + id3 + iv] -= bc2 * w1[ic_i + ix2];
471 w2[ic_i + id3 + iv] += bc2 * w1[ic_r + ix2];
472 w2[ic_r + id4 + iv] -= bc2 * w1[ic_i + ix1];
473 w2[ic_i + id4 + iv] += bc2 * w1[ic_r + ix1];
486 double *v2,
const double *v1,
const int ieo)
491 const int id2 =
m_Nvc;
492 const int id3 =
m_Nvc * 2;
493 const int id4 =
m_Nvc * 3;
497 const int isite =
m_arg[itask].isite;
498 const int iyzt0 = isite /
m_Nx2;
500 const double *w1 = &v1[Nvcd * isite];
501 double *w2 = &v2[Nvcd * isite];
505 for (
int it = 0; it <
m_Mt; ++it) {
506 for (
int iz = 0; iz <
m_Mz; ++iz) {
507 for (
int iy = 0; iy <
m_Ny; ++iy) {
508 int iyzt = iy + m_Ny * (iz +
m_Nz * it);
509 int yzt_eo = ieo + (1 - 2 * ieo) *
m_yzt_eo[iyzt0 + iyzt];
510 int Meo = 1 - yzt_eo;
512 for (
int ix = Meo; ix <
m_Nx2; ++ix) {
513 int is = ix + m_Nx2 * iyzt;
515 int in = Nvcd * (is -
Meo);
520 for (
int ic = 0; ic <
m_Nc; ++ic) {
522 int ic_i = 2 * ic + 1;
524 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id4 + in];
525 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id4 + in];
526 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id3 + in];
527 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id3 + in];
530 for (
int ic = 0; ic <
m_Nc; ++ic) {
533 double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
534 double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
535 double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
536 double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
539 int ic_i = 2 * ic + 1;
541 w2[ic_r + id1 + iv] += wt1r;
542 w2[ic_i + id1 + iv] += wt1i;
543 w2[ic_r + id2 + iv] += wt2r;
544 w2[ic_i + id2 + iv] += wt2i;
546 w2[ic_r + id3 + iv] -= wt2i;
547 w2[ic_i + id3 + iv] += wt2r;
548 w2[ic_r + id4 + iv] -= wt1i;
549 w2[ic_i + id4 + iv] += wt1r;
560 double *vcp1,
const double *v1,
const int ieo)
562 const int Nvc2 = 2 *
m_Nvc;
564 const int Nvcd2 = Nvcd / 2;
567 const int id2 =
m_Nvc;
568 const int id3 =
m_Nvc * 2;
569 const int id4 =
m_Nvc * 3;
575 const int isite =
m_arg[itask].isite;
576 const int isite_cp =
m_arg[itask].isite_cp_y;
578 const double *w1 = &v1[Nvcd * isite];
579 double *w2 = &vcp1[Nvcd2 * isite_cp];
582 for (
int it = 0; it <
m_Mt; ++it) {
583 for (
int iz = 0; iz <
m_Mz; ++iz) {
584 for (
int ix = 0; ix <
m_Nx2; ++ix) {
585 int is = ix + m_Nx2 * (iy +
m_Ny * (iz +
m_Nz * it));
586 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
588 int ix1 = Nvc2 * is2;
589 int ix2 = ix1 +
m_Nvc;
591 for (
int ic = 0; ic <
m_Nc; ++ic) {
593 int ic_i = 2 * ic + 1;
595 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id4 + in]);
596 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id4 + in]);
597 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] - w1[ic_r + id3 + in]);
598 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] - w1[ic_i + id3 + in]);
608 double *v2,
const double *vcp2,
const int ieo)
610 const int Nvc2 = 2 *
m_Nvc;
612 const int Nvcd2 = Nvcd / 2;
615 const int id2 =
m_Nvc;
616 const int id3 =
m_Nvc * 2;
617 const int id4 =
m_Nvc * 3;
620 const int iy =
m_Ny - 1;
622 const int isite =
m_arg[itask].isite;
623 const int isite_cp =
m_arg[itask].isite_cp_y;
625 const double *w1 = &vcp2[Nvcd2 * isite_cp];
626 double *w2 = &v2[Nvcd * isite];
630 for (
int it = 0; it <
m_Mt; ++it) {
631 for (
int iz = 0; iz <
m_Mz; ++iz) {
632 for (
int ix = 0; ix <
m_Nx2; ++ix) {
633 int is = ix + m_Nx2 * (iy +
m_Ny * (iz +
m_Nz * it));
634 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
637 int ix1 = Nvc2 * is2;
638 int ix2 = ix1 +
m_Nvc;
640 for (
int ic = 0; ic <
m_Nc; ++ic) {
641 int ic2 = ic *
m_Nvc;
643 double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
644 double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
645 double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
646 double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
649 int ic_i = 2 * ic + 1;
651 w2[ic_r + id1 + iv] += wt1r;
652 w2[ic_i + id1 + iv] += wt1i;
653 w2[ic_r + id2 + iv] += wt2r;
654 w2[ic_i + id2 + iv] += wt2i;
656 w2[ic_r + id3 + iv] -= wt2r;
657 w2[ic_i + id3 + iv] -= wt2i;
658 w2[ic_r + id4 + iv] += wt1r;
659 w2[ic_i + id4 + iv] += wt1i;
669 double *v2,
const double *v1,
const int ieo)
674 const int id2 =
m_Nvc;
675 const int id3 =
m_Nvc * 2;
676 const int id4 =
m_Nvc * 3;
680 const int isite =
m_arg[itask].isite;
682 const double *w1 = &v1[Nvcd * isite];
683 double *w2 = &v2[Nvcd * isite];
687 for (
int it = 0; it <
m_Mt; ++it) {
688 for (
int iz = 0; iz <
m_Mz; ++iz) {
689 for (
int iy = 0; iy <
m_Ny - 1; ++iy) {
690 for (
int ix = 0; ix <
m_Nx2; ++ix) {
691 int is = ix + m_Nx2 * (iy + m_Ny * (iz +
m_Nz * it));
693 int in = Nvcd * (is +
m_Nx2);
698 for (
int ic = 0; ic <
m_Nc; ++ic) {
700 int ic_i = 2 * ic + 1;
702 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id4 + in];
703 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id4 + in];
704 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id3 + in];
705 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id3 + in];
708 for (
int ic = 0; ic <
m_Nc; ++ic) {
709 int ic2 = ic *
m_Nvc;
711 double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
712 double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
713 double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
714 double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
717 int ic_i = 2 * ic + 1;
719 w2[ic_r + id1 + iv] += wt1r;
720 w2[ic_i + id1 + iv] += wt1i;
721 w2[ic_r + id2 + iv] += wt2r;
722 w2[ic_i + id2 + iv] += wt2i;
724 w2[ic_r + id3 + iv] -= wt2r;
725 w2[ic_i + id3 + iv] -= wt2i;
726 w2[ic_r + id4 + iv] += wt1r;
727 w2[ic_i + id4 + iv] += wt1i;
738 double *vcp1,
const double *v1,
const int ieo)
740 const int Nvc2 = 2 *
m_Nvc;
742 const int Nvcd2 = Nvcd / 2;
745 const int id2 =
m_Nvc;
746 const int id3 =
m_Nvc * 2;
747 const int id4 =
m_Nvc * 3;
750 const int iy =
m_Ny - 1;
752 const int isite =
m_arg[itask].isite;
753 const int isite_cp =
m_arg[itask].isite_cp_y;
755 const double *w1 = &v1[Nvcd * isite];
756 double *w2 = &vcp1[Nvcd2 * isite_cp];
760 for (
int it = 0; it <
m_Mt; ++it) {
761 for (
int iz = 0; iz <
m_Mz; ++iz) {
762 for (
int ix = 0; ix <
m_Nx2; ++ix) {
763 int is = ix + m_Nx2 * (iy +
m_Ny * (iz +
m_Nz * it));
764 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
767 int ix1 = Nvc2 * is2;
768 int ix2 = ix1 +
m_Nvc;
772 for (
int ic = 0; ic <
m_Nc; ++ic) {
774 int ic_i = 2 * ic + 1;
776 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id4 + in];
777 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id4 + in];
778 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id3 + in];
779 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id3 + in];
782 for (
int ic = 0; ic <
m_Nc; ++ic) {
786 int ic_i = 2 * ic + 1;
788 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
789 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
790 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
791 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
801 double *v2,
const double *vcp2,
const int ieo)
803 const int Nvc2 = 2 *
m_Nvc;
805 const int Nvcd2 = Nvcd / 2;
808 const int id2 =
m_Nvc;
809 const int id3 =
m_Nvc * 2;
810 const int id4 =
m_Nvc * 3;
816 const int isite =
m_arg[itask].isite;
817 const int isite_cp =
m_arg[itask].isite_cp_y;
819 const double *w1 = &vcp2[Nvcd2 * isite_cp];
820 double *w2 = &v2[Nvcd * isite];
823 for (
int it = 0; it <
m_Mt; ++it) {
824 for (
int iz = 0; iz <
m_Mz; ++iz) {
825 for (
int ix = 0; ix <
m_Nx2; ++ix) {
826 int is = ix + m_Nx2 * (iy +
m_Ny * (iz +
m_Nz * it));
827 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
829 int ix1 = Nvc2 * is2;
830 int ix2 = ix1 +
m_Nvc;
832 for (
int ic = 0; ic <
m_Nc; ++ic) {
834 int ic_i = 2 * ic + 1;
836 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
837 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
838 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
839 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
841 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix2];
842 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix2];
843 w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix1];
844 w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix1];
854 double *v2,
const double *v1,
const int ieo)
859 const int id2 =
m_Nvc;
860 const int id3 =
m_Nvc * 2;
861 const int id4 =
m_Nvc * 3;
865 const int isite =
m_arg[itask].isite;
867 const double *w1 = &v1[Nvcd * isite];
868 double *w2 = &v2[Nvcd * isite];
872 for (
int it = 0; it <
m_Mt; ++it) {
873 for (
int iz = 0; iz <
m_Mz; ++iz) {
874 for (
int iy = 1; iy <
m_Ny; ++iy) {
875 for (
int ix = 0; ix <
m_Nx2; ++ix) {
876 int is = ix + m_Nx2 * (iy + m_Ny * (iz +
m_Nz * it));
878 int in = Nvcd * (is -
m_Nx2);
883 for (
int ic = 0; ic <
m_Nc; ++ic) {
885 int ic_i = 2 * ic + 1;
887 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id4 + in];
888 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id4 + in];
889 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id3 + in];
890 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id3 + in];
893 for (
int ic = 0; ic <
m_Nc; ++ic) {
896 double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
897 double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
898 double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
899 double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
902 int ic_i = 2 * ic + 1;
904 w2[ic_r + id1 + iv] += wt1r;
905 w2[ic_i + id1 + iv] += wt1i;
906 w2[ic_r + id2 + iv] += wt2r;
907 w2[ic_i + id2 + iv] += wt2i;
909 w2[ic_r + id3 + iv] += wt2r;
910 w2[ic_i + id3 + iv] += wt2i;
911 w2[ic_r + id4 + iv] -= wt1r;
912 w2[ic_i + id4 + iv] -= wt1i;
923 double *vcp1,
const double *v1,
const int ieo)
925 const int Nvc2 = 2 *
m_Nvc;
927 const int Nvcd2 = Nvcd / 2;
930 const int id2 =
m_Nvc;
931 const int id3 =
m_Nvc * 2;
932 const int id4 =
m_Nvc * 3;
937 const int isite =
m_arg[itask].isite;
938 const int isite_cp =
m_arg[itask].isite_cp_z;
940 const double *w1 = &v1[Nvcd * isite];
941 double *w2 = &vcp1[Nvcd2 * isite_cp];
944 if (
m_arg[itask].kz0 == 1) {
948 for (
int it = 0; it <
m_Mt; ++it) {
949 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
950 int is = i_xy + Nxy * (iz +
m_Nz * it);
951 int is2 = i_xy + Nxy * it;
953 int ix1 = Nvc2 * is2;
954 int ix2 = ix1 +
m_Nvc;
956 for (
int ic = 0; ic <
m_Nc; ++ic) {
958 int ic_i = 2 * ic + 1;
960 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] - w1[ic_i + id3 + in]);
961 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_r + id3 + in]);
962 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_i + id4 + in]);
963 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] - w1[ic_r + id4 + in]);
973 double *v2,
const double *vcp2,
const int ieo)
975 const int Nvc2 = 2 *
m_Nvc;
977 const int Nvcd2 = Nvcd / 2;
980 const int id2 =
m_Nvc;
981 const int id3 =
m_Nvc * 2;
982 const int id4 =
m_Nvc * 3;
986 const int isite =
m_arg[itask].isite;
987 const int isite_cp =
m_arg[itask].isite_cp_z;
989 const double *w1 = &vcp2[Nvcd2 * isite_cp];
990 double *w2 = &v2[Nvcd * isite];
994 if (
m_arg[itask].kz1 == 1) {
996 const int iz =
m_Mz - 1;
998 for (
int it = 0; it <
m_Mt; ++it) {
999 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1000 int is = i_xy + Nxy * (iz +
m_Nz * it);
1001 int is2 = i_xy + Nxy * it;
1003 int ig =
m_Ndf * is;
1004 int ix1 = Nvc2 * is2;
1005 int ix2 = ix1 +
m_Nvc;
1007 for (
int ic = 0; ic <
m_Nc; ++ic) {
1008 int ic2 = ic *
m_Nvc;
1010 double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1011 double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1012 double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1013 double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1016 int ic_i = 2 * ic + 1;
1018 w2[ic_r + id1 + iv] += wt1r;
1019 w2[ic_i + id1 + iv] += wt1i;
1020 w2[ic_r + id2 + iv] += wt2r;
1021 w2[ic_i + id2 + iv] += wt2i;
1023 w2[ic_r + id3 + iv] += wt1i;
1024 w2[ic_i + id3 + iv] -= wt1r;
1025 w2[ic_r + id4 + iv] -= wt2i;
1026 w2[ic_i + id4 + iv] += wt2r;
1036 double *v2,
const double *v1,
const int ieo)
1042 const int id2 =
m_Nvc;
1043 const int id3 =
m_Nvc * 2;
1044 const int id4 =
m_Nvc * 3;
1048 const int isite =
m_arg[itask].isite;
1049 const int kz1 =
m_arg[itask].kz1;
1051 const double *w1 = &v1[Nvcd * isite];
1052 double *w2 = &v2[Nvcd * isite];
1056 for (
int it = 0; it <
m_Mt; ++it) {
1057 for (
int iz = 0; iz <
m_Mz - kz1; ++iz) {
1058 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1059 int is = i_xy + Nxy * (iz +
m_Nz * it);
1061 int in = Nvcd * (is + Nxy);
1062 int ig =
m_Ndf * is;
1066 for (
int ic = 0; ic <
m_Nc; ++ic) {
1068 int ic_i = 2 * ic + 1;
1070 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_i + id3 + in];
1071 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_r + id3 + in];
1072 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id4 + in];
1073 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id4 + in];
1076 for (
int ic = 0; ic <
m_Nc; ++ic) {
1077 int ic2 = ic *
m_Nvc;
1079 double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1080 double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1081 double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1082 double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1085 int ic_i = 2 * ic + 1;
1087 w2[ic_r + id1 + iv] += wt1r;
1088 w2[ic_i + id1 + iv] += wt1i;
1089 w2[ic_r + id2 + iv] += wt2r;
1090 w2[ic_i + id2 + iv] += wt2i;
1092 w2[ic_r + id3 + iv] += wt1i;
1093 w2[ic_i + id3 + iv] -= wt1r;
1094 w2[ic_r + id4 + iv] -= wt2i;
1095 w2[ic_i + id4 + iv] += wt2r;
1105 double *vcp1,
const double *v1,
const int ieo)
1107 const int Nvc2 = 2 *
m_Nvc;
1109 const int Nvcd2 = Nvcd / 2;
1112 const int id2 =
m_Nvc;
1113 const int id3 =
m_Nvc * 2;
1114 const int id4 =
m_Nvc * 3;
1118 const int isite =
m_arg[itask].isite;
1119 const int isite_cp =
m_arg[itask].isite_cp_z;
1121 const double *w1 = &v1[Nvcd * isite];
1122 double *w2 = &vcp1[Nvcd2 * isite_cp];
1126 if (
m_arg[itask].kz1 == 1) {
1128 const int iz =
m_Mz - 1;
1130 for (
int it = 0; it <
m_Mt; ++it) {
1131 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1132 int is = i_xy + Nxy * (iz +
m_Nz * it);
1133 int is2 = i_xy + Nxy * it;
1135 int ig =
m_Ndf * is;
1136 int ix1 = Nvc2 * is2;
1137 int ix2 = ix1 +
m_Nvc;
1141 for (
int ic = 0; ic <
m_Nc; ++ic) {
1143 int ic_i = 2 * ic + 1;
1145 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id3 + in];
1146 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id3 + in];
1147 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id4 + in];
1148 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id4 + in];
1151 for (
int ic = 0; ic <
m_Nc; ++ic) {
1155 int ic_i = 2 * ic + 1;
1157 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1158 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1159 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1160 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1170 double *v2,
const double *vcp2,
const int ieo)
1172 const int Nvc2 = 2 *
m_Nvc;
1174 const int Nvcd2 = Nvcd / 2;
1177 const int id2 =
m_Nvc;
1178 const int id3 =
m_Nvc * 2;
1179 const int id4 =
m_Nvc * 3;
1184 const int isite =
m_arg[itask].isite;
1185 const int isite_cp =
m_arg[itask].isite_cp_z;
1187 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1188 double *w2 = &v2[Nvcd * isite];
1191 if (
m_arg[itask].kz0 == 1) {
1195 for (
int it = 0; it <
m_Mt; ++it) {
1196 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1197 int is = i_xy + Nxy * (iz +
m_Nz * it);
1198 int is2 = i_xy + Nxy * it;
1200 int ix1 = Nvc2 * is2;
1201 int ix2 = ix1 +
m_Nvc;
1203 for (
int ic = 0; ic <
m_Nc; ++ic) {
1205 int ic_i = 2 * ic + 1;
1207 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1208 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1209 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1210 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1212 w2[ic_r + id3 + iv] -= bc2 * w1[ic_i + ix1];
1213 w2[ic_i + id3 + iv] += bc2 * w1[ic_r + ix1];
1214 w2[ic_r + id4 + iv] += bc2 * w1[ic_i + ix2];
1215 w2[ic_i + id4 + iv] -= bc2 * w1[ic_r + ix2];
1225 double *v2,
const double *v1,
const int ieo)
1231 const int id2 =
m_Nvc;
1232 const int id3 =
m_Nvc * 2;
1233 const int id4 =
m_Nvc * 3;
1237 const int isite =
m_arg[itask].isite;
1238 const int kz0 =
m_arg[itask].kz0;
1240 const double *w1 = &v1[Nvcd * isite];
1241 double *w2 = &v2[Nvcd * isite];
1245 for (
int it = 0; it <
m_Mt; ++it) {
1246 for (
int iz = kz0; iz <
m_Mz; ++iz) {
1247 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1248 int is = i_xy + Nxy * (iz +
m_Nz * it);
1250 int in = Nvcd * (is - Nxy);
1251 int ig =
m_Ndf * (is - Nxy);
1255 for (
int ic = 0; ic <
m_Nc; ++ic) {
1257 int ic_i = 2 * ic + 1;
1259 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id3 + in];
1260 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id3 + in];
1261 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id4 + in];
1262 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id4 + in];
1265 for (
int ic = 0; ic <
m_Nc; ++ic) {
1268 double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1269 double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1270 double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1271 double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1274 int ic_i = 2 * ic + 1;
1276 w2[ic_r + id1 + iv] += wt1r;
1277 w2[ic_i + id1 + iv] += wt1i;
1278 w2[ic_r + id2 + iv] += wt2r;
1279 w2[ic_i + id2 + iv] += wt2i;
1281 w2[ic_r + id3 + iv] -= wt1i;
1282 w2[ic_i + id3 + iv] += wt1r;
1283 w2[ic_r + id4 + iv] += wt2i;
1284 w2[ic_i + id4 + iv] -= wt2r;
1294 double *vcp1,
const double *v1,
const int ieo)
1296 const int Nvc2 = 2 *
m_Nvc;
1298 const int Nvcd2 = Nvcd / 2;
1300 const int id3 =
m_Nvc * 2;
1301 const int id4 =
m_Nvc * 3;
1306 const int isite =
m_arg[itask].isite;
1307 const int isite_cp =
m_arg[itask].isite_cp_t;
1309 const double *w1 = &v1[Nvcd * isite];
1310 double *w2 = &vcp1[Nvcd2 * isite_cp];
1313 if (
m_arg[itask].kt0 == 1) {
1317 for (
int iz = 0; iz <
m_Mz; ++iz) {
1318 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1319 int is = i_xy + Nxy * (iz +
m_Nz * it);
1320 int is2 = i_xy + Nxy * iz;
1322 int ix1 = Nvc2 * is2;
1323 int ix2 = ix1 +
m_Nvc;
1325 for (
int ic = 0; ic <
m_Nc; ++ic) {
1327 int ic_i = 2 * ic + 1;
1329 w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1330 w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1331 w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1332 w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1342 double *v2,
const double *vcp2,
const int ieo)
1344 const int Nvc2 = 2 *
m_Nvc;
1346 const int Nvcd2 = Nvcd / 2;
1348 const int id3 =
m_Nvc * 2;
1349 const int id4 =
m_Nvc * 3;
1353 const int isite =
m_arg[itask].isite;
1354 const int isite_cp =
m_arg[itask].isite_cp_t;
1356 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1357 double *w2 = &v2[Nvcd * isite];
1361 if (
m_arg[itask].kt1 == 1) {
1363 const int it =
m_Mt - 1;
1365 for (
int iz = 0; iz <
m_Mz; ++iz) {
1366 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1367 int is = i_xy + Nxy * (iz +
m_Nz * it);
1368 int is2 = i_xy + Nxy * iz;
1370 int ig =
m_Ndf * is;
1371 int ix1 = Nvc2 * is2;
1372 int ix2 = ix1 +
m_Nvc;
1374 for (
int ic = 0; ic <
m_Nc; ++ic) {
1375 int ic2 = ic *
m_Nvc;
1378 int ic_i = 2 * ic + 1;
1380 w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1381 w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1382 w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1383 w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1393 double *v2,
const double *v1,
const int ieo)
1397 const int Nxyz = Nxy *
m_Nz;
1399 const int id3 =
m_Nvc * 2;
1400 const int id4 =
m_Nvc * 3;
1404 const int isite =
m_arg[itask].isite;
1405 const int kt1 =
m_arg[itask].kt1;
1407 const double *w1 = &v1[Nvcd * isite];
1408 double *w2 = &v2[Nvcd * isite];
1412 for (
int it = 0; it <
m_Mt - kt1; ++it) {
1413 for (
int iz = 0; iz <
m_Mz; ++iz) {
1414 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1415 int is = i_xy + Nxy * (iz + m_Nz * it);
1417 int in = Nvcd * (is + Nxyz);
1418 int ig =
m_Ndf * is;
1422 for (
int ic = 0; ic <
m_Nc; ++ic) {
1424 int ic_i = 2 * ic + 1;
1426 vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1427 vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1428 vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1429 vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1432 for (
int ic = 0; ic <
m_Nc; ++ic) {
1433 int ic2 = ic *
m_Nvc;
1436 int ic_i = 2 * ic + 1;
1438 w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1439 w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1440 w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1441 w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1451 double *vcp1,
const double *v1,
const int ieo)
1453 const int Nvc2 = 2 *
m_Nvc;
1455 const int Nvcd2 = Nvcd / 2;
1458 const int id2 =
m_Nvc;
1462 const int isite =
m_arg[itask].isite;
1463 const int isite_cp =
m_arg[itask].isite_cp_t;
1465 const double *w1 = &v1[Nvcd * isite];
1466 double *w2 = &vcp1[Nvcd2 * isite_cp];
1470 if (
m_arg[itask].kt1 == 1) {
1472 const int it =
m_Mt - 1;
1474 for (
int iz = 0; iz <
m_Mz; ++iz) {
1475 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1476 int is = i_xy + Nxy * (iz +
m_Nz * it);
1477 int is2 = i_xy + Nxy * iz;
1479 int ig =
m_Ndf * is;
1480 int ix1 = Nvc2 * is2;
1481 int ix2 = ix1 +
m_Nvc;
1485 for (
int ic = 0; ic <
m_Nc; ++ic) {
1487 int ic_i = 2 * ic + 1;
1489 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1490 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1491 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1492 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1495 for (
int ic = 0; ic <
m_Nc; ++ic) {
1499 int ic_i = 2 * ic + 1;
1501 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1502 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1503 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1504 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1514 double *v2,
const double *vcp2,
const int ieo)
1516 const int Nvc2 = 2 *
m_Nvc;
1518 const int Nvcd2 = Nvcd / 2;
1521 const int id2 =
m_Nvc;
1526 const int isite =
m_arg[itask].isite;
1527 const int isite_cp =
m_arg[itask].isite_cp_t;
1529 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1530 double *w2 = &v2[Nvcd * isite];
1533 if (
m_arg[itask].kt0 == 1) {
1537 for (
int iz = 0; iz <
m_Mz; ++iz) {
1538 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1539 int is = i_xy + Nxy * (iz +
m_Nz * it);
1540 int is2 = i_xy + Nxy * iz;
1542 int ix1 = Nvc2 * is2;
1543 int ix2 = ix1 +
m_Nvc;
1545 for (
int ic = 0; ic <
m_Nc; ++ic) {
1547 int ic_i = 2 * ic + 1;
1549 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1550 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1551 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1552 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1562 double *v2,
const double *v1,
const int ieo)
1566 const int Nxyz = Nxy *
m_Nz;
1569 const int id2 =
m_Nvc;
1573 const int isite =
m_arg[itask].isite;
1574 const int kt0 =
m_arg[itask].kt0;
1576 const double *w1 = &v1[Nvcd * isite];
1577 double *w2 = &v2[Nvcd * isite];
1581 for (
int it = kt0; it <
m_Mt; ++it) {
1582 for (
int iz = 0; iz <
m_Mz; ++iz) {
1583 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1584 int is = i_xy + Nxy * (iz + m_Nz * it);
1586 int in = Nvcd * (is - Nxyz);
1587 int ig =
m_Ndf * (is - Nxyz);
1591 for (
int ic = 0; ic <
m_Nc; ++ic) {
1593 int ic_i = 2 * ic + 1;
1595 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1596 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1597 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1598 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1601 for (
int ic = 0; ic <
m_Nc; ++ic) {
1605 int ic_i = 2 * ic + 1;
1607 w2[ic_r + id1 + iv] += mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1608 w2[ic_i + id1 + iv] += mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1609 w2[ic_r + id2 + iv] += mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1610 w2[ic_i + id2 + iv] += mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1620 double *vcp1,
const double *v1,
const int ieo)
1622 const int Nvc2 = 2 *
m_Nvc;
1624 const int Nvcd2 = Nvcd / 2;
1627 const int id2 =
m_Nvc;
1628 const int id3 =
m_Nvc * 2;
1629 const int id4 =
m_Nvc * 3;
1634 const int isite =
m_arg[itask].isite;
1635 const int isite_cp =
m_arg[itask].isite_cp_t;
1637 const double *w1 = &v1[Nvcd * isite];
1638 double *w2 = &vcp1[Nvcd2 * isite_cp];
1641 if (
m_arg[itask].kt0 == 1) {
1645 for (
int iz = 0; iz <
m_Mz; ++iz) {
1646 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1647 int is = i_xy + Nxy * (iz +
m_Nz * it);
1648 int is2 = i_xy + Nxy * iz;
1650 int ix1 = Nvc2 * is2;
1651 int ix2 = ix1 +
m_Nvc;
1653 for (
int ic = 0; ic <
m_Nc; ++ic) {
1655 int ic_i = 2 * ic + 1;
1657 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1658 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1659 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1660 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1670 double *v2,
const double *vcp2,
const int ieo)
1672 const int Nvc2 = 2 *
m_Nvc;
1674 const int Nvcd2 = Nvcd / 2;
1677 const int id2 =
m_Nvc;
1678 const int id3 =
m_Nvc * 2;
1679 const int id4 =
m_Nvc * 3;
1683 const int isite =
m_arg[itask].isite;
1684 const int isite_cp =
m_arg[itask].isite_cp_t;
1686 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1687 double *w2 = &v2[Nvcd * isite];
1691 if (
m_arg[itask].kt1 == 1) {
1693 const int it =
m_Mt - 1;
1695 for (
int iz = 0; iz <
m_Mz; ++iz) {
1696 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1697 int is = i_xy + Nxy * (iz +
m_Nz * it);
1698 int is2 = i_xy + Nxy * iz;
1700 int ig =
m_Ndf * is;
1701 int ix1 = Nvc2 * is2;
1702 int ix2 = ix1 +
m_Nvc;
1704 for (
int ic = 0; ic <
m_Nc; ++ic) {
1705 int ic2 = ic *
m_Nvc;
1707 double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1708 double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1709 double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1710 double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1713 int ic_i = 2 * ic + 1;
1715 w2[ic_r + id1 + iv] += wt1r;
1716 w2[ic_i + id1 + iv] += wt1i;
1717 w2[ic_r + id2 + iv] += wt2r;
1718 w2[ic_i + id2 + iv] += wt2i;
1720 w2[ic_r + id3 + iv] += wt1r;
1721 w2[ic_i + id3 + iv] += wt1i;
1722 w2[ic_r + id4 + iv] += wt2r;
1723 w2[ic_i + id4 + iv] += wt2i;
1733 double *v2,
const double *v1,
const int ieo)
1737 const int Nxyz = Nxy *
m_Nz;
1740 const int id2 =
m_Nvc;
1741 const int id3 =
m_Nvc * 2;
1742 const int id4 =
m_Nvc * 3;
1746 const int isite =
m_arg[itask].isite;
1747 const int kt1 =
m_arg[itask].kt1;
1749 const double *w1 = &v1[Nvcd * isite];
1750 double *w2 = &v2[Nvcd * isite];
1754 for (
int it = 0; it <
m_Mt - kt1; ++it) {
1755 for (
int iz = 0; iz <
m_Mz; ++iz) {
1756 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1757 int is = i_xy + Nxy * (iz + m_Nz * it);
1759 int in = Nvcd * (is + Nxyz);
1760 int ig =
m_Ndf * is;
1764 for (
int ic = 0; ic <
m_Nc; ++ic) {
1766 int ic_i = 2 * ic + 1;
1768 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1769 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1770 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1771 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1774 for (
int ic = 0; ic <
m_Nc; ++ic) {
1775 int ic2 = ic *
m_Nvc;
1777 double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1778 double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1779 double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1780 double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1783 int ic_i = 2 * ic + 1;
1785 w2[ic_r + id1 + iv] += wt1r;
1786 w2[ic_i + id1 + iv] += wt1i;
1787 w2[ic_r + id2 + iv] += wt2r;
1788 w2[ic_i + id2 + iv] += wt2i;
1790 w2[ic_r + id3 + iv] += wt1r;
1791 w2[ic_i + id3 + iv] += wt1i;
1792 w2[ic_r + id4 + iv] += wt2r;
1793 w2[ic_i + id4 + iv] += wt2i;
1803 double *vcp1,
const double *v1,
const int ieo)
1805 const int Nvc2 = 2 *
m_Nvc;
1807 const int Nvcd2 = Nvcd / 2;
1810 const int id2 =
m_Nvc;
1811 const int id3 =
m_Nvc * 2;
1812 const int id4 =
m_Nvc * 3;
1816 const int isite =
m_arg[itask].isite;
1817 const int isite_cp =
m_arg[itask].isite_cp_t;
1819 const double *w1 = &v1[Nvcd * isite];
1820 double *w2 = &vcp1[Nvcd2 * isite_cp];
1824 if (
m_arg[itask].kt1 == 1) {
1826 const int it =
m_Mt - 1;
1828 for (
int iz = 0; iz <
m_Mz; ++iz) {
1829 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1830 int is = i_xy + Nxy * (iz +
m_Nz * it);
1831 int is2 = i_xy + Nxy * iz;
1833 int ig =
m_Ndf * is;
1834 int ix1 = Nvc2 * is2;
1835 int ix2 = ix1 +
m_Nvc;
1839 for (
int ic = 0; ic <
m_Nc; ++ic) {
1841 int ic_i = 2 * ic + 1;
1843 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1844 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1845 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1846 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1849 for (
int ic = 0; ic <
m_Nc; ++ic) {
1853 int ic_i = 2 * ic + 1;
1855 w2[ic_r + ix1] = mult_udagv_r(&u[ic_r + ig], vt1, m_Nc);
1856 w2[ic_i + ix1] = mult_udagv_i(&u[ic_r + ig], vt1, m_Nc);
1857 w2[ic_r + ix2] = mult_udagv_r(&u[ic_r + ig], vt2, m_Nc);
1858 w2[ic_i + ix2] = mult_udagv_i(&u[ic_r + ig], vt2, m_Nc);
1868 double *v2,
const double *vcp2,
const int ieo)
1870 const int Nvc2 = 2 *
m_Nvc;
1872 const int Nvcd2 = Nvcd / 2;
1875 const int id2 =
m_Nvc;
1876 const int id3 =
m_Nvc * 2;
1877 const int id4 =
m_Nvc * 3;
1882 const int isite =
m_arg[itask].isite;
1883 const int isite_cp =
m_arg[itask].isite_cp_t;
1885 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1886 double *w2 = &v2[Nvcd * isite];
1889 if (
m_arg[itask].kt0 == 1) {
1893 for (
int iz = 0; iz <
m_Mz; ++iz) {
1894 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1895 int is = i_xy + Nxy * (iz +
m_Nz * it);
1896 int is2 = i_xy + Nxy * iz;
1898 int ix1 = Nvc2 * is2;
1899 int ix2 = ix1 +
m_Nvc;
1901 for (
int ic = 0; ic <
m_Nc; ++ic) {
1903 int ic_i = 2 * ic + 1;
1905 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1906 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1907 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1908 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1910 w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
1911 w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
1912 w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
1913 w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
1923 double *v2,
const double *v1,
const int ieo)
1927 const int Nxyz = Nxy *
m_Nz;
1930 const int id2 =
m_Nvc;
1931 const int id3 =
m_Nvc * 2;
1932 const int id4 =
m_Nvc * 3;
1936 const int isite =
m_arg[itask].isite;
1937 const int kt0 =
m_arg[itask].kt0;
1939 const double *w1 = &v1[Nvcd * isite];
1940 double *w2 = &v2[Nvcd * isite];
1944 for (
int it = kt0; it <
m_Mt; ++it) {
1945 for (
int iz = 0; iz <
m_Mz; ++iz) {
1946 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
1947 int is = i_xy + Nxy * (iz + m_Nz * it);
1949 int in = Nvcd * (is - Nxyz);
1950 int ig =
m_Ndf * (is - Nxyz);
1954 for (
int ic = 0; ic <
m_Nc; ++ic) {
1956 int ic_i = 2 * ic + 1;
1958 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1959 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1960 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1961 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1964 for (
int ic = 0; ic <
m_Nc; ++ic) {
1967 double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1968 double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1969 double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1970 double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1973 int ic_i = 2 * ic + 1;
1975 w2[ic_r + id1 + iv] += wt1r;
1976 w2[ic_i + id1 + iv] += wt1i;
1977 w2[ic_r + id2 + iv] += wt2r;
1978 w2[ic_i + id2 + iv] += wt2i;
1980 w2[ic_r + id3 + iv] -= wt1r;
1981 w2[ic_i + id3 + iv] -= wt1i;
1982 w2[ic_r + id4 + iv] -= wt2r;
1983 w2[ic_i + id4 + iv] -= wt2i;
1993 double *v2,
const double *v1)
1999 const int id2 =
m_Nvc;
2000 const int id3 =
m_Nvc * 2;
2001 const int id4 =
m_Nvc * 3;
2003 const int isite =
m_arg[itask].isite;
2005 const double *w1 = &v1[Nvcd * isite];
2006 double *w2 = &v2[Nvcd * isite];
2009 for (
int it = 0; it <
m_Mt; ++it) {
2010 for (
int iz = 0; iz <
m_Mz; ++iz) {
2011 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
2012 int iv = Nvcd * (i_xy + Nxy * (iz +
m_Nz * it));
2014 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2015 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2016 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2018 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2019 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2029 double *v2,
const double *v1)
2035 const int id2 =
m_Nvc;
2036 const int id3 =
m_Nvc * 2;
2037 const int id4 =
m_Nvc * 3;
2039 const int isite =
m_arg[itask].isite;
2041 const double *w1 = &v1[Nvcd * isite];
2042 double *w2 = &v2[Nvcd * isite];
2045 for (
int it = 0; it <
m_Mt; ++it) {
2046 for (
int iz = 0; iz <
m_Mz; ++iz) {
2047 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
2048 int iv = Nvcd * (i_xy + Nxy * (iz +
m_Nz * it));
2050 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2051 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2052 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2054 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2055 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2071 const int id2 =
m_Nvc;
2072 const int id3 =
m_Nvc * 2;
2073 const int id4 =
m_Nvc * 3;
2075 const int isite =
m_arg[itask].isite;
2077 double *w1 = &v1[Nvcd * isite];
2080 for (
int it = 0; it <
m_Mt; ++it) {
2081 for (
int iz = 0; iz <
m_Mz; ++iz) {
2082 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
2083 int iv = Nvcd * (i_xy + Nxy * (iz +
m_Nz * it));
2084 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2085 double wt1 = w1[ivc + id1 + iv];
2086 double wt2 = w1[ivc + id2 + iv];
2088 w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
2089 w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
2091 w1[ivc + id3 + iv] = wt1;
2092 w1[ivc + id4 + iv] = wt2;
2107 const int id3 =
m_Nvc * 2;
2108 const int id4 =
m_Nvc * 3;
2110 const int isite =
m_arg[itask].isite;
2112 double *w1 = &v1[Nvcd * isite];
2115 for (
int it = 0; it <
m_Mt; ++it) {
2116 for (
int iz = 0; iz <
m_Mz; ++iz) {
2117 for (
int i_xy = 0; i_xy < Nxy; ++i_xy) {
2118 int iv = Nvcd * (i_xy + Nxy * (iz +
m_Nz * it));
2119 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2120 w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2121 w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_zp2_thread(const int, double *, const double *, const int)
void mult_tp1_dirac_thread(const int, double *, const double *, const int)
void mult_zpb_thread(const int, double *, const double *, const int)
void mult_ymb_thread(const int, double *, const double *, const int)
const double * ptr(const int jin, const int site, const int jex) const
void gm5_chiral_thread(const int, double *, const double *)
void mult_zm2_thread(const int, double *, const double *, const int)
void clear_thread(const int, double *)
void general(const char *format,...)
void mult_xp2_thread(const int, double *, const double *, const int)
std::vector< double > m_boundary_each_node
b.c. for each node.
void mult_tmb_chiral_thread(const int, double *, const double *, const int)
void mult_zp1_thread(const int, double *, const double *, const int)
void mult_tp2_dirac_thread(const int, double *, const double *, const int)
std::vector< int > m_yzt_eo
void mult_xp1_thread(const int, double *, const double *, const int)
void mult_ym1_thread(const int, double *, const double *, const int)
void Meo(Field &, const Field &, const int ieo)
void gm5_dirac_thread(const int, double *, const double *)
void mult_xm1_thread(const int, double *, const double *, const int)
void mult_tpb_dirac_thread(const int, double *, const double *, const int)
void mult_tm2_dirac_thread(const int, double *, const double *, const int)
void mult_tp2_chiral_thread(const int, double *, const double *, const int)
void mult_yp1_thread(const int, double *, const double *, const int)
Bridge::VerboseLevel m_vl
void mult_zmb_thread(const int, double *, const double *, const int)
void mult_xm2_thread(const int, double *, const double *, const int)
Field_G * m_U
dummy: pointing m_Ueo.
void mult_tm1_dirac_thread(const int, double *, const double *, const int)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_tpb_chiral_thread(const int, double *, const double *, const int)
void mult_yp2_thread(const int, double *, const double *, const int)
void mult_tp1_chiral_thread(const int, double *, const double *, const int)
void crucial(const char *format,...)
std::vector< mult_arg > m_arg
void mult_tm2_chiral_thread(const int, double *, const double *, const int)
void mult_tm1_chiral_thread(const int, double *, const double *, const int)
void mult_xmb_thread(const int, double *, const double *, const int)
void mult_xpb_thread(const int, double *, const double *, const int)
void scal_thread(const int, double *, const double)
void mult_ym2_thread(const int, double *, const double *, const int)
static const std::string class_name
void mult_tmb_dirac_thread(const int, double *, const double *, const int)
void mult_ypb_thread(const int, double *, const double *, const int)
void mult_zm1_thread(const int, double *, const double *, const int)