19 #if defined USE_GROUP_SU3
21 #elif defined USE_GROUP_SU2
23 #elif defined USE_GROUP_SU_N
47 vout.
crucial(
m_vl,
"Error at %s: Nz = %d and Nt = %d do not match Nthread = %d\n",
57 vout.
crucial(
m_vl,
"Error at %s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
63 vout.
crucial(
m_vl,
"Error at %s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
94 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
95 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
96 int itask = ith_z + m_Ntask_z * ith_t;
100 m_arg[itask].kt0 = 0;
101 m_arg[itask].kt1 = 0;
102 m_arg[itask].kz0 = 0;
103 m_arg[itask].kz1 = 0;
104 if (ith_t == 0)
m_arg[itask].kt0 = 1;
105 if (ith_z == 0)
m_arg[itask].kz0 = 1;
106 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
107 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
111 m_arg[itask].isite_cp_z = ith_t *
m_Mt * Nxy;
112 m_arg[itask].isite_cp_t = ith_z *
m_Mz * Nxy;
120 double *v2,
const double fac,
const double *v1)
123 const int Nvxy = Nvcd *
m_Nx *
m_Ny;
125 const int isite =
m_arg[itask].isite;
127 double *w2 = &v2[Nvcd * isite];
128 const double *w1 = &v1[Nvcd * isite];
130 for (
int it = 0; it <
m_Mt; ++it) {
131 for (
int iz = 0; iz <
m_Mz; ++iz) {
132 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
133 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
135 w2[iv] = fac * w2[iv] + w1[iv];
147 const int Nvxy = Nvcd *
m_Nx *
m_Ny;
149 const int isite =
m_arg[itask].isite;
151 double *w2 = &v2[Nvcd * isite];
153 for (
int it = 0; it <
m_Mt; ++it) {
154 for (
int iz = 0; iz <
m_Mz; ++iz) {
155 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
156 int iv = ivxy + Nvxy * (iz +
m_Nz * it);
167 double *vcp1,
const double *v1)
169 const int Nvc2 = 2 *
m_Nvc;
171 const int Nvcd2 = Nvcd / 2;
174 const int id2 =
m_Nvc;
175 const int id3 =
m_Nvc * 2;
176 const int id4 =
m_Nvc * 3;
182 const int isite =
m_arg[itask].isite;
183 const int isite_cp =
m_arg[itask].isite_cp_x;
185 double *w2 = &vcp1[Nvcd2 * isite_cp];
186 const double *w1 = &v1[Nvcd * isite];
189 for (
int it = 0; it <
m_Mt; ++it) {
190 for (
int iz = 0; iz <
m_Mz; ++iz) {
191 for (
int iy = 0; iy <
m_Ny; ++iy) {
192 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
193 int is2 = iy + m_Ny * (iz + m_Mz * it);
195 int ix1 = Nvc2 * is2;
196 int ix2 = ix1 +
m_Nvc;
198 for (
int ic = 0; ic <
m_Nc; ++ic) {
200 int ic_i = 2 * ic + 1;
202 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] - w1[ic_i + id4 + in]);
203 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_r + id4 + in]);
204 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] - w1[ic_i + id3 + in]);
205 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_r + id3 + in]);
215 double *v2,
const double *vcp2)
217 const int Nvc2 = 2 *
m_Nvc;
219 const int Nvcd2 = Nvcd / 2;
222 const int id2 =
m_Nvc;
223 const int id3 =
m_Nvc * 2;
224 const int id4 =
m_Nvc * 3;
227 const int ix =
m_Nx - 1;
229 const int isite =
m_arg[itask].isite;
230 const int isite_cp =
m_arg[itask].isite_cp_x;
232 double *w2 = &v2[Nvcd * isite];
233 const double *w1 = &vcp2[Nvcd2 * isite_cp];
237 for (
int it = 0; it <
m_Mt; ++it) {
238 for (
int iz = 0; iz <
m_Mz; ++iz) {
239 for (
int iy = 0; iy <
m_Ny; ++iy) {
240 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
241 int is2 = iy + m_Ny * (iz + m_Mz * it);
244 int ix1 = Nvc2 * is2;
245 int ix2 = ix1 +
m_Nvc;
247 for (
int ic = 0; ic <
m_Nc; ++ic) {
248 int ic2 = ic *
m_Nvc;
250 double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
251 double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
252 double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
253 double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
256 int ic_i = 2 * ic + 1;
258 w2[ic_r + id1 + iv] += wt1r;
259 w2[ic_i + id1 + iv] += wt1i;
260 w2[ic_r + id2 + iv] += wt2r;
261 w2[ic_i + id2 + iv] += wt2i;
263 w2[ic_r + id3 + iv] += wt2i;
264 w2[ic_i + id3 + iv] -= wt2r;
265 w2[ic_r + id4 + iv] += wt1i;
266 w2[ic_i + id4 + iv] -= wt1r;
276 double *v2,
const double *v1)
281 const int id2 =
m_Nvc;
282 const int id3 =
m_Nvc * 2;
283 const int id4 =
m_Nvc * 3;
287 const int isite =
m_arg[itask].isite;
289 double *w2 = &v2[Nvcd * isite];
290 const double *w1 = &v1[Nvcd * isite];
294 for (
int it = 0; it <
m_Mt; ++it) {
295 for (
int iz = 0; iz <
m_Mz; ++iz) {
296 for (
int iy = 0; iy <
m_Ny; ++iy) {
297 for (
int ix = 0; ix <
m_Nx - 1; ++ix) {
298 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
300 int in = Nvcd * (is + 1);
305 for (
int ic = 0; ic <
m_Nc; ++ic) {
307 int ic_i = 2 * ic + 1;
309 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_i + id4 + in];
310 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_r + id4 + in];
311 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id3 + in];
312 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id3 + in];
315 for (
int ic = 0; ic <
m_Nc; ++ic) {
316 int ic2 = ic *
m_Nvc;
318 double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
319 double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
320 double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
321 double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
324 int ic_i = 2 * ic + 1;
326 w2[ic_r + id1 + iv] += wt1r;
327 w2[ic_i + id1 + iv] += wt1i;
328 w2[ic_r + id2 + iv] += wt2r;
329 w2[ic_i + id2 + iv] += wt2i;
331 w2[ic_r + id3 + iv] += wt2i;
332 w2[ic_i + id3 + iv] -= wt2r;
333 w2[ic_r + id4 + iv] += wt1i;
334 w2[ic_i + id4 + iv] -= wt1r;
345 double *vcp1,
const double *v1)
347 const int Nvc2 = 2 *
m_Nvc;
349 const int Nvcd2 = Nvcd / 2;
352 const int id2 =
m_Nvc;
353 const int id3 =
m_Nvc * 2;
354 const int id4 =
m_Nvc * 3;
357 const int ix =
m_Nx - 1;
359 const int isite =
m_arg[itask].isite;
360 const int isite_cp =
m_arg[itask].isite_cp_x;
362 double *w2 = &vcp1[Nvcd2 * isite_cp];
363 const double *w1 = &v1[Nvcd * isite];
367 for (
int it = 0; it <
m_Mt; ++it) {
368 for (
int iz = 0; iz <
m_Mz; ++iz) {
369 for (
int iy = 0; iy <
m_Ny; ++iy) {
370 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
371 int is2 = iy + m_Ny * (iz + m_Mz * it);
374 int ix1 = Nvc2 * is2;
375 int ix2 = ix1 +
m_Nvc;
379 for (
int ic = 0; ic <
m_Nc; ++ic) {
381 int ic_i = 2 * ic + 1;
383 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id4 + in];
384 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id4 + in];
385 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id3 + in];
386 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id3 + in];
389 for (
int ic = 0; ic <
m_Nc; ++ic) {
393 int ic_i = 2 * ic + 1;
395 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
396 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
397 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
398 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
408 double *v2,
const double *vcp2)
410 const int Nvc2 = 2 *
m_Nvc;
412 const int Nvcd2 = Nvcd / 2;
415 const int id2 =
m_Nvc;
416 const int id3 =
m_Nvc * 2;
417 const int id4 =
m_Nvc * 3;
423 const int isite =
m_arg[itask].isite;
424 const int isite_cp =
m_arg[itask].isite_cp_x;
426 double *w2 = &v2[Nvcd * isite];
427 const double *w1 = &vcp2[Nvcd2 * isite_cp];
430 for (
int it = 0; it <
m_Mt; ++it) {
431 for (
int iz = 0; iz <
m_Mz; ++iz) {
432 for (
int iy = 0; iy <
m_Ny; ++iy) {
433 int is = ix +
m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
434 int is2 = iy + m_Ny * (iz + m_Mz * it);
436 int ix1 = Nvc2 * is2;
437 int ix2 = ix1 +
m_Nvc;
439 for (
int ic = 0; ic <
m_Nc; ++ic) {
441 int ic_i = 2 * ic + 1;
443 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
444 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
445 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
446 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
448 w2[ic_r + id3 + iv] -= bc2 * w1[ic_i + ix2];
449 w2[ic_i + id3 + iv] += bc2 * w1[ic_r + ix2];
450 w2[ic_r + id4 + iv] -= bc2 * w1[ic_i + ix1];
451 w2[ic_i + id4 + iv] += bc2 * w1[ic_r + ix1];
461 double *v2,
const double *v1)
466 const int id2 =
m_Nvc;
467 const int id3 =
m_Nvc * 2;
468 const int id4 =
m_Nvc * 3;
472 const int isite =
m_arg[itask].isite;
474 double *w2 = &v2[Nvcd * isite];
475 const double *w1 = &v1[Nvcd * isite];
479 for (
int it = 0; it <
m_Mt; ++it) {
480 for (
int iz = 0; iz <
m_Mz; ++iz) {
481 for (
int iy = 0; iy <
m_Ny; ++iy) {
482 for (
int ix = 1; ix <
m_Nx; ++ix) {
483 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
485 int in = Nvcd * (is - 1);
486 int ig =
m_Ndf * (is - 1);
490 for (
int ic = 0; ic <
m_Nc; ++ic) {
492 int ic_i = 2 * ic + 1;
494 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id4 + in];
495 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id4 + in];
496 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id3 + in];
497 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id3 + in];
500 for (
int ic = 0; ic <
m_Nc; ++ic) {
503 double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
504 double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
505 double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
506 double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
509 int ic_i = 2 * ic + 1;
511 w2[ic_r + id1 + iv] += wt1r;
512 w2[ic_i + id1 + iv] += wt1i;
513 w2[ic_r + id2 + iv] += wt2r;
514 w2[ic_i + id2 + iv] += wt2i;
516 w2[ic_r + id3 + iv] -= wt2i;
517 w2[ic_i + id3 + iv] += wt2r;
518 w2[ic_r + id4 + iv] -= wt1i;
519 w2[ic_i + id4 + iv] += wt1r;
530 double *vcp1,
const double *v1)
532 const int Nvc2 = 2 *
m_Nvc;
534 const int Nvcd2 = Nvcd / 2;
537 const int id2 =
m_Nvc;
538 const int id3 =
m_Nvc * 2;
539 const int id4 =
m_Nvc * 3;
545 const int isite =
m_arg[itask].isite;
546 const int isite_cp =
m_arg[itask].isite_cp_y;
548 double *w2 = &vcp1[Nvcd2 * isite_cp];
549 const double *w1 = &v1[Nvcd * isite];
552 for (
int it = 0; it <
m_Mt; ++it) {
553 for (
int iz = 0; iz <
m_Mz; ++iz) {
554 for (
int ix = 0; ix <
m_Nx; ++ix) {
555 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
556 int is2 = ix + m_Nx * (iz + m_Mz * it);
558 int ix1 = Nvc2 * is2;
559 int ix2 = ix1 +
m_Nvc;
561 for (
int ic = 0; ic <
m_Nc; ++ic) {
563 int ic_i = 2 * ic + 1;
565 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id4 + in]);
566 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id4 + in]);
567 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] - w1[ic_r + id3 + in]);
568 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] - w1[ic_i + id3 + in]);
578 double *v2,
const double *vcp2)
580 const int Nvc2 = 2 *
m_Nvc;
582 const int Nvcd2 = Nvcd / 2;
585 const int id2 =
m_Nvc;
586 const int id3 =
m_Nvc * 2;
587 const int id4 =
m_Nvc * 3;
590 const int iy =
m_Ny - 1;
592 const int isite =
m_arg[itask].isite;
593 const int isite_cp =
m_arg[itask].isite_cp_y;
595 double *w2 = &v2[Nvcd * isite];
596 const double *w1 = &vcp2[Nvcd2 * isite_cp];
600 for (
int it = 0; it <
m_Mt; ++it) {
601 for (
int iz = 0; iz <
m_Mz; ++iz) {
602 for (
int ix = 0; ix <
m_Nx; ++ix) {
603 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
604 int is2 = ix + m_Nx * (iz + m_Mz * it);
607 int ix1 = Nvc2 * is2;
608 int ix2 = ix1 +
m_Nvc;
610 for (
int ic = 0; ic <
m_Nc; ++ic) {
611 int ic2 = ic *
m_Nvc;
613 double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
614 double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
615 double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
616 double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
619 int ic_i = 2 * ic + 1;
621 w2[ic_r + id1 + iv] += wt1r;
622 w2[ic_i + id1 + iv] += wt1i;
623 w2[ic_r + id2 + iv] += wt2r;
624 w2[ic_i + id2 + iv] += wt2i;
626 w2[ic_r + id3 + iv] -= wt2r;
627 w2[ic_i + id3 + iv] -= wt2i;
628 w2[ic_r + id4 + iv] += wt1r;
629 w2[ic_i + id4 + iv] += wt1i;
639 double *v2,
const double *v1)
644 const int id2 =
m_Nvc;
645 const int id3 =
m_Nvc * 2;
646 const int id4 =
m_Nvc * 3;
650 const int isite =
m_arg[itask].isite;
652 double *w2 = &v2[Nvcd * isite];
653 const double *w1 = &v1[Nvcd * isite];
657 for (
int it = 0; it <
m_Mt; ++it) {
658 for (
int iz = 0; iz <
m_Mz; ++iz) {
659 for (
int iy = 0; iy <
m_Ny - 1; ++iy) {
660 for (
int ix = 0; ix <
m_Nx; ++ix) {
661 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
663 int in = Nvcd * (is +
m_Nx);
668 for (
int ic = 0; ic <
m_Nc; ++ic) {
670 int ic_i = 2 * ic + 1;
672 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id4 + in];
673 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id4 + in];
674 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id3 + in];
675 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id3 + in];
678 for (
int ic = 0; ic <
m_Nc; ++ic) {
679 int ic2 = ic *
m_Nvc;
681 double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
682 double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
683 double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
684 double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
687 int ic_i = 2 * ic + 1;
689 w2[ic_r + id1 + iv] += wt1r;
690 w2[ic_i + id1 + iv] += wt1i;
691 w2[ic_r + id2 + iv] += wt2r;
692 w2[ic_i + id2 + iv] += wt2i;
694 w2[ic_r + id3 + iv] -= wt2r;
695 w2[ic_i + id3 + iv] -= wt2i;
696 w2[ic_r + id4 + iv] += wt1r;
697 w2[ic_i + id4 + iv] += wt1i;
708 double *vcp1,
const double *v1)
710 const int Nvc2 = 2 *
m_Nvc;
712 const int Nvcd2 = Nvcd / 2;
715 const int id2 =
m_Nvc;
716 const int id3 =
m_Nvc * 2;
717 const int id4 =
m_Nvc * 3;
720 const int iy =
m_Ny - 1;
722 const int isite =
m_arg[itask].isite;
723 const int isite_cp =
m_arg[itask].isite_cp_y;
725 double *w2 = &vcp1[Nvcd2 * isite_cp];
726 const double *w1 = &v1[Nvcd * isite];
730 for (
int it = 0; it <
m_Mt; ++it) {
731 for (
int iz = 0; iz <
m_Mz; ++iz) {
732 for (
int ix = 0; ix <
m_Nx; ++ix) {
733 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
734 int is2 = ix + m_Nx * (iz + m_Mz * it);
737 int ix1 = Nvc2 * is2;
738 int ix2 = ix1 +
m_Nvc;
742 for (
int ic = 0; ic <
m_Nc; ++ic) {
744 int ic_i = 2 * ic + 1;
746 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id4 + in];
747 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id4 + in];
748 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id3 + in];
749 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id3 + in];
752 for (
int ic = 0; ic <
m_Nc; ++ic) {
756 int ic_i = 2 * ic + 1;
758 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
759 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
760 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
761 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
771 double *v2,
const double *vcp2)
773 const int Nvc2 = 2 *
m_Nvc;
775 const int Nvcd2 = Nvcd / 2;
778 const int id2 =
m_Nvc;
779 const int id3 =
m_Nvc * 2;
780 const int id4 =
m_Nvc * 3;
786 const int isite =
m_arg[itask].isite;
787 const int isite_cp =
m_arg[itask].isite_cp_y;
789 double *w2 = &v2[Nvcd * isite];
790 const double *w1 = &vcp2[Nvcd2 * isite_cp];
793 for (
int it = 0; it <
m_Mt; ++it) {
794 for (
int iz = 0; iz <
m_Mz; ++iz) {
795 for (
int ix = 0; ix <
m_Nx; ++ix) {
796 int is = ix + m_Nx * (iy +
m_Ny * (iz +
m_Nz * it));
797 int is2 = ix + m_Nx * (iz + m_Mz * it);
799 int ix1 = Nvc2 * is2;
800 int ix2 = ix1 +
m_Nvc;
802 for (
int ic = 0; ic <
m_Nc; ++ic) {
804 int ic_i = 2 * ic + 1;
806 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
807 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
808 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
809 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
811 w2[ic_r + id3 + iv] += bc2 * w1[ic_r + ix2];
812 w2[ic_i + id3 + iv] += bc2 * w1[ic_i + ix2];
813 w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix1];
814 w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix1];
824 double *v2,
const double *v1)
829 const int id2 =
m_Nvc;
830 const int id3 =
m_Nvc * 2;
831 const int id4 =
m_Nvc * 3;
835 const int isite =
m_arg[itask].isite;
837 double *w2 = &v2[Nvcd * isite];
838 const double *w1 = &v1[Nvcd * isite];
842 for (
int it = 0; it <
m_Mt; ++it) {
843 for (
int iz = 0; iz <
m_Mz; ++iz) {
844 for (
int iy = 1; iy <
m_Ny; ++iy) {
845 for (
int ix = 0; ix <
m_Nx; ++ix) {
846 int is = ix + m_Nx * (iy + m_Ny * (iz +
m_Nz * it));
848 int in = Nvcd * (is -
m_Nx);
853 for (
int ic = 0; ic <
m_Nc; ++ic) {
855 int ic_i = 2 * ic + 1;
857 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id4 + in];
858 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id4 + in];
859 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id3 + in];
860 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id3 + in];
863 for (
int ic = 0; ic <
m_Nc; ++ic) {
866 double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
867 double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
868 double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
869 double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
872 int ic_i = 2 * ic + 1;
874 w2[ic_r + id1 + iv] += wt1r;
875 w2[ic_i + id1 + iv] += wt1i;
876 w2[ic_r + id2 + iv] += wt2r;
877 w2[ic_i + id2 + iv] += wt2i;
879 w2[ic_r + id3 + iv] += wt2r;
880 w2[ic_i + id3 + iv] += wt2i;
881 w2[ic_r + id4 + iv] -= wt1r;
882 w2[ic_i + id4 + iv] -= wt1i;
893 double *vcp1,
const double *v1)
895 const int Nvc2 = 2 *
m_Nvc;
897 const int Nvcd2 = Nvcd / 2;
900 const int id2 =
m_Nvc;
901 const int id3 =
m_Nvc * 2;
902 const int id4 =
m_Nvc * 3;
907 const int isite =
m_arg[itask].isite;
908 const int isite_cp =
m_arg[itask].isite_cp_z;
910 double *w2 = &vcp1[Nvcd2 * isite_cp];
911 const double *w1 = &v1[Nvcd * isite];
914 if (
m_arg[itask].kz0 == 1) {
918 for (
int it = 0; it <
m_Mt; ++it) {
919 for (
int ixy = 0; ixy < Nxy; ++ixy) {
920 int is = ixy + Nxy * (iz +
m_Nz * it);
921 int is2 = ixy + Nxy * it;
923 int ix1 = Nvc2 * is2;
924 int ix2 = ix1 +
m_Nvc;
926 for (
int ic = 0; ic <
m_Nc; ++ic) {
928 int ic_i = 2 * ic + 1;
930 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] - w1[ic_i + id3 + in]);
931 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_r + id3 + in]);
932 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_i + id4 + in]);
933 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] - w1[ic_r + id4 + in]);
943 double *v2,
const double *vcp2)
945 const int Nvc2 = 2 *
m_Nvc;
947 const int Nvcd2 = Nvcd / 2;
950 const int id2 =
m_Nvc;
951 const int id3 =
m_Nvc * 2;
952 const int id4 =
m_Nvc * 3;
956 const int isite =
m_arg[itask].isite;
957 const int isite_cp =
m_arg[itask].isite_cp_z;
959 double *w2 = &v2[Nvcd * isite];
960 const double *w1 = &vcp2[Nvcd2 * isite_cp];
964 if (
m_arg[itask].kz1 == 1) {
966 const int iz =
m_Mz - 1;
968 for (
int it = 0; it <
m_Mt; ++it) {
969 for (
int ixy = 0; ixy < Nxy; ++ixy) {
970 int is = ixy + Nxy * (iz +
m_Nz * it);
971 int is2 = ixy + Nxy * it;
974 int ix1 = Nvc2 * is2;
975 int ix2 = ix1 +
m_Nvc;
977 for (
int ic = 0; ic <
m_Nc; ++ic) {
978 int ic2 = ic *
m_Nvc;
980 double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
981 double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
982 double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
983 double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
986 int ic_i = 2 * ic + 1;
988 w2[ic_r + id1 + iv] += wt1r;
989 w2[ic_i + id1 + iv] += wt1i;
990 w2[ic_r + id2 + iv] += wt2r;
991 w2[ic_i + id2 + iv] += wt2i;
993 w2[ic_r + id3 + iv] += wt1i;
994 w2[ic_i + id3 + iv] -= wt1r;
995 w2[ic_r + id4 + iv] -= wt2i;
996 w2[ic_i + id4 + iv] += wt2r;
1006 double *v2,
const double *v1)
1012 const int id2 =
m_Nvc;
1013 const int id3 =
m_Nvc * 2;
1014 const int id4 =
m_Nvc * 3;
1018 const int isite =
m_arg[itask].isite;
1019 const int kz1 =
m_arg[itask].kz1;
1021 double *w2 = &v2[Nvcd * isite];
1022 const double *w1 = &v1[Nvcd * isite];
1026 for (
int it = 0; it <
m_Mt; ++it) {
1027 for (
int iz = 0; iz <
m_Mz - kz1; ++iz) {
1028 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1029 int is = ixy + Nxy * (iz +
m_Nz * it);
1031 int in = Nvcd * (is + Nxy);
1032 int ig =
m_Ndf * is;
1036 for (
int ic = 0; ic <
m_Nc; ++ic) {
1038 int ic_i = 2 * ic + 1;
1040 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_i + id3 + in];
1041 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_r + id3 + in];
1042 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_i + id4 + in];
1043 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_r + id4 + in];
1046 for (
int ic = 0; ic <
m_Nc; ++ic) {
1047 int ic2 = ic *
m_Nvc;
1049 double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1050 double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1051 double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1052 double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1055 int ic_i = 2 * ic + 1;
1057 w2[ic_r + id1 + iv] += wt1r;
1058 w2[ic_i + id1 + iv] += wt1i;
1059 w2[ic_r + id2 + iv] += wt2r;
1060 w2[ic_i + id2 + iv] += wt2i;
1062 w2[ic_r + id3 + iv] += wt1i;
1063 w2[ic_i + id3 + iv] -= wt1r;
1064 w2[ic_r + id4 + iv] -= wt2i;
1065 w2[ic_i + id4 + iv] += wt2r;
1075 double *vcp1,
const double *v1)
1077 const int Nvc2 = 2 *
m_Nvc;
1079 const int Nvcd2 = Nvcd / 2;
1082 const int id2 =
m_Nvc;
1083 const int id3 =
m_Nvc * 2;
1084 const int id4 =
m_Nvc * 3;
1088 const int isite =
m_arg[itask].isite;
1089 const int isite_cp =
m_arg[itask].isite_cp_z;
1091 double *w2 = &vcp1[Nvcd2 * isite_cp];
1092 const double *w1 = &v1[Nvcd * isite];
1096 if (
m_arg[itask].kz1 == 1) {
1098 const int iz =
m_Mz - 1;
1100 for (
int it = 0; it <
m_Mt; ++it) {
1101 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1102 int is = ixy + Nxy * (iz +
m_Nz * it);
1103 int is2 = ixy + Nxy * it;
1105 int ig =
m_Ndf * is;
1106 int ix1 = Nvc2 * is2;
1107 int ix2 = ix1 +
m_Nvc;
1111 for (
int ic = 0; ic <
m_Nc; ++ic) {
1113 int ic_i = 2 * ic + 1;
1115 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id3 + in];
1116 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id3 + in];
1117 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id4 + in];
1118 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id4 + in];
1121 for (
int ic = 0; ic <
m_Nc; ++ic) {
1125 int ic_i = 2 * ic + 1;
1127 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1128 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1129 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1130 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1140 double *v2,
const double *vcp2)
1142 const int Nvc2 = 2 *
m_Nvc;
1144 const int Nvcd2 = Nvcd / 2;
1147 const int id2 =
m_Nvc;
1148 const int id3 =
m_Nvc * 2;
1149 const int id4 =
m_Nvc * 3;
1154 const int isite =
m_arg[itask].isite;
1155 const int isite_cp =
m_arg[itask].isite_cp_z;
1157 double *w2 = &v2[Nvcd * isite];
1158 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1161 if (
m_arg[itask].kz0 == 1) {
1165 for (
int it = 0; it <
m_Mt; ++it) {
1166 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1167 int is = ixy + Nxy * (iz +
m_Nz * it);
1168 int is2 = ixy + Nxy * it;
1170 int ix1 = Nvc2 * is2;
1171 int ix2 = ix1 +
m_Nvc;
1173 for (
int ic = 0; ic <
m_Nc; ++ic) {
1175 int ic_i = 2 * ic + 1;
1177 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1178 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1179 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1180 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1182 w2[ic_r + id3 + iv] -= bc2 * w1[ic_i + ix1];
1183 w2[ic_i + id3 + iv] += bc2 * w1[ic_r + ix1];
1184 w2[ic_r + id4 + iv] += bc2 * w1[ic_i + ix2];
1185 w2[ic_i + id4 + iv] -= bc2 * w1[ic_r + ix2];
1195 double *v2,
const double *v1)
1201 const int id2 =
m_Nvc;
1202 const int id3 =
m_Nvc * 2;
1203 const int id4 =
m_Nvc * 3;
1207 const int isite =
m_arg[itask].isite;
1208 const int kz0 =
m_arg[itask].kz0;
1210 double *w2 = &v2[Nvcd * isite];
1211 const double *w1 = &v1[Nvcd * isite];
1215 for (
int it = 0; it <
m_Mt; ++it) {
1216 for (
int iz = kz0; iz <
m_Mz; ++iz) {
1217 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1218 int is = ixy + Nxy * (iz +
m_Nz * it);
1220 int in = Nvcd * (is - Nxy);
1221 int ig =
m_Ndf * (is - Nxy);
1225 for (
int ic = 0; ic <
m_Nc; ++ic) {
1227 int ic_i = 2 * ic + 1;
1229 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_i + id3 + in];
1230 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_r + id3 + in];
1231 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_i + id4 + in];
1232 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_r + id4 + in];
1235 for (
int ic = 0; ic <
m_Nc; ++ic) {
1238 double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1239 double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1240 double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1241 double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1244 int ic_i = 2 * ic + 1;
1246 w2[ic_r + id1 + iv] += wt1r;
1247 w2[ic_i + id1 + iv] += wt1i;
1248 w2[ic_r + id2 + iv] += wt2r;
1249 w2[ic_i + id2 + iv] += wt2i;
1251 w2[ic_r + id3 + iv] -= wt1i;
1252 w2[ic_i + id3 + iv] += wt1r;
1253 w2[ic_r + id4 + iv] += wt2i;
1254 w2[ic_i + id4 + iv] -= wt2r;
1264 double *vcp1,
const double *v1)
1266 const int Nvc2 = 2 *
m_Nvc;
1268 const int Nvcd2 = Nvcd / 2;
1270 const int id3 =
m_Nvc * 2;
1271 const int id4 =
m_Nvc * 3;
1276 const int isite =
m_arg[itask].isite;
1277 const int isite_cp =
m_arg[itask].isite_cp_t;
1279 double *w2 = &vcp1[Nvcd2 * isite_cp];
1280 const double *w1 = &v1[Nvcd * isite];
1283 if (
m_arg[itask].kt0 == 1) {
1287 for (
int iz = 0; iz <
m_Mz; ++iz) {
1288 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1289 int is = ixy + Nxy * (iz +
m_Nz * it);
1290 int is2 = ixy + Nxy * iz;
1292 int ix1 = Nvc2 * is2;
1293 int ix2 = ix1 +
m_Nvc;
1295 for (
int ic = 0; ic <
m_Nc; ++ic) {
1297 int ic_i = 2 * ic + 1;
1299 w2[ic_r + ix1] = 2.0 * bc2 * w1[ic_r + id3 + in];
1300 w2[ic_i + ix1] = 2.0 * bc2 * w1[ic_i + id3 + in];
1301 w2[ic_r + ix2] = 2.0 * bc2 * w1[ic_r + id4 + in];
1302 w2[ic_i + ix2] = 2.0 * bc2 * w1[ic_i + id4 + in];
1312 double *v2,
const double *vcp2)
1314 const int Nvc2 = 2 *
m_Nvc;
1316 const int Nvcd2 = Nvcd / 2;
1318 const int id3 =
m_Nvc * 2;
1319 const int id4 =
m_Nvc * 3;
1323 const int isite =
m_arg[itask].isite;
1324 const int isite_cp =
m_arg[itask].isite_cp_t;
1326 double *w2 = &v2[Nvcd * isite];
1327 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1331 if (
m_arg[itask].kt1 == 1) {
1333 const int it =
m_Mt - 1;
1335 for (
int iz = 0; iz <
m_Mz; ++iz) {
1336 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1337 int is = ixy + Nxy * (iz +
m_Nz * it);
1338 int is2 = ixy + Nxy * iz;
1340 int ig =
m_Ndf * is;
1341 int ix1 = Nvc2 * is2;
1342 int ix2 = ix1 +
m_Nvc;
1344 for (
int ic = 0; ic <
m_Nc; ++ic) {
1345 int ic2 = ic *
m_Nvc;
1348 int ic_i = 2 * ic + 1;
1350 w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1351 w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1352 w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1353 w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1363 double *v2,
const double *v1)
1367 const int Nxyz =
m_Nx * m_Ny *
m_Nz;
1369 const int id3 =
m_Nvc * 2;
1370 const int id4 =
m_Nvc * 3;
1374 const int isite =
m_arg[itask].isite;
1375 const int kt1 =
m_arg[itask].kt1;
1377 double *w2 = &v2[Nvcd * isite];
1378 const double *w1 = &v1[Nvcd * isite];
1384 for (
int it = 0; it <
m_Mt - kt1; ++it) {
1385 for (
int iz = 0; iz <
m_Mz; ++iz) {
1386 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1387 int is = ixy + Nxy * (iz + m_Nz * it);
1389 int in = Nvcd * (is + Nxyz);
1390 int ig =
m_Ndf * is;
1392 for (
int ic = 0; ic <
m_Nc; ++ic) {
1394 int ic_i = 2 * ic + 1;
1396 vt1[ic_r] = 2.0 * w1[ic_r + id3 + in];
1397 vt1[ic_i] = 2.0 * w1[ic_i + id3 + in];
1398 vt2[ic_r] = 2.0 * w1[ic_r + id4 + in];
1399 vt2[ic_i] = 2.0 * w1[ic_i + id4 + in];
1402 for (
int ic = 0; ic <
m_Nc; ++ic) {
1403 int ic2 = ic *
m_Nvc;
1406 int ic_i = 2 * ic + 1;
1408 w2[ic_r + id3 + iv] += mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1409 w2[ic_i + id3 + iv] += mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1410 w2[ic_r + id4 + iv] += mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1411 w2[ic_i + id4 + iv] += mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1421 double *vcp1,
const double *v1)
1423 const int Nvc2 = 2 *
m_Nvc;
1425 const int Nvcd2 = Nvcd / 2;
1428 const int id2 =
m_Nvc;
1432 const int isite =
m_arg[itask].isite;
1433 const int isite_cp =
m_arg[itask].isite_cp_t;
1435 double *w2 = &vcp1[Nvcd2 * isite_cp];
1436 const double *w1 = &v1[Nvcd * isite];
1440 if (
m_arg[itask].kt1 == 1) {
1442 const int it =
m_Mt - 1;
1444 for (
int iz = 0; iz <
m_Mz; ++iz) {
1445 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1446 int is = ixy + Nxy * (iz +
m_Nz * it);
1447 int is2 = ixy + Nxy * iz;
1449 int ig =
m_Ndf * is;
1450 int ix1 = Nvc2 * is2;
1451 int ix2 = ix1 +
m_Nvc;
1455 for (
int ic = 0; ic <
m_Nc; ++ic) {
1457 int ic_i = 2 * ic + 1;
1459 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1460 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1461 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1462 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1465 for (
int ic = 0; ic <
m_Nc; ++ic) {
1469 int ic_i = 2 * ic + 1;
1471 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1472 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1473 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1474 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1484 double *v2,
const double *vcp2)
1486 const int Nvc2 = 2 *
m_Nvc;
1488 const int Nvcd2 = Nvcd / 2;
1491 const int id2 =
m_Nvc;
1496 const int isite =
m_arg[itask].isite;
1497 const int isite_cp =
m_arg[itask].isite_cp_t;
1499 double *w2 = &v2[Nvcd * isite];
1500 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1503 if (
m_arg[itask].kt0 == 1) {
1507 for (
int iz = 0; iz <
m_Mz; ++iz) {
1508 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1509 int is = ixy + Nxy * (iz +
m_Nz * it);
1510 int is2 = ixy + Nxy * iz;
1512 int ix1 = Nvc2 * is2;
1513 int ix2 = ix1 +
m_Nvc;
1515 for (
int ic = 0; ic <
m_Nc; ++ic) {
1517 int ic_i = 2 * ic + 1;
1519 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1520 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1521 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1522 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1532 double *v2,
const double *v1)
1536 const int Nxyz = Nxy *
m_Nz;
1539 const int id2 =
m_Nvc;
1543 const int isite =
m_arg[itask].isite;
1544 const int kt0 =
m_arg[itask].kt0;
1546 double *w2 = &v2[Nvcd * isite];
1547 const double *w1 = &v1[Nvcd * isite];
1551 for (
int it = kt0; it <
m_Mt; ++it) {
1552 for (
int iz = 0; iz <
m_Mz; ++iz) {
1553 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1554 int is = ixy + Nxy * (iz + m_Nz * it);
1556 int in = Nvcd * (is - Nxyz);
1557 int ig =
m_Ndf * (is - Nxyz);
1561 for (
int ic = 0; ic <
m_Nc; ++ic) {
1563 int ic_i = 2 * ic + 1;
1565 vt1[ic_r] = 2.0 * w1[ic_r + id1 + in];
1566 vt1[ic_i] = 2.0 * w1[ic_i + id1 + in];
1567 vt2[ic_r] = 2.0 * w1[ic_r + id2 + in];
1568 vt2[ic_i] = 2.0 * w1[ic_i + id2 + in];
1571 for (
int ic = 0; ic <
m_Nc; ++ic) {
1575 int ic_i = 2 * ic + 1;
1577 w2[ic_r + id1 + iv] += mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1578 w2[ic_i + id1 + iv] += mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1579 w2[ic_r + id2 + iv] += mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1580 w2[ic_i + id2 + iv] += mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1590 double *vcp1,
const double *v1)
1592 const int Nvc2 = 2 *
m_Nvc;
1594 const int Nvcd2 = Nvcd / 2;
1597 const int id2 =
m_Nvc;
1598 const int id3 =
m_Nvc * 2;
1599 const int id4 =
m_Nvc * 3;
1604 const int isite =
m_arg[itask].isite;
1605 const int isite_cp =
m_arg[itask].isite_cp_t;
1607 double *w2 = &vcp1[Nvcd2 * isite_cp];
1608 const double *w1 = &v1[Nvcd * isite];
1611 if (
m_arg[itask].kt0 == 1) {
1615 for (
int iz = 0; iz <
m_Mz; ++iz) {
1616 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1617 int is = ixy + Nxy * (iz +
m_Nz * it);
1618 int is2 = ixy + Nxy * iz;
1620 int ix1 = Nvc2 * is2;
1621 int ix2 = ix1 +
m_Nvc;
1623 for (
int ic = 0; ic <
m_Nc; ++ic) {
1625 int ic_i = 2 * ic + 1;
1627 w2[ic_r + ix1] = bc2 * (w1[ic_r + id1 + in] + w1[ic_r + id3 + in]);
1628 w2[ic_i + ix1] = bc2 * (w1[ic_i + id1 + in] + w1[ic_i + id3 + in]);
1629 w2[ic_r + ix2] = bc2 * (w1[ic_r + id2 + in] + w1[ic_r + id4 + in]);
1630 w2[ic_i + ix2] = bc2 * (w1[ic_i + id2 + in] + w1[ic_i + id4 + in]);
1640 double *v2,
const double *vcp2)
1642 const int Nvc2 = 2 *
m_Nvc;
1644 const int Nvcd2 = Nvcd / 2;
1647 const int id2 =
m_Nvc;
1648 const int id3 =
m_Nvc * 2;
1649 const int id4 =
m_Nvc * 3;
1653 const int isite =
m_arg[itask].isite;
1654 const int isite_cp =
m_arg[itask].isite_cp_t;
1656 double *w2 = &v2[Nvcd * isite];
1657 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1661 if (
m_arg[itask].kt1 == 1) {
1663 const int it =
m_Mt - 1;
1665 for (
int iz = 0; iz <
m_Mz; ++iz) {
1666 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1667 int is = ixy + Nxy * (iz +
m_Nz * it);
1668 int is2 = ixy + Nxy * iz;
1670 int ig =
m_Ndf * is;
1671 int ix1 = Nvc2 * is2;
1672 int ix2 = ix1 +
m_Nvc;
1674 for (
int ic = 0; ic <
m_Nc; ++ic) {
1675 int ic2 = ic *
m_Nvc;
1677 double wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1678 double wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1679 double wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1680 double wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1683 int ic_i = 2 * ic + 1;
1685 w2[ic_r + id1 + iv] += wt1r;
1686 w2[ic_i + id1 + iv] += wt1i;
1687 w2[ic_r + id2 + iv] += wt2r;
1688 w2[ic_i + id2 + iv] += wt2i;
1690 w2[ic_r + id3 + iv] += wt1r;
1691 w2[ic_i + id3 + iv] += wt1i;
1692 w2[ic_r + id4 + iv] += wt2r;
1693 w2[ic_i + id4 + iv] += wt2i;
1703 double *v2,
const double *v1)
1707 const int Nxyz =
m_Nx * m_Ny *
m_Nz;
1710 const int id2 =
m_Nvc;
1711 const int id3 =
m_Nvc * 2;
1712 const int id4 =
m_Nvc * 3;
1716 const int isite =
m_arg[itask].isite;
1717 const int kt1 =
m_arg[itask].kt1;
1719 double *w2 = &v2[Nvcd * isite];
1720 const double *w1 = &v1[Nvcd * isite];
1724 for (
int it = 0; it <
m_Mt - kt1; ++it) {
1725 for (
int iz = 0; iz <
m_Mz; ++iz) {
1726 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1727 int is = ixy + Nxy * (iz + m_Nz * it);
1729 int in = Nvcd * (is + Nxyz);
1730 int ig =
m_Ndf * is;
1734 for (
int ic = 0; ic <
m_Nc; ++ic) {
1736 int ic_i = 2 * ic + 1;
1738 vt1[ic_r] = w1[ic_r + id1 + in] + w1[ic_r + id3 + in];
1739 vt1[ic_i] = w1[ic_i + id1 + in] + w1[ic_i + id3 + in];
1740 vt2[ic_r] = w1[ic_r + id2 + in] + w1[ic_r + id4 + in];
1741 vt2[ic_i] = w1[ic_i + id2 + in] + w1[ic_i + id4 + in];
1744 for (
int ic = 0; ic <
m_Nc; ++ic) {
1745 int ic2 = ic *
m_Nvc;
1747 double wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1748 double wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1749 double wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1750 double wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1753 int ic_i = 2 * ic + 1;
1755 w2[ic_r + id1 + iv] += wt1r;
1756 w2[ic_i + id1 + iv] += wt1i;
1757 w2[ic_r + id2 + iv] += wt2r;
1758 w2[ic_i + id2 + iv] += wt2i;
1760 w2[ic_r + id3 + iv] += wt1r;
1761 w2[ic_i + id3 + iv] += wt1i;
1762 w2[ic_r + id4 + iv] += wt2r;
1763 w2[ic_i + id4 + iv] += wt2i;
1773 double *vcp1,
const double *v1)
1775 const int Nvc2 = 2 *
m_Nvc;
1777 const int Nvcd2 = Nvcd / 2;
1780 const int id2 =
m_Nvc;
1781 const int id3 =
m_Nvc * 2;
1782 const int id4 =
m_Nvc * 3;
1786 const int isite =
m_arg[itask].isite;
1787 const int isite_cp =
m_arg[itask].isite_cp_t;
1789 double *w2 = &vcp1[Nvcd2 * isite_cp];
1790 const double *w1 = &v1[Nvcd * isite];
1794 if (
m_arg[itask].kt1 == 1) {
1796 const int it =
m_Mt - 1;
1798 for (
int iz = 0; iz <
m_Mz; ++iz) {
1799 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1800 int is = ixy + Nxy * (iz +
m_Nz * it);
1801 int is2 = ixy + Nxy * iz;
1803 int ig =
m_Ndf * is;
1804 int ix1 = Nvc2 * is2;
1805 int ix2 = ix1 +
m_Nvc;
1809 for (
int ic = 0; ic <
m_Nc; ++ic) {
1811 int ic_i = 2 * ic + 1;
1813 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1814 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1815 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1816 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1819 for (
int ic = 0; ic <
m_Nc; ++ic) {
1823 int ic_i = 2 * ic + 1;
1825 w2[ic_r + ix1] = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1826 w2[ic_i + ix1] = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1827 w2[ic_r + ix2] = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1828 w2[ic_i + ix2] = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1838 double *v2,
const double *vcp2)
1840 const int Nvc2 = 2 *
m_Nvc;
1842 const int Nvcd2 = Nvcd / 2;
1845 const int id2 =
m_Nvc;
1846 const int id3 =
m_Nvc * 2;
1847 const int id4 =
m_Nvc * 3;
1852 const int isite =
m_arg[itask].isite;
1853 const int isite_cp =
m_arg[itask].isite_cp_t;
1855 double *w2 = &v2[Nvcd * isite];
1856 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1859 if (
m_arg[itask].kt0 == 1) {
1863 for (
int iz = 0; iz <
m_Mz; ++iz) {
1864 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1865 int is = ixy + Nxy * (iz +
m_Nz * it);
1866 int is2 = ixy + Nxy * iz;
1868 int ix1 = Nvc2 * is2;
1869 int ix2 = ix1 +
m_Nvc;
1871 for (
int ic = 0; ic <
m_Nc; ++ic) {
1873 int ic_i = 2 * ic + 1;
1875 w2[ic_r + id1 + iv] += bc2 * w1[ic_r + ix1];
1876 w2[ic_i + id1 + iv] += bc2 * w1[ic_i + ix1];
1877 w2[ic_r + id2 + iv] += bc2 * w1[ic_r + ix2];
1878 w2[ic_i + id2 + iv] += bc2 * w1[ic_i + ix2];
1880 w2[ic_r + id3 + iv] -= bc2 * w1[ic_r + ix1];
1881 w2[ic_i + id3 + iv] -= bc2 * w1[ic_i + ix1];
1882 w2[ic_r + id4 + iv] -= bc2 * w1[ic_r + ix2];
1883 w2[ic_i + id4 + iv] -= bc2 * w1[ic_i + ix2];
1893 double *v2,
const double *v1)
1897 const int Nxyz = Nxy *
m_Nz;
1900 const int id2 =
m_Nvc;
1901 const int id3 =
m_Nvc * 2;
1902 const int id4 =
m_Nvc * 3;
1906 const int isite =
m_arg[itask].isite;
1907 const int kt0 =
m_arg[itask].kt0;
1909 double *w2 = &v2[Nvcd * isite];
1910 const double *w1 = &v1[Nvcd * isite];
1914 for (
int it = kt0; it <
m_Mt; ++it) {
1915 for (
int iz = 0; iz <
m_Mz; ++iz) {
1916 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1917 int is = ixy + Nxy * (iz + m_Nz * it);
1919 int in = Nvcd * (is - Nxyz);
1920 int ig =
m_Ndf * (is - Nxyz);
1924 for (
int ic = 0; ic <
m_Nc; ++ic) {
1926 int ic_i = 2 * ic + 1;
1928 vt1[ic_r] = w1[ic_r + id1 + in] - w1[ic_r + id3 + in];
1929 vt1[ic_i] = w1[ic_i + id1 + in] - w1[ic_i + id3 + in];
1930 vt2[ic_r] = w1[ic_r + id2 + in] - w1[ic_r + id4 + in];
1931 vt2[ic_i] = w1[ic_i + id2 + in] - w1[ic_i + id4 + in];
1934 for (
int ic = 0; ic <
m_Nc; ++ic) {
1937 double wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1938 double wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1939 double wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1940 double wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1943 int ic_i = 2 * ic + 1;
1945 w2[ic_r + id1 + iv] += wt1r;
1946 w2[ic_i + id1 + iv] += wt1i;
1947 w2[ic_r + id2 + iv] += wt2r;
1948 w2[ic_i + id2 + iv] += wt2i;
1950 w2[ic_r + id3 + iv] -= wt1r;
1951 w2[ic_i + id3 + iv] -= wt1i;
1952 w2[ic_r + id4 + iv] -= wt2r;
1953 w2[ic_i + id4 + iv] -= wt2i;
1963 double *v2,
const double *v1)
1969 const int id2 =
m_Nvc;
1970 const int id3 =
m_Nvc * 2;
1971 const int id4 =
m_Nvc * 3;
1973 const int isite =
m_arg[itask].isite;
1975 double *w2 = &v2[Nvcd * isite];
1976 const double *w1 = &v1[Nvcd * isite];
1978 for (
int it = 0; it <
m_Mt; ++it) {
1979 for (
int iz = 0; iz <
m_Mz; ++iz) {
1980 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1981 int iv = Nvcd * (ixy + Nxy * (iz +
m_Nz * it));
1983 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
1984 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
1985 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
1986 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
1987 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
1997 double *v2,
const double *v1)
2003 const int id2 =
m_Nvc;
2004 const int id3 =
m_Nvc * 2;
2005 const int id4 =
m_Nvc * 3;
2007 const int isite =
m_arg[itask].isite;
2009 double *w2 = &v2[Nvcd * isite];
2010 const double *w1 = &v1[Nvcd * isite];
2013 for (
int it = 0; it <
m_Mt; ++it) {
2014 for (
int iz = 0; iz <
m_Mz; ++iz) {
2015 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2016 int iv = Nvcd * (ixy + Nxy * (iz +
m_Nz * it));
2018 for (
int ivc = 0; ivc <
m_Nvc; ++ivc) {
2019 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2020 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2021 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2022 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_ypb_thread(const int, double *, const double *)
void mult_yp2_thread(const int, double *, const double *)
const double * ptr(const int jin, const int site, const int jex) const
void mult_xmb_thread(const int, double *, const double *)
void mult_tmb_chiral_thread(const int, double *, const double *)
void general(const char *format,...)
void mult_yp1_thread(const int, double *, const double *)
void mult_xpb_thread(const int, double *, const double *)
std::vector< double > m_boundary_each_node
b.c. for each node.
void mult_tp1_chiral_thread(const int, double *, const double *)
Bridge::VerboseLevel m_vl
void clear_thread(const int, double *)
const Field_G * m_U
gauge configuration.
void mult_tpb_chiral_thread(const int, double *, const double *)
void mult_tpb_dirac_thread(const int, double *, const double *)
void mult_zm2_thread(const int, double *, const double *)
void mult_tp1_dirac_thread(const int, double *, const double *)
void mult_zp2_thread(const int, double *, const double *)
void mult_xm1_thread(const int, double *, const double *)
void gm5_chiral_thread(const int, double *, const double *)
void mult_xp2_thread(const int, double *, const double *)
void mult_zmb_thread(const int, double *, const double *)
void mult_ymb_thread(const int, double *, const double *)
void daypx_thread(const int, double *, const double, const double *)
void mult_tm2_dirac_thread(const int, double *, const double *)
void mult_tm1_chiral_thread(const int, double *, const double *)
void mult_xp1_thread(const int, double *, const double *)
std::vector< mult_arg > m_arg
void mult_tmb_dirac_thread(const int, double *, const double *)
void mult_tm1_dirac_thread(const int, double *, const double *)
void mult_tp2_dirac_thread(const int, double *, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void gm5_dirac_thread(const int, double *, const double *)
void mult_zm1_thread(const int, double *, const double *)
void crucial(const char *format,...)
void mult_tm2_chiral_thread(const int, double *, const double *)
void mult_ym2_thread(const int, double *, const double *)
void mult_tp2_chiral_thread(const int, double *, const double *)
void mult_xm2_thread(const int, double *, const double *)
void mult_zp1_thread(const int, double *, const double *)
void mult_zpb_thread(const int, double *, const double *)
void mult_ym1_thread(const int, double *, const double *)
static const std::string class_name