22 #if defined USE_GROUP_SU3
23 #include "fopr_Wilson_impl_SU3.inc"
24 #elif defined USE_GROUP_SU2
25 #include "fopr_Wilson_impl_SU2.inc"
26 #elif defined USE_GROUP_SU_N
27 #include "fopr_Wilson_impl_SU_N.inc"
50 vout.
crucial(
m_vl,
"%s: Nz = %d and Nt = %d do not match Nthread = %d\n",
60 vout.
crucial(
m_vl,
"%s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
66 vout.
crucial(
m_vl,
"%s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
96 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
97 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
98 int itask = ith_z + m_Ntask_z * ith_t;
102 m_arg[itask].kt0 = 0;
103 m_arg[itask].kt1 = 0;
104 m_arg[itask].kz0 = 0;
105 m_arg[itask].kz1 = 0;
106 if (ith_t == 0)
m_arg[itask].kt0 = 1;
107 if (ith_z == 0)
m_arg[itask].kz0 = 1;
108 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
109 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
113 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy;
114 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy;
122 double *v2,
double fac,
const double *v1)
124 int Nvcd = m_Nvc * m_Nd;
125 int Nvxy = Nvcd * m_Nx * m_Ny;
127 int isite = m_arg[itask].isite;
128 double *w2 = &v2[Nvcd * isite];
129 const double *w1 = &v1[Nvcd * isite];
131 for (
int it = 0; it < m_Mt; ++it) {
132 for (
int iz = 0; iz < m_Mz; ++iz) {
133 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
134 int iv = ivxy + Nvxy * (iz + m_Nz * it);
135 w2[iv] = fac * w2[iv] + w1[iv];
146 int Nvcd = m_Nvc * m_Nd;
147 int Nvxy = Nvcd * m_Nx * m_Ny;
149 int isite = m_arg[itask].isite;
150 double *w2 = &v2[Nvcd * isite];
152 for (
int it = 0; it < m_Mt; ++it) {
153 for (
int iz = 0; iz < m_Mz; ++iz) {
154 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
155 int iv = ivxy + Nvxy * (iz + m_Nz * it);
165 double *vcp1,
const double *v1)
167 int Nvc2 = 2 * m_Nvc;
168 int Nvcd = m_Nvc * m_Nd;
169 int Nvcd2 = Nvcd / 2;
176 int isite = m_arg[itask].isite;
177 int isite_cp = m_arg[itask].isite_cpx;
179 double *w2 = &vcp1[Nvcd2 * isite_cp];
180 const double *w1 = &v1[Nvcd * isite];
183 double bc2 = m_boundary2[idir];
187 for (
int it = 0; it < m_Mt; ++it) {
188 for (
int iz = 0; iz < m_Mz; ++iz) {
189 for (
int iy = 0; iy < m_Ny; ++iy) {
190 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
191 int is2 = iy + m_Ny * (iz + m_Mz * it);
193 int ix1 = Nvc2 * is2;
194 int ix2 = ix1 + m_Nvc;
196 for (
int ic = 0; ic < m_Nc; ++ic) {
197 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
198 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
199 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
200 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
210 double *v2,
const double *vcp2)
212 int Nvc2 = 2 * m_Nvc;
213 int Nvcd = m_Nvc * m_Nd;
214 int Nvcd2 = Nvcd / 2;
223 double wt1r, wt1i, wt2r, wt2i;
225 int isite = m_arg[itask].isite;
226 int isite_cp = m_arg[itask].isite_cpx;
228 double *w2 = &v2[Nvcd * isite];
229 const double *w1 = &vcp2[Nvcd2 * isite_cp];
230 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
234 for (
int it = 0; it < m_Mt; ++it) {
235 for (
int iz = 0; iz < m_Mz; ++iz) {
236 for (
int iy = 0; iy < m_Ny; ++iy) {
237 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
238 int is2 = iy + m_Ny * (iz + m_Mz * it);
241 int ix1 = Nvc2 * is2;
242 int ix2 = ix1 + m_Nvc;
244 for (
int ic = 0; ic < m_Nc; ++ic) {
245 int ic2 = ic * m_Nvc;
247 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
248 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
249 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
250 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
252 w2[2 * ic + id1 + iv] += wt1r;
253 w2[2 * ic + 1 + id1 + iv] += wt1i;
254 w2[2 * ic + id2 + iv] += wt2r;
255 w2[2 * ic + 1 + id2 + iv] += wt2i;
256 w2[2 * ic + id3 + iv] += wt2i;
257 w2[2 * ic + 1 + id3 + iv] += -wt2r;
258 w2[2 * ic + id4 + iv] += wt1i;
259 w2[2 * ic + 1 + id4 + iv] += -wt1r;
269 double *v2,
const double *v1)
271 int Nvcd = m_Nvc * m_Nd;
280 double vt1[m_Nvc], vt2[m_Nvc];
281 double wt1r, wt1i, wt2r, wt2i;
283 int isite = m_arg[itask].isite;
285 double *w2 = &v2[Nvcd * isite];
286 const double *w1 = &v1[Nvcd * isite];
287 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
289 for (
int it = 0; it < m_Mt; ++it) {
290 for (
int iz = 0; iz < m_Mz; ++iz) {
291 for (
int iy = 0; iy < m_Ny; ++iy) {
292 for (
int ix = 0; ix < m_Nx - 1; ++ix) {
293 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
295 int in = Nvcd * (is + 1);
298 for (
int ic = 0; ic < m_Nc; ++ic) {
299 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
300 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
301 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
302 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
305 for (
int ic = 0; ic < m_Nc; ++ic) {
306 int ic2 = ic * m_Nvc;
308 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
309 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
310 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
311 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
313 w2[2 * ic + id1 + iv] += wt1r;
314 w2[2 * ic + 1 + id1 + iv] += wt1i;
315 w2[2 * ic + id2 + iv] += wt2r;
316 w2[2 * ic + 1 + id2 + iv] += wt2i;
317 w2[2 * ic + id3 + iv] += wt2i;
318 w2[2 * ic + 1 + id3 + iv] += -wt2r;
319 w2[2 * ic + id4 + iv] += wt1i;
320 w2[2 * ic + 1 + id4 + iv] += -wt1r;
331 double *vcp1,
const double *v1)
333 int Nvc2 = 2 * m_Nvc;
334 int Nvcd = m_Nvc * m_Nd;
335 int Nvcd2 = Nvcd / 2;
344 int isite = m_arg[itask].isite;
345 int isite_cp = m_arg[itask].isite_cpx;
347 double *w2 = &vcp1[Nvcd2 * isite_cp];
348 const double *w1 = &v1[Nvcd * isite];
349 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
351 double vt1[m_Nvc], vt2[m_Nvc];
355 for (
int it = 0; it < m_Mt; ++it) {
356 for (
int iz = 0; iz < m_Mz; ++iz) {
357 for (
int iy = 0; iy < m_Ny; ++iy) {
358 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
359 int is2 = iy + m_Ny * (iz + m_Mz * it);
362 int ix1 = Nvc2 * is2;
363 int ix2 = ix1 + m_Nvc;
365 for (
int ic = 0; ic < m_Nc; ++ic) {
366 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
367 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
368 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
369 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
372 for (
int ic = 0; ic < m_Nc; ++ic) {
374 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
375 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
376 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
377 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
387 double *v2,
const double *vcp2)
389 int Nvc2 = 2 * m_Nvc;
390 int Nvcd = m_Nvc * m_Nd;
391 int Nvcd2 = Nvcd / 2;
399 double bc2 = m_boundary2[idir];
403 int isite = m_arg[itask].isite;
404 int isite_cp = m_arg[itask].isite_cpx;
406 double *w2 = &v2[Nvcd * isite];
407 const double *w1 = &vcp2[Nvcd2 * isite_cp];
411 for (
int it = 0; it < m_Mt; ++it) {
412 for (
int iz = 0; iz < m_Mz; ++iz) {
413 for (
int iy = 0; iy < m_Ny; ++iy) {
414 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
415 int is2 = iy + m_Ny * (iz + m_Mz * it);
417 int ix1 = Nvc2 * is2;
418 int ix2 = ix1 + m_Nvc;
420 for (
int ic = 0; ic < m_Nc; ++ic) {
422 int ici = 2 * ic + 1;
423 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
424 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
425 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
426 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
427 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
428 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
429 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
430 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
440 double *v2,
const double *v1)
442 int Nvcd = m_Nvc * m_Nd;
451 double vt1[m_Nvc], vt2[m_Nvc];
452 double wt1r, wt1i, wt2r, wt2i;
454 int isite = m_arg[itask].isite;
456 double *w2 = &v2[Nvcd * isite];
457 const double *w1 = &v1[Nvcd * isite];
458 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
460 for (
int it = 0; it < m_Mt; ++it) {
461 for (
int iz = 0; iz < m_Mz; ++iz) {
462 for (
int iy = 0; iy < m_Ny; ++iy) {
463 for (
int ix = 1; ix < m_Nx; ++ix) {
464 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
466 int in = Nvcd * (is - 1);
467 int ig = m_Ndf * (is - 1);
469 for (
int ic = 0; ic < m_Nc; ++ic) {
470 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
471 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
472 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
473 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
476 for (
int ic = 0; ic < m_Nc; ++ic) {
479 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
480 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
481 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
482 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
484 w2[2 * ic + id1 + iv] += wt1r;
485 w2[2 * ic + 1 + id1 + iv] += wt1i;
486 w2[2 * ic + id2 + iv] += wt2r;
487 w2[2 * ic + 1 + id2 + iv] += wt2i;
488 w2[2 * ic + id3 + iv] += -wt2i;
489 w2[2 * ic + 1 + id3 + iv] += +wt2r;
490 w2[2 * ic + id4 + iv] += -wt1i;
491 w2[2 * ic + 1 + id4 + iv] += +wt1r;
502 double *vcp1,
const double *v1)
504 int Nvc2 = 2 * m_Nvc;
505 int Nvcd = m_Nvc * m_Nd;
506 int Nvcd2 = Nvcd / 2;
513 int isite = m_arg[itask].isite;
514 int isite_cp = m_arg[itask].isite_cpy;
516 double *w2 = &vcp1[Nvcd2 * isite_cp];
517 const double *w1 = &v1[Nvcd * isite];
520 double bc2 = m_boundary2[idir];
524 for (
int it = 0; it < m_Mt; ++it) {
525 for (
int iz = 0; iz < m_Mz; ++iz) {
526 for (
int ix = 0; ix < m_Nx; ++ix) {
527 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
528 int is2 = ix + m_Nx * (iz + m_Mz * it);
530 int ix1 = Nvc2 * is2;
531 int ix2 = ix1 + m_Nvc;
533 for (
int ic = 0; ic < m_Nc; ++ic) {
534 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
535 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
536 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
537 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
547 double *v2,
const double *vcp2)
549 int Nvc2 = 2 * m_Nvc;
550 int Nvcd = m_Nvc * m_Nd;
551 int Nvcd2 = Nvcd / 2;
560 double wt1r, wt1i, wt2r, wt2i;
562 int isite = m_arg[itask].isite;
563 int isite_cp = m_arg[itask].isite_cpy;
565 double *w2 = &v2[Nvcd * isite];
566 const double *w1 = &vcp2[Nvcd2 * isite_cp];
567 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
571 for (
int it = 0; it < m_Mt; ++it) {
572 for (
int iz = 0; iz < m_Mz; ++iz) {
573 for (
int ix = 0; ix < m_Nx; ++ix) {
574 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
575 int is2 = ix + m_Nx * (iz + m_Mz * it);
578 int ix1 = Nvc2 * is2;
579 int ix2 = ix1 + m_Nvc;
581 for (
int ic = 0; ic < m_Nc; ++ic) {
582 int ic2 = ic * m_Nvc;
584 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
585 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
586 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
587 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
589 w2[2 * ic + id1 + iv] += wt1r;
590 w2[2 * ic + 1 + id1 + iv] += wt1i;
591 w2[2 * ic + id2 + iv] += wt2r;
592 w2[2 * ic + 1 + id2 + iv] += wt2i;
593 w2[2 * ic + id3 + iv] += -wt2r;
594 w2[2 * ic + 1 + id3 + iv] += -wt2i;
595 w2[2 * ic + id4 + iv] += wt1r;
596 w2[2 * ic + 1 + id4 + iv] += wt1i;
606 double *v2,
const double *v1)
608 int Nvcd = m_Nvc * m_Nd;
617 double vt1[m_Nvc], vt2[m_Nvc];
618 double wt1r, wt1i, wt2r, wt2i;
620 int isite = m_arg[itask].isite;
622 double *w2 = &v2[Nvcd * isite];
623 const double *w1 = &v1[Nvcd * isite];
624 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
626 for (
int it = 0; it < m_Mt; ++it) {
627 for (
int iz = 0; iz < m_Mz; ++iz) {
628 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
629 for (
int ix = 0; ix < m_Nx; ++ix) {
630 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
632 int in = Nvcd * (is + m_Nx);
635 for (
int ic = 0; ic < m_Nc; ++ic) {
636 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
637 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
638 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
639 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
642 for (
int ic = 0; ic < m_Nc; ++ic) {
643 int ic2 = ic * m_Nvc;
645 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
646 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
647 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
648 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
650 w2[2 * ic + id1 + iv] += wt1r;
651 w2[2 * ic + 1 + id1 + iv] += wt1i;
652 w2[2 * ic + id2 + iv] += wt2r;
653 w2[2 * ic + 1 + id2 + iv] += wt2i;
654 w2[2 * ic + id3 + iv] += -wt2r;
655 w2[2 * ic + 1 + id3 + iv] += -wt2i;
656 w2[2 * ic + id4 + iv] += wt1r;
657 w2[2 * ic + 1 + id4 + iv] += wt1i;
668 double *vcp1,
const double *v1)
670 int Nvc2 = 2 * m_Nvc;
671 int Nvcd = m_Nvc * m_Nd;
672 int Nvcd2 = Nvcd / 2;
681 int isite = m_arg[itask].isite;
682 int isite_cp = m_arg[itask].isite_cpy;
684 double *w2 = &vcp1[Nvcd2 * isite_cp];
685 const double *w1 = &v1[Nvcd * isite];
686 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
688 double vt1[m_Nvc], vt2[m_Nvc];
692 for (
int it = 0; it < m_Mt; ++it) {
693 for (
int iz = 0; iz < m_Mz; ++iz) {
694 for (
int ix = 0; ix < m_Nx; ++ix) {
695 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
696 int is2 = ix + m_Nx * (iz + m_Mz * it);
699 int ix1 = Nvc2 * is2;
700 int ix2 = ix1 + m_Nvc;
702 for (
int ic = 0; ic < m_Nc; ++ic) {
703 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
704 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
705 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
706 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
709 for (
int ic = 0; ic < m_Nc; ++ic) {
711 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
712 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
713 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
714 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
724 double *v2,
const double *vcp2)
726 int Nvc2 = 2 * m_Nvc;
727 int Nvcd = m_Nvc * m_Nd;
728 int Nvcd2 = Nvcd / 2;
736 double bc2 = m_boundary2[idir];
740 int isite = m_arg[itask].isite;
741 int isite_cp = m_arg[itask].isite_cpy;
743 double *w2 = &v2[Nvcd * isite];
744 const double *w1 = &vcp2[Nvcd2 * isite_cp];
748 for (
int it = 0; it < m_Mt; ++it) {
749 for (
int iz = 0; iz < m_Mz; ++iz) {
750 for (
int ix = 0; ix < m_Nx; ++ix) {
751 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
752 int is2 = ix + m_Nx * (iz + m_Mz * it);
754 int ix1 = Nvc2 * is2;
755 int ix2 = ix1 + m_Nvc;
757 for (
int ic = 0; ic < m_Nc; ++ic) {
759 int ici = 2 * ic + 1;
760 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
761 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
762 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
763 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
764 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
765 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
766 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
767 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
777 double *v2,
const double *v1)
779 int Nvcd = m_Nvc * m_Nd;
788 double vt1[m_Nvc], vt2[m_Nvc];
789 double wt1r, wt1i, wt2r, wt2i;
791 int isite = m_arg[itask].isite;
793 double *w2 = &v2[Nvcd * isite];
794 const double *w1 = &v1[Nvcd * isite];
795 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
797 for (
int it = 0; it < m_Mt; ++it) {
798 for (
int iz = 0; iz < m_Mz; ++iz) {
799 for (
int iy = 1; iy < m_Ny; ++iy) {
800 for (
int ix = 0; ix < m_Nx; ++ix) {
801 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
803 int in = Nvcd * (is - m_Nx);
804 int ig = m_Ndf * (is - m_Nx);
806 for (
int ic = 0; ic < m_Nc; ++ic) {
807 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
808 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
809 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
810 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
813 for (
int ic = 0; ic < m_Nc; ++ic) {
815 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
816 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
817 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
818 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
820 w2[ic2 + id1 + iv] += wt1r;
821 w2[ic2 + 1 + id1 + iv] += wt1i;
822 w2[ic2 + id2 + iv] += wt2r;
823 w2[ic2 + 1 + id2 + iv] += wt2i;
824 w2[ic2 + id3 + iv] += wt2r;
825 w2[ic2 + 1 + id3 + iv] += wt2i;
826 w2[ic2 + id4 + iv] += -wt1r;
827 w2[ic2 + 1 + id4 + iv] += -wt1i;
838 double *vcp1,
const double *v1)
840 int Nvc2 = 2 * m_Nvc;
841 int Nvcd = m_Nvc * m_Nd;
842 int Nvcd2 = Nvcd / 2;
849 int isite = m_arg[itask].isite;
850 int isite_cp = m_arg[itask].isite_cpz;
852 double *w2 = &vcp1[Nvcd2 * isite_cp];
853 const double *w1 = &v1[Nvcd * isite];
856 double bc2 = m_boundary2[idir];
858 if (m_arg[itask].kz0 == 1) {
859 int Nxy = m_Nx * m_Ny;
861 for (
int it = 0; it < m_Mt; ++it) {
862 for (
int ixy = 0; ixy < Nxy; ++ixy) {
863 int is = ixy + Nxy * (iz + m_Nz * it);
864 int is2 = ixy + Nxy * it;
867 int ix1 = Nvc2 * is2;
868 int ix2 = ix1 + m_Nvc;
870 for (
int ic = 0; ic < m_Nc; ++ic) {
871 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
872 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
873 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
874 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
884 double *v2,
const double *vcp2)
886 int Nvc2 = 2 * m_Nvc;
887 int Nvcd = m_Nvc * m_Nd;
888 int Nvcd2 = Nvcd / 2;
897 double wt1r, wt1i, wt2r, wt2i;
899 int isite = m_arg[itask].isite;
900 int isite_cp = m_arg[itask].isite_cpz;
902 double *w2 = &v2[Nvcd * isite];
903 const double *w1 = &vcp2[Nvcd2 * isite_cp];
904 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
906 if (m_arg[itask].kz1 == 1) {
907 int Nxy = m_Nx * m_Ny;
909 for (
int it = 0; it < m_Mt; ++it) {
910 for (
int ixy = 0; ixy < Nxy; ++ixy) {
911 int is = ixy + Nxy * (iz + m_Nz * it);
912 int is2 = ixy + Nxy * it;
915 int ix1 = Nvc2 * is2;
916 int ix2 = ix1 + m_Nvc;
918 for (
int ic = 0; ic < m_Nc; ++ic) {
919 int ic2 = ic * m_Nvc;
921 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
922 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
923 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
924 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
926 w2[2 * ic + id1 + iv] += wt1r;
927 w2[2 * ic + 1 + id1 + iv] += wt1i;
928 w2[2 * ic + id2 + iv] += wt2r;
929 w2[2 * ic + 1 + id2 + iv] += wt2i;
930 w2[2 * ic + id3 + iv] += wt1i;
931 w2[2 * ic + 1 + id3 + iv] += -wt1r;
932 w2[2 * ic + id4 + iv] += -wt2i;
933 w2[2 * ic + 1 + id4 + iv] += wt2r;
943 double *v2,
const double *v1)
945 int Nvcd = m_Nvc * m_Nd;
954 double vt1[m_Nvc], vt2[m_Nvc];
955 double wt1r, wt1i, wt2r, wt2i;
957 int isite = m_arg[itask].isite;
959 double *w2 = &v2[Nvcd * isite];
960 const double *w1 = &v1[Nvcd * isite];
961 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
963 int kz1 = m_arg[itask].kz1;
964 int Nxy = m_Nx * m_Ny;
966 for (
int it = 0; it < m_Mt; ++it) {
967 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
968 for (
int ixy = 0; ixy < Nxy; ++ixy) {
969 int is = ixy + Nxy * (iz + m_Nz * it);
971 int in = Nvcd * (is + Nxy);
974 for (
int ic = 0; ic < m_Nc; ++ic) {
975 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
976 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
977 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
978 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
981 for (
int ic = 0; ic < m_Nc; ++ic) {
982 int ic2 = ic * m_Nvc;
984 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
985 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
986 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
987 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
989 w2[2 * ic + id1 + iv] += wt1r;
990 w2[2 * ic + 1 + id1 + iv] += wt1i;
991 w2[2 * ic + id2 + iv] += wt2r;
992 w2[2 * ic + 1 + id2 + iv] += wt2i;
993 w2[2 * ic + id3 + iv] += wt1i;
994 w2[2 * ic + 1 + id3 + iv] += -wt1r;
995 w2[2 * ic + id4 + iv] += -wt2i;
996 w2[2 * ic + 1 + id4 + iv] += wt2r;
1006 double *vcp1,
const double *v1)
1008 int Nvc2 = 2 * m_Nvc;
1009 int Nvcd = m_Nvc * m_Nd;
1010 int Nvcd2 = Nvcd / 2;
1014 int id3 = m_Nvc * 2;
1015 int id4 = m_Nvc * 3;
1019 int isite = m_arg[itask].isite;
1020 int isite_cp = m_arg[itask].isite_cpz;
1022 double *w2 = &vcp1[Nvcd2 * isite_cp];
1023 const double *w1 = &v1[Nvcd * isite];
1024 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1026 double vt1[m_Nvc], vt2[m_Nvc];
1028 if (m_arg[itask].kz1 == 1) {
1029 int Nxy = m_Nx * m_Ny;
1031 for (
int it = 0; it < m_Mt; ++it) {
1032 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1033 int is = ixy + Nxy * (iz + m_Nz * it);
1034 int is2 = ixy + Nxy * it;
1036 int ig = m_Ndf * is;
1037 int ix1 = Nvc2 * is2;
1038 int ix2 = ix1 + m_Nvc;
1040 for (
int ic = 0; ic < m_Nc; ++ic) {
1041 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1042 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1043 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1044 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1047 for (
int ic = 0; ic < m_Nc; ++ic) {
1049 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1050 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1051 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1052 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1062 double *v2,
const double *vcp2)
1064 int Nvc2 = 2 * m_Nvc;
1065 int Nvcd = m_Nvc * m_Nd;
1066 int Nvcd2 = Nvcd / 2;
1070 int id3 = m_Nvc * 2;
1071 int id4 = m_Nvc * 3;
1074 double bc2 = m_boundary2[idir];
1078 int isite = m_arg[itask].isite;
1079 int isite_cp = m_arg[itask].isite_cpz;
1081 double *w2 = &v2[Nvcd * isite];
1082 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1084 if (m_arg[itask].kz0 == 1) {
1085 int Nxy = m_Nx * m_Ny;
1088 for (
int it = 0; it < m_Mt; ++it) {
1089 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1090 int is = ixy + Nxy * (iz + m_Nz * it);
1091 int is2 = ixy + Nxy * it;
1093 int ix1 = Nvc2 * is2;
1094 int ix2 = ix1 + m_Nvc;
1096 for (
int ic = 0; ic < m_Nc; ++ic) {
1098 int ici = 2 * ic + 1;
1099 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1100 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1101 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1102 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1103 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1104 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1105 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1106 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1116 double *v2,
const double *v1)
1118 int Nvcd = m_Nvc * m_Nd;
1122 int id3 = m_Nvc * 2;
1123 int id4 = m_Nvc * 3;
1127 double vt1[m_Nvc], vt2[m_Nvc];
1128 double wt1r, wt1i, wt2r, wt2i;
1130 int isite = m_arg[itask].isite;
1132 double *w2 = &v2[Nvcd * isite];
1133 const double *w1 = &v1[Nvcd * isite];
1134 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1136 int kz0 = m_arg[itask].kz0;
1137 int Nxy = m_Nx * m_Ny;
1139 for (
int it = 0; it < m_Mt; ++it) {
1140 for (
int iz = kz0; iz < m_Mz; ++iz) {
1141 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1142 int is = ixy + Nxy * (iz + m_Nz * it);
1144 int in = Nvcd * (is - Nxy);
1145 int ig = m_Ndf * (is - Nxy);
1147 for (
int ic = 0; ic < m_Nc; ++ic) {
1148 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1149 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1150 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1151 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1154 for (
int ic = 0; ic < m_Nc; ++ic) {
1156 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1157 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1158 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1159 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1161 w2[ic2 + id1 + iv] += wt1r;
1162 w2[ic2 + 1 + id1 + iv] += wt1i;
1163 w2[ic2 + id2 + iv] += wt2r;
1164 w2[ic2 + 1 + id2 + iv] += wt2i;
1165 w2[ic2 + id3 + iv] += -wt1i;
1166 w2[ic2 + 1 + id3 + iv] += wt1r;
1167 w2[ic2 + id4 + iv] += wt2i;
1168 w2[ic2 + 1 + id4 + iv] += -wt2r;
1178 double *vcp1,
const double *v1)
1180 int Nvc2 = 2 * m_Nvc;
1181 int Nvcd = m_Nvc * m_Nd;
1182 int Nvcd2 = Nvcd / 2;
1186 int id3 = m_Nvc * 2;
1187 int id4 = m_Nvc * 3;
1189 int isite = m_arg[itask].isite;
1190 int isite_cp = m_arg[itask].isite_cpt;
1192 double *w2 = &vcp1[Nvcd2 * isite_cp];
1193 const double *w1 = &v1[Nvcd * isite];
1196 double bc2 = m_boundary2[idir];
1198 if (m_arg[itask].kt0 == 1) {
1199 int Nxy = m_Nx * m_Ny;
1201 for (
int iz = 0; iz < m_Mz; ++iz) {
1202 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1203 int is = ixy + Nxy * (iz + m_Nz * it);
1204 int is2 = ixy + Nxy * iz;
1207 int ix1 = Nvc2 * is2;
1208 int ix2 = ix1 + m_Nvc;
1210 for (
int ic = 0; ic < m_Nc; ++ic) {
1211 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1212 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1213 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1214 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1224 double *v2,
const double *vcp2)
1226 int Nvc2 = 2 * m_Nvc;
1227 int Nvcd = m_Nvc * m_Nd;
1228 int Nvcd2 = Nvcd / 2;
1232 int id3 = m_Nvc * 2;
1233 int id4 = m_Nvc * 3;
1237 double wt1r, wt1i, wt2r, wt2i;
1239 int isite = m_arg[itask].isite;
1240 int isite_cp = m_arg[itask].isite_cpt;
1242 double *w2 = &v2[Nvcd * isite];
1243 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1244 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1246 if (m_arg[itask].kt1 == 1) {
1247 int Nxy = m_Nx * m_Ny;
1249 for (
int iz = 0; iz < m_Mz; ++iz) {
1250 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1251 int is = ixy + Nxy * (iz + m_Nz * it);
1252 int is2 = ixy + Nxy * iz;
1254 int ig = m_Ndf * is;
1255 int ix1 = Nvc2 * is2;
1256 int ix2 = ix1 + m_Nvc;
1258 for (
int ic = 0; ic < m_Nc; ++ic) {
1259 int ic2 = ic * m_Nvc;
1261 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1262 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1263 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1264 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1266 w2[2 * ic + id3 + iv] += wt1r;
1267 w2[2 * ic + 1 + id3 + iv] += wt1i;
1268 w2[2 * ic + id4 + iv] += wt2r;
1269 w2[2 * ic + 1 + id4 + iv] += wt2i;
1279 double *v2,
const double *v1)
1281 int Nvcd = m_Nvc * m_Nd;
1285 int id3 = m_Nvc * 2;
1286 int id4 = m_Nvc * 3;
1290 double vt1[m_Nvc], vt2[m_Nvc];
1291 double wt1r, wt1i, wt2r, wt2i;
1293 int isite = m_arg[itask].isite;
1295 double *w2 = &v2[Nvcd * isite];
1296 const double *w1 = &v1[Nvcd * isite];
1297 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1299 int kt1 = m_arg[itask].kt1;
1300 int Nxy = m_Nx * m_Ny;
1301 int Nxyz = Nxy * m_Nz;
1303 for (
int it = 0; it < m_Mt - kt1; ++it) {
1304 for (
int iz = 0; iz < m_Mz; ++iz) {
1305 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1306 int is = ixy + Nxy * (iz + m_Nz * it);
1308 int in = Nvcd * (is + Nxyz);
1309 int ig = m_Ndf * is;
1311 for (
int ic = 0; ic < m_Nc; ++ic) {
1312 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1313 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1314 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1315 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1318 for (
int ic = 0; ic < m_Nc; ++ic) {
1319 int ic2 = ic * m_Nvc;
1321 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1322 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1323 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1324 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1326 w2[2 * ic + id3 + iv] += wt1r;
1327 w2[2 * ic + 1 + id3 + iv] += wt1i;
1328 w2[2 * ic + id4 + iv] += wt2r;
1329 w2[2 * ic + 1 + id4 + iv] += wt2i;
1339 double *vcp1,
const double *v1)
1341 int Nvc2 = 2 * m_Nvc;
1342 int Nvcd = m_Nvc * m_Nd;
1343 int Nvcd2 = Nvcd / 2;
1352 int isite = m_arg[itask].isite;
1353 int isite_cp = m_arg[itask].isite_cpt;
1355 double *w2 = &vcp1[Nvcd2 * isite_cp];
1356 const double *w1 = &v1[Nvcd * isite];
1357 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1359 double vt1[m_Nvc], vt2[m_Nvc];
1361 if (m_arg[itask].kt1 == 1) {
1362 int Nxy = m_Nx * m_Ny;
1364 for (
int iz = 0; iz < m_Mz; ++iz) {
1365 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1366 int is = ixy + Nxy * (iz + m_Nz * it);
1367 int is2 = ixy + Nxy * iz;
1369 int ig = m_Ndf * is;
1370 int ix1 = Nvc2 * is2;
1371 int ix2 = ix1 + m_Nvc;
1373 for (
int ic = 0; ic < m_Nc; ++ic) {
1374 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1375 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1376 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1377 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1380 for (
int ic = 0; ic < m_Nc; ++ic) {
1382 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1383 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1384 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1385 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1395 double *v2,
const double *vcp2)
1397 int Nvc2 = 2 * m_Nvc;
1398 int Nvcd = m_Nvc * m_Nd;
1399 int Nvcd2 = Nvcd / 2;
1407 double bc2 = m_boundary2[idir];
1411 int isite = m_arg[itask].isite;
1412 int isite_cp = m_arg[itask].isite_cpt;
1414 double *w2 = &v2[Nvcd * isite];
1415 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1417 if (m_arg[itask].kt0 == 1) {
1418 int Nxy = m_Nx * m_Ny;
1420 for (
int iz = 0; iz < m_Mz; ++iz) {
1421 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1422 int is = ixy + Nxy * (iz + m_Nz * it);
1423 int is2 = ixy + Nxy * iz;
1425 int ix1 = Nvc2 * is2;
1426 int ix2 = ix1 + m_Nvc;
1428 for (
int ic = 0; ic < m_Nc; ++ic) {
1430 int ici = 2 * ic + 1;
1431 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1432 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1433 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1434 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1444 double *v2,
const double *v1)
1446 int Nvcd = m_Nvc * m_Nd;
1455 double vt1[m_Nvc], vt2[m_Nvc];
1456 double wt1r, wt1i, wt2r, wt2i;
1458 int isite = m_arg[itask].isite;
1460 double *w2 = &v2[Nvcd * isite];
1461 const double *w1 = &v1[Nvcd * isite];
1462 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1464 int kt0 = m_arg[itask].kt0;
1465 int Nxy = m_Nx * m_Ny;
1466 int Nxyz = Nxy * m_Nz;
1468 for (
int it = kt0; it < m_Mt; ++it) {
1469 for (
int iz = 0; iz < m_Mz; ++iz) {
1470 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1471 int is = ixy + Nxy * (iz + m_Nz * it);
1473 int in = Nvcd * (is - Nxyz);
1474 int ig = m_Ndf * (is - Nxyz);
1476 for (
int ic = 0; ic < m_Nc; ++ic) {
1477 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1478 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1479 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1480 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1483 for (
int ic = 0; ic < m_Nc; ++ic) {
1485 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1486 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1487 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1488 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1490 w2[ic2 + id1 + iv] += wt1r;
1491 w2[ic2 + 1 + id1 + iv] += wt1i;
1492 w2[ic2 + id2 + iv] += wt2r;
1493 w2[ic2 + 1 + id2 + iv] += wt2i;
1503 double *vcp1,
const double *v1)
1505 int Nvc2 = 2 * m_Nvc;
1506 int Nvcd = m_Nvc * m_Nd;
1507 int Nvcd2 = Nvcd / 2;
1511 int id3 = m_Nvc * 2;
1512 int id4 = m_Nvc * 3;
1514 int isite = m_arg[itask].isite;
1515 int isite_cp = m_arg[itask].isite_cpt;
1517 double *w2 = &vcp1[Nvcd2 * isite_cp];
1518 const double *w1 = &v1[Nvcd * isite];
1521 double bc2 = m_boundary2[idir];
1523 if (m_arg[itask].kt0 == 1) {
1524 int Nxy = m_Nx * m_Ny;
1526 for (
int iz = 0; iz < m_Mz; ++iz) {
1527 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1528 int is = ixy + Nxy * (iz + m_Nz * it);
1529 int is2 = ixy + Nxy * iz;
1532 int ix1 = Nvc2 * is2;
1533 int ix2 = ix1 + m_Nvc;
1535 for (
int ic = 0; ic < m_Nc; ++ic) {
1536 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1537 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1538 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1539 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1549 double *v2,
const double *vcp2)
1551 int Nvc2 = 2 * m_Nvc;
1552 int Nvcd = m_Nvc * m_Nd;
1553 int Nvcd2 = Nvcd / 2;
1557 int id3 = m_Nvc * 2;
1558 int id4 = m_Nvc * 3;
1562 double wt1r, wt1i, wt2r, wt2i;
1564 int isite = m_arg[itask].isite;
1565 int isite_cp = m_arg[itask].isite_cpt;
1567 double *w2 = &v2[Nvcd * isite];
1568 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1569 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1571 if (m_arg[itask].kt1 == 1) {
1572 int Nxy = m_Nx * m_Ny;
1574 for (
int iz = 0; iz < m_Mz; ++iz) {
1575 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1576 int is = ixy + Nxy * (iz + m_Nz * it);
1577 int is2 = ixy + Nxy * iz;
1579 int ig = m_Ndf * is;
1580 int ix1 = Nvc2 * is2;
1581 int ix2 = ix1 + m_Nvc;
1583 for (
int ic = 0; ic < m_Nc; ++ic) {
1584 int ic2 = ic * m_Nvc;
1586 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1587 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1588 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1589 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1591 w2[2 * ic + id1 + iv] += wt1r;
1592 w2[2 * ic + 1 + id1 + iv] += wt1i;
1593 w2[2 * ic + id2 + iv] += wt2r;
1594 w2[2 * ic + 1 + id2 + iv] += wt2i;
1595 w2[2 * ic + id3 + iv] += wt1r;
1596 w2[2 * ic + 1 + id3 + iv] += wt1i;
1597 w2[2 * ic + id4 + iv] += wt2r;
1598 w2[2 * ic + 1 + id4 + iv] += wt2i;
1608 double *v2,
const double *v1)
1610 int Nvcd = m_Nvc * m_Nd;
1614 int id3 = m_Nvc * 2;
1615 int id4 = m_Nvc * 3;
1619 double vt1[m_Nvc], vt2[m_Nvc];
1620 double wt1r, wt1i, wt2r, wt2i;
1622 int isite = m_arg[itask].isite;
1624 double *w2 = &v2[Nvcd * isite];
1625 const double *w1 = &v1[Nvcd * isite];
1626 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1628 int kt1 = m_arg[itask].kt1;
1629 int Nxy = m_Nx * m_Ny;
1630 int Nxyz = Nxy * m_Nz;
1632 for (
int it = 0; it < m_Mt - kt1; ++it) {
1633 for (
int iz = 0; iz < m_Mz; ++iz) {
1634 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1635 int is = ixy + Nxy * (iz + m_Nz * it);
1637 int in = Nvcd * (is + Nxyz);
1638 int ig = m_Ndf * is;
1640 for (
int ic = 0; ic < m_Nc; ++ic) {
1641 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1642 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1643 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1644 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1647 for (
int ic = 0; ic < m_Nc; ++ic) {
1648 int ic2 = ic * m_Nvc;
1650 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1651 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1652 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1653 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1655 w2[2 * ic + id1 + iv] += wt1r;
1656 w2[2 * ic + 1 + id1 + iv] += wt1i;
1657 w2[2 * ic + id2 + iv] += wt2r;
1658 w2[2 * ic + 1 + id2 + iv] += wt2i;
1659 w2[2 * ic + id3 + iv] += wt1r;
1660 w2[2 * ic + 1 + id3 + iv] += wt1i;
1661 w2[2 * ic + id4 + iv] += wt2r;
1662 w2[2 * ic + 1 + id4 + iv] += wt2i;
1672 double *vcp1,
const double *v1)
1674 int Nvc2 = 2 * m_Nvc;
1675 int Nvcd = m_Nvc * m_Nd;
1676 int Nvcd2 = Nvcd / 2;
1680 int id3 = m_Nvc * 2;
1681 int id4 = m_Nvc * 3;
1685 int isite = m_arg[itask].isite;
1686 int isite_cp = m_arg[itask].isite_cpt;
1688 double *w2 = &vcp1[Nvcd2 * isite_cp];
1689 const double *w1 = &v1[Nvcd * isite];
1690 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1692 double vt1[m_Nvc], vt2[m_Nvc];
1694 if (m_arg[itask].kt1 == 1) {
1695 int Nxy = m_Nx * m_Ny;
1697 for (
int iz = 0; iz < m_Mz; ++iz) {
1698 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1699 int is = ixy + Nxy * (iz + m_Nz * it);
1700 int is2 = ixy + Nxy * iz;
1702 int ig = m_Ndf * is;
1703 int ix1 = Nvc2 * is2;
1704 int ix2 = ix1 + m_Nvc;
1706 for (
int ic = 0; ic < m_Nc; ++ic) {
1707 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1708 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1709 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1710 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1713 for (
int ic = 0; ic < m_Nc; ++ic) {
1715 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1716 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1717 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1718 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1728 double *v2,
const double *vcp2)
1730 int Nvc2 = 2 * m_Nvc;
1731 int Nvcd = m_Nvc * m_Nd;
1732 int Nvcd2 = Nvcd / 2;
1736 int id3 = m_Nvc * 2;
1737 int id4 = m_Nvc * 3;
1740 double bc2 = m_boundary2[idir];
1744 int isite = m_arg[itask].isite;
1745 int isite_cp = m_arg[itask].isite_cpt;
1747 double *w2 = &v2[Nvcd * isite];
1748 const double *w1 = &vcp2[Nvcd2 * isite_cp];
1750 if (m_arg[itask].kt0 == 1) {
1751 int Nxy = m_Nx * m_Ny;
1753 for (
int iz = 0; iz < m_Mz; ++iz) {
1754 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1755 int is = ixy + Nxy * (iz + m_Nz * it);
1756 int is2 = ixy + Nxy * iz;
1758 int ix1 = Nvc2 * is2;
1759 int ix2 = ix1 + m_Nvc;
1761 for (
int ic = 0; ic < m_Nc; ++ic) {
1763 int ici = 2 * ic + 1;
1764 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1765 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1766 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1767 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1768 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1769 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1770 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1771 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1781 double *v2,
const double *v1)
1783 int Nvcd = m_Nvc * m_Nd;
1787 int id3 = m_Nvc * 2;
1788 int id4 = m_Nvc * 3;
1792 double vt1[m_Nvc], vt2[m_Nvc];
1793 double wt1r, wt1i, wt2r, wt2i;
1795 int isite = m_arg[itask].isite;
1797 double *w2 = &v2[Nvcd * isite];
1798 const double *w1 = &v1[Nvcd * isite];
1799 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1801 int kt0 = m_arg[itask].kt0;
1802 int Nxy = m_Nx * m_Ny;
1803 int Nxyz = Nxy * m_Nz;
1805 for (
int it = kt0; it < m_Mt; ++it) {
1806 for (
int iz = 0; iz < m_Mz; ++iz) {
1807 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1808 int is = ixy + Nxy * (iz + m_Nz * it);
1810 int in = Nvcd * (is - Nxyz);
1811 int ig = m_Ndf * (is - Nxyz);
1813 for (
int ic = 0; ic < m_Nc; ++ic) {
1814 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1815 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1816 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1817 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1820 for (
int ic = 0; ic < m_Nc; ++ic) {
1822 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1823 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1824 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1825 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1827 w2[ic2 + id1 + iv] += wt1r;
1828 w2[ic2 + 1 + id1 + iv] += wt1i;
1829 w2[ic2 + id2 + iv] += wt2r;
1830 w2[ic2 + 1 + id2 + iv] += wt2i;
1831 w2[ic2 + id3 + iv] -= wt1r;
1832 w2[ic2 + 1 + id3 + iv] -= wt1i;
1833 w2[ic2 + id4 + iv] -= wt2r;
1834 w2[ic2 + 1 + id4 + iv] -= wt2i;
1844 double *v2,
const double *v1)
1846 int Nvcd = m_Nvc * m_Nd;
1847 int Nxy = m_Nx * m_Ny;
1851 int id3 = m_Nvc * 2;
1852 int id4 = m_Nvc * 3;
1854 int isite = m_arg[itask].isite;
1855 double *w2 = &v2[Nvcd * isite];
1856 const double *w1 = &v1[Nvcd * isite];
1858 for (
int it = 0; it < m_Mt; ++it) {
1859 for (
int iz = 0; iz < m_Mz; ++iz) {
1860 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1861 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1862 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1863 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
1864 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
1865 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
1866 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
1876 double *v2,
const double *v1)
1878 int Nvcd = m_Nvc * m_Nd;
1879 int Nxy = m_Nx * m_Ny;
1883 int id3 = m_Nvc * 2;
1884 int id4 = m_Nvc * 3;
1886 int isite = m_arg[itask].isite;
1887 double *w2 = &v2[Nvcd * isite];
1888 const double *w1 = &v1[Nvcd * isite];
1890 for (
int it = 0; it < m_Mt; ++it) {
1891 for (
int iz = 0; iz < m_Mz; ++iz) {
1892 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1893 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1894 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1895 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
1896 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
1897 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
1898 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_zp2_thread(int, double *, const double *)
void mult_yp2_thread(int, double *, const double *)
void mult_tm1_dirac_thread(int, double *, const double *)
void gm5_dirac_thread(int, double *, const double *)
void general(const char *format,...)
void mult_ym2_thread(int, double *, const double *)
void mult_xp2_thread(int, double *, const double *)
void mult_yp1_thread(int, double *, const double *)
void mult_tm1_chiral_thread(int, double *, const double *)
void mult_ym1_thread(int, double *, const double *)
void mult_xp1_thread(int, double *, const double *)
void mult_xm2_thread(int, double *, const double *)
void mult_tp1_dirac_thread(int, double *, const double *)
std::vector< mult_arg > m_arg
void mult_ymb_thread(int, double *, const double *)
void mult_zpb_thread(int, double *, const double *)
void mult_xm1_thread(int, double *, const double *)
void daypx_thread(int, double *, double, const double *)
Bridge::VerboseLevel m_vl
void mult_tpb_chiral_thread(int, double *, const double *)
void mult_tp1_chiral_thread(int, double *, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
static const std::string class_name
void gm5_chiral_thread(int, double *, const double *)
void mult_tp2_chiral_thread(int, double *, const double *)
void mult_zp1_thread(int, double *, const double *)
void clear_thread(int, double *)
void mult_ypb_thread(int, double *, const double *)
void crucial(const char *format,...)
void mult_zmb_thread(int, double *, const double *)
void mult_tmb_dirac_thread(int, double *, const double *)
void mult_xmb_thread(int, double *, const double *)
void mult_xpb_thread(int, double *, const double *)
void mult_zm2_thread(int, double *, const double *)
void mult_tm2_dirac_thread(int, double *, const double *)
void mult_tpb_dirac_thread(int, double *, const double *)
void mult_tp2_dirac_thread(int, double *, const double *)
void mult_zm1_thread(int, double *, const double *)
void mult_tm2_chiral_thread(int, double *, const double *)
void mult_tmb_chiral_thread(int, double *, const double *)