24 #if defined USE_GROUP_SU3
25 #include "fopr_Wilson_impl_SU3.inc"
26 #elif defined USE_GROUP_SU2
27 #include "fopr_Wilson_impl_SU2.inc"
28 #elif defined USE_GROUP_SU_N
29 #include "fopr_Wilson_impl_SU_N.inc"
52 vout.
crucial(
m_vl,
"%s: Nz = %d and Nt = %d do not match Nthread = %d\n",
62 vout.
crucial(
m_vl,
"%s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
68 vout.
crucial(
m_vl,
"%s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
98 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
99 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
100 int itask = ith_z + m_Ntask_z * ith_t;
104 m_arg[itask].kt0 = 0;
105 m_arg[itask].kt1 = 0;
106 m_arg[itask].kz0 = 0;
107 m_arg[itask].kz1 = 0;
108 if (ith_t == 0)
m_arg[itask].kt0 = 1;
109 if (ith_z == 0)
m_arg[itask].kz0 = 1;
110 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
111 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
115 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy;
116 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy;
124 double *v2,
double fac,
double *v1)
126 int Nvcd = m_Nvc * m_Nd;
127 int Nvxy = Nvcd * m_Nx * m_Ny;
129 int isite = m_arg[itask].isite;
130 double *w2 = &v2[Nvcd * isite];
131 double *w1 = &v1[Nvcd * isite];
133 for (
int it = 0; it < m_Mt; ++it) {
134 for (
int iz = 0; iz < m_Mz; ++iz) {
135 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
136 int iv = ivxy + Nvxy * (iz + m_Nz * it);
137 w2[iv] = fac * w2[iv] + w1[iv];
148 int Nvcd = m_Nvc * m_Nd;
149 int Nvxy = Nvcd * m_Nx * m_Ny;
151 int isite = m_arg[itask].isite;
152 double *w2 = &v2[Nvcd * isite];
154 for (
int it = 0; it < m_Mt; ++it) {
155 for (
int iz = 0; iz < m_Mz; ++iz) {
156 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
157 int iv = ivxy + Nvxy * (iz + m_Nz * it);
167 double *vcp1,
double *v1)
169 int Nvc2 = 2 * m_Nvc;
170 int Nvcd = m_Nvc * m_Nd;
171 int Nvcd2 = Nvcd / 2;
178 int isite = m_arg[itask].isite;
179 int isite_cp = m_arg[itask].isite_cpx;
181 double *w2 = &vcp1[Nvcd2 * isite_cp];
182 double *w1 = &v1[Nvcd * isite];
185 double bc2 = m_boundary2[idir];
189 for (
int it = 0; it < m_Mt; ++it) {
190 for (
int iz = 0; iz < m_Mz; ++iz) {
191 for (
int iy = 0; iy < m_Ny; ++iy) {
192 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
193 int is2 = iy + m_Ny * (iz + m_Mz * it);
195 int ix1 = Nvc2 * is2;
196 int ix2 = ix1 + m_Nvc;
198 for (
int ic = 0; ic < m_Nc; ++ic) {
199 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
200 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
201 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
202 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
212 double *v2,
double *vcp2)
214 int Nvc2 = 2 * m_Nvc;
215 int Nvcd = m_Nvc * m_Nd;
216 int Nvcd2 = Nvcd / 2;
225 double wt1r, wt1i, wt2r, wt2i;
227 int isite = m_arg[itask].isite;
228 int isite_cp = m_arg[itask].isite_cpx;
230 double *w2 = &v2[Nvcd * isite];
231 double *w1 = &vcp2[Nvcd2 * isite_cp];
232 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
236 for (
int it = 0; it < m_Mt; ++it) {
237 for (
int iz = 0; iz < m_Mz; ++iz) {
238 for (
int iy = 0; iy < m_Ny; ++iy) {
239 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
240 int is2 = iy + m_Ny * (iz + m_Mz * it);
243 int ix1 = Nvc2 * is2;
244 int ix2 = ix1 + m_Nvc;
246 for (
int ic = 0; ic < m_Nc; ++ic) {
247 int ic2 = ic * m_Nvc;
249 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
250 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
251 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
252 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
254 w2[2 * ic + id1 + iv] += wt1r;
255 w2[2 * ic + 1 + id1 + iv] += wt1i;
256 w2[2 * ic + id2 + iv] += wt2r;
257 w2[2 * ic + 1 + id2 + iv] += wt2i;
258 w2[2 * ic + id3 + iv] += wt2i;
259 w2[2 * ic + 1 + id3 + iv] += -wt2r;
260 w2[2 * ic + id4 + iv] += wt1i;
261 w2[2 * ic + 1 + id4 + iv] += -wt1r;
271 double *v2,
double *v1)
273 int Nvcd = m_Nvc * m_Nd;
282 double vt1[m_Nvc], vt2[m_Nvc];
283 double wt1r, wt1i, wt2r, wt2i;
285 int isite = m_arg[itask].isite;
287 double *w2 = &v2[Nvcd * isite];
288 double *w1 = &v1[Nvcd * isite];
289 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
291 for (
int it = 0; it < m_Mt; ++it) {
292 for (
int iz = 0; iz < m_Mz; ++iz) {
293 for (
int iy = 0; iy < m_Ny; ++iy) {
294 for (
int ix = 0; ix < m_Nx - 1; ++ix) {
295 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
297 int in = Nvcd * (is + 1);
300 for (
int ic = 0; ic < m_Nc; ++ic) {
301 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
302 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
303 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
304 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
307 for (
int ic = 0; ic < m_Nc; ++ic) {
308 int ic2 = ic * m_Nvc;
310 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
311 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
312 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
313 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
315 w2[2 * ic + id1 + iv] += wt1r;
316 w2[2 * ic + 1 + id1 + iv] += wt1i;
317 w2[2 * ic + id2 + iv] += wt2r;
318 w2[2 * ic + 1 + id2 + iv] += wt2i;
319 w2[2 * ic + id3 + iv] += wt2i;
320 w2[2 * ic + 1 + id3 + iv] += -wt2r;
321 w2[2 * ic + id4 + iv] += wt1i;
322 w2[2 * ic + 1 + id4 + iv] += -wt1r;
333 double *vcp1,
double *v1)
335 int Nvc2 = 2 * m_Nvc;
336 int Nvcd = m_Nvc * m_Nd;
337 int Nvcd2 = Nvcd / 2;
346 int isite = m_arg[itask].isite;
347 int isite_cp = m_arg[itask].isite_cpx;
349 double *w2 = &vcp1[Nvcd2 * isite_cp];
350 double *w1 = &v1[Nvcd * isite];
351 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
353 double vt1[m_Nvc], vt2[m_Nvc];
357 for (
int it = 0; it < m_Mt; ++it) {
358 for (
int iz = 0; iz < m_Mz; ++iz) {
359 for (
int iy = 0; iy < m_Ny; ++iy) {
360 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
361 int is2 = iy + m_Ny * (iz + m_Mz * it);
364 int ix1 = Nvc2 * is2;
365 int ix2 = ix1 + m_Nvc;
367 for (
int ic = 0; ic < m_Nc; ++ic) {
368 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
369 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
370 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
371 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
374 for (
int ic = 0; ic < m_Nc; ++ic) {
376 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
377 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
378 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
379 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
389 double *v2,
double *vcp2)
391 int Nvc2 = 2 * m_Nvc;
392 int Nvcd = m_Nvc * m_Nd;
393 int Nvcd2 = Nvcd / 2;
401 double bc2 = m_boundary2[idir];
405 int isite = m_arg[itask].isite;
406 int isite_cp = m_arg[itask].isite_cpx;
408 double *w2 = &v2[Nvcd * isite];
409 double *w1 = &vcp2[Nvcd2 * isite_cp];
413 for (
int it = 0; it < m_Mt; ++it) {
414 for (
int iz = 0; iz < m_Mz; ++iz) {
415 for (
int iy = 0; iy < m_Ny; ++iy) {
416 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
417 int is2 = iy + m_Ny * (iz + m_Mz * it);
419 int ix1 = Nvc2 * is2;
420 int ix2 = ix1 + m_Nvc;
422 for (
int ic = 0; ic < m_Nc; ++ic) {
424 int ici = 2 * ic + 1;
425 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
426 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
427 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
428 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
429 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
430 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
431 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
432 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
442 double *v2,
double *v1)
444 int Nvcd = m_Nvc * m_Nd;
453 double vt1[m_Nvc], vt2[m_Nvc];
454 double wt1r, wt1i, wt2r, wt2i;
456 int isite = m_arg[itask].isite;
458 double *w2 = &v2[Nvcd * isite];
459 double *w1 = &v1[Nvcd * isite];
460 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
462 for (
int it = 0; it < m_Mt; ++it) {
463 for (
int iz = 0; iz < m_Mz; ++iz) {
464 for (
int iy = 0; iy < m_Ny; ++iy) {
465 for (
int ix = 1; ix < m_Nx; ++ix) {
466 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
468 int in = Nvcd * (is - 1);
469 int ig = m_Ndf * (is - 1);
471 for (
int ic = 0; ic < m_Nc; ++ic) {
472 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
473 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
474 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
475 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
478 for (
int ic = 0; ic < m_Nc; ++ic) {
481 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
482 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
483 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
484 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
486 w2[2 * ic + id1 + iv] += wt1r;
487 w2[2 * ic + 1 + id1 + iv] += wt1i;
488 w2[2 * ic + id2 + iv] += wt2r;
489 w2[2 * ic + 1 + id2 + iv] += wt2i;
490 w2[2 * ic + id3 + iv] += -wt2i;
491 w2[2 * ic + 1 + id3 + iv] += +wt2r;
492 w2[2 * ic + id4 + iv] += -wt1i;
493 w2[2 * ic + 1 + id4 + iv] += +wt1r;
504 double *vcp1,
double *v1)
506 int Nvc2 = 2 * m_Nvc;
507 int Nvcd = m_Nvc * m_Nd;
508 int Nvcd2 = Nvcd / 2;
515 int isite = m_arg[itask].isite;
516 int isite_cp = m_arg[itask].isite_cpy;
518 double *w2 = &vcp1[Nvcd2 * isite_cp];
519 double *w1 = &v1[Nvcd * isite];
522 double bc2 = m_boundary2[idir];
526 for (
int it = 0; it < m_Mt; ++it) {
527 for (
int iz = 0; iz < m_Mz; ++iz) {
528 for (
int ix = 0; ix < m_Nx; ++ix) {
529 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
530 int is2 = ix + m_Nx * (iz + m_Mz * it);
532 int ix1 = Nvc2 * is2;
533 int ix2 = ix1 + m_Nvc;
535 for (
int ic = 0; ic < m_Nc; ++ic) {
536 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
537 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
538 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
539 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
549 double *v2,
double *vcp2)
551 int Nvc2 = 2 * m_Nvc;
552 int Nvcd = m_Nvc * m_Nd;
553 int Nvcd2 = Nvcd / 2;
562 double wt1r, wt1i, wt2r, wt2i;
564 int isite = m_arg[itask].isite;
565 int isite_cp = m_arg[itask].isite_cpy;
567 double *w2 = &v2[Nvcd * isite];
568 double *w1 = &vcp2[Nvcd2 * isite_cp];
569 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
573 for (
int it = 0; it < m_Mt; ++it) {
574 for (
int iz = 0; iz < m_Mz; ++iz) {
575 for (
int ix = 0; ix < m_Nx; ++ix) {
576 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
577 int is2 = ix + m_Nx * (iz + m_Mz * it);
580 int ix1 = Nvc2 * is2;
581 int ix2 = ix1 + m_Nvc;
583 for (
int ic = 0; ic < m_Nc; ++ic) {
584 int ic2 = ic * m_Nvc;
586 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
587 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
588 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
589 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
591 w2[2 * ic + id1 + iv] += wt1r;
592 w2[2 * ic + 1 + id1 + iv] += wt1i;
593 w2[2 * ic + id2 + iv] += wt2r;
594 w2[2 * ic + 1 + id2 + iv] += wt2i;
595 w2[2 * ic + id3 + iv] += -wt2r;
596 w2[2 * ic + 1 + id3 + iv] += -wt2i;
597 w2[2 * ic + id4 + iv] += wt1r;
598 w2[2 * ic + 1 + id4 + iv] += wt1i;
608 double *v2,
double *v1)
610 int Nvcd = m_Nvc * m_Nd;
619 double vt1[m_Nvc], vt2[m_Nvc];
620 double wt1r, wt1i, wt2r, wt2i;
622 int isite = m_arg[itask].isite;
624 double *w2 = &v2[Nvcd * isite];
625 double *w1 = &v1[Nvcd * isite];
626 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
628 for (
int it = 0; it < m_Mt; ++it) {
629 for (
int iz = 0; iz < m_Mz; ++iz) {
630 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
631 for (
int ix = 0; ix < m_Nx; ++ix) {
632 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
634 int in = Nvcd * (is + m_Nx);
637 for (
int ic = 0; ic < m_Nc; ++ic) {
638 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
639 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
640 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
641 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
644 for (
int ic = 0; ic < m_Nc; ++ic) {
645 int ic2 = ic * m_Nvc;
647 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
648 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
649 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
650 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
652 w2[2 * ic + id1 + iv] += wt1r;
653 w2[2 * ic + 1 + id1 + iv] += wt1i;
654 w2[2 * ic + id2 + iv] += wt2r;
655 w2[2 * ic + 1 + id2 + iv] += wt2i;
656 w2[2 * ic + id3 + iv] += -wt2r;
657 w2[2 * ic + 1 + id3 + iv] += -wt2i;
658 w2[2 * ic + id4 + iv] += wt1r;
659 w2[2 * ic + 1 + id4 + iv] += wt1i;
670 double *vcp1,
double *v1)
672 int Nvc2 = 2 * m_Nvc;
673 int Nvcd = m_Nvc * m_Nd;
674 int Nvcd2 = Nvcd / 2;
683 int isite = m_arg[itask].isite;
684 int isite_cp = m_arg[itask].isite_cpy;
686 double *w2 = &vcp1[Nvcd2 * isite_cp];
687 double *w1 = &v1[Nvcd * isite];
688 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
690 double vt1[m_Nvc], vt2[m_Nvc];
694 for (
int it = 0; it < m_Mt; ++it) {
695 for (
int iz = 0; iz < m_Mz; ++iz) {
696 for (
int ix = 0; ix < m_Nx; ++ix) {
697 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
698 int is2 = ix + m_Nx * (iz + m_Mz * it);
701 int ix1 = Nvc2 * is2;
702 int ix2 = ix1 + m_Nvc;
704 for (
int ic = 0; ic < m_Nc; ++ic) {
705 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
706 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
707 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
708 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
711 for (
int ic = 0; ic < m_Nc; ++ic) {
713 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
714 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
715 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
716 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
726 double *v2,
double *vcp2)
728 int Nvc2 = 2 * m_Nvc;
729 int Nvcd = m_Nvc * m_Nd;
730 int Nvcd2 = Nvcd / 2;
738 double bc2 = m_boundary2[idir];
742 int isite = m_arg[itask].isite;
743 int isite_cp = m_arg[itask].isite_cpy;
745 double *w2 = &v2[Nvcd * isite];
746 double *w1 = &vcp2[Nvcd2 * isite_cp];
750 for (
int it = 0; it < m_Mt; ++it) {
751 for (
int iz = 0; iz < m_Mz; ++iz) {
752 for (
int ix = 0; ix < m_Nx; ++ix) {
753 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
754 int is2 = ix + m_Nx * (iz + m_Mz * it);
756 int ix1 = Nvc2 * is2;
757 int ix2 = ix1 + m_Nvc;
759 for (
int ic = 0; ic < m_Nc; ++ic) {
761 int ici = 2 * ic + 1;
762 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
763 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
764 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
765 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
766 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
767 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
768 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
769 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
779 double *v2,
double *v1)
781 int Nvcd = m_Nvc * m_Nd;
790 double vt1[m_Nvc], vt2[m_Nvc];
791 double wt1r, wt1i, wt2r, wt2i;
793 int isite = m_arg[itask].isite;
795 double *w2 = &v2[Nvcd * isite];
796 double *w1 = &v1[Nvcd * isite];
797 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
799 for (
int it = 0; it < m_Mt; ++it) {
800 for (
int iz = 0; iz < m_Mz; ++iz) {
801 for (
int iy = 1; iy < m_Ny; ++iy) {
802 for (
int ix = 0; ix < m_Nx; ++ix) {
803 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
805 int in = Nvcd * (is - m_Nx);
806 int ig = m_Ndf * (is - m_Nx);
808 for (
int ic = 0; ic < m_Nc; ++ic) {
809 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
810 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
811 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
812 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
815 for (
int ic = 0; ic < m_Nc; ++ic) {
817 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
818 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
819 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
820 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
822 w2[ic2 + id1 + iv] += wt1r;
823 w2[ic2 + 1 + id1 + iv] += wt1i;
824 w2[ic2 + id2 + iv] += wt2r;
825 w2[ic2 + 1 + id2 + iv] += wt2i;
826 w2[ic2 + id3 + iv] += wt2r;
827 w2[ic2 + 1 + id3 + iv] += wt2i;
828 w2[ic2 + id4 + iv] += -wt1r;
829 w2[ic2 + 1 + id4 + iv] += -wt1i;
840 double *vcp1,
double *v1)
842 int Nvc2 = 2 * m_Nvc;
843 int Nvcd = m_Nvc * m_Nd;
844 int Nvcd2 = Nvcd / 2;
851 int isite = m_arg[itask].isite;
852 int isite_cp = m_arg[itask].isite_cpz;
854 double *w2 = &vcp1[Nvcd2 * isite_cp];
855 double *w1 = &v1[Nvcd * isite];
858 double bc2 = m_boundary2[idir];
860 if (m_arg[itask].kz0 == 1) {
861 int Nxy = m_Nx * m_Ny;
863 for (
int it = 0; it < m_Mt; ++it) {
864 for (
int ixy = 0; ixy < Nxy; ++ixy) {
865 int is = ixy + Nxy * (iz + m_Nz * it);
866 int is2 = ixy + Nxy * it;
869 int ix1 = Nvc2 * is2;
870 int ix2 = ix1 + m_Nvc;
872 for (
int ic = 0; ic < m_Nc; ++ic) {
873 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
874 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
875 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
876 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
886 double *v2,
double *vcp2)
888 int Nvc2 = 2 * m_Nvc;
889 int Nvcd = m_Nvc * m_Nd;
890 int Nvcd2 = Nvcd / 2;
899 double wt1r, wt1i, wt2r, wt2i;
901 int isite = m_arg[itask].isite;
902 int isite_cp = m_arg[itask].isite_cpz;
904 double *w2 = &v2[Nvcd * isite];
905 double *w1 = &vcp2[Nvcd2 * isite_cp];
906 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
908 if (m_arg[itask].kz1 == 1) {
909 int Nxy = m_Nx * m_Ny;
911 for (
int it = 0; it < m_Mt; ++it) {
912 for (
int ixy = 0; ixy < Nxy; ++ixy) {
913 int is = ixy + Nxy * (iz + m_Nz * it);
914 int is2 = ixy + Nxy * it;
917 int ix1 = Nvc2 * is2;
918 int ix2 = ix1 + m_Nvc;
920 for (
int ic = 0; ic < m_Nc; ++ic) {
921 int ic2 = ic * m_Nvc;
923 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
924 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
925 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
926 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
928 w2[2 * ic + id1 + iv] += wt1r;
929 w2[2 * ic + 1 + id1 + iv] += wt1i;
930 w2[2 * ic + id2 + iv] += wt2r;
931 w2[2 * ic + 1 + id2 + iv] += wt2i;
932 w2[2 * ic + id3 + iv] += wt1i;
933 w2[2 * ic + 1 + id3 + iv] += -wt1r;
934 w2[2 * ic + id4 + iv] += -wt2i;
935 w2[2 * ic + 1 + id4 + iv] += wt2r;
945 double *v2,
double *v1)
947 int Nvcd = m_Nvc * m_Nd;
956 double vt1[m_Nvc], vt2[m_Nvc];
957 double wt1r, wt1i, wt2r, wt2i;
959 int isite = m_arg[itask].isite;
961 double *w2 = &v2[Nvcd * isite];
962 double *w1 = &v1[Nvcd * isite];
963 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
965 int kz1 = m_arg[itask].kz1;
966 int Nxy = m_Nx * m_Ny;
968 for (
int it = 0; it < m_Mt; ++it) {
969 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
970 for (
int ixy = 0; ixy < Nxy; ++ixy) {
971 int is = ixy + Nxy * (iz + m_Nz * it);
973 int in = Nvcd * (is + Nxy);
976 for (
int ic = 0; ic < m_Nc; ++ic) {
977 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
978 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
979 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
980 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
983 for (
int ic = 0; ic < m_Nc; ++ic) {
984 int ic2 = ic * m_Nvc;
986 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
987 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
988 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
989 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
991 w2[2 * ic + id1 + iv] += wt1r;
992 w2[2 * ic + 1 + id1 + iv] += wt1i;
993 w2[2 * ic + id2 + iv] += wt2r;
994 w2[2 * ic + 1 + id2 + iv] += wt2i;
995 w2[2 * ic + id3 + iv] += wt1i;
996 w2[2 * ic + 1 + id3 + iv] += -wt1r;
997 w2[2 * ic + id4 + iv] += -wt2i;
998 w2[2 * ic + 1 + id4 + iv] += wt2r;
1008 double *vcp1,
double *v1)
1010 int Nvc2 = 2 * m_Nvc;
1011 int Nvcd = m_Nvc * m_Nd;
1012 int Nvcd2 = Nvcd / 2;
1016 int id3 = m_Nvc * 2;
1017 int id4 = m_Nvc * 3;
1021 int isite = m_arg[itask].isite;
1022 int isite_cp = m_arg[itask].isite_cpz;
1024 double *w2 = &vcp1[Nvcd2 * isite_cp];
1025 double *w1 = &v1[Nvcd * isite];
1026 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1028 double vt1[m_Nvc], vt2[m_Nvc];
1030 if (m_arg[itask].kz1 == 1) {
1031 int Nxy = m_Nx * m_Ny;
1033 for (
int it = 0; it < m_Mt; ++it) {
1034 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1035 int is = ixy + Nxy * (iz + m_Nz * it);
1036 int is2 = ixy + Nxy * it;
1038 int ig = m_Ndf * is;
1039 int ix1 = Nvc2 * is2;
1040 int ix2 = ix1 + m_Nvc;
1042 for (
int ic = 0; ic < m_Nc; ++ic) {
1043 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1044 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1045 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1046 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1049 for (
int ic = 0; ic < m_Nc; ++ic) {
1051 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1052 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1053 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1054 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1064 double *v2,
double *vcp2)
1066 int Nvc2 = 2 * m_Nvc;
1067 int Nvcd = m_Nvc * m_Nd;
1068 int Nvcd2 = Nvcd / 2;
1072 int id3 = m_Nvc * 2;
1073 int id4 = m_Nvc * 3;
1076 double bc2 = m_boundary2[idir];
1080 int isite = m_arg[itask].isite;
1081 int isite_cp = m_arg[itask].isite_cpz;
1083 double *w2 = &v2[Nvcd * isite];
1084 double *w1 = &vcp2[Nvcd2 * isite_cp];
1086 if (m_arg[itask].kz0 == 1) {
1087 int Nxy = m_Nx * m_Ny;
1090 for (
int it = 0; it < m_Mt; ++it) {
1091 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1092 int is = ixy + Nxy * (iz + m_Nz * it);
1093 int is2 = ixy + Nxy * it;
1095 int ix1 = Nvc2 * is2;
1096 int ix2 = ix1 + m_Nvc;
1098 for (
int ic = 0; ic < m_Nc; ++ic) {
1100 int ici = 2 * ic + 1;
1101 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1102 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1103 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1104 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1105 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1106 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1107 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1108 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1118 double *v2,
double *v1)
1120 int Nvcd = m_Nvc * m_Nd;
1124 int id3 = m_Nvc * 2;
1125 int id4 = m_Nvc * 3;
1129 double vt1[m_Nvc], vt2[m_Nvc];
1130 double wt1r, wt1i, wt2r, wt2i;
1132 int isite = m_arg[itask].isite;
1134 double *w2 = &v2[Nvcd * isite];
1135 double *w1 = &v1[Nvcd * isite];
1136 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1138 int kz0 = m_arg[itask].kz0;
1139 int Nxy = m_Nx * m_Ny;
1141 for (
int it = 0; it < m_Mt; ++it) {
1142 for (
int iz = kz0; iz < m_Mz; ++iz) {
1143 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1144 int is = ixy + Nxy * (iz + m_Nz * it);
1146 int in = Nvcd * (is - Nxy);
1147 int ig = m_Ndf * (is - Nxy);
1149 for (
int ic = 0; ic < m_Nc; ++ic) {
1150 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1151 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1152 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1153 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1156 for (
int ic = 0; ic < m_Nc; ++ic) {
1158 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1159 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1160 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1161 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1163 w2[ic2 + id1 + iv] += wt1r;
1164 w2[ic2 + 1 + id1 + iv] += wt1i;
1165 w2[ic2 + id2 + iv] += wt2r;
1166 w2[ic2 + 1 + id2 + iv] += wt2i;
1167 w2[ic2 + id3 + iv] += -wt1i;
1168 w2[ic2 + 1 + id3 + iv] += wt1r;
1169 w2[ic2 + id4 + iv] += wt2i;
1170 w2[ic2 + 1 + id4 + iv] += -wt2r;
1180 double *vcp1,
double *v1)
1182 int Nvc2 = 2 * m_Nvc;
1183 int Nvcd = m_Nvc * m_Nd;
1184 int Nvcd2 = Nvcd / 2;
1188 int id3 = m_Nvc * 2;
1189 int id4 = m_Nvc * 3;
1191 int isite = m_arg[itask].isite;
1192 int isite_cp = m_arg[itask].isite_cpt;
1194 double *w2 = &vcp1[Nvcd2 * isite_cp];
1195 double *w1 = &v1[Nvcd * isite];
1198 double bc2 = m_boundary2[idir];
1200 if (m_arg[itask].kt0 == 1) {
1201 int Nxy = m_Nx * m_Ny;
1203 for (
int iz = 0; iz < m_Mz; ++iz) {
1204 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1205 int is = ixy + Nxy * (iz + m_Nz * it);
1206 int is2 = ixy + Nxy * iz;
1209 int ix1 = Nvc2 * is2;
1210 int ix2 = ix1 + m_Nvc;
1212 for (
int ic = 0; ic < m_Nc; ++ic) {
1213 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1214 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1215 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1216 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1226 double *v2,
double *vcp2)
1228 int Nvc2 = 2 * m_Nvc;
1229 int Nvcd = m_Nvc * m_Nd;
1230 int Nvcd2 = Nvcd / 2;
1234 int id3 = m_Nvc * 2;
1235 int id4 = m_Nvc * 3;
1239 double wt1r, wt1i, wt2r, wt2i;
1241 int isite = m_arg[itask].isite;
1242 int isite_cp = m_arg[itask].isite_cpt;
1244 double *w2 = &v2[Nvcd * isite];
1245 double *w1 = &vcp2[Nvcd2 * isite_cp];
1246 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1248 if (m_arg[itask].kt1 == 1) {
1249 int Nxy = m_Nx * m_Ny;
1251 for (
int iz = 0; iz < m_Mz; ++iz) {
1252 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1253 int is = ixy + Nxy * (iz + m_Nz * it);
1254 int is2 = ixy + Nxy * iz;
1256 int ig = m_Ndf * is;
1257 int ix1 = Nvc2 * is2;
1258 int ix2 = ix1 + m_Nvc;
1260 for (
int ic = 0; ic < m_Nc; ++ic) {
1261 int ic2 = ic * m_Nvc;
1263 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1264 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1265 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1266 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1268 w2[2 * ic + id3 + iv] += wt1r;
1269 w2[2 * ic + 1 + id3 + iv] += wt1i;
1270 w2[2 * ic + id4 + iv] += wt2r;
1271 w2[2 * ic + 1 + id4 + iv] += wt2i;
1281 double *v2,
double *v1)
1283 int Nvcd = m_Nvc * m_Nd;
1287 int id3 = m_Nvc * 2;
1288 int id4 = m_Nvc * 3;
1292 double vt1[m_Nvc], vt2[m_Nvc];
1293 double wt1r, wt1i, wt2r, wt2i;
1295 int isite = m_arg[itask].isite;
1297 double *w2 = &v2[Nvcd * isite];
1298 double *w1 = &v1[Nvcd * isite];
1299 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1301 int kt1 = m_arg[itask].kt1;
1302 int Nxy = m_Nx * m_Ny;
1303 int Nxyz = Nxy * m_Nz;
1305 for (
int it = 0; it < m_Mt - kt1; ++it) {
1306 for (
int iz = 0; iz < m_Mz; ++iz) {
1307 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1308 int is = ixy + Nxy * (iz + m_Nz * it);
1310 int in = Nvcd * (is + Nxyz);
1311 int ig = m_Ndf * is;
1313 for (
int ic = 0; ic < m_Nc; ++ic) {
1314 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1315 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1316 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1317 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1320 for (
int ic = 0; ic < m_Nc; ++ic) {
1321 int ic2 = ic * m_Nvc;
1323 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1324 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1325 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1326 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1328 w2[2 * ic + id3 + iv] += wt1r;
1329 w2[2 * ic + 1 + id3 + iv] += wt1i;
1330 w2[2 * ic + id4 + iv] += wt2r;
1331 w2[2 * ic + 1 + id4 + iv] += wt2i;
1341 double *vcp1,
double *v1)
1343 int Nvc2 = 2 * m_Nvc;
1344 int Nvcd = m_Nvc * m_Nd;
1345 int Nvcd2 = Nvcd / 2;
1354 int isite = m_arg[itask].isite;
1355 int isite_cp = m_arg[itask].isite_cpt;
1357 double *w2 = &vcp1[Nvcd2 * isite_cp];
1358 double *w1 = &v1[Nvcd * isite];
1359 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1361 double vt1[m_Nvc], vt2[m_Nvc];
1363 if (m_arg[itask].kt1 == 1) {
1364 int Nxy = m_Nx * m_Ny;
1366 for (
int iz = 0; iz < m_Mz; ++iz) {
1367 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1368 int is = ixy + Nxy * (iz + m_Nz * it);
1369 int is2 = ixy + Nxy * iz;
1371 int ig = m_Ndf * is;
1372 int ix1 = Nvc2 * is2;
1373 int ix2 = ix1 + m_Nvc;
1375 for (
int ic = 0; ic < m_Nc; ++ic) {
1376 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1377 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1378 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1379 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1382 for (
int ic = 0; ic < m_Nc; ++ic) {
1384 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1385 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1386 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1387 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1397 double *v2,
double *vcp2)
1399 int Nvc2 = 2 * m_Nvc;
1400 int Nvcd = m_Nvc * m_Nd;
1401 int Nvcd2 = Nvcd / 2;
1409 double bc2 = m_boundary2[idir];
1413 int isite = m_arg[itask].isite;
1414 int isite_cp = m_arg[itask].isite_cpt;
1416 double *w2 = &v2[Nvcd * isite];
1417 double *w1 = &vcp2[Nvcd2 * isite_cp];
1419 if (m_arg[itask].kt0 == 1) {
1420 int Nxy = m_Nx * m_Ny;
1422 for (
int iz = 0; iz < m_Mz; ++iz) {
1423 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1424 int is = ixy + Nxy * (iz + m_Nz * it);
1425 int is2 = ixy + Nxy * iz;
1427 int ix1 = Nvc2 * is2;
1428 int ix2 = ix1 + m_Nvc;
1430 for (
int ic = 0; ic < m_Nc; ++ic) {
1432 int ici = 2 * ic + 1;
1433 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1434 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1435 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1436 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1446 double *v2,
double *v1)
1448 int Nvcd = m_Nvc * m_Nd;
1457 double vt1[m_Nvc], vt2[m_Nvc];
1458 double wt1r, wt1i, wt2r, wt2i;
1460 int isite = m_arg[itask].isite;
1462 double *w2 = &v2[Nvcd * isite];
1463 double *w1 = &v1[Nvcd * isite];
1464 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1466 int kt0 = m_arg[itask].kt0;
1467 int Nxy = m_Nx * m_Ny;
1468 int Nxyz = Nxy * m_Nz;
1470 for (
int it = kt0; it < m_Mt; ++it) {
1471 for (
int iz = 0; iz < m_Mz; ++iz) {
1472 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1473 int is = ixy + Nxy * (iz + m_Nz * it);
1475 int in = Nvcd * (is - Nxyz);
1476 int ig = m_Ndf * (is - Nxyz);
1478 for (
int ic = 0; ic < m_Nc; ++ic) {
1479 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1480 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1481 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1482 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1485 for (
int ic = 0; ic < m_Nc; ++ic) {
1487 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1488 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1489 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1490 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1492 w2[ic2 + id1 + iv] += wt1r;
1493 w2[ic2 + 1 + id1 + iv] += wt1i;
1494 w2[ic2 + id2 + iv] += wt2r;
1495 w2[ic2 + 1 + id2 + iv] += wt2i;
1505 double *vcp1,
double *v1)
1507 int Nvc2 = 2 * m_Nvc;
1508 int Nvcd = m_Nvc * m_Nd;
1509 int Nvcd2 = Nvcd / 2;
1513 int id3 = m_Nvc * 2;
1514 int id4 = m_Nvc * 3;
1516 int isite = m_arg[itask].isite;
1517 int isite_cp = m_arg[itask].isite_cpt;
1519 double *w2 = &vcp1[Nvcd2 * isite_cp];
1520 double *w1 = &v1[Nvcd * isite];
1523 double bc2 = m_boundary2[idir];
1525 if (m_arg[itask].kt0 == 1) {
1526 int Nxy = m_Nx * m_Ny;
1528 for (
int iz = 0; iz < m_Mz; ++iz) {
1529 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1530 int is = ixy + Nxy * (iz + m_Nz * it);
1531 int is2 = ixy + Nxy * iz;
1534 int ix1 = Nvc2 * is2;
1535 int ix2 = ix1 + m_Nvc;
1537 for (
int ic = 0; ic < m_Nc; ++ic) {
1538 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1539 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1540 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1541 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1551 double *v2,
double *vcp2)
1553 int Nvc2 = 2 * m_Nvc;
1554 int Nvcd = m_Nvc * m_Nd;
1555 int Nvcd2 = Nvcd / 2;
1559 int id3 = m_Nvc * 2;
1560 int id4 = m_Nvc * 3;
1564 double wt1r, wt1i, wt2r, wt2i;
1566 int isite = m_arg[itask].isite;
1567 int isite_cp = m_arg[itask].isite_cpt;
1569 double *w2 = &v2[Nvcd * isite];
1570 double *w1 = &vcp2[Nvcd2 * isite_cp];
1571 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1573 if (m_arg[itask].kt1 == 1) {
1574 int Nxy = m_Nx * m_Ny;
1576 for (
int iz = 0; iz < m_Mz; ++iz) {
1577 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1578 int is = ixy + Nxy * (iz + m_Nz * it);
1579 int is2 = ixy + Nxy * iz;
1581 int ig = m_Ndf * is;
1582 int ix1 = Nvc2 * is2;
1583 int ix2 = ix1 + m_Nvc;
1585 for (
int ic = 0; ic < m_Nc; ++ic) {
1586 int ic2 = ic * m_Nvc;
1588 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1589 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1590 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1591 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1593 w2[2 * ic + id1 + iv] += wt1r;
1594 w2[2 * ic + 1 + id1 + iv] += wt1i;
1595 w2[2 * ic + id2 + iv] += wt2r;
1596 w2[2 * ic + 1 + id2 + iv] += wt2i;
1597 w2[2 * ic + id3 + iv] += wt1r;
1598 w2[2 * ic + 1 + id3 + iv] += wt1i;
1599 w2[2 * ic + id4 + iv] += wt2r;
1600 w2[2 * ic + 1 + id4 + iv] += wt2i;
1610 double *v2,
double *v1)
1612 int Nvcd = m_Nvc * m_Nd;
1616 int id3 = m_Nvc * 2;
1617 int id4 = m_Nvc * 3;
1621 double vt1[m_Nvc], vt2[m_Nvc];
1622 double wt1r, wt1i, wt2r, wt2i;
1624 int isite = m_arg[itask].isite;
1626 double *w2 = &v2[Nvcd * isite];
1627 double *w1 = &v1[Nvcd * isite];
1628 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1630 int kt1 = m_arg[itask].kt1;
1631 int Nxy = m_Nx * m_Ny;
1632 int Nxyz = Nxy * m_Nz;
1634 for (
int it = 0; it < m_Mt - kt1; ++it) {
1635 for (
int iz = 0; iz < m_Mz; ++iz) {
1636 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1637 int is = ixy + Nxy * (iz + m_Nz * it);
1639 int in = Nvcd * (is + Nxyz);
1640 int ig = m_Ndf * is;
1642 for (
int ic = 0; ic < m_Nc; ++ic) {
1643 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1644 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1645 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1646 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1649 for (
int ic = 0; ic < m_Nc; ++ic) {
1650 int ic2 = ic * m_Nvc;
1652 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1653 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1654 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1655 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1657 w2[2 * ic + id1 + iv] += wt1r;
1658 w2[2 * ic + 1 + id1 + iv] += wt1i;
1659 w2[2 * ic + id2 + iv] += wt2r;
1660 w2[2 * ic + 1 + id2 + iv] += wt2i;
1661 w2[2 * ic + id3 + iv] += wt1r;
1662 w2[2 * ic + 1 + id3 + iv] += wt1i;
1663 w2[2 * ic + id4 + iv] += wt2r;
1664 w2[2 * ic + 1 + id4 + iv] += wt2i;
1674 double *vcp1,
double *v1)
1676 int Nvc2 = 2 * m_Nvc;
1677 int Nvcd = m_Nvc * m_Nd;
1678 int Nvcd2 = Nvcd / 2;
1682 int id3 = m_Nvc * 2;
1683 int id4 = m_Nvc * 3;
1687 int isite = m_arg[itask].isite;
1688 int isite_cp = m_arg[itask].isite_cpt;
1690 double *w2 = &vcp1[Nvcd2 * isite_cp];
1691 double *w1 = &v1[Nvcd * isite];
1692 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1694 double vt1[m_Nvc], vt2[m_Nvc];
1696 if (m_arg[itask].kt1 == 1) {
1697 int Nxy = m_Nx * m_Ny;
1699 for (
int iz = 0; iz < m_Mz; ++iz) {
1700 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1701 int is = ixy + Nxy * (iz + m_Nz * it);
1702 int is2 = ixy + Nxy * iz;
1704 int ig = m_Ndf * is;
1705 int ix1 = Nvc2 * is2;
1706 int ix2 = ix1 + m_Nvc;
1708 for (
int ic = 0; ic < m_Nc; ++ic) {
1709 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1710 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1711 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1712 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1715 for (
int ic = 0; ic < m_Nc; ++ic) {
1717 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1718 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1719 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1720 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1730 double *v2,
double *vcp2)
1732 int Nvc2 = 2 * m_Nvc;
1733 int Nvcd = m_Nvc * m_Nd;
1734 int Nvcd2 = Nvcd / 2;
1738 int id3 = m_Nvc * 2;
1739 int id4 = m_Nvc * 3;
1742 double bc2 = m_boundary2[idir];
1746 int isite = m_arg[itask].isite;
1747 int isite_cp = m_arg[itask].isite_cpt;
1749 double *w2 = &v2[Nvcd * isite];
1750 double *w1 = &vcp2[Nvcd2 * isite_cp];
1752 if (m_arg[itask].kt0 == 1) {
1753 int Nxy = m_Nx * m_Ny;
1755 for (
int iz = 0; iz < m_Mz; ++iz) {
1756 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1757 int is = ixy + Nxy * (iz + m_Nz * it);
1758 int is2 = ixy + Nxy * iz;
1760 int ix1 = Nvc2 * is2;
1761 int ix2 = ix1 + m_Nvc;
1763 for (
int ic = 0; ic < m_Nc; ++ic) {
1765 int ici = 2 * ic + 1;
1766 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1767 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1768 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1769 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1770 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1771 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1772 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1773 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1783 double *v2,
double *v1)
1785 int Nvcd = m_Nvc * m_Nd;
1789 int id3 = m_Nvc * 2;
1790 int id4 = m_Nvc * 3;
1794 double vt1[m_Nvc], vt2[m_Nvc];
1795 double wt1r, wt1i, wt2r, wt2i;
1797 int isite = m_arg[itask].isite;
1799 double *w2 = &v2[Nvcd * isite];
1800 double *w1 = &v1[Nvcd * isite];
1801 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1803 int kt0 = m_arg[itask].kt0;
1804 int Nxy = m_Nx * m_Ny;
1805 int Nxyz = Nxy * m_Nz;
1807 for (
int it = kt0; it < m_Mt; ++it) {
1808 for (
int iz = 0; iz < m_Mz; ++iz) {
1809 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1810 int is = ixy + Nxy * (iz + m_Nz * it);
1812 int in = Nvcd * (is - Nxyz);
1813 int ig = m_Ndf * (is - Nxyz);
1815 for (
int ic = 0; ic < m_Nc; ++ic) {
1816 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1817 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1818 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1819 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1822 for (
int ic = 0; ic < m_Nc; ++ic) {
1824 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1825 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1826 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1827 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1829 w2[ic2 + id1 + iv] += wt1r;
1830 w2[ic2 + 1 + id1 + iv] += wt1i;
1831 w2[ic2 + id2 + iv] += wt2r;
1832 w2[ic2 + 1 + id2 + iv] += wt2i;
1833 w2[ic2 + id3 + iv] -= wt1r;
1834 w2[ic2 + 1 + id3 + iv] -= wt1i;
1835 w2[ic2 + id4 + iv] -= wt2r;
1836 w2[ic2 + 1 + id4 + iv] -= wt2i;
1846 double *v2,
double *v1)
1848 int Nvcd = m_Nvc * m_Nd;
1849 int Nxy = m_Nx * m_Ny;
1853 int id3 = m_Nvc * 2;
1854 int id4 = m_Nvc * 3;
1856 int isite = m_arg[itask].isite;
1857 double *w2 = &v2[Nvcd * isite];
1858 double *w1 = &v1[Nvcd * isite];
1860 for (
int it = 0; it < m_Mt; ++it) {
1861 for (
int iz = 0; iz < m_Mz; ++iz) {
1862 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1863 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1864 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1865 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
1866 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
1867 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
1868 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
1878 double *v2,
double *v1)
1880 int Nvcd = m_Nvc * m_Nd;
1881 int Nxy = m_Nx * m_Ny;
1885 int id3 = m_Nvc * 2;
1886 int id4 = m_Nvc * 3;
1888 int isite = m_arg[itask].isite;
1889 double *w2 = &v2[Nvcd * isite];
1890 double *w1 = &v1[Nvcd * isite];
1892 for (
int it = 0; it < m_Mt; ++it) {
1893 for (
int iz = 0; iz < m_Mz; ++iz) {
1894 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1895 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1896 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1897 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
1898 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
1899 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
1900 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_tpb_dirac_thread(int, double *, double *)
void mult_tm1_chiral_thread(int, double *, double *)
void general(const char *format,...)
void mult_xp1_thread(int, double *, double *)
void daypx_thread(int, double *, double, double *)
void mult_xm2_thread(int, double *, double *)
void mult_zm2_thread(int, double *, double *)
void mult_xp2_thread(int, double *, double *)
void mult_ymb_thread(int, double *, double *)
void mult_tm2_chiral_thread(int, double *, double *)
void gm5_dirac_thread(int, double *, double *)
void mult_zmb_thread(int, double *, double *)
void mult_tm2_dirac_thread(int, double *, double *)
void mult_zp1_thread(int, double *, double *)
void mult_ym1_thread(int, double *, double *)
void mult_yp2_thread(int, double *, double *)
void mult_zp2_thread(int, double *, double *)
void mult_tm1_dirac_thread(int, double *, double *)
void mult_tp1_chiral_thread(int, double *, double *)
void mult_xmb_thread(int, double *, double *)
Bridge::VerboseLevel m_vl
void mult_xm1_thread(int, double *, double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
static const std::string class_name
void mult_tp1_dirac_thread(int, double *, double *)
void clear_thread(int, double *)
void mult_zm1_thread(int, double *, double *)
void mult_xpb_thread(int, double *, double *)
void mult_tmb_dirac_thread(int, double *, double *)
void crucial(const char *format,...)
void mult_tp2_chiral_thread(int, double *, double *)
void mult_ypb_thread(int, double *, double *)
void mult_tp2_dirac_thread(int, double *, double *)
void gm5_chiral_thread(int, double *, double *)
void mult_zpb_thread(int, double *, double *)
void mult_ym2_thread(int, double *, double *)
void mult_tpb_chiral_thread(int, double *, double *)
void mult_yp1_thread(int, double *, double *)
void mult_tmb_chiral_thread(int, double *, double *)
valarray< mult_arg > m_arg