23 #if defined USE_GROUP_SU3
24 #include "fopr_Wilson_impl_SU3.inc"
25 #elif defined USE_GROUP_SU2
26 #include "fopr_Wilson_impl_SU2.inc"
27 #elif defined USE_GROUP_SU_N
28 #include "fopr_Wilson_impl_SU_N.inc"
49 vout.
crucial(
m_vl,
"%s: Nz = %d and Nt = %d do not match Nthread = %d\n",
59 vout.
crucial(
m_vl,
"%s: Mz = %d and Ntask_z = %d do not match Nz = %d\n",
65 vout.
crucial(
m_vl,
"%s: Mt = %d and Ntask_t = %d do not match Nt = %d\n",
78 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
79 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
80 int itask = ith_z + m_Ntask_z * ith_t;
88 if (ith_t == 0)
m_arg[itask].kt0 = 1;
89 if (ith_z == 0)
m_arg[itask].kz0 = 1;
90 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
91 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
95 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy2;
96 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy2;
104 double *w,
double fac)
106 int Nvcd = m_Nvc *
m_Nd;
107 int Nvxy = Nvcd * m_Nx2 * m_Ny;
109 int isite = m_arg[itask].isite;
110 double *wp = &w[Nvcd * isite];
112 for (
int it = 0; it < m_Mt; ++it) {
113 for (
int iz = 0; iz < m_Mz; ++iz) {
114 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
115 int iv = ivxy + Nvxy * (iz + m_Nz * it);
116 wp[iv] = fac * wp[iv];
127 int Nvcd = m_Nvc *
m_Nd;
128 int Nvxy = Nvcd * m_Nx2 * m_Ny;
130 int isite = m_arg[itask].isite;
131 double *wp = &v[Nvcd * isite];
133 for (
int it = 0; it < m_Mt; ++it) {
134 for (
int iz = 0; iz < m_Mz; ++iz) {
135 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
136 int iv = ivxy + Nvxy * (iz + m_Nz * it);
146 double *vcp1,
double *
v1,
int ieo)
148 int Nvc2 = 2 * m_Nvc;
149 int Nvcd = m_Nvc *
m_Nd;
150 int Nvcd2 = Nvcd / 2;
157 int isite = m_arg[itask].isite;
158 int isite_cp = m_arg[itask].isite_cpx;
159 int iyzt0 = isite / m_Nx2;
161 double *w2 = &vcp1[Nvcd2 * isite_cp];
162 double *w1 = &v1[Nvcd * isite];
165 double bc2 = m_boundary2[idir];
170 for (
int it = 0; it < m_Mt; ++it) {
171 for (
int iz = 0; iz < m_Mz; ++iz) {
172 for (
int iy = 0; iy < m_Ny; ++iy) {
173 int iyzt = iy + m_Ny * (iz + m_Nz * it);
174 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
176 int is = ix + m_Nx2 * iyzt;
179 int ix1 = Nvc2 * ibf;
180 int ix2 = ix1 + m_Nvc;
182 for (
int ic = 0; ic <
m_Nc; ++ic) {
183 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
184 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
185 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
186 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
198 double *
v2,
double *vcp2,
int ieo)
200 int Nvc2 = 2 * m_Nvc;
201 int Nvcd = m_Nvc *
m_Nd;
202 int Nvcd2 = Nvcd / 2;
211 double wt1r, wt1i, wt2r, wt2i;
213 int isite = m_arg[itask].isite;
214 int isite_cp = m_arg[itask].isite_cpx;
215 int iyzt0 = isite / m_Nx2;
217 double *w2 = &v2[Nvcd * isite];
218 double *w1 = &vcp2[Nvcd2 * isite_cp];
219 double *u =
const_cast<Field_G *
>(m_U)->ptr(
225 for (
int it = 0; it < m_Mt; ++it) {
226 for (
int iz = 0; iz < m_Mz; ++iz) {
227 for (
int iy = 0; iy < m_Ny; ++iy) {
228 int iyzt = iy + m_Ny * (iz + m_Nz * it);
229 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
232 int is = ix + m_Nx2 * iyzt;
235 int ix1 = Nvc2 * ibf;
236 int ix2 = ix1 + m_Nvc;
238 for (
int ic = 0; ic <
m_Nc; ++ic) {
239 int ic2 = ic * m_Nvc;
240 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
241 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
242 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
243 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
244 w2[2 * ic + id1 + iv] += wt1r;
245 w2[2 * ic + 1 + id1 + iv] += wt1i;
246 w2[2 * ic + id2 + iv] += wt2r;
247 w2[2 * ic + 1 + id2 + iv] += wt2i;
248 w2[2 * ic + id3 + iv] += wt2i;
249 w2[2 * ic + 1 + id3 + iv] += -wt2r;
250 w2[2 * ic + id4 + iv] += wt1i;
251 w2[2 * ic + 1 + id4 + iv] += -wt1r;
263 double *
v2,
double *
v1,
int ieo)
265 int Nvcd = m_Nvc *
m_Nd;
274 double vt1[m_Nvc], vt2[m_Nvc];
275 double wt1r, wt1i, wt2r, wt2i;
277 int isite = m_arg[itask].isite;
278 int iyzt0 = isite / m_Nx2;
280 double *w2 = &v2[Nvcd * isite];
281 double *w1 = &v1[Nvcd * isite];
282 double *u =
const_cast<Field_G *
>(m_U)->ptr(
285 for (
int it = 0; it < m_Mt; ++it) {
286 for (
int iz = 0; iz < m_Mz; ++iz) {
287 for (
int iy = 0; iy < m_Ny; ++iy) {
288 int iyzt = iy + m_Ny * (iz + m_Nz * it);
289 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
290 for (
int ix = 0; ix < m_Nx2 - Leo; ++ix) {
291 int is = ix + m_Nx2 * iyzt;
293 int in = Nvcd * (is + Leo);
296 for (
int ic = 0; ic <
m_Nc; ++ic) {
297 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
298 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
299 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
300 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
303 for (
int ic = 0; ic <
m_Nc; ++ic) {
304 int ic2 = ic * m_Nvc;
306 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
307 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
308 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
309 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
311 w2[2 * ic + id1 + iv] += wt1r;
312 w2[2 * ic + 1 + id1 + iv] += wt1i;
313 w2[2 * ic + id2 + iv] += wt2r;
314 w2[2 * ic + 1 + id2 + iv] += wt2i;
315 w2[2 * ic + id3 + iv] += wt2i;
316 w2[2 * ic + 1 + id3 + iv] += -wt2r;
317 w2[2 * ic + id4 + iv] += wt1i;
318 w2[2 * ic + 1 + id4 + iv] += -wt1r;
329 double *vcp1,
double *
v1,
int ieo)
331 int Nvc2 = 2 * m_Nvc;
332 int Nvcd = m_Nvc *
m_Nd;
333 int Nvcd2 = Nvcd / 2;
342 int isite = m_arg[itask].isite;
343 int isite_cp = m_arg[itask].isite_cpx;
344 int iyzt0 = isite / m_Nx2;
346 double *w2 = &vcp1[Nvcd2 * isite_cp];
347 double *w1 = &v1[Nvcd * isite];
348 double *u =
const_cast<Field_G *
>(m_U)->ptr(
349 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
351 double vt1[m_Nvc], vt2[m_Nvc];
356 for (
int it = 0; it < m_Mt; ++it) {
357 for (
int iz = 0; iz < m_Mz; ++iz) {
358 for (
int iy = 0; iy < m_Ny; ++iy) {
359 int iyzt = iy + m_Ny * (iz + m_Nz * it);
360 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
362 int is = ix + m_Nx2 * iyzt;
366 int ix1 = Nvc2 * ibf;
367 int ix2 = ix1 + m_Nvc;
369 for (
int ic = 0; ic <
m_Nc; ++ic) {
370 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
371 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
372 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
373 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
376 for (
int ic = 0; ic <
m_Nc; ++ic) {
378 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
379 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
380 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
381 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
393 double *
v2,
double *vcp2,
int ieo)
395 int Nvc2 = 2 * m_Nvc;
396 int Nvcd = m_Nvc *
m_Nd;
397 int Nvcd2 = Nvcd / 2;
405 double bc2 = m_boundary2[idir];
409 int isite = m_arg[itask].isite;
410 int isite_cp = m_arg[itask].isite_cpx;
411 int iyzt0 = isite / m_Nx2;
413 double *w2 = &v2[Nvcd * isite];
414 double *w1 = &vcp2[Nvcd2 * isite_cp];
419 for (
int it = 0; it < m_Mt; ++it) {
420 for (
int iz = 0; iz < m_Mz; ++iz) {
421 for (
int iy = 0; iy < m_Ny; ++iy) {
422 int iyzt = iy + m_Ny * (iz + m_Nz * it);
423 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
425 int is = ix + m_Nx2 * iyzt;
428 int ix1 = Nvc2 * ibf;
429 int ix2 = ix1 + m_Nvc;
431 for (
int ic = 0; ic <
m_Nc; ++ic) {
433 int ici = 2 * ic + 1;
434 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
435 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
436 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
437 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
438 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
439 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
440 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
441 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
453 double *
v2,
double *
v1,
int ieo)
455 int Nvcd = m_Nvc *
m_Nd;
464 double vt1[m_Nvc], vt2[m_Nvc];
465 double wt1r, wt1i, wt2r, wt2i;
467 int isite = m_arg[itask].isite;
468 int iyzt0 = isite / m_Nx2;
470 double *w2 = &v2[Nvcd * isite];
471 double *w1 = &v1[Nvcd * isite];
472 double *u =
const_cast<Field_G *
>(m_U)->ptr(
473 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
475 for (
int it = 0; it < m_Mt; ++it) {
476 for (
int iz = 0; iz < m_Mz; ++iz) {
477 for (
int iy = 0; iy < m_Ny; ++iy) {
478 int iyzt = iy + m_Ny * (iz + m_Nz * it);
479 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
481 for (
int ix = Meo; ix < m_Nx2; ++ix) {
482 int is = ix + m_Nx2 * iyzt;
484 int in = Nvcd * (is -
Meo);
485 int ig = m_Ndf * (is -
Meo);
487 for (
int ic = 0; ic <
m_Nc; ++ic) {
488 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
489 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
490 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
491 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
494 for (
int ic = 0; ic <
m_Nc; ++ic) {
497 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
498 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
499 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
500 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
502 w2[2 * ic + id1 + iv] += wt1r;
503 w2[2 * ic + 1 + id1 + iv] += wt1i;
504 w2[2 * ic + id2 + iv] += wt2r;
505 w2[2 * ic + 1 + id2 + iv] += wt2i;
506 w2[2 * ic + id3 + iv] += -wt2i;
507 w2[2 * ic + 1 + id3 + iv] += +wt2r;
508 w2[2 * ic + id4 + iv] += -wt1i;
509 w2[2 * ic + 1 + id4 + iv] += +wt1r;
520 double *vcp1,
double *
v1,
int ieo)
522 int Nvc2 = 2 * m_Nvc;
523 int Nvcd = m_Nvc *
m_Nd;
524 int Nvcd2 = Nvcd / 2;
531 int isite = m_arg[itask].isite;
532 int isite_cp = m_arg[itask].isite_cpy;
534 double *w2 = &vcp1[Nvcd2 * isite_cp];
535 double *w1 = &v1[Nvcd * isite];
538 double bc2 = m_boundary2[idir];
542 for (
int it = 0; it < m_Mt; ++it) {
543 for (
int iz = 0; iz < m_Mz; ++iz) {
544 for (
int ix = 0; ix < m_Nx2; ++ix) {
545 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
546 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
548 int ix1 = Nvc2 * is2;
549 int ix2 = ix1 + m_Nvc;
551 for (
int ic = 0; ic <
m_Nc; ++ic) {
552 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
553 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
554 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
555 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
565 double *
v2,
double *vcp2,
int ieo)
567 int Nvc2 = 2 * m_Nvc;
568 int Nvcd = m_Nvc *
m_Nd;
569 int Nvcd2 = Nvcd / 2;
578 double wt1r, wt1i, wt2r, wt2i;
580 int isite = m_arg[itask].isite;
581 int isite_cp = m_arg[itask].isite_cpy;
583 double *w2 = &v2[Nvcd * isite];
584 double *w1 = &vcp2[Nvcd2 * isite_cp];
585 double *u =
const_cast<Field_G *
>(m_U)->ptr(
590 for (
int it = 0; it < m_Mt; ++it) {
591 for (
int iz = 0; iz < m_Mz; ++iz) {
592 for (
int ix = 0; ix < m_Nx2; ++ix) {
593 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
594 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
597 int ix1 = Nvc2 * is2;
598 int ix2 = ix1 + m_Nvc;
600 for (
int ic = 0; ic <
m_Nc; ++ic) {
601 int ic2 = ic * m_Nvc;
603 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
604 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
605 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
606 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
608 w2[2 * ic + id1 + iv] += wt1r;
609 w2[2 * ic + 1 + id1 + iv] += wt1i;
610 w2[2 * ic + id2 + iv] += wt2r;
611 w2[2 * ic + 1 + id2 + iv] += wt2i;
612 w2[2 * ic + id3 + iv] += -wt2r;
613 w2[2 * ic + 1 + id3 + iv] += -wt2i;
614 w2[2 * ic + id4 + iv] += wt1r;
615 w2[2 * ic + 1 + id4 + iv] += wt1i;
625 double *
v2,
double *
v1,
int ieo)
627 int Nvcd = m_Nvc *
m_Nd;
636 double vt1[m_Nvc], vt2[m_Nvc];
637 double wt1r, wt1i, wt2r, wt2i;
639 int isite = m_arg[itask].isite;
641 double *w2 = &v2[Nvcd * isite];
642 double *w1 = &v1[Nvcd * isite];
643 double *u =
const_cast<Field_G *
>(m_U)->ptr(
646 for (
int it = 0; it < m_Mt; ++it) {
647 for (
int iz = 0; iz < m_Mz; ++iz) {
648 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
649 for (
int ix = 0; ix < m_Nx2; ++ix) {
650 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
652 int in = Nvcd * (is + m_Nx2);
655 for (
int ic = 0; ic <
m_Nc; ++ic) {
656 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
657 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
658 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
659 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
662 for (
int ic = 0; ic <
m_Nc; ++ic) {
663 int ic2 = ic * m_Nvc;
665 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
666 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
667 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
668 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
670 w2[2 * ic + id1 + iv] += wt1r;
671 w2[2 * ic + 1 + id1 + iv] += wt1i;
672 w2[2 * ic + id2 + iv] += wt2r;
673 w2[2 * ic + 1 + id2 + iv] += wt2i;
674 w2[2 * ic + id3 + iv] += -wt2r;
675 w2[2 * ic + 1 + id3 + iv] += -wt2i;
676 w2[2 * ic + id4 + iv] += wt1r;
677 w2[2 * ic + 1 + id4 + iv] += wt1i;
688 double *vcp1,
double *
v1,
int ieo)
690 int Nvc2 = 2 * m_Nvc;
691 int Nvcd = m_Nvc *
m_Nd;
692 int Nvcd2 = Nvcd / 2;
701 int isite = m_arg[itask].isite;
702 int isite_cp = m_arg[itask].isite_cpy;
704 double *w2 = &vcp1[Nvcd2 * isite_cp];
705 double *w1 = &v1[Nvcd * isite];
706 double *u =
const_cast<Field_G *
>(m_U)->ptr(
707 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
709 double vt1[m_Nvc], vt2[m_Nvc];
713 for (
int it = 0; it < m_Mt; ++it) {
714 for (
int iz = 0; iz < m_Mz; ++iz) {
715 for (
int ix = 0; ix < m_Nx2; ++ix) {
716 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
717 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
720 int ix1 = Nvc2 * is2;
721 int ix2 = ix1 + m_Nvc;
723 for (
int ic = 0; ic <
m_Nc; ++ic) {
724 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
725 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
726 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
727 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
730 for (
int ic = 0; ic <
m_Nc; ++ic) {
732 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
733 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
734 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
735 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
745 double *
v2,
double *vcp2,
int ieo)
747 int Nvc2 = 2 * m_Nvc;
748 int Nvcd = m_Nvc *
m_Nd;
749 int Nvcd2 = Nvcd / 2;
757 double bc2 = m_boundary2[idir];
761 int isite = m_arg[itask].isite;
762 int isite_cp = m_arg[itask].isite_cpy;
764 double *w2 = &v2[Nvcd * isite];
765 double *w1 = &vcp2[Nvcd2 * isite_cp];
769 for (
int it = 0; it < m_Mt; ++it) {
770 for (
int iz = 0; iz < m_Mz; ++iz) {
771 for (
int ix = 0; ix < m_Nx2; ++ix) {
772 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
773 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
775 int ix1 = Nvc2 * is2;
776 int ix2 = ix1 + m_Nvc;
778 for (
int ic = 0; ic <
m_Nc; ++ic) {
780 int ici = 2 * ic + 1;
781 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
782 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
783 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
784 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
785 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
786 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
787 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
788 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
798 double *
v2,
double *
v1,
int ieo)
800 int Nvcd = m_Nvc *
m_Nd;
809 double vt1[m_Nvc], vt2[m_Nvc];
810 double wt1r, wt1i, wt2r, wt2i;
812 int isite = m_arg[itask].isite;
814 double *w2 = &v2[Nvcd * isite];
815 double *w1 = &v1[Nvcd * isite];
816 double *u =
const_cast<Field_G *
>(m_U)->ptr(
817 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
819 for (
int it = 0; it < m_Mt; ++it) {
820 for (
int iz = 0; iz < m_Mz; ++iz) {
821 for (
int iy = 1; iy < m_Ny; ++iy) {
822 for (
int ix = 0; ix < m_Nx2; ++ix) {
823 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
825 int in = Nvcd * (is - m_Nx2);
826 int ig = m_Ndf * (is - m_Nx2);
828 for (
int ic = 0; ic <
m_Nc; ++ic) {
829 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
830 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
831 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
832 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
835 for (
int ic = 0; ic <
m_Nc; ++ic) {
837 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
838 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
839 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
840 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
842 w2[ic2 + id1 + iv] += wt1r;
843 w2[ic2 + 1 + id1 + iv] += wt1i;
844 w2[ic2 + id2 + iv] += wt2r;
845 w2[ic2 + 1 + id2 + iv] += wt2i;
846 w2[ic2 + id3 + iv] += wt2r;
847 w2[ic2 + 1 + id3 + iv] += wt2i;
848 w2[ic2 + id4 + iv] += -wt1r;
849 w2[ic2 + 1 + id4 + iv] += -wt1i;
860 double *vcp1,
double *
v1,
int ieo)
862 int Nvc2 = 2 * m_Nvc;
863 int Nvcd = m_Nvc *
m_Nd;
864 int Nvcd2 = Nvcd / 2;
871 int isite = m_arg[itask].isite;
872 int isite_cp = m_arg[itask].isite_cpz;
874 double *w2 = &vcp1[Nvcd2 * isite_cp];
875 double *w1 = &v1[Nvcd * isite];
878 double bc2 = m_boundary2[idir];
880 if (m_arg[itask].kz0 == 1) {
881 int Nxy = m_Nx2 * m_Ny;
883 for (
int it = 0; it < m_Mt; ++it) {
884 for (
int ixy = 0; ixy < Nxy; ++ixy) {
885 int is = ixy + Nxy * (iz + m_Nz * it);
886 int is2 = ixy + Nxy * it;
889 int ix1 = Nvc2 * is2;
890 int ix2 = ix1 + m_Nvc;
892 for (
int ic = 0; ic <
m_Nc; ++ic) {
893 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
894 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
895 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
896 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
906 double *
v2,
double *vcp2,
int ieo)
908 int Nvc2 = 2 * m_Nvc;
909 int Nvcd = m_Nvc *
m_Nd;
910 int Nvcd2 = Nvcd / 2;
919 double wt1r, wt1i, wt2r, wt2i;
921 int isite = m_arg[itask].isite;
922 int isite_cp = m_arg[itask].isite_cpz;
924 double *w2 = &v2[Nvcd * isite];
925 double *w1 = &vcp2[Nvcd2 * isite_cp];
926 double *u =
const_cast<Field_G *
>(m_U)->ptr(
929 if (m_arg[itask].kz1 == 1) {
930 int Nxy = m_Nx2 * m_Ny;
932 for (
int it = 0; it < m_Mt; ++it) {
933 for (
int ixy = 0; ixy < Nxy; ++ixy) {
934 int is = ixy + Nxy * (iz + m_Nz * it);
935 int is2 = ixy + Nxy * it;
938 int ix1 = Nvc2 * is2;
939 int ix2 = ix1 + m_Nvc;
941 for (
int ic = 0; ic <
m_Nc; ++ic) {
942 int ic2 = ic * m_Nvc;
944 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
945 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
946 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
947 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
949 w2[2 * ic + id1 + iv] += wt1r;
950 w2[2 * ic + 1 + id1 + iv] += wt1i;
951 w2[2 * ic + id2 + iv] += wt2r;
952 w2[2 * ic + 1 + id2 + iv] += wt2i;
953 w2[2 * ic + id3 + iv] += wt1i;
954 w2[2 * ic + 1 + id3 + iv] += -wt1r;
955 w2[2 * ic + id4 + iv] += -wt2i;
956 w2[2 * ic + 1 + id4 + iv] += wt2r;
966 double *
v2,
double *
v1,
int ieo)
968 int Nvcd = m_Nvc *
m_Nd;
977 double vt1[m_Nvc], vt2[m_Nvc];
978 double wt1r, wt1i, wt2r, wt2i;
980 int isite = m_arg[itask].isite;
982 double *w2 = &v2[Nvcd * isite];
983 double *w1 = &v1[Nvcd * isite];
984 double *u =
const_cast<Field_G *
>(m_U)->ptr(
987 int kz1 = m_arg[itask].kz1;
988 int Nxy = m_Nx2 * m_Ny;
990 for (
int it = 0; it < m_Mt; ++it) {
991 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
992 for (
int ixy = 0; ixy < Nxy; ++ixy) {
993 int is = ixy + Nxy * (iz + m_Nz * it);
995 int in = Nvcd * (is + Nxy);
998 for (
int ic = 0; ic <
m_Nc; ++ic) {
999 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1000 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1001 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1002 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1005 for (
int ic = 0; ic <
m_Nc; ++ic) {
1006 int ic2 = ic * m_Nvc;
1008 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1009 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1010 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1011 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1013 w2[2 * ic + id1 + iv] += wt1r;
1014 w2[2 * ic + 1 + id1 + iv] += wt1i;
1015 w2[2 * ic + id2 + iv] += wt2r;
1016 w2[2 * ic + 1 + id2 + iv] += wt2i;
1017 w2[2 * ic + id3 + iv] += wt1i;
1018 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1019 w2[2 * ic + id4 + iv] += -wt2i;
1020 w2[2 * ic + 1 + id4 + iv] += wt2r;
1030 double *vcp1,
double *
v1,
int ieo)
1032 int Nvc2 = 2 * m_Nvc;
1033 int Nvcd = m_Nvc *
m_Nd;
1034 int Nvcd2 = Nvcd / 2;
1038 int id3 = m_Nvc * 2;
1039 int id4 = m_Nvc * 3;
1043 int isite = m_arg[itask].isite;
1044 int isite_cp = m_arg[itask].isite_cpz;
1046 double *w2 = &vcp1[Nvcd2 * isite_cp];
1047 double *w1 = &v1[Nvcd * isite];
1048 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1049 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1051 double vt1[m_Nvc], vt2[m_Nvc];
1053 if (m_arg[itask].kz1 == 1) {
1054 int Nxy = m_Nx2 * m_Ny;
1056 for (
int it = 0; it < m_Mt; ++it) {
1057 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1058 int is = ixy + Nxy * (iz + m_Nz * it);
1059 int is2 = ixy + Nxy * it;
1061 int ig = m_Ndf * is;
1062 int ix1 = Nvc2 * is2;
1063 int ix2 = ix1 + m_Nvc;
1065 for (
int ic = 0; ic <
m_Nc; ++ic) {
1066 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1067 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1068 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1069 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1072 for (
int ic = 0; ic <
m_Nc; ++ic) {
1074 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1075 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1076 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1077 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1087 double *
v2,
double *vcp2,
int ieo)
1089 int Nvc2 = 2 * m_Nvc;
1090 int Nvcd = m_Nvc *
m_Nd;
1091 int Nvcd2 = Nvcd / 2;
1095 int id3 = m_Nvc * 2;
1096 int id4 = m_Nvc * 3;
1099 double bc2 = m_boundary2[idir];
1103 int isite = m_arg[itask].isite;
1104 int isite_cp = m_arg[itask].isite_cpz;
1106 double *w2 = &v2[Nvcd * isite];
1107 double *w1 = &vcp2[Nvcd2 * isite_cp];
1109 if (m_arg[itask].kz0 == 1) {
1110 int Nxy = m_Nx2 * m_Ny;
1113 for (
int it = 0; it < m_Mt; ++it) {
1114 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1115 int is = ixy + Nxy * (iz + m_Nz * it);
1116 int is2 = ixy + Nxy * it;
1118 int ix1 = Nvc2 * is2;
1119 int ix2 = ix1 + m_Nvc;
1121 for (
int ic = 0; ic <
m_Nc; ++ic) {
1123 int ici = 2 * ic + 1;
1124 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1125 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1126 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1127 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1128 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1129 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1130 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1131 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1141 double *
v2,
double *
v1,
int ieo)
1143 int Nvcd = m_Nvc *
m_Nd;
1147 int id3 = m_Nvc * 2;
1148 int id4 = m_Nvc * 3;
1152 double vt1[m_Nvc], vt2[m_Nvc];
1153 double wt1r, wt1i, wt2r, wt2i;
1155 int isite = m_arg[itask].isite;
1157 double *w2 = &v2[Nvcd * isite];
1158 double *w1 = &v1[Nvcd * isite];
1159 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1160 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1162 int kz0 = m_arg[itask].kz0;
1163 int Nxy = m_Nx2 * m_Ny;
1165 for (
int it = 0; it < m_Mt; ++it) {
1166 for (
int iz = kz0; iz < m_Mz; ++iz) {
1167 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1168 int is = ixy + Nxy * (iz + m_Nz * it);
1170 int in = Nvcd * (is - Nxy);
1171 int ig = m_Ndf * (is - Nxy);
1173 for (
int ic = 0; ic <
m_Nc; ++ic) {
1174 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1175 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1176 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1177 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1180 for (
int ic = 0; ic <
m_Nc; ++ic) {
1182 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1183 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1184 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1185 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1187 w2[ic2 + id1 + iv] += wt1r;
1188 w2[ic2 + 1 + id1 + iv] += wt1i;
1189 w2[ic2 + id2 + iv] += wt2r;
1190 w2[ic2 + 1 + id2 + iv] += wt2i;
1191 w2[ic2 + id3 + iv] += -wt1i;
1192 w2[ic2 + 1 + id3 + iv] += wt1r;
1193 w2[ic2 + id4 + iv] += wt2i;
1194 w2[ic2 + 1 + id4 + iv] += -wt2r;
1204 double *vcp1,
double *
v1,
int ieo)
1206 int Nvc2 = 2 * m_Nvc;
1207 int Nvcd = m_Nvc *
m_Nd;
1208 int Nvcd2 = Nvcd / 2;
1212 int id3 = m_Nvc * 2;
1213 int id4 = m_Nvc * 3;
1215 int isite = m_arg[itask].isite;
1216 int isite_cp = m_arg[itask].isite_cpt;
1218 double *w2 = &vcp1[Nvcd2 * isite_cp];
1219 double *w1 = &v1[Nvcd * isite];
1222 double bc2 = m_boundary2[idir];
1224 if (m_arg[itask].kt0 == 1) {
1225 int Nxy = m_Nx2 * m_Ny;
1227 for (
int iz = 0; iz < m_Mz; ++iz) {
1228 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1229 int is = ixy + Nxy * (iz + m_Nz * it);
1230 int is2 = ixy + Nxy * iz;
1233 int ix1 = Nvc2 * is2;
1234 int ix2 = ix1 + m_Nvc;
1236 for (
int ic = 0; ic <
m_Nc; ++ic) {
1237 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1238 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1239 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1240 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1250 double *
v2,
double *vcp2,
int ieo)
1252 int Nvc2 = 2 * m_Nvc;
1253 int Nvcd = m_Nvc *
m_Nd;
1254 int Nvcd2 = Nvcd / 2;
1258 int id3 = m_Nvc * 2;
1259 int id4 = m_Nvc * 3;
1263 double wt1r, wt1i, wt2r, wt2i;
1265 int isite = m_arg[itask].isite;
1266 int isite_cp = m_arg[itask].isite_cpt;
1268 double *w2 = &v2[Nvcd * isite];
1269 double *w1 = &vcp2[Nvcd2 * isite_cp];
1270 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1273 if (m_arg[itask].kt1 == 1) {
1274 int Nxy = m_Nx2 * m_Ny;
1276 for (
int iz = 0; iz < m_Mz; ++iz) {
1277 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1278 int is = ixy + Nxy * (iz + m_Nz * it);
1279 int is2 = ixy + Nxy * iz;
1281 int ig = m_Ndf * is;
1282 int ix1 = Nvc2 * is2;
1283 int ix2 = ix1 + m_Nvc;
1285 for (
int ic = 0; ic <
m_Nc; ++ic) {
1286 int ic2 = ic * m_Nvc;
1288 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1289 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1290 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1291 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1293 w2[2 * ic + id3 + iv] += wt1r;
1294 w2[2 * ic + 1 + id3 + iv] += wt1i;
1295 w2[2 * ic + id4 + iv] += wt2r;
1296 w2[2 * ic + 1 + id4 + iv] += wt2i;
1306 double *
v2,
double *
v1,
int ieo)
1308 int Nvcd = m_Nvc *
m_Nd;
1312 int id3 = m_Nvc * 2;
1313 int id4 = m_Nvc * 3;
1317 double vt1[m_Nvc], vt2[m_Nvc];
1318 double wt1r, wt1i, wt2r, wt2i;
1320 int isite = m_arg[itask].isite;
1322 double *w2 = &v2[Nvcd * isite];
1323 double *w1 = &v1[Nvcd * isite];
1324 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1327 int kt1 = m_arg[itask].kt1;
1328 int Nxy = m_Nx2 * m_Ny;
1329 int Nxyz = Nxy * m_Nz;
1331 for (
int it = 0; it < m_Mt - kt1; ++it) {
1332 for (
int iz = 0; iz < m_Mz; ++iz) {
1333 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1334 int is = ixy + Nxy * (iz + m_Nz * it);
1336 int in = Nvcd * (is + Nxyz);
1337 int ig = m_Ndf * is;
1339 for (
int ic = 0; ic <
m_Nc; ++ic) {
1340 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1341 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1342 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1343 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1346 for (
int ic = 0; ic <
m_Nc; ++ic) {
1347 int ic2 = ic * m_Nvc;
1349 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1350 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1351 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1352 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1354 w2[2 * ic + id3 + iv] += wt1r;
1355 w2[2 * ic + 1 + id3 + iv] += wt1i;
1356 w2[2 * ic + id4 + iv] += wt2r;
1357 w2[2 * ic + 1 + id4 + iv] += wt2i;
1367 double *vcp1,
double *
v1,
int ieo)
1369 int Nvc2 = 2 * m_Nvc;
1370 int Nvcd = m_Nvc *
m_Nd;
1371 int Nvcd2 = Nvcd / 2;
1380 int isite = m_arg[itask].isite;
1381 int isite_cp = m_arg[itask].isite_cpt;
1383 double *w2 = &vcp1[Nvcd2 * isite_cp];
1384 double *w1 = &v1[Nvcd * isite];
1385 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1386 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1388 double vt1[m_Nvc], vt2[m_Nvc];
1390 if (m_arg[itask].kt1 == 1) {
1391 int Nxy = m_Nx2 * m_Ny;
1393 for (
int iz = 0; iz < m_Mz; ++iz) {
1394 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1395 int is = ixy + Nxy * (iz + m_Nz * it);
1396 int is2 = ixy + Nxy * iz;
1398 int ig = m_Ndf * is;
1399 int ix1 = Nvc2 * is2;
1400 int ix2 = ix1 + m_Nvc;
1402 for (
int ic = 0; ic <
m_Nc; ++ic) {
1403 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1404 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1405 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1406 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1409 for (
int ic = 0; ic <
m_Nc; ++ic) {
1411 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1412 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1413 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1414 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1424 double *
v2,
double *vcp2,
int ieo)
1426 int Nvc2 = 2 * m_Nvc;
1427 int Nvcd = m_Nvc *
m_Nd;
1428 int Nvcd2 = Nvcd / 2;
1436 double bc2 = m_boundary2[idir];
1440 int isite = m_arg[itask].isite;
1441 int isite_cp = m_arg[itask].isite_cpt;
1443 double *w2 = &v2[Nvcd * isite];
1444 double *w1 = &vcp2[Nvcd2 * isite_cp];
1446 if (m_arg[itask].kt0 == 1) {
1447 int Nxy = m_Nx2 * m_Ny;
1449 for (
int iz = 0; iz < m_Mz; ++iz) {
1450 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1451 int is = ixy + Nxy * (iz + m_Nz * it);
1452 int is2 = ixy + Nxy * iz;
1454 int ix1 = Nvc2 * is2;
1455 int ix2 = ix1 + m_Nvc;
1457 for (
int ic = 0; ic <
m_Nc; ++ic) {
1459 int ici = 2 * ic + 1;
1460 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1461 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1462 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1463 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1473 double *
v2,
double *
v1,
int ieo)
1475 int Nvcd = m_Nvc *
m_Nd;
1484 double vt1[m_Nvc], vt2[m_Nvc];
1485 double wt1r, wt1i, wt2r, wt2i;
1487 int isite = m_arg[itask].isite;
1489 double *w2 = &v2[Nvcd * isite];
1490 double *w1 = &v1[Nvcd * isite];
1491 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1492 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1494 int kt0 = m_arg[itask].kt0;
1495 int Nxy = m_Nx2 * m_Ny;
1496 int Nxyz = Nxy * m_Nz;
1498 for (
int it = kt0; it < m_Mt; ++it) {
1499 for (
int iz = 0; iz < m_Mz; ++iz) {
1500 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1501 int is = ixy + Nxy * (iz + m_Nz * it);
1503 int in = Nvcd * (is - Nxyz);
1504 int ig = m_Ndf * (is - Nxyz);
1506 for (
int ic = 0; ic <
m_Nc; ++ic) {
1507 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1508 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1509 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1510 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1513 for (
int ic = 0; ic <
m_Nc; ++ic) {
1515 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1516 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1517 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1518 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1520 w2[ic2 + id1 + iv] += wt1r;
1521 w2[ic2 + 1 + id1 + iv] += wt1i;
1522 w2[ic2 + id2 + iv] += wt2r;
1523 w2[ic2 + 1 + id2 + iv] += wt2i;
1533 double *vcp1,
double *
v1,
int ieo)
1535 int Nvc2 = 2 * m_Nvc;
1536 int Nvcd = m_Nvc *
m_Nd;
1537 int Nvcd2 = Nvcd / 2;
1541 int id3 = m_Nvc * 2;
1542 int id4 = m_Nvc * 3;
1544 int isite = m_arg[itask].isite;
1545 int isite_cp = m_arg[itask].isite_cpt;
1547 double *w2 = &vcp1[Nvcd2 * isite_cp];
1548 double *w1 = &v1[Nvcd * isite];
1551 double bc2 = m_boundary2[idir];
1553 if (m_arg[itask].kt0 == 1) {
1554 int Nxy = m_Nx2 * m_Ny;
1556 for (
int iz = 0; iz < m_Mz; ++iz) {
1557 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1558 int is = ixy + Nxy * (iz + m_Nz * it);
1559 int is2 = ixy + Nxy * iz;
1562 int ix1 = Nvc2 * is2;
1563 int ix2 = ix1 + m_Nvc;
1565 for (
int ic = 0; ic <
m_Nc; ++ic) {
1566 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1567 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1568 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1569 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1579 double *
v2,
double *vcp2,
int ieo)
1581 int Nvc2 = 2 * m_Nvc;
1582 int Nvcd = m_Nvc *
m_Nd;
1583 int Nvcd2 = Nvcd / 2;
1587 int id3 = m_Nvc * 2;
1588 int id4 = m_Nvc * 3;
1592 double wt1r, wt1i, wt2r, wt2i;
1594 int isite = m_arg[itask].isite;
1595 int isite_cp = m_arg[itask].isite_cpt;
1597 double *w2 = &v2[Nvcd * isite];
1598 double *w1 = &vcp2[Nvcd2 * isite_cp];
1599 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1602 if (m_arg[itask].kt1 == 1) {
1603 int Nxy = m_Nx2 * m_Ny;
1605 for (
int iz = 0; iz < m_Mz; ++iz) {
1606 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1607 int is = ixy + Nxy * (iz + m_Nz * it);
1608 int is2 = ixy + Nxy * iz;
1610 int ig = m_Ndf * is;
1611 int ix1 = Nvc2 * is2;
1612 int ix2 = ix1 + m_Nvc;
1614 for (
int ic = 0; ic <
m_Nc; ++ic) {
1615 int ic2 = ic * m_Nvc;
1617 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1618 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1619 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1620 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1622 w2[2 * ic + id1 + iv] += wt1r;
1623 w2[2 * ic + 1 + id1 + iv] += wt1i;
1624 w2[2 * ic + id2 + iv] += wt2r;
1625 w2[2 * ic + 1 + id2 + iv] += wt2i;
1626 w2[2 * ic + id3 + iv] += wt1r;
1627 w2[2 * ic + 1 + id3 + iv] += wt1i;
1628 w2[2 * ic + id4 + iv] += wt2r;
1629 w2[2 * ic + 1 + id4 + iv] += wt2i;
1639 double *
v2,
double *
v1,
int ieo)
1641 int Nvcd = m_Nvc *
m_Nd;
1645 int id3 = m_Nvc * 2;
1646 int id4 = m_Nvc * 3;
1650 double vt1[m_Nvc], vt2[m_Nvc];
1651 double wt1r, wt1i, wt2r, wt2i;
1653 int isite = m_arg[itask].isite;
1655 double *w2 = &v2[Nvcd * isite];
1656 double *w1 = &v1[Nvcd * isite];
1657 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1660 int kt1 = m_arg[itask].kt1;
1661 int Nxy = m_Nx2 * m_Ny;
1662 int Nxyz = Nxy * m_Nz;
1664 for (
int it = 0; it < m_Mt - kt1; ++it) {
1665 for (
int iz = 0; iz < m_Mz; ++iz) {
1666 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1667 int is = ixy + Nxy * (iz + m_Nz * it);
1669 int in = Nvcd * (is + Nxyz);
1670 int ig = m_Ndf * is;
1672 for (
int ic = 0; ic <
m_Nc; ++ic) {
1673 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1674 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1675 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1676 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1679 for (
int ic = 0; ic <
m_Nc; ++ic) {
1680 int ic2 = ic * m_Nvc;
1682 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1683 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1684 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1685 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1687 w2[2 * ic + id1 + iv] += wt1r;
1688 w2[2 * ic + 1 + id1 + iv] += wt1i;
1689 w2[2 * ic + id2 + iv] += wt2r;
1690 w2[2 * ic + 1 + id2 + iv] += wt2i;
1691 w2[2 * ic + id3 + iv] += wt1r;
1692 w2[2 * ic + 1 + id3 + iv] += wt1i;
1693 w2[2 * ic + id4 + iv] += wt2r;
1694 w2[2 * ic + 1 + id4 + iv] += wt2i;
1704 double *vcp1,
double *
v1,
int ieo)
1706 int Nvc2 = 2 * m_Nvc;
1707 int Nvcd = m_Nvc *
m_Nd;
1708 int Nvcd2 = Nvcd / 2;
1712 int id3 = m_Nvc * 2;
1713 int id4 = m_Nvc * 3;
1717 int isite = m_arg[itask].isite;
1718 int isite_cp = m_arg[itask].isite_cpt;
1720 double *w2 = &vcp1[Nvcd2 * isite_cp];
1721 double *w1 = &v1[Nvcd * isite];
1722 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1723 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1725 double vt1[m_Nvc], vt2[m_Nvc];
1727 if (m_arg[itask].kt1 == 1) {
1728 int Nxy = m_Nx2 * m_Ny;
1730 for (
int iz = 0; iz < m_Mz; ++iz) {
1731 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1732 int is = ixy + Nxy * (iz + m_Nz * it);
1733 int is2 = ixy + Nxy * iz;
1735 int ig = m_Ndf * is;
1736 int ix1 = Nvc2 * is2;
1737 int ix2 = ix1 + m_Nvc;
1739 for (
int ic = 0; ic <
m_Nc; ++ic) {
1740 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1741 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1742 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1743 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1746 for (
int ic = 0; ic <
m_Nc; ++ic) {
1748 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1749 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1750 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1751 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1761 double *
v2,
double *vcp2,
int ieo)
1763 int Nvc2 = 2 * m_Nvc;
1764 int Nvcd = m_Nvc *
m_Nd;
1765 int Nvcd2 = Nvcd / 2;
1769 int id3 = m_Nvc * 2;
1770 int id4 = m_Nvc * 3;
1773 double bc2 = m_boundary2[idir];
1777 int isite = m_arg[itask].isite;
1778 int isite_cp = m_arg[itask].isite_cpt;
1780 double *w2 = &v2[Nvcd * isite];
1781 double *w1 = &vcp2[Nvcd2 * isite_cp];
1783 if (m_arg[itask].kt0 == 1) {
1784 int Nxy = m_Nx2 * m_Ny;
1786 for (
int iz = 0; iz < m_Mz; ++iz) {
1787 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1788 int is = ixy + Nxy * (iz + m_Nz * it);
1789 int is2 = ixy + Nxy * iz;
1791 int ix1 = Nvc2 * is2;
1792 int ix2 = ix1 + m_Nvc;
1794 for (
int ic = 0; ic <
m_Nc; ++ic) {
1796 int ici = 2 * ic + 1;
1797 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1798 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1799 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1800 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1801 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1802 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1803 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1804 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1814 double *
v2,
double *
v1,
int ieo)
1816 int Nvcd = m_Nvc *
m_Nd;
1820 int id3 = m_Nvc * 2;
1821 int id4 = m_Nvc * 3;
1825 double vt1[m_Nvc], vt2[m_Nvc];
1826 double wt1r, wt1i, wt2r, wt2i;
1828 int isite = m_arg[itask].isite;
1830 double *w2 = &v2[Nvcd * isite];
1831 double *w1 = &v1[Nvcd * isite];
1832 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1833 m_Ndf * (isite + (1 - ieo) *
m_Nvol / 2 + idir *
m_Nvol));
1835 int kt0 = m_arg[itask].kt0;
1836 int Nxy = m_Nx2 * m_Ny;
1837 int Nxyz = Nxy * m_Nz;
1839 for (
int it = kt0; it < m_Mt; ++it) {
1840 for (
int iz = 0; iz < m_Mz; ++iz) {
1841 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1842 int is = ixy + Nxy * (iz + m_Nz * it);
1844 int in = Nvcd * (is - Nxyz);
1845 int ig = m_Ndf * (is - Nxyz);
1847 for (
int ic = 0; ic <
m_Nc; ++ic) {
1848 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1849 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1850 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1851 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1854 for (
int ic = 0; ic <
m_Nc; ++ic) {
1856 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1857 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1858 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1859 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1861 w2[ic2 + id1 + iv] += wt1r;
1862 w2[ic2 + 1 + id1 + iv] += wt1i;
1863 w2[ic2 + id2 + iv] += wt2r;
1864 w2[ic2 + 1 + id2 + iv] += wt2i;
1865 w2[ic2 + id3 + iv] -= wt1r;
1866 w2[ic2 + 1 + id3 + iv] -= wt1i;
1867 w2[ic2 + id4 + iv] -= wt2r;
1868 w2[ic2 + 1 + id4 + iv] -= wt2i;
1878 double *
v2,
double *
v1)
1880 int Nvcd = m_Nvc *
m_Nd;
1881 int Nxy = m_Nx2 * m_Ny;
1885 int id3 = m_Nvc * 2;
1886 int id4 = m_Nvc * 3;
1888 int isite = m_arg[itask].isite;
1889 double *w2 = &v2[Nvcd * isite];
1890 double *w1 = &v1[Nvcd * isite];
1892 for (
int it = 0; it < m_Mt; ++it) {
1893 for (
int iz = 0; iz < m_Mz; ++iz) {
1894 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1895 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1896 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1897 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
1898 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
1899 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
1900 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
1910 double *
v2,
double *
v1)
1912 int Nvcd = m_Nvc *
m_Nd;
1913 int Nxy = m_Nx2 * m_Ny;
1917 int id3 = m_Nvc * 2;
1918 int id4 = m_Nvc * 3;
1920 int isite = m_arg[itask].isite;
1921 double *w2 = &v2[Nvcd * isite];
1922 double *w1 = &v1[Nvcd * isite];
1924 for (
int it = 0; it < m_Mt; ++it) {
1925 for (
int iz = 0; iz < m_Mz; ++iz) {
1926 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1927 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1928 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1929 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
1930 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
1931 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
1932 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
1944 int Nvcd = m_Nvc *
m_Nd;
1945 int Nxy = m_Nx2 * m_Ny;
1949 int id3 = m_Nvc * 2;
1950 int id4 = m_Nvc * 3;
1952 int isite = m_arg[itask].isite;
1953 double *w1 = &v1[Nvcd * isite];
1955 for (
int it = 0; it < m_Mt; ++it) {
1956 for (
int iz = 0; iz < m_Mz; ++iz) {
1957 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1958 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1959 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1960 double wt1 = w1[ivc + id1 + iv];
1961 double wt2 = w1[ivc + id2 + iv];
1962 w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
1963 w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
1964 w1[ivc + id3 + iv] = wt1;
1965 w1[ivc + id4 + iv] = wt2;
1977 int Nvcd = m_Nvc *
m_Nd;
1978 int Nxy = m_Nx2 * m_Ny;
1982 int id3 = m_Nvc * 2;
1983 int id4 = m_Nvc * 3;
1985 int isite = m_arg[itask].isite;
1986 double *w1 = &v1[Nvcd * isite];
1988 for (
int it = 0; it < m_Mt; ++it) {
1989 for (
int iz = 0; iz < m_Mz; ++iz) {
1990 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1991 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
1992 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
1993 w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
1994 w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_tm2_dirac_thread(int, double *, double *, int)
const Field_F Meo(const Field_F &, const int ieo)
void mult_tp1_dirac_thread(int, double *, double *, int)
void mult_ym1_thread(int, double *, double *, int)
void mult_xp1_thread(int, double *, double *, int)
static const std::string class_name
void general(const char *format,...)
void mult_tm2_chiral_thread(int, double *, double *, int)
void mult_ymb_thread(int, double *, double *, int)
void clear_thread(int, double *)
valarray< mult_arg > m_arg
void mult_tmb_dirac_thread(int, double *, double *, int)
void mult_zp1_thread(int, double *, double *, int)
void gm5_dirac_thread(int, double *, double *)
void mult_xp2_thread(int, double *, double *, int)
void mult_ypb_thread(int, double *, double *, int)
void mult_tm1_dirac_thread(int, double *, double *, int)
void mult_zm2_thread(int, double *, double *, int)
void mult_tmb_chiral_thread(int, double *, double *, int)
void mult_xm2_thread(int, double *, double *, int)
void mult_ym2_thread(int, double *, double *, int)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_xm1_thread(int, double *, double *, int)
void mult_tp1_chiral_thread(int, double *, double *, int)
void crucial(const char *format,...)
void mult_tp2_dirac_thread(int, double *, double *, int)
void mult_tp2_chiral_thread(int, double *, double *, int)
void mult_tpb_chiral_thread(int, double *, double *, int)
void gm5_chiral_thread(int, double *, double *)
void mult_zp2_thread(int, double *, double *, int)
void mult_xmb_thread(int, double *, double *, int)
void mult_zmb_thread(int, double *, double *, int)
Bridge::VerboseLevel m_vl
void mult_yp1_thread(int, double *, double *, int)
void mult_zm1_thread(int, double *, double *, int)
void mult_zpb_thread(int, double *, double *, int)
void mult_yp2_thread(int, double *, double *, int)
void scal_thread(int, double *, double)
void mult_xpb_thread(int, double *, double *, int)
void mult_tm1_chiral_thread(int, double *, double *, int)
void mult_tpb_dirac_thread(int, double *, double *, int)