22 #if defined USE_GROUP_SU3
23 #include "fopr_Wilson_impl_SU3.inc"
24 #elif defined USE_GROUP_SU2
25 #include "fopr_Wilson_impl_SU2.inc"
26 #elif defined USE_GROUP_SU_N
27 #include "fopr_Wilson_impl_SU_N.inc"
62 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
63 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
64 int itask = ith_z + m_Ntask_z * ith_t;
72 if (ith_t == 0)
m_arg[itask].kt0 = 1;
73 if (ith_z == 0)
m_arg[itask].kz0 = 1;
74 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
75 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
79 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy2;
80 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy2;
87 int Nvcd2 = 2 * Nc * Nd / 2;
91 valarray<int> datasize(
m_Ntask);
92 valarray<int> offset_up(
m_Ntask);
93 valarray<int> offset_lw(
m_Ntask);
94 valarray<int> datasize_up(
m_Ntask);
95 valarray<int> datasize_lw(
m_Ntask);
98 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
99 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
101 int isite_cp = itask *
m_Mz *
m_Mt * (m_Ny / 2);
102 destid[itask] = itask;
103 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
104 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * (m_Ny / 2);
113 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
114 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
117 destid[itask] = itask;
118 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
119 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * m_Nx2;
128 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
129 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
130 int itask = ith_z + m_Ntask_z * ith_t;
132 offset_up[itask] = 0;
133 offset_lw[itask] = 0;
134 datasize_up[itask] = 0;
135 datasize_lw[itask] = 0;
137 destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
138 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx2 * m_Ny;
139 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx2 * m_Ny;
141 if (ith_z == m_Ntask_z - 1) {
143 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx2 * m_Ny;
144 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx2 * m_Ny;
154 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
155 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
156 int itask = ith_z + m_Ntask_z * ith_t;
158 offset_up[itask] = 0;
159 offset_lw[itask] = 0;
160 datasize_up[itask] = 0;
161 datasize_lw[itask] = 0;
163 destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
164 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx2 * m_Ny;
165 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx2 * m_Ny;
167 if (ith_t == m_Ntask_t - 1) {
168 destid[itask] = ith_z;
169 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx2 * m_Ny;
170 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx2 * m_Ny;
182 double *w,
double fac)
184 int Nvcd = m_Nvc *
m_Nd;
185 int Nvxy = Nvcd * m_Nx2 * m_Ny;
187 int isite = m_arg[itask].isite;
188 double *wp = &w[Nvcd * isite];
190 for (
int it = 0; it < m_Mt; ++it) {
191 for (
int iz = 0; iz < m_Mz; ++iz) {
192 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
193 int iv = ivxy + Nvxy * (iz + m_Nz * it);
194 wp[iv] = fac * wp[iv];
205 int Nvcd = m_Nvc *
m_Nd;
206 int Nvxy = Nvcd * m_Nx2 * m_Ny;
208 int isite = m_arg[itask].isite;
209 double *wp = &v[Nvcd * isite];
211 for (
int it = 0; it < m_Mt; ++it) {
212 for (
int iz = 0; iz < m_Mz; ++iz) {
213 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
214 int iv = ivxy + Nvxy * (iz + m_Nz * it);
224 double *vcp1,
double *
v1,
int ieo)
226 int Nvc2 = 2 * m_Nvc;
227 int Nvcd = m_Nvc *
m_Nd;
228 int Nvcd2 = Nvcd / 2;
237 int isite = m_arg[itask].isite;
238 int isite_cp = m_arg[itask].isite_cpx;
239 int iyzt0 = isite / m_Nx2;
243 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
244 double *w1 = &v1[Nvcd * isite];
246 double bc2 = m_boundary2[idir];
251 for (
int it = 0; it < m_Mt; ++it) {
252 for (
int iz = 0; iz < m_Mz; ++iz) {
253 for (
int iy = 0; iy < m_Ny; ++iy) {
254 int iyzt = iy + m_Ny * (iz + m_Nz * it);
255 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
257 int is = ix + m_Nx2 * iyzt;
260 int ix1 = Nvc2 * ibf;
261 int ix2 = ix1 + m_Nvc;
263 for (
int ic = 0; ic <
m_Nc; ++ic) {
264 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
265 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
266 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
267 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
275 m_bw_send[idir]->start_thread(itask);
280 double *
v2,
double *vcp2,
int ieo)
282 int Nvc2 = 2 * m_Nvc;
283 int Nvcd = m_Nvc *
m_Nd;
284 int Nvcd2 = Nvcd / 2;
293 double wt1r, wt1i, wt2r, wt2i;
295 int isite = m_arg[itask].isite;
296 int isite_cp = m_arg[itask].isite_cpx;
297 int iyzt0 = isite / m_Nx2;
299 double *w2 = &v2[Nvcd * isite];
302 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
303 double *u =
const_cast<Field_G *
>(m_U)->ptr(
306 m_bw_recv[idir]->wait_thread(itask);
310 for (
int it = 0; it < m_Mt; ++it) {
311 for (
int iz = 0; iz < m_Mz; ++iz) {
312 for (
int iy = 0; iy < m_Ny; ++iy) {
313 int iyzt = iy + m_Ny * (iz + m_Nz * it);
314 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
317 int is = ix + m_Nx2 * iyzt;
320 int ix1 = Nvc2 * ibf;
321 int ix2 = ix1 + m_Nvc;
323 for (
int ic = 0; ic <
m_Nc; ++ic) {
324 int ic2 = ic * m_Nvc;
325 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
326 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
327 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
328 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
329 w2[2 * ic + id1 + iv] += wt1r;
330 w2[2 * ic + 1 + id1 + iv] += wt1i;
331 w2[2 * ic + id2 + iv] += wt2r;
332 w2[2 * ic + 1 + id2 + iv] += wt2i;
333 w2[2 * ic + id3 + iv] += wt2i;
334 w2[2 * ic + 1 + id3 + iv] += -wt2r;
335 w2[2 * ic + id4 + iv] += wt1i;
336 w2[2 * ic + 1 + id4 + iv] += -wt1r;
348 double *v2,
double *v1,
int ieo)
350 int Nvcd = m_Nvc *
m_Nd;
359 double vt1[m_Nvc], vt2[m_Nvc];
360 double wt1r, wt1i, wt2r, wt2i;
362 int isite = m_arg[itask].isite;
363 int iyzt0 = isite / m_Nx2;
365 double *w2 = &v2[Nvcd * isite];
366 double *w1 = &v1[Nvcd * isite];
367 double *u =
const_cast<Field_G *
>(m_U)->ptr(
368 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
370 for (
int it = 0; it < m_Mt; ++it) {
371 for (
int iz = 0; iz < m_Mz; ++iz) {
372 for (
int iy = 0; iy < m_Ny; ++iy) {
373 int iyzt = iy + m_Ny * (iz + m_Nz * it);
374 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
375 for (
int ix = 0; ix < m_Nx2 - Leo; ++ix) {
376 int is = ix + m_Nx2 * iyzt;
378 int in = Nvcd * (is + Leo);
381 for (
int ic = 0; ic <
m_Nc; ++ic) {
382 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
383 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
384 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
385 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
388 for (
int ic = 0; ic <
m_Nc; ++ic) {
389 int ic2 = ic * m_Nvc;
391 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
392 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
393 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
394 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
396 w2[2 * ic + id1 + iv] += wt1r;
397 w2[2 * ic + 1 + id1 + iv] += wt1i;
398 w2[2 * ic + id2 + iv] += wt2r;
399 w2[2 * ic + 1 + id2 + iv] += wt2i;
400 w2[2 * ic + id3 + iv] += wt2i;
401 w2[2 * ic + 1 + id3 + iv] += -wt2r;
402 w2[2 * ic + id4 + iv] += wt1i;
403 w2[2 * ic + 1 + id4 + iv] += -wt1r;
414 double *vcp1,
double *v1,
int ieo)
416 int Nvc2 = 2 * m_Nvc;
417 int Nvcd = m_Nvc *
m_Nd;
418 int Nvcd2 = Nvcd / 2;
427 int isite = m_arg[itask].isite;
428 int isite_cp = m_arg[itask].isite_cpx;
429 int iyzt0 = isite / m_Nx2;
433 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
434 double *w1 = &v1[Nvcd * isite];
435 double *u =
const_cast<Field_G *
>(m_U)->ptr(
436 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir *
m_Nvol));
438 double vt1[m_Nvc], vt2[m_Nvc];
443 for (
int it = 0; it < m_Mt; ++it) {
444 for (
int iz = 0; iz < m_Mz; ++iz) {
445 for (
int iy = 0; iy < m_Ny; ++iy) {
446 int iyzt = iy + m_Ny * (iz + m_Nz * it);
447 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
449 int is = ix + m_Nx2 * iyzt;
453 int ix1 = Nvc2 * ibf;
454 int ix2 = ix1 + m_Nvc;
456 for (
int ic = 0; ic <
m_Nc; ++ic) {
457 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
458 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
459 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
460 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
463 for (
int ic = 0; ic <
m_Nc; ++ic) {
465 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
466 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
467 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
468 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
476 m_fw_send[idir]->start_thread(itask);
481 double *v2,
double *vcp2,
int ieo)
483 int Nvc2 = 2 * m_Nvc;
484 int Nvcd = m_Nvc *
m_Nd;
485 int Nvcd2 = Nvcd / 2;
493 double bc2 = m_boundary2[idir];
495 double wt1r, wt1i, wt2r, wt2i;
497 int isite = m_arg[itask].isite;
498 int isite_cp = m_arg[itask].isite_cpx;
499 int iyzt0 = isite / m_Nx2;
501 double *w2 = &v2[Nvcd * isite];
504 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
506 m_fw_recv[idir]->wait_thread(itask);
510 for (
int it = 0; it < m_Mt; ++it) {
511 for (
int iz = 0; iz < m_Mz; ++iz) {
512 for (
int iy = 0; iy < m_Ny; ++iy) {
513 int iyzt = iy + m_Ny * (iz + m_Nz * it);
514 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
516 int is = ix + m_Nx2 * iyzt;
519 int ix1 = Nvc2 * ibf;
520 int ix2 = ix1 + m_Nvc;
522 for (
int ic = 0; ic <
m_Nc; ++ic) {
524 int ici = 2 * ic + 1;
525 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
526 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
527 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
528 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
529 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
530 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
531 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
532 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
544 double *v2,
double *v1,
int ieo)
546 int Nvcd = m_Nvc *
m_Nd;
555 double vt1[m_Nvc], vt2[m_Nvc];
556 double wt1r, wt1i, wt2r, wt2i;
558 int isite = m_arg[itask].isite;
559 int iyzt0 = isite / m_Nx2;
561 double *w2 = &v2[Nvcd * isite];
562 double *w1 = &v1[Nvcd * isite];
563 double *u =
const_cast<Field_G *
>(m_U)->ptr(
564 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
566 for (
int it = 0; it < m_Mt; ++it) {
567 for (
int iz = 0; iz < m_Mz; ++iz) {
568 for (
int iy = 0; iy < m_Ny; ++iy) {
569 int iyzt = iy + m_Ny * (iz + m_Nz * it);
570 int Leo = ieo + (1 - 2 * ieo) * m_Leo[iyzt0 + iyzt];
572 for (
int ix = Meo; ix < m_Nx2; ++ix) {
573 int is = ix + m_Nx2 * iyzt;
575 int in = Nvcd * (is -
Meo);
576 int ig = m_Ndf * (is -
Meo);
578 for (
int ic = 0; ic <
m_Nc; ++ic) {
579 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
580 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
581 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
582 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
585 for (
int ic = 0; ic <
m_Nc; ++ic) {
588 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
589 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
590 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
591 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
593 w2[2 * ic + id1 + iv] += wt1r;
594 w2[2 * ic + 1 + id1 + iv] += wt1i;
595 w2[2 * ic + id2 + iv] += wt2r;
596 w2[2 * ic + 1 + id2 + iv] += wt2i;
597 w2[2 * ic + id3 + iv] += -wt2i;
598 w2[2 * ic + 1 + id3 + iv] += +wt2r;
599 w2[2 * ic + id4 + iv] += -wt1i;
600 w2[2 * ic + 1 + id4 + iv] += +wt1r;
611 double *vcp1,
double *v1,
int ieo)
613 int Nvc2 = 2 * m_Nvc;
614 int Nvcd = m_Nvc *
m_Nd;
615 int Nvcd2 = Nvcd / 2;
624 int isite = m_arg[itask].isite;
625 int isite_cp = m_arg[itask].isite_cpy;
629 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
630 double *w1 = &v1[Nvcd * isite];
632 double bc2 = m_boundary2[idir];
636 for (
int it = 0; it < m_Mt; ++it) {
637 for (
int iz = 0; iz < m_Mz; ++iz) {
638 for (
int ix = 0; ix < m_Nx2; ++ix) {
639 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
640 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
642 int ix1 = Nvc2 * is2;
643 int ix2 = ix1 + m_Nvc;
645 for (
int ic = 0; ic <
m_Nc; ++ic) {
646 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
647 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
648 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
649 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
655 m_bw_send[idir]->start_thread(itask);
660 double *v2,
double *vcp2,
int ieo)
662 int Nvc2 = 2 * m_Nvc;
663 int Nvcd = m_Nvc *
m_Nd;
664 int Nvcd2 = Nvcd / 2;
673 double wt1r, wt1i, wt2r, wt2i;
675 int isite = m_arg[itask].isite;
676 int isite_cp = m_arg[itask].isite_cpy;
678 double *w2 = &v2[Nvcd * isite];
681 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
682 double *u =
const_cast<Field_G *
>(m_U)->ptr(
683 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
685 m_bw_recv[idir]->wait_thread(itask);
688 for (
int it = 0; it < m_Mt; ++it) {
689 for (
int iz = 0; iz < m_Mz; ++iz) {
690 for (
int ix = 0; ix < m_Nx2; ++ix) {
691 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
692 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
695 int ix1 = Nvc2 * is2;
696 int ix2 = ix1 + m_Nvc;
698 for (
int ic = 0; ic <
m_Nc; ++ic) {
699 int ic2 = ic * m_Nvc;
701 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
702 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
703 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
704 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
706 w2[2 * ic + id1 + iv] += wt1r;
707 w2[2 * ic + 1 + id1 + iv] += wt1i;
708 w2[2 * ic + id2 + iv] += wt2r;
709 w2[2 * ic + 1 + id2 + iv] += wt2i;
710 w2[2 * ic + id3 + iv] += -wt2r;
711 w2[2 * ic + 1 + id3 + iv] += -wt2i;
712 w2[2 * ic + id4 + iv] += wt1r;
713 w2[2 * ic + 1 + id4 + iv] += wt1i;
723 double *v2,
double *v1,
int ieo)
725 int Nvcd = m_Nvc *
m_Nd;
734 double vt1[m_Nvc], vt2[m_Nvc];
735 double wt1r, wt1i, wt2r, wt2i;
737 int isite = m_arg[itask].isite;
739 double *w2 = &v2[Nvcd * isite];
740 double *w1 = &v1[Nvcd * isite];
741 double *u =
const_cast<Field_G *
>(m_U)->ptr(
742 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
744 for (
int it = 0; it < m_Mt; ++it) {
745 for (
int iz = 0; iz < m_Mz; ++iz) {
746 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
747 for (
int ix = 0; ix < m_Nx2; ++ix) {
748 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
750 int in = Nvcd * (is + m_Nx2);
753 for (
int ic = 0; ic <
m_Nc; ++ic) {
754 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
755 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
756 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
757 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
760 for (
int ic = 0; ic <
m_Nc; ++ic) {
761 int ic2 = ic * m_Nvc;
763 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
764 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
765 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
766 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
768 w2[2 * ic + id1 + iv] += wt1r;
769 w2[2 * ic + 1 + id1 + iv] += wt1i;
770 w2[2 * ic + id2 + iv] += wt2r;
771 w2[2 * ic + 1 + id2 + iv] += wt2i;
772 w2[2 * ic + id3 + iv] += -wt2r;
773 w2[2 * ic + 1 + id3 + iv] += -wt2i;
774 w2[2 * ic + id4 + iv] += wt1r;
775 w2[2 * ic + 1 + id4 + iv] += wt1i;
786 double *vcp1,
double *v1,
int ieo)
788 int Nvc2 = 2 * m_Nvc;
789 int Nvcd = m_Nvc *
m_Nd;
790 int Nvcd2 = Nvcd / 2;
799 int isite = m_arg[itask].isite;
800 int isite_cp = m_arg[itask].isite_cpy;
804 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
805 double *w1 = &v1[Nvcd * isite];
806 double *u =
const_cast<Field_G *
>(m_U)->ptr(
807 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir *
m_Nvol));
809 double vt1[m_Nvc], vt2[m_Nvc];
813 for (
int it = 0; it < m_Mt; ++it) {
814 for (
int iz = 0; iz < m_Mz; ++iz) {
815 for (
int ix = 0; ix < m_Nx2; ++ix) {
816 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
817 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
820 int ix1 = Nvc2 * is2;
821 int ix2 = ix1 + m_Nvc;
823 for (
int ic = 0; ic <
m_Nc; ++ic) {
824 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
825 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
826 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
827 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
830 for (
int ic = 0; ic <
m_Nc; ++ic) {
832 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
833 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
834 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
835 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
841 m_fw_send[idir]->start_thread(itask);
846 double *v2,
double *vcp2,
int ieo)
848 int Nvc2 = 2 * m_Nvc;
849 int Nvcd = m_Nvc *
m_Nd;
850 int Nvcd2 = Nvcd / 2;
858 double bc2 = m_boundary2[idir];
860 double wt1r, wt1i, wt2r, wt2i;
862 int isite = m_arg[itask].isite;
863 int isite_cp = m_arg[itask].isite_cpy;
865 double *w2 = &v2[Nvcd * isite];
868 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
870 m_fw_recv[idir]->wait_thread(itask);
873 for (
int it = 0; it < m_Mt; ++it) {
874 for (
int iz = 0; iz < m_Mz; ++iz) {
875 for (
int ix = 0; ix < m_Nx2; ++ix) {
876 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
877 int is2 = ix + m_Nx2 * (iz + m_Mz * it);
879 int ix1 = Nvc2 * is2;
880 int ix2 = ix1 + m_Nvc;
882 for (
int ic = 0; ic <
m_Nc; ++ic) {
884 int ici = 2 * ic + 1;
885 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
886 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
887 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
888 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
889 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
890 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
891 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
892 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
902 double *v2,
double *v1,
int ieo)
904 int Nvcd = m_Nvc *
m_Nd;
913 double vt1[m_Nvc], vt2[m_Nvc];
914 double wt1r, wt1i, wt2r, wt2i;
916 int isite = m_arg[itask].isite;
918 double *w2 = &v2[Nvcd * isite];
919 double *w1 = &v1[Nvcd * isite];
920 double *u =
const_cast<Field_G *
>(m_U)->ptr(
921 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
923 for (
int it = 0; it < m_Mt; ++it) {
924 for (
int iz = 0; iz < m_Mz; ++iz) {
925 for (
int iy = 1; iy < m_Ny; ++iy) {
926 for (
int ix = 0; ix < m_Nx2; ++ix) {
927 int is = ix + m_Nx2 * (iy + m_Ny * (iz + m_Nz * it));
929 int in = Nvcd * (is - m_Nx2);
930 int ig = m_Ndf * (is - m_Nx2);
932 for (
int ic = 0; ic <
m_Nc; ++ic) {
933 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
934 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
935 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
936 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
939 for (
int ic = 0; ic <
m_Nc; ++ic) {
941 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
942 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
943 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
944 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
946 w2[ic2 + id1 + iv] += wt1r;
947 w2[ic2 + 1 + id1 + iv] += wt1i;
948 w2[ic2 + id2 + iv] += wt2r;
949 w2[ic2 + 1 + id2 + iv] += wt2i;
950 w2[ic2 + id3 + iv] += wt2r;
951 w2[ic2 + 1 + id3 + iv] += wt2i;
952 w2[ic2 + id4 + iv] += -wt1r;
953 w2[ic2 + 1 + id4 + iv] += -wt1i;
964 double *vcp1,
double *v1,
int ieo)
966 int Nvc2 = 2 * m_Nvc;
967 int Nvcd = m_Nvc *
m_Nd;
968 int Nvcd2 = Nvcd / 2;
977 int isite = m_arg[itask].isite;
978 int isite_cp = m_arg[itask].isite_cpz;
982 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
983 double *w1 = &v1[Nvcd * isite];
985 double bc2 = m_boundary2[idir];
987 if (m_arg[itask].kz0 == 1) {
988 int Nxy = m_Nx2 * m_Ny;
990 for (
int it = 0; it < m_Mt; ++it) {
991 for (
int ixy = 0; ixy < Nxy; ++ixy) {
992 int is = ixy + Nxy * (iz + m_Nz * it);
993 int is2 = ixy + Nxy * it;
996 int ix1 = Nvc2 * is2;
997 int ix2 = ix1 + m_Nvc;
999 for (
int ic = 0; ic <
m_Nc; ++ic) {
1000 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
1001 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
1002 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1003 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
1009 m_bw_send[idir]->start_thread(itask);
1014 double *v2,
double *vcp2,
int ieo)
1016 int Nvc2 = 2 * m_Nvc;
1017 int Nvcd = m_Nvc *
m_Nd;
1018 int Nvcd2 = Nvcd / 2;
1022 int id3 = m_Nvc * 2;
1023 int id4 = m_Nvc * 3;
1027 double wt1r, wt1i, wt2r, wt2i;
1029 int isite = m_arg[itask].isite;
1030 int isite_cp = m_arg[itask].isite_cpz;
1032 double *w2 = &v2[Nvcd * isite];
1035 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1036 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1037 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1039 m_bw_recv[idir]->wait_thread(itask);
1041 if (m_arg[itask].kz1 == 1) {
1042 int Nxy = m_Nx2 * m_Ny;
1044 for (
int it = 0; it < m_Mt; ++it) {
1045 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1046 int is = ixy + Nxy * (iz + m_Nz * it);
1047 int is2 = ixy + Nxy * it;
1049 int ig = m_Ndf * is;
1050 int ix1 = Nvc2 * is2;
1051 int ix2 = ix1 + m_Nvc;
1053 for (
int ic = 0; ic <
m_Nc; ++ic) {
1054 int ic2 = ic * m_Nvc;
1056 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1057 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1058 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1059 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1061 w2[2 * ic + id1 + iv] += wt1r;
1062 w2[2 * ic + 1 + id1 + iv] += wt1i;
1063 w2[2 * ic + id2 + iv] += wt2r;
1064 w2[2 * ic + 1 + id2 + iv] += wt2i;
1065 w2[2 * ic + id3 + iv] += wt1i;
1066 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1067 w2[2 * ic + id4 + iv] += -wt2i;
1068 w2[2 * ic + 1 + id4 + iv] += wt2r;
1078 double *v2,
double *v1,
int ieo)
1080 int Nvcd = m_Nvc *
m_Nd;
1084 int id3 = m_Nvc * 2;
1085 int id4 = m_Nvc * 3;
1089 double vt1[m_Nvc], vt2[m_Nvc];
1090 double wt1r, wt1i, wt2r, wt2i;
1092 int isite = m_arg[itask].isite;
1094 double *w2 = &v2[Nvcd * isite];
1095 double *w1 = &v1[Nvcd * isite];
1096 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1097 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1099 int kz1 = m_arg[itask].kz1;
1100 int Nxy = m_Nx2 * m_Ny;
1102 for (
int it = 0; it < m_Mt; ++it) {
1103 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
1104 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1105 int is = ixy + Nxy * (iz + m_Nz * it);
1107 int in = Nvcd * (is + Nxy);
1108 int ig = m_Ndf * is;
1110 for (
int ic = 0; ic <
m_Nc; ++ic) {
1111 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1112 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1113 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1114 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1117 for (
int ic = 0; ic <
m_Nc; ++ic) {
1118 int ic2 = ic * m_Nvc;
1120 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1121 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1122 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1123 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1125 w2[2 * ic + id1 + iv] += wt1r;
1126 w2[2 * ic + 1 + id1 + iv] += wt1i;
1127 w2[2 * ic + id2 + iv] += wt2r;
1128 w2[2 * ic + 1 + id2 + iv] += wt2i;
1129 w2[2 * ic + id3 + iv] += wt1i;
1130 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1131 w2[2 * ic + id4 + iv] += -wt2i;
1132 w2[2 * ic + 1 + id4 + iv] += wt2r;
1142 double *vcp1,
double *v1,
int ieo)
1144 int Nvc2 = 2 * m_Nvc;
1145 int Nvcd = m_Nvc *
m_Nd;
1146 int Nvcd2 = Nvcd / 2;
1150 int id3 = m_Nvc * 2;
1151 int id4 = m_Nvc * 3;
1155 int isite = m_arg[itask].isite;
1156 int isite_cp = m_arg[itask].isite_cpz;
1160 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1161 double *w1 = &v1[Nvcd * isite];
1162 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1163 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir *
m_Nvol));
1165 double vt1[m_Nvc], vt2[m_Nvc];
1167 if (m_arg[itask].kz1 == 1) {
1168 int Nxy = m_Nx2 * m_Ny;
1170 for (
int it = 0; it < m_Mt; ++it) {
1171 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1172 int is = ixy + Nxy * (iz + m_Nz * it);
1173 int is2 = ixy + Nxy * it;
1175 int ig = m_Ndf * is;
1176 int ix1 = Nvc2 * is2;
1177 int ix2 = ix1 + m_Nvc;
1179 for (
int ic = 0; ic <
m_Nc; ++ic) {
1180 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1181 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1182 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1183 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1186 for (
int ic = 0; ic <
m_Nc; ++ic) {
1188 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1189 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1190 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1191 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1197 m_fw_send[idir]->start_thread(itask);
1202 double *v2,
double *vcp2,
int ieo)
1204 int Nvc2 = 2 * m_Nvc;
1205 int Nvcd = m_Nvc *
m_Nd;
1206 int Nvcd2 = Nvcd / 2;
1210 int id3 = m_Nvc * 2;
1211 int id4 = m_Nvc * 3;
1214 double bc2 = m_boundary2[idir];
1216 double wt1r, wt1i, wt2r, wt2i;
1218 int isite = m_arg[itask].isite;
1219 int isite_cp = m_arg[itask].isite_cpz;
1221 double *w2 = &v2[Nvcd * isite];
1224 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1226 m_fw_recv[idir]->wait_thread(itask);
1228 if (m_arg[itask].kz0 == 1) {
1229 int Nxy = m_Nx2 * m_Ny;
1232 for (
int it = 0; it < m_Mt; ++it) {
1233 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1234 int is = ixy + Nxy * (iz + m_Nz * it);
1235 int is2 = ixy + Nxy * it;
1237 int ix1 = Nvc2 * is2;
1238 int ix2 = ix1 + m_Nvc;
1240 for (
int ic = 0; ic <
m_Nc; ++ic) {
1242 int ici = 2 * ic + 1;
1243 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1244 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1245 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1246 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1247 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1248 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1249 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1250 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1260 double *v2,
double *v1,
int ieo)
1262 int Nvcd = m_Nvc *
m_Nd;
1266 int id3 = m_Nvc * 2;
1267 int id4 = m_Nvc * 3;
1271 double vt1[m_Nvc], vt2[m_Nvc];
1272 double wt1r, wt1i, wt2r, wt2i;
1274 int isite = m_arg[itask].isite;
1276 double *w2 = &v2[Nvcd * isite];
1277 double *w1 = &v1[Nvcd * isite];
1278 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1279 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1281 int kz0 = m_arg[itask].kz0;
1282 int Nxy = m_Nx2 * m_Ny;
1284 for (
int it = 0; it < m_Mt; ++it) {
1285 for (
int iz = kz0; iz < m_Mz; ++iz) {
1286 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1287 int is = ixy + Nxy * (iz + m_Nz * it);
1289 int in = Nvcd * (is - Nxy);
1290 int ig = m_Ndf * (is - Nxy);
1292 for (
int ic = 0; ic <
m_Nc; ++ic) {
1293 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1294 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1295 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1296 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1299 for (
int ic = 0; ic <
m_Nc; ++ic) {
1301 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1302 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1303 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1304 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1306 w2[ic2 + id1 + iv] += wt1r;
1307 w2[ic2 + 1 + id1 + iv] += wt1i;
1308 w2[ic2 + id2 + iv] += wt2r;
1309 w2[ic2 + 1 + id2 + iv] += wt2i;
1310 w2[ic2 + id3 + iv] += -wt1i;
1311 w2[ic2 + 1 + id3 + iv] += wt1r;
1312 w2[ic2 + id4 + iv] += wt2i;
1313 w2[ic2 + 1 + id4 + iv] += -wt2r;
1323 double *vcp1,
double *v1,
int ieo)
1325 int Nvc2 = 2 * m_Nvc;
1326 int Nvcd = m_Nvc *
m_Nd;
1327 int Nvcd2 = Nvcd / 2;
1331 int id3 = m_Nvc * 2;
1332 int id4 = m_Nvc * 3;
1336 int isite = m_arg[itask].isite;
1337 int isite_cp = m_arg[itask].isite_cpt;
1341 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1342 double *w1 = &v1[Nvcd * isite];
1344 double bc2 = m_boundary2[idir];
1346 if (m_arg[itask].kt0 == 1) {
1347 int Nxy = m_Nx2 * m_Ny;
1349 for (
int iz = 0; iz < m_Mz; ++iz) {
1350 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1351 int is = ixy + Nxy * (iz + m_Nz * it);
1352 int is2 = ixy + Nxy * iz;
1355 int ix1 = Nvc2 * is2;
1356 int ix2 = ix1 + m_Nvc;
1358 for (
int ic = 0; ic <
m_Nc; ++ic) {
1359 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1360 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1361 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1362 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1368 m_bw_send[idir]->start_thread(itask);
1373 double *v2,
double *vcp2,
int ieo)
1375 int Nvc2 = 2 * m_Nvc;
1376 int Nvcd = m_Nvc *
m_Nd;
1377 int Nvcd2 = Nvcd / 2;
1381 int id3 = m_Nvc * 2;
1382 int id4 = m_Nvc * 3;
1386 double wt1r, wt1i, wt2r, wt2i;
1388 int isite = m_arg[itask].isite;
1389 int isite_cp = m_arg[itask].isite_cpt;
1391 double *w2 = &v2[Nvcd * isite];
1394 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1395 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1396 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1398 m_bw_recv[idir]->wait_thread(itask);
1400 if (m_arg[itask].kt1 == 1) {
1401 int Nxy = m_Nx2 * m_Ny;
1403 for (
int iz = 0; iz < m_Mz; ++iz) {
1404 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1405 int is = ixy + Nxy * (iz + m_Nz * it);
1406 int is2 = ixy + Nxy * iz;
1408 int ig = m_Ndf * is;
1409 int ix1 = Nvc2 * is2;
1410 int ix2 = ix1 + m_Nvc;
1412 for (
int ic = 0; ic <
m_Nc; ++ic) {
1413 int ic2 = ic * m_Nvc;
1415 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1416 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1417 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1418 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1420 w2[2 * ic + id3 + iv] += wt1r;
1421 w2[2 * ic + 1 + id3 + iv] += wt1i;
1422 w2[2 * ic + id4 + iv] += wt2r;
1423 w2[2 * ic + 1 + id4 + iv] += wt2i;
1433 double *v2,
double *v1,
int ieo)
1435 int Nvcd = m_Nvc *
m_Nd;
1439 int id3 = m_Nvc * 2;
1440 int id4 = m_Nvc * 3;
1444 double vt1[m_Nvc], vt2[m_Nvc];
1445 double wt1r, wt1i, wt2r, wt2i;
1447 int isite = m_arg[itask].isite;
1449 double *w2 = &v2[Nvcd * isite];
1450 double *w1 = &v1[Nvcd * isite];
1451 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1452 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1454 int kt1 = m_arg[itask].kt1;
1455 int Nxy = m_Nx2 * m_Ny;
1456 int Nxyz = Nxy * m_Nz;
1458 for (
int it = 0; it < m_Mt - kt1; ++it) {
1459 for (
int iz = 0; iz < m_Mz; ++iz) {
1460 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1461 int is = ixy + Nxy * (iz + m_Nz * it);
1463 int in = Nvcd * (is + Nxyz);
1464 int ig = m_Ndf * is;
1466 for (
int ic = 0; ic <
m_Nc; ++ic) {
1467 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1468 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1469 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1470 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1473 for (
int ic = 0; ic <
m_Nc; ++ic) {
1474 int ic2 = ic * m_Nvc;
1476 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1477 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1478 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1479 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1481 w2[2 * ic + id3 + iv] += wt1r;
1482 w2[2 * ic + 1 + id3 + iv] += wt1i;
1483 w2[2 * ic + id4 + iv] += wt2r;
1484 w2[2 * ic + 1 + id4 + iv] += wt2i;
1494 double *vcp1,
double *v1,
int ieo)
1496 int Nvc2 = 2 * m_Nvc;
1497 int Nvcd = m_Nvc *
m_Nd;
1498 int Nvcd2 = Nvcd / 2;
1502 int id3 = m_Nvc * 2;
1503 int id4 = m_Nvc * 3;
1507 int isite = m_arg[itask].isite;
1508 int isite_cp = m_arg[itask].isite_cpt;
1512 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1513 double *w1 = &v1[Nvcd * isite];
1514 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1515 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir *
m_Nvol));
1517 double vt1[m_Nvc], vt2[m_Nvc];
1519 if (m_arg[itask].kt1 == 1) {
1520 int Nxy = m_Nx2 * m_Ny;
1522 for (
int iz = 0; iz < m_Mz; ++iz) {
1523 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1524 int is = ixy + Nxy * (iz + m_Nz * it);
1525 int is2 = ixy + Nxy * iz;
1527 int ig = m_Ndf * is;
1528 int ix1 = Nvc2 * is2;
1529 int ix2 = ix1 + m_Nvc;
1531 for (
int ic = 0; ic <
m_Nc; ++ic) {
1532 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1533 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1534 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1535 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1538 for (
int ic = 0; ic <
m_Nc; ++ic) {
1540 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1541 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1542 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1543 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1549 m_fw_send[idir]->start_thread(itask);
1554 double *v2,
double *vcp2,
int ieo)
1556 int Nvc2 = 2 * m_Nvc;
1557 int Nvcd = m_Nvc *
m_Nd;
1558 int Nvcd2 = Nvcd / 2;
1562 int id3 = m_Nvc * 2;
1563 int id4 = m_Nvc * 3;
1566 double bc2 = m_boundary2[idir];
1568 double wt1r, wt1i, wt2r, wt2i;
1570 int isite = m_arg[itask].isite;
1571 int isite_cp = m_arg[itask].isite_cpt;
1573 double *w2 = &v2[Nvcd * isite];
1576 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1578 m_fw_recv[idir]->wait_thread(itask);
1580 if (m_arg[itask].kt0 == 1) {
1581 int Nxy = m_Nx2 * m_Ny;
1583 for (
int iz = 0; iz < m_Mz; ++iz) {
1584 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1585 int is = ixy + Nxy * (iz + m_Nz * it);
1586 int is2 = ixy + Nxy * iz;
1588 int ix1 = Nvc2 * is2;
1589 int ix2 = ix1 + m_Nvc;
1591 for (
int ic = 0; ic <
m_Nc; ++ic) {
1593 int ici = 2 * ic + 1;
1594 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1595 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1596 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1597 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1607 double *v2,
double *v1,
int ieo)
1609 int Nvcd = m_Nvc *
m_Nd;
1613 int id3 = m_Nvc * 2;
1614 int id4 = m_Nvc * 3;
1618 double vt1[m_Nvc], vt2[m_Nvc];
1619 double wt1r, wt1i, wt2r, wt2i;
1621 int isite = m_arg[itask].isite;
1623 double *w2 = &v2[Nvcd * isite];
1624 double *w1 = &v1[Nvcd * isite];
1625 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1626 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1628 int kt0 = m_arg[itask].kt0;
1629 int Nxy = m_Nx2 * m_Ny;
1630 int Nxyz = Nxy * m_Nz;
1632 for (
int it = kt0; it < m_Mt; ++it) {
1633 for (
int iz = 0; iz < m_Mz; ++iz) {
1634 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1635 int is = ixy + Nxy * (iz + m_Nz * it);
1637 int in = Nvcd * (is - Nxyz);
1638 int ig = m_Ndf * (is - Nxyz);
1640 for (
int ic = 0; ic <
m_Nc; ++ic) {
1641 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1642 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1643 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1644 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1647 for (
int ic = 0; ic <
m_Nc; ++ic) {
1649 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1650 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1651 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1652 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1654 w2[ic2 + id1 + iv] += wt1r;
1655 w2[ic2 + 1 + id1 + iv] += wt1i;
1656 w2[ic2 + id2 + iv] += wt2r;
1657 w2[ic2 + 1 + id2 + iv] += wt2i;
1667 double *vcp1,
double *v1,
int ieo)
1669 int Nvc2 = 2 * m_Nvc;
1670 int Nvcd = m_Nvc *
m_Nd;
1671 int Nvcd2 = Nvcd / 2;
1675 int id3 = m_Nvc * 2;
1676 int id4 = m_Nvc * 3;
1680 int isite = m_arg[itask].isite;
1681 int isite_cp = m_arg[itask].isite_cpt;
1685 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1686 double *w1 = &v1[Nvcd * isite];
1688 double bc2 = m_boundary2[idir];
1690 if (m_arg[itask].kt0 == 1) {
1691 int Nxy = m_Nx2 * m_Ny;
1693 for (
int iz = 0; iz < m_Mz; ++iz) {
1694 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1695 int is = ixy + Nxy * (iz + m_Nz * it);
1696 int is2 = ixy + Nxy * iz;
1699 int ix1 = Nvc2 * is2;
1700 int ix2 = ix1 + m_Nvc;
1702 for (
int ic = 0; ic <
m_Nc; ++ic) {
1703 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1704 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1705 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1706 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1712 m_bw_send[idir]->start_thread(itask);
1717 double *v2,
double *vcp2,
int ieo)
1719 int Nvc2 = 2 * m_Nvc;
1720 int Nvcd = m_Nvc *
m_Nd;
1721 int Nvcd2 = Nvcd / 2;
1725 int id3 = m_Nvc * 2;
1726 int id4 = m_Nvc * 3;
1730 double wt1r, wt1i, wt2r, wt2i;
1732 int isite = m_arg[itask].isite;
1733 int isite_cp = m_arg[itask].isite_cpt;
1735 double *w2 = &v2[Nvcd * isite];
1738 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1739 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1740 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1742 m_bw_recv[idir]->wait_thread(itask);
1744 if (m_arg[itask].kt1 == 1) {
1745 int Nxy = m_Nx2 * m_Ny;
1747 for (
int iz = 0; iz < m_Mz; ++iz) {
1748 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1749 int is = ixy + Nxy * (iz + m_Nz * it);
1750 int is2 = ixy + Nxy * iz;
1752 int ig = m_Ndf * is;
1753 int ix1 = Nvc2 * is2;
1754 int ix2 = ix1 + m_Nvc;
1756 for (
int ic = 0; ic <
m_Nc; ++ic) {
1757 int ic2 = ic * m_Nvc;
1759 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1760 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1761 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1762 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1764 w2[2 * ic + id1 + iv] += wt1r;
1765 w2[2 * ic + 1 + id1 + iv] += wt1i;
1766 w2[2 * ic + id2 + iv] += wt2r;
1767 w2[2 * ic + 1 + id2 + iv] += wt2i;
1768 w2[2 * ic + id3 + iv] += wt1r;
1769 w2[2 * ic + 1 + id3 + iv] += wt1i;
1770 w2[2 * ic + id4 + iv] += wt2r;
1771 w2[2 * ic + 1 + id4 + iv] += wt2i;
1781 double *v2,
double *v1,
int ieo)
1783 int Nvcd = m_Nvc *
m_Nd;
1787 int id3 = m_Nvc * 2;
1788 int id4 = m_Nvc * 3;
1792 double vt1[m_Nvc], vt2[m_Nvc];
1793 double wt1r, wt1i, wt2r, wt2i;
1795 int isite = m_arg[itask].isite;
1797 double *w2 = &v2[Nvcd * isite];
1798 double *w1 = &v1[Nvcd * isite];
1799 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1800 m_Ndf * (isite + ieo * m_Nvol / 2 + idir * m_Nvol));
1802 int kt1 = m_arg[itask].kt1;
1803 int Nxy = m_Nx2 * m_Ny;
1804 int Nxyz = Nxy * m_Nz;
1806 for (
int it = 0; it < m_Mt - kt1; ++it) {
1807 for (
int iz = 0; iz < m_Mz; ++iz) {
1808 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1809 int is = ixy + Nxy * (iz + m_Nz * it);
1811 int in = Nvcd * (is + Nxyz);
1812 int ig = m_Ndf * is;
1814 for (
int ic = 0; ic <
m_Nc; ++ic) {
1815 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1816 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1817 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1818 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1821 for (
int ic = 0; ic <
m_Nc; ++ic) {
1822 int ic2 = ic * m_Nvc;
1824 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1825 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1826 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1827 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1829 w2[2 * ic + id1 + iv] += wt1r;
1830 w2[2 * ic + 1 + id1 + iv] += wt1i;
1831 w2[2 * ic + id2 + iv] += wt2r;
1832 w2[2 * ic + 1 + id2 + iv] += wt2i;
1833 w2[2 * ic + id3 + iv] += wt1r;
1834 w2[2 * ic + 1 + id3 + iv] += wt1i;
1835 w2[2 * ic + id4 + iv] += wt2r;
1836 w2[2 * ic + 1 + id4 + iv] += wt2i;
1846 double *vcp1,
double *v1,
int ieo)
1848 int Nvc2 = 2 * m_Nvc;
1849 int Nvcd = m_Nvc *
m_Nd;
1850 int Nvcd2 = Nvcd / 2;
1854 int id3 = m_Nvc * 2;
1855 int id4 = m_Nvc * 3;
1859 int isite = m_arg[itask].isite;
1860 int isite_cp = m_arg[itask].isite_cpt;
1864 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1865 double *w1 = &v1[Nvcd * isite];
1866 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1867 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir *
m_Nvol));
1869 double vt1[m_Nvc], vt2[m_Nvc];
1871 if (m_arg[itask].kt1 == 1) {
1872 int Nxy = m_Nx2 * m_Ny;
1874 for (
int iz = 0; iz < m_Mz; ++iz) {
1875 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1876 int is = ixy + Nxy * (iz + m_Nz * it);
1877 int is2 = ixy + Nxy * iz;
1879 int ig = m_Ndf * is;
1880 int ix1 = Nvc2 * is2;
1881 int ix2 = ix1 + m_Nvc;
1883 for (
int ic = 0; ic <
m_Nc; ++ic) {
1884 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1885 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1886 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1887 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1890 for (
int ic = 0; ic <
m_Nc; ++ic) {
1892 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1893 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1894 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1895 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1901 m_fw_send[idir]->start_thread(itask);
1906 double *v2,
double *vcp2,
int ieo)
1908 int Nvc2 = 2 * m_Nvc;
1909 int Nvcd = m_Nvc *
m_Nd;
1910 int Nvcd2 = Nvcd / 2;
1914 int id3 = m_Nvc * 2;
1915 int id4 = m_Nvc * 3;
1918 double bc2 = m_boundary2[idir];
1920 double wt1r, wt1i, wt2r, wt2i;
1922 int isite = m_arg[itask].isite;
1923 int isite_cp = m_arg[itask].isite_cpt;
1925 double *w2 = &v2[Nvcd * isite];
1928 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1930 m_fw_recv[idir]->wait_thread(itask);
1932 if (m_arg[itask].kt0 == 1) {
1933 int Nxy = m_Nx2 * m_Ny;
1935 for (
int iz = 0; iz < m_Mz; ++iz) {
1936 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1937 int is = ixy + Nxy * (iz + m_Nz * it);
1938 int is2 = ixy + Nxy * iz;
1940 int ix1 = Nvc2 * is2;
1941 int ix2 = ix1 + m_Nvc;
1943 for (
int ic = 0; ic <
m_Nc; ++ic) {
1945 int ici = 2 * ic + 1;
1946 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1947 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1948 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1949 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1950 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1951 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1952 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1953 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1963 double *v2,
double *v1,
int ieo)
1965 int Nvcd = m_Nvc *
m_Nd;
1969 int id3 = m_Nvc * 2;
1970 int id4 = m_Nvc * 3;
1974 double vt1[m_Nvc], vt2[m_Nvc];
1975 double wt1r, wt1i, wt2r, wt2i;
1977 int isite = m_arg[itask].isite;
1979 double *w2 = &v2[Nvcd * isite];
1980 double *w1 = &v1[Nvcd * isite];
1981 double *u =
const_cast<Field_G *
>(m_U)->ptr(
1982 m_Ndf * (isite + (1 - ieo) * m_Nvol / 2 + idir * m_Nvol));
1984 int kt0 = m_arg[itask].kt0;
1985 int Nxy = m_Nx2 * m_Ny;
1986 int Nxyz = Nxy * m_Nz;
1988 for (
int it = kt0; it < m_Mt; ++it) {
1989 for (
int iz = 0; iz < m_Mz; ++iz) {
1990 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1991 int is = ixy + Nxy * (iz + m_Nz * it);
1993 int in = Nvcd * (is - Nxyz);
1994 int ig = m_Ndf * (is - Nxyz);
1996 for (
int ic = 0; ic <
m_Nc; ++ic) {
1997 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1998 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1999 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
2000 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
2003 for (
int ic = 0; ic <
m_Nc; ++ic) {
2005 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
2006 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
2007 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
2008 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
2010 w2[ic2 + id1 + iv] += wt1r;
2011 w2[ic2 + 1 + id1 + iv] += wt1i;
2012 w2[ic2 + id2 + iv] += wt2r;
2013 w2[ic2 + 1 + id2 + iv] += wt2i;
2014 w2[ic2 + id3 + iv] -= wt1r;
2015 w2[ic2 + 1 + id3 + iv] -= wt1i;
2016 w2[ic2 + id4 + iv] -= wt2r;
2017 w2[ic2 + 1 + id4 + iv] -= wt2i;
2027 double *v2,
double *v1)
2029 int Nvcd = m_Nvc *
m_Nd;
2030 int Nxy = m_Nx2 * m_Ny;
2034 int id3 = m_Nvc * 2;
2035 int id4 = m_Nvc * 3;
2037 int isite = m_arg[itask].isite;
2038 double *w2 = &v2[Nvcd * isite];
2039 double *w1 = &v1[Nvcd * isite];
2041 for (
int it = 0; it < m_Mt; ++it) {
2042 for (
int iz = 0; iz < m_Mz; ++iz) {
2043 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2044 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2045 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2046 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2047 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2048 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2049 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2059 double *v2,
double *v1)
2061 int Nvcd = m_Nvc *
m_Nd;
2062 int Nxy = m_Nx2 * m_Ny;
2066 int id3 = m_Nvc * 2;
2067 int id4 = m_Nvc * 3;
2069 int isite = m_arg[itask].isite;
2070 double *w2 = &v2[Nvcd * isite];
2071 double *w1 = &v1[Nvcd * isite];
2073 for (
int it = 0; it < m_Mt; ++it) {
2074 for (
int iz = 0; iz < m_Mz; ++iz) {
2075 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2076 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2077 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2078 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2079 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2080 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2081 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
2093 int Nvcd = m_Nvc *
m_Nd;
2094 int Nxy = m_Nx2 * m_Ny;
2098 int id3 = m_Nvc * 2;
2099 int id4 = m_Nvc * 3;
2101 int isite = m_arg[itask].isite;
2102 double *w1 = &v1[Nvcd * isite];
2104 for (
int it = 0; it < m_Mt; ++it) {
2105 for (
int iz = 0; iz < m_Mz; ++iz) {
2106 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2107 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2108 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2109 double wt1 = w1[ivc + id1 + iv];
2110 double wt2 = w1[ivc + id2 + iv];
2111 w1[ivc + id1 + iv] = w1[ivc + id3 + iv];
2112 w1[ivc + id2 + iv] = w1[ivc + id4 + iv];
2113 w1[ivc + id3 + iv] = wt1;
2114 w1[ivc + id4 + iv] = wt2;
2126 int Nvcd = m_Nvc *
m_Nd;
2127 int Nxy = m_Nx2 * m_Ny;
2131 int id3 = m_Nvc * 2;
2132 int id4 = m_Nvc * 3;
2134 int isite = m_arg[itask].isite;
2135 double *w1 = &v1[Nvcd * isite];
2137 for (
int it = 0; it < m_Mt; ++it) {
2138 for (
int iz = 0; iz < m_Mz; ++iz) {
2139 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2140 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2141 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2142 w1[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2143 w1[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_tm2_dirac_thread(int, double *, double *, int)
const Field_F Meo(const Field_F &, const int ieo)
void mult_tp1_dirac_thread(int, double *, double *, int)
void mult_ym1_thread(int, double *, double *, int)
void mult_xp1_thread(int, double *, double *, int)
void general(const char *format,...)
void mult_tm2_chiral_thread(int, double *, double *, int)
std::valarray< Channel * > m_bw_recv
void mult_ymb_thread(int, double *, double *, int)
void clear_thread(int, double *)
std::valarray< Channel * > m_fw_recv
valarray< mult_arg > m_arg
void mult_tmb_dirac_thread(int, double *, double *, int)
void mult_zp1_thread(int, double *, double *, int)
void gm5_dirac_thread(int, double *, double *)
void mult_xp2_thread(int, double *, double *, int)
void mult_ypb_thread(int, double *, double *, int)
void mult_tm1_dirac_thread(int, double *, double *, int)
void mult_zm2_thread(int, double *, double *, int)
std::valarray< Channel * > m_bw_send
void mult_tmb_chiral_thread(int, double *, double *, int)
void mult_xm2_thread(int, double *, double *, int)
void mult_ym2_thread(int, double *, double *, int)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_xm1_thread(int, double *, double *, int)
void mult_tp1_chiral_thread(int, double *, double *, int)
std::valarray< Channel * > m_fw_send
void mult_tp2_dirac_thread(int, double *, double *, int)
void mult_tp2_chiral_thread(int, double *, double *, int)
void mult_tpb_chiral_thread(int, double *, double *, int)
void gm5_chiral_thread(int, double *, double *)
void mult_zp2_thread(int, double *, double *, int)
void mult_xmb_thread(int, double *, double *, int)
void mult_zmb_thread(int, double *, double *, int)
Bridge::VerboseLevel m_vl
void mult_yp1_thread(int, double *, double *, int)
void mult_zm1_thread(int, double *, double *, int)
void mult_zpb_thread(int, double *, double *, int)
void mult_yp2_thread(int, double *, double *, int)
void scal_thread(int, double *, double)
void mult_xpb_thread(int, double *, double *, int)
void mult_tm1_chiral_thread(int, double *, double *, int)
void mult_tpb_dirac_thread(int, double *, double *, int)