28 #if defined USE_GROUP_SU3
29 #include "fopr_Wilson_impl_SU3.inc"
30 #elif defined USE_GROUP_SU2
31 #include "fopr_Wilson_impl_SU2.inc"
32 #elif defined USE_GROUP_SU_N
33 #include "fopr_Wilson_impl_SU_N.inc"
85 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
86 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
87 int itask = ith_z + m_Ntask_z * ith_t;
95 if (ith_t == 0)
m_arg[itask].kt0 = 1;
96 if (ith_z == 0)
m_arg[itask].kz0 = 1;
97 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
98 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
102 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy;
103 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy;
110 int Nvcd2 = 2 * Nc * Nd / 2;
112 std::vector<int> destid(
m_Ntask);
113 std::vector<int> offset(
m_Ntask);
114 std::vector<int> datasize(
m_Ntask);
115 std::vector<int> offset_up(
m_Ntask);
116 std::vector<int> offset_lw(
m_Ntask);
117 std::vector<int> datasize_up(
m_Ntask);
118 std::vector<int> datasize_lw(
m_Ntask);
121 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
122 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
125 destid[itask] = itask;
126 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
127 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * m_Ny;
136 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
137 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
140 destid[itask] = itask;
141 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
142 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * m_Nx;
151 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
152 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
153 int itask = ith_z + m_Ntask_z * ith_t;
155 offset_up[itask] = 0;
156 offset_lw[itask] = 0;
157 datasize_up[itask] = 0;
158 datasize_lw[itask] = 0;
160 destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
161 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx * m_Ny;
162 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx * m_Ny;
164 if (ith_z == m_Ntask_z - 1) {
166 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx * m_Ny;
167 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx * m_Ny;
177 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
178 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
179 int itask = ith_z + m_Ntask_z * ith_t;
181 offset_up[itask] = 0;
182 offset_lw[itask] = 0;
183 datasize_up[itask] = 0;
184 datasize_lw[itask] = 0;
186 destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
187 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx * m_Ny;
188 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx * m_Ny;
190 if (ith_t == m_Ntask_t - 1) {
191 destid[itask] = ith_z;
192 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx * m_Ny;
193 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx * m_Ny;
206 int itask,
double *v2,
double fac,
const double *v1)
208 int Nvcd = m_Nvc * m_Nd;
209 int Nvxy = Nvcd * m_Nx * m_Ny;
211 int isite = m_arg[itask].isite;
213 double *w2 = &v2[Nvcd * isite];
214 const double *w1 = &v1[Nvcd * isite];
216 for (
int it = 0; it < m_Mt; ++it) {
217 for (
int iz = 0; iz < m_Mz; ++iz) {
218 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
219 int iv = ivxy + Nvxy * (iz + m_Nz * it);
220 w2[iv] = fac * w2[iv] + w1[iv];
231 int Nvcd = m_Nvc * m_Nd;
232 int Nvxy = Nvcd * m_Nx * m_Ny;
234 int isite = m_arg[itask].isite;
235 double *w2 = &v2[Nvcd * isite];
237 for (
int it = 0; it < m_Mt; ++it) {
238 for (
int iz = 0; iz < m_Mz; ++iz) {
239 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
240 int iv = ivxy + Nvxy * (iz + m_Nz * it);
250 int itask,
double *vcp1,
const double *v1)
252 int Nvc2 = 2 * m_Nvc;
253 int Nvcd = m_Nvc * m_Nd;
254 int Nvcd2 = Nvcd / 2;
262 double bc2 = m_boundary2[idir];
264 int isite = m_arg[itask].isite;
265 int isite_cp = m_arg[itask].isite_cpx;
269 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
270 const double *w1 = &v1[Nvcd * isite];
274 for (
int it = 0; it < m_Mt; ++it) {
275 for (
int iz = 0; iz < m_Mz; ++iz) {
276 for (
int iy = 0; iy < m_Ny; ++iy) {
277 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
278 int is2 = iy + m_Ny * (iz + m_Mz * it);
280 int ix1 = Nvc2 * is2;
281 int ix2 = ix1 + m_Nvc;
283 for (
int ic = 0; ic < m_Nc; ++ic) {
284 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
285 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
286 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
287 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
293 m_bw_send[idir]->start_thread(itask);
299 int itask,
double *v2,
const double *vcp2)
301 int Nvc2 = 2 * m_Nvc;
302 int Nvcd = m_Nvc * m_Nd;
303 int Nvcd2 = Nvcd / 2;
312 double wt1r, wt1i, wt2r, wt2i;
314 int isite = m_arg[itask].isite;
315 int isite_cp = m_arg[itask].isite_cpx;
317 double *w2 = &v2[Nvcd * isite];
320 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
321 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
323 m_bw_recv[idir]->wait_thread(itask);
326 for (
int it = 0; it < m_Mt; ++it) {
327 for (
int iz = 0; iz < m_Mz; ++iz) {
328 for (
int iy = 0; iy < m_Ny; ++iy) {
329 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
330 int is2 = iy + m_Ny * (iz + m_Mz * it);
333 int ix1 = Nvc2 * is2;
334 int ix2 = ix1 + m_Nvc;
336 for (
int ic = 0; ic < m_Nc; ++ic) {
337 int ic2 = ic * m_Nvc;
339 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
340 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
341 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
342 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
344 w2[2 * ic + id1 + iv] += wt1r;
345 w2[2 * ic + 1 + id1 + iv] += wt1i;
346 w2[2 * ic + id2 + iv] += wt2r;
347 w2[2 * ic + 1 + id2 + iv] += wt2i;
348 w2[2 * ic + id3 + iv] += wt2i;
349 w2[2 * ic + 1 + id3 + iv] += -wt2r;
350 w2[2 * ic + id4 + iv] += wt1i;
351 w2[2 * ic + 1 + id4 + iv] += -wt1r;
361 int itask,
double *v2,
const double *v1)
363 int Nvcd = m_Nvc * m_Nd;
372 double vt1[m_Nvc], vt2[m_Nvc];
373 double wt1r, wt1i, wt2r, wt2i;
375 int isite = m_arg[itask].isite;
377 double *w2 = &v2[Nvcd * isite];
378 const double *w1 = &v1[Nvcd * isite];
379 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
381 for (
int it = 0; it < m_Mt; ++it) {
382 for (
int iz = 0; iz < m_Mz; ++iz) {
383 for (
int iy = 0; iy < m_Ny; ++iy) {
384 for (
int ix = 0; ix < m_Nx - 1; ++ix) {
385 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
387 int in = Nvcd * (is + 1);
390 for (
int ic = 0; ic < m_Nc; ++ic) {
391 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
392 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
393 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
394 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
397 for (
int ic = 0; ic < m_Nc; ++ic) {
398 int ic2 = ic * m_Nvc;
400 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
401 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
402 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
403 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
405 w2[2 * ic + id1 + iv] += wt1r;
406 w2[2 * ic + 1 + id1 + iv] += wt1i;
407 w2[2 * ic + id2 + iv] += wt2r;
408 w2[2 * ic + 1 + id2 + iv] += wt2i;
409 w2[2 * ic + id3 + iv] += wt2i;
410 w2[2 * ic + 1 + id3 + iv] += -wt2r;
411 w2[2 * ic + id4 + iv] += wt1i;
412 w2[2 * ic + 1 + id4 + iv] += -wt1r;
423 int itask,
double *vcp1,
const double *v1)
425 int Nvc2 = 2 * m_Nvc;
426 int Nvcd = m_Nvc * m_Nd;
427 int Nvcd2 = Nvcd / 2;
436 int isite = m_arg[itask].isite;
437 int isite_cp = m_arg[itask].isite_cpx;
441 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
442 const double *w1 = &v1[Nvcd * isite];
443 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
445 double vt1[m_Nvc], vt2[m_Nvc];
449 for (
int it = 0; it < m_Mt; ++it) {
450 for (
int iz = 0; iz < m_Mz; ++iz) {
451 for (
int iy = 0; iy < m_Ny; ++iy) {
452 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
453 int is2 = iy + m_Ny * (iz + m_Mz * it);
456 int ix1 = Nvc2 * is2;
457 int ix2 = ix1 + m_Nvc;
459 for (
int ic = 0; ic < m_Nc; ++ic) {
460 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
461 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
462 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
463 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
466 for (
int ic = 0; ic < m_Nc; ++ic) {
468 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
469 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
470 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
471 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
477 m_fw_send[idir]->start_thread(itask);
483 int itask,
double *v2,
const double *vcp2)
485 int Nvc2 = 2 * m_Nvc;
486 int Nvcd = m_Nvc * m_Nd;
487 int Nvcd2 = Nvcd / 2;
495 double bc2 = m_boundary2[idir];
497 double wt1r, wt1i, wt2r, wt2i;
499 int isite = m_arg[itask].isite;
500 int isite_cp = m_arg[itask].isite_cpx;
502 double *w2 = &v2[Nvcd * isite];
505 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
507 m_fw_recv[idir]->wait_thread(itask);
510 for (
int it = 0; it < m_Mt; ++it) {
511 for (
int iz = 0; iz < m_Mz; ++iz) {
512 for (
int iy = 0; iy < m_Ny; ++iy) {
513 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
514 int is2 = iy + m_Ny * (iz + m_Mz * it);
516 int ix1 = Nvc2 * is2;
517 int ix2 = ix1 + m_Nvc;
519 for (
int ic = 0; ic < m_Nc; ++ic) {
521 int ici = 2 * ic + 1;
522 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
523 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
524 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
525 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
526 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
527 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
528 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
529 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
539 int itask,
double *v2,
const double *v1)
541 int Nvcd = m_Nvc * m_Nd;
550 double vt1[m_Nvc], vt2[m_Nvc];
551 double wt1r, wt1i, wt2r, wt2i;
553 int isite = m_arg[itask].isite;
555 double *w2 = &v2[Nvcd * isite];
556 const double *w1 = &v1[Nvcd * isite];
557 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
559 for (
int it = 0; it < m_Mt; ++it) {
560 for (
int iz = 0; iz < m_Mz; ++iz) {
561 for (
int iy = 0; iy < m_Ny; ++iy) {
562 for (
int ix = 1; ix < m_Nx; ++ix) {
563 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
565 int in = Nvcd * (is - 1);
566 int ig = m_Ndf * (is - 1);
568 for (
int ic = 0; ic < m_Nc; ++ic) {
569 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
570 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
571 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
572 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
575 for (
int ic = 0; ic < m_Nc; ++ic) {
578 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
579 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
580 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
581 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
583 w2[2 * ic + id1 + iv] += wt1r;
584 w2[2 * ic + 1 + id1 + iv] += wt1i;
585 w2[2 * ic + id2 + iv] += wt2r;
586 w2[2 * ic + 1 + id2 + iv] += wt2i;
587 w2[2 * ic + id3 + iv] += -wt2i;
588 w2[2 * ic + 1 + id3 + iv] += +wt2r;
589 w2[2 * ic + id4 + iv] += -wt1i;
590 w2[2 * ic + 1 + id4 + iv] += +wt1r;
601 int itask,
double *vcp1,
const double *v1)
603 int Nvc2 = 2 * m_Nvc;
604 int Nvcd = m_Nvc * m_Nd;
605 int Nvcd2 = Nvcd / 2;
612 int isite = m_arg[itask].isite;
613 int isite_cp = m_arg[itask].isite_cpy;
616 double bc2 = m_boundary2[idir];
620 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
621 const double *w1 = &v1[Nvcd * isite];
625 for (
int it = 0; it < m_Mt; ++it) {
626 for (
int iz = 0; iz < m_Mz; ++iz) {
627 for (
int ix = 0; ix < m_Nx; ++ix) {
628 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
629 int is2 = ix + m_Nx * (iz + m_Mz * it);
631 int ix1 = Nvc2 * is2;
632 int ix2 = ix1 + m_Nvc;
634 for (
int ic = 0; ic < m_Nc; ++ic) {
635 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
636 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
637 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
638 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
644 m_bw_send[idir]->start_thread(itask);
650 int itask,
double *v2,
const double *vcp2)
652 int Nvc2 = 2 * m_Nvc;
653 int Nvcd = m_Nvc * m_Nd;
654 int Nvcd2 = Nvcd / 2;
663 double wt1r, wt1i, wt2r, wt2i;
665 int isite = m_arg[itask].isite;
666 int isite_cp = m_arg[itask].isite_cpy;
668 double *w2 = &v2[Nvcd * isite];
671 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
672 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
674 m_bw_recv[idir]->wait_thread(itask);
677 for (
int it = 0; it < m_Mt; ++it) {
678 for (
int iz = 0; iz < m_Mz; ++iz) {
679 for (
int ix = 0; ix < m_Nx; ++ix) {
680 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
681 int is2 = ix + m_Nx * (iz + m_Mz * it);
684 int ix1 = Nvc2 * is2;
685 int ix2 = ix1 + m_Nvc;
687 for (
int ic = 0; ic < m_Nc; ++ic) {
688 int ic2 = ic * m_Nvc;
690 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
691 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
692 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
693 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
695 w2[2 * ic + id1 + iv] += wt1r;
696 w2[2 * ic + 1 + id1 + iv] += wt1i;
697 w2[2 * ic + id2 + iv] += wt2r;
698 w2[2 * ic + 1 + id2 + iv] += wt2i;
699 w2[2 * ic + id3 + iv] += -wt2r;
700 w2[2 * ic + 1 + id3 + iv] += -wt2i;
701 w2[2 * ic + id4 + iv] += wt1r;
702 w2[2 * ic + 1 + id4 + iv] += wt1i;
712 int itask,
double *v2,
const double *v1)
714 int Nvcd = m_Nvc * m_Nd;
723 double vt1[m_Nvc], vt2[m_Nvc];
724 double wt1r, wt1i, wt2r, wt2i;
726 int isite = m_arg[itask].isite;
728 double *w2 = &v2[Nvcd * isite];
729 const double *w1 = &v1[Nvcd * isite];
730 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
732 for (
int it = 0; it < m_Mt; ++it) {
733 for (
int iz = 0; iz < m_Mz; ++iz) {
734 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
735 for (
int ix = 0; ix < m_Nx; ++ix) {
736 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
738 int in = Nvcd * (is + m_Nx);
741 for (
int ic = 0; ic < m_Nc; ++ic) {
742 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
743 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
744 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
745 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
748 for (
int ic = 0; ic < m_Nc; ++ic) {
749 int ic2 = ic * m_Nvc;
751 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
752 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
753 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
754 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
756 w2[2 * ic + id1 + iv] += wt1r;
757 w2[2 * ic + 1 + id1 + iv] += wt1i;
758 w2[2 * ic + id2 + iv] += wt2r;
759 w2[2 * ic + 1 + id2 + iv] += wt2i;
760 w2[2 * ic + id3 + iv] += -wt2r;
761 w2[2 * ic + 1 + id3 + iv] += -wt2i;
762 w2[2 * ic + id4 + iv] += wt1r;
763 w2[2 * ic + 1 + id4 + iv] += wt1i;
774 int itask,
double *vcp1,
const double *v1)
776 int Nvc2 = 2 * m_Nvc;
777 int Nvcd = m_Nvc * m_Nd;
778 int Nvcd2 = Nvcd / 2;
787 int isite = m_arg[itask].isite;
788 int isite_cp = m_arg[itask].isite_cpy;
792 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
794 const double *w1 = &v1[Nvcd * isite];
795 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
797 double vt1[m_Nvc], vt2[m_Nvc];
801 for (
int it = 0; it < m_Mt; ++it) {
802 for (
int iz = 0; iz < m_Mz; ++iz) {
803 for (
int ix = 0; ix < m_Nx; ++ix) {
804 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
805 int is2 = ix + m_Nx * (iz + m_Mz * it);
808 int ix1 = Nvc2 * is2;
809 int ix2 = ix1 + m_Nvc;
811 for (
int ic = 0; ic < m_Nc; ++ic) {
812 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
813 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
814 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
815 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
818 for (
int ic = 0; ic < m_Nc; ++ic) {
820 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
821 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
822 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
823 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
829 m_fw_send[idir]->start_thread(itask);
835 int itask,
double *v2,
const double *vcp2)
837 int Nvc2 = 2 * m_Nvc;
838 int Nvcd = m_Nvc * m_Nd;
839 int Nvcd2 = Nvcd / 2;
847 double bc2 = m_boundary2[idir];
849 double wt1r, wt1i, wt2r, wt2i;
851 int isite = m_arg[itask].isite;
852 int isite_cp = m_arg[itask].isite_cpy;
854 double *w2 = &v2[Nvcd * isite];
857 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
859 m_fw_recv[idir]->wait_thread(itask);
862 for (
int it = 0; it < m_Mt; ++it) {
863 for (
int iz = 0; iz < m_Mz; ++iz) {
864 for (
int ix = 0; ix < m_Nx; ++ix) {
865 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
866 int is2 = ix + m_Nx * (iz + m_Mz * it);
868 int ix1 = Nvc2 * is2;
869 int ix2 = ix1 + m_Nvc;
871 for (
int ic = 0; ic < m_Nc; ++ic) {
873 int ici = 2 * ic + 1;
874 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
875 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
876 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
877 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
878 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
879 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
880 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
881 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
891 int itask,
double *v2,
const double *v1)
893 int Nvcd = m_Nvc * m_Nd;
902 double vt1[m_Nvc], vt2[m_Nvc];
903 double wt1r, wt1i, wt2r, wt2i;
905 int isite = m_arg[itask].isite;
907 double *w2 = &v2[Nvcd * isite];
908 const double *w1 = &v1[Nvcd * isite];
909 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
911 for (
int it = 0; it < m_Mt; ++it) {
912 for (
int iz = 0; iz < m_Mz; ++iz) {
913 for (
int iy = 1; iy < m_Ny; ++iy) {
914 for (
int ix = 0; ix < m_Nx; ++ix) {
915 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
917 int in = Nvcd * (is - m_Nx);
918 int ig = m_Ndf * (is - m_Nx);
920 for (
int ic = 0; ic < m_Nc; ++ic) {
921 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
922 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
923 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
924 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
927 for (
int ic = 0; ic < m_Nc; ++ic) {
929 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
930 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
931 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
932 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
934 w2[ic2 + id1 + iv] += wt1r;
935 w2[ic2 + 1 + id1 + iv] += wt1i;
936 w2[ic2 + id2 + iv] += wt2r;
937 w2[ic2 + 1 + id2 + iv] += wt2i;
938 w2[ic2 + id3 + iv] += wt2r;
939 w2[ic2 + 1 + id3 + iv] += wt2i;
940 w2[ic2 + id4 + iv] += -wt1r;
941 w2[ic2 + 1 + id4 + iv] += -wt1i;
952 int itask,
double *vcp1,
const double *v1)
954 int Nvc2 = 2 * m_Nvc;
955 int Nvcd = m_Nvc * m_Nd;
956 int Nvcd2 = Nvcd / 2;
963 int isite = m_arg[itask].isite;
964 int isite_cp = m_arg[itask].isite_cpz;
967 double bc2 = m_boundary2[idir];
971 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
972 const double *w1 = &v1[Nvcd * isite];
974 if (m_arg[itask].kz0 == 1) {
975 int Nxy = m_Nx * m_Ny;
977 for (
int it = 0; it < m_Mt; ++it) {
978 for (
int ixy = 0; ixy < Nxy; ++ixy) {
979 int is = ixy + Nxy * (iz + m_Nz * it);
980 int is2 = ixy + Nxy * it;
983 int ix1 = Nvc2 * is2;
984 int ix2 = ix1 + m_Nvc;
986 for (
int ic = 0; ic < m_Nc; ++ic) {
987 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
988 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
989 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
990 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
996 m_bw_send[idir]->start_thread(itask);
1002 int itask,
double *v2,
const double *vcp2)
1004 int Nvc2 = 2 * m_Nvc;
1005 int Nvcd = m_Nvc * m_Nd;
1006 int Nvcd2 = Nvcd / 2;
1010 int id3 = m_Nvc * 2;
1011 int id4 = m_Nvc * 3;
1015 double wt1r, wt1i, wt2r, wt2i;
1017 int isite = m_arg[itask].isite;
1018 int isite_cp = m_arg[itask].isite_cpz;
1020 double *w2 = &v2[Nvcd * isite];
1023 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1024 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1026 m_bw_recv[idir]->wait_thread(itask);
1028 if (m_arg[itask].kz1 == 1) {
1029 int Nxy = m_Nx * m_Ny;
1031 for (
int it = 0; it < m_Mt; ++it) {
1032 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1033 int is = ixy + Nxy * (iz + m_Nz * it);
1034 int is2 = ixy + Nxy * it;
1036 int ig = m_Ndf * is;
1037 int ix1 = Nvc2 * is2;
1038 int ix2 = ix1 + m_Nvc;
1040 for (
int ic = 0; ic < m_Nc; ++ic) {
1041 int ic2 = ic * m_Nvc;
1043 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1044 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1045 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1046 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1048 w2[2 * ic + id1 + iv] += wt1r;
1049 w2[2 * ic + 1 + id1 + iv] += wt1i;
1050 w2[2 * ic + id2 + iv] += wt2r;
1051 w2[2 * ic + 1 + id2 + iv] += wt2i;
1052 w2[2 * ic + id3 + iv] += wt1i;
1053 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1054 w2[2 * ic + id4 + iv] += -wt2i;
1055 w2[2 * ic + 1 + id4 + iv] += wt2r;
1065 int itask,
double *v2,
const double *v1)
1067 int Nvcd = m_Nvc * m_Nd;
1071 int id3 = m_Nvc * 2;
1072 int id4 = m_Nvc * 3;
1076 double vt1[m_Nvc], vt2[m_Nvc];
1077 double wt1r, wt1i, wt2r, wt2i;
1079 int isite = m_arg[itask].isite;
1081 double *w2 = &v2[Nvcd * isite];
1082 const double *w1 = &v1[Nvcd * isite];
1083 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1085 int kz1 = m_arg[itask].kz1;
1086 int Nxy = m_Nx * m_Ny;
1088 for (
int it = 0; it < m_Mt; ++it) {
1089 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
1090 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1091 int is = ixy + Nxy * (iz + m_Nz * it);
1093 int in = Nvcd * (is + Nxy);
1094 int ig = m_Ndf * is;
1096 for (
int ic = 0; ic < m_Nc; ++ic) {
1097 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1098 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1099 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1100 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1103 for (
int ic = 0; ic < m_Nc; ++ic) {
1104 int ic2 = ic * m_Nvc;
1106 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1107 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1108 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1109 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1111 w2[2 * ic + id1 + iv] += wt1r;
1112 w2[2 * ic + 1 + id1 + iv] += wt1i;
1113 w2[2 * ic + id2 + iv] += wt2r;
1114 w2[2 * ic + 1 + id2 + iv] += wt2i;
1115 w2[2 * ic + id3 + iv] += wt1i;
1116 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1117 w2[2 * ic + id4 + iv] += -wt2i;
1118 w2[2 * ic + 1 + id4 + iv] += wt2r;
1128 int itask,
double *vcp1,
const double *v1)
1130 int Nvc2 = 2 * m_Nvc;
1131 int Nvcd = m_Nvc * m_Nd;
1132 int Nvcd2 = Nvcd / 2;
1136 int id3 = m_Nvc * 2;
1137 int id4 = m_Nvc * 3;
1141 int isite = m_arg[itask].isite;
1142 int isite_cp = m_arg[itask].isite_cpz;
1146 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1147 const double *w1 = &v1[Nvcd * isite];
1148 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1150 double vt1[m_Nvc], vt2[m_Nvc];
1152 if (m_arg[itask].kz1 == 1) {
1153 int Nxy = m_Nx * m_Ny;
1155 for (
int it = 0; it < m_Mt; ++it) {
1156 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1157 int is = ixy + Nxy * (iz + m_Nz * it);
1158 int is2 = ixy + Nxy * it;
1160 int ig = m_Ndf * is;
1161 int ix1 = Nvc2 * is2;
1162 int ix2 = ix1 + m_Nvc;
1164 for (
int ic = 0; ic < m_Nc; ++ic) {
1165 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1166 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1167 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1168 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1171 for (
int ic = 0; ic < m_Nc; ++ic) {
1173 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1174 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1175 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1176 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1182 m_fw_send[idir]->start_thread(itask);
1188 int itask,
double *v2,
const double *vcp2)
1190 int Nvc2 = 2 * m_Nvc;
1191 int Nvcd = m_Nvc * m_Nd;
1192 int Nvcd2 = Nvcd / 2;
1196 int id3 = m_Nvc * 2;
1197 int id4 = m_Nvc * 3;
1200 double bc2 = m_boundary2[idir];
1202 double wt1r, wt1i, wt2r, wt2i;
1204 int isite = m_arg[itask].isite;
1205 int isite_cp = m_arg[itask].isite_cpz;
1207 double *w2 = &v2[Nvcd * isite];
1210 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1212 m_fw_recv[idir]->wait_thread(itask);
1214 if (m_arg[itask].kz0 == 1) {
1215 int Nxy = m_Nx * m_Ny;
1218 for (
int it = 0; it < m_Mt; ++it) {
1219 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1220 int is = ixy + Nxy * (iz + m_Nz * it);
1221 int is2 = ixy + Nxy * it;
1223 int ix1 = Nvc2 * is2;
1224 int ix2 = ix1 + m_Nvc;
1226 for (
int ic = 0; ic < m_Nc; ++ic) {
1228 int ici = 2 * ic + 1;
1229 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1230 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1231 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1232 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1233 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1234 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1235 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1236 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1246 int itask,
double *v2,
const double *v1)
1248 int Nvcd = m_Nvc * m_Nd;
1252 int id3 = m_Nvc * 2;
1253 int id4 = m_Nvc * 3;
1257 double vt1[m_Nvc], vt2[m_Nvc];
1258 double wt1r, wt1i, wt2r, wt2i;
1260 int isite = m_arg[itask].isite;
1262 double *w2 = &v2[Nvcd * isite];
1263 const double *w1 = &v1[Nvcd * isite];
1264 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1266 int kz0 = m_arg[itask].kz0;
1267 int Nxy = m_Nx * m_Ny;
1269 for (
int it = 0; it < m_Mt; ++it) {
1270 for (
int iz = kz0; iz < m_Mz; ++iz) {
1271 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1272 int is = ixy + Nxy * (iz + m_Nz * it);
1274 int in = Nvcd * (is - Nxy);
1275 int ig = m_Ndf * (is - Nxy);
1277 for (
int ic = 0; ic < m_Nc; ++ic) {
1278 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1279 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1280 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1281 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1284 for (
int ic = 0; ic < m_Nc; ++ic) {
1286 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1287 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1288 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1289 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1291 w2[ic2 + id1 + iv] += wt1r;
1292 w2[ic2 + 1 + id1 + iv] += wt1i;
1293 w2[ic2 + id2 + iv] += wt2r;
1294 w2[ic2 + 1 + id2 + iv] += wt2i;
1295 w2[ic2 + id3 + iv] += -wt1i;
1296 w2[ic2 + 1 + id3 + iv] += wt1r;
1297 w2[ic2 + id4 + iv] += wt2i;
1298 w2[ic2 + 1 + id4 + iv] += -wt2r;
1308 int itask,
double *vcp1,
const double *v1)
1310 int Nvc2 = 2 * m_Nvc;
1311 int Nvcd = m_Nvc * m_Nd;
1312 int Nvcd2 = Nvcd / 2;
1316 int id3 = m_Nvc * 2;
1317 int id4 = m_Nvc * 3;
1319 int isite = m_arg[itask].isite;
1320 int isite_cp = m_arg[itask].isite_cpt;
1323 double bc2 = m_boundary2[idir];
1327 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1328 const double *w1 = &v1[Nvcd * isite];
1330 if (m_arg[itask].kt0 == 1) {
1331 int Nxy = m_Nx * m_Ny;
1333 for (
int iz = 0; iz < m_Mz; ++iz) {
1334 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1335 int is = ixy + Nxy * (iz + m_Nz * it);
1336 int is2 = ixy + Nxy * iz;
1339 int ix1 = Nvc2 * is2;
1340 int ix2 = ix1 + m_Nvc;
1342 for (
int ic = 0; ic < m_Nc; ++ic) {
1343 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1344 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1345 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1346 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1352 m_bw_send[idir]->start_thread(itask);
1358 int itask,
double *v2,
const double *vcp2)
1360 int Nvc2 = 2 * m_Nvc;
1361 int Nvcd = m_Nvc * m_Nd;
1362 int Nvcd2 = Nvcd / 2;
1366 int id3 = m_Nvc * 2;
1367 int id4 = m_Nvc * 3;
1371 double wt1r, wt1i, wt2r, wt2i;
1373 int isite = m_arg[itask].isite;
1374 int isite_cp = m_arg[itask].isite_cpt;
1376 double *w2 = &v2[Nvcd * isite];
1379 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1380 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1382 m_bw_recv[idir]->wait_thread(itask);
1384 if (m_arg[itask].kt1 == 1) {
1385 int Nxy = m_Nx * m_Ny;
1387 for (
int iz = 0; iz < m_Mz; ++iz) {
1388 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1389 int is = ixy + Nxy * (iz + m_Nz * it);
1390 int is2 = ixy + Nxy * iz;
1392 int ig = m_Ndf * is;
1393 int ix1 = Nvc2 * is2;
1394 int ix2 = ix1 + m_Nvc;
1396 for (
int ic = 0; ic < m_Nc; ++ic) {
1397 int ic2 = ic * m_Nvc;
1399 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1400 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1401 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1402 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1404 w2[2 * ic + id3 + iv] += wt1r;
1405 w2[2 * ic + 1 + id3 + iv] += wt1i;
1406 w2[2 * ic + id4 + iv] += wt2r;
1407 w2[2 * ic + 1 + id4 + iv] += wt2i;
1417 int itask,
double *v2,
const double *v1)
1419 int Nvcd = m_Nvc * m_Nd;
1423 int id3 = m_Nvc * 2;
1424 int id4 = m_Nvc * 3;
1428 double vt1[m_Nvc], vt2[m_Nvc];
1429 double wt1r, wt1i, wt2r, wt2i;
1431 int isite = m_arg[itask].isite;
1433 double *w2 = &v2[Nvcd * isite];
1434 const double *w1 = &v1[Nvcd * isite];
1435 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1437 int kt1 = m_arg[itask].kt1;
1438 int Nxy = m_Nx * m_Ny;
1439 int Nxyz = Nxy * m_Nz;
1441 for (
int it = 0; it < m_Mt - kt1; ++it) {
1442 for (
int iz = 0; iz < m_Mz; ++iz) {
1443 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1444 int is = ixy + Nxy * (iz + m_Nz * it);
1446 int in = Nvcd * (is + Nxyz);
1447 int ig = m_Ndf * is;
1449 for (
int ic = 0; ic < m_Nc; ++ic) {
1450 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1451 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1452 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1453 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1456 for (
int ic = 0; ic < m_Nc; ++ic) {
1457 int ic2 = ic * m_Nvc;
1459 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1460 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1461 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1462 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1464 w2[2 * ic + id3 + iv] += wt1r;
1465 w2[2 * ic + 1 + id3 + iv] += wt1i;
1466 w2[2 * ic + id4 + iv] += wt2r;
1467 w2[2 * ic + 1 + id4 + iv] += wt2i;
1477 int itask,
double *vcp1,
const double *v1)
1479 int Nvc2 = 2 * m_Nvc;
1480 int Nvcd = m_Nvc * m_Nd;
1481 int Nvcd2 = Nvcd / 2;
1485 int id3 = m_Nvc * 2;
1486 int id4 = m_Nvc * 3;
1490 int isite = m_arg[itask].isite;
1491 int isite_cp = m_arg[itask].isite_cpt;
1495 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1496 const double *w1 = &v1[Nvcd * isite];
1497 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1499 double vt1[m_Nvc], vt2[m_Nvc];
1501 if (m_arg[itask].kt1 == 1) {
1502 int Nxy = m_Nx * m_Ny;
1504 for (
int iz = 0; iz < m_Mz; ++iz) {
1505 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1506 int is = ixy + Nxy * (iz + m_Nz * it);
1507 int is2 = ixy + Nxy * iz;
1509 int ig = m_Ndf * is;
1510 int ix1 = Nvc2 * is2;
1511 int ix2 = ix1 + m_Nvc;
1513 for (
int ic = 0; ic < m_Nc; ++ic) {
1514 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1515 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1516 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1517 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1520 for (
int ic = 0; ic < m_Nc; ++ic) {
1522 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1523 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1524 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1525 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1531 m_fw_send[idir]->start_thread(itask);
1537 int itask,
double *v2,
const double *vcp2)
1539 int Nvc2 = 2 * m_Nvc;
1540 int Nvcd = m_Nvc * m_Nd;
1541 int Nvcd2 = Nvcd / 2;
1545 int id3 = m_Nvc * 2;
1546 int id4 = m_Nvc * 3;
1549 double bc2 = m_boundary2[idir];
1551 double wt1r, wt1i, wt2r, wt2i;
1553 int isite = m_arg[itask].isite;
1554 int isite_cp = m_arg[itask].isite_cpt;
1556 double *w2 = &v2[Nvcd * isite];
1559 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1561 m_fw_recv[idir]->wait_thread(itask);
1563 if (m_arg[itask].kt0 == 1) {
1564 int Nxy = m_Nx * m_Ny;
1566 for (
int iz = 0; iz < m_Mz; ++iz) {
1567 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1568 int is = ixy + Nxy * (iz + m_Nz * it);
1569 int is2 = ixy + Nxy * iz;
1571 int ix1 = Nvc2 * is2;
1572 int ix2 = ix1 + m_Nvc;
1574 for (
int ic = 0; ic < m_Nc; ++ic) {
1576 int ici = 2 * ic + 1;
1577 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1578 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1579 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1580 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1590 int itask,
double *v2,
const double *v1)
1592 int Nvcd = m_Nvc * m_Nd;
1596 int id3 = m_Nvc * 2;
1597 int id4 = m_Nvc * 3;
1601 double vt1[m_Nvc], vt2[m_Nvc];
1602 double wt1r, wt1i, wt2r, wt2i;
1604 int isite = m_arg[itask].isite;
1606 double *w2 = &v2[Nvcd * isite];
1607 const double *w1 = &v1[Nvcd * isite];
1608 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1610 int kt0 = m_arg[itask].kt0;
1611 int Nxy = m_Nx * m_Ny;
1612 int Nxyz = Nxy * m_Nz;
1614 for (
int it = kt0; it < m_Mt; ++it) {
1615 for (
int iz = 0; iz < m_Mz; ++iz) {
1616 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1617 int is = ixy + Nxy * (iz + m_Nz * it);
1619 int in = Nvcd * (is - Nxyz);
1620 int ig = m_Ndf * (is - Nxyz);
1622 for (
int ic = 0; ic < m_Nc; ++ic) {
1623 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1624 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1625 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1626 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1629 for (
int ic = 0; ic < m_Nc; ++ic) {
1631 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1632 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1633 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1634 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1636 w2[ic2 + id1 + iv] += wt1r;
1637 w2[ic2 + 1 + id1 + iv] += wt1i;
1638 w2[ic2 + id2 + iv] += wt2r;
1639 w2[ic2 + 1 + id2 + iv] += wt2i;
1649 int itask,
double *vcp1,
const double *v1)
1651 int Nvc2 = 2 * m_Nvc;
1652 int Nvcd = m_Nvc * m_Nd;
1653 int Nvcd2 = Nvcd / 2;
1657 int id3 = m_Nvc * 2;
1658 int id4 = m_Nvc * 3;
1660 int isite = m_arg[itask].isite;
1661 int isite_cp = m_arg[itask].isite_cpt;
1664 double bc2 = m_boundary2[idir];
1668 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1669 const double *w1 = &v1[Nvcd * isite];
1671 if (m_arg[itask].kt0 == 1) {
1672 int Nxy = m_Nx * m_Ny;
1674 for (
int iz = 0; iz < m_Mz; ++iz) {
1675 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1676 int is = ixy + Nxy * (iz + m_Nz * it);
1677 int is2 = ixy + Nxy * iz;
1680 int ix1 = Nvc2 * is2;
1681 int ix2 = ix1 + m_Nvc;
1683 for (
int ic = 0; ic < m_Nc; ++ic) {
1684 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1685 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1686 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1687 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1693 m_bw_send[idir]->start_thread(itask);
1699 int itask,
double *v2,
const double *vcp2)
1701 int Nvc2 = 2 * m_Nvc;
1702 int Nvcd = m_Nvc * m_Nd;
1703 int Nvcd2 = Nvcd / 2;
1707 int id3 = m_Nvc * 2;
1708 int id4 = m_Nvc * 3;
1712 double wt1r, wt1i, wt2r, wt2i;
1714 int isite = m_arg[itask].isite;
1715 int isite_cp = m_arg[itask].isite_cpt;
1717 double *w2 = &v2[Nvcd * isite];
1720 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1721 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1723 m_bw_recv[idir]->wait_thread(itask);
1725 if (m_arg[itask].kt1 == 1) {
1726 int Nxy = m_Nx * m_Ny;
1728 for (
int iz = 0; iz < m_Mz; ++iz) {
1729 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1730 int is = ixy + Nxy * (iz + m_Nz * it);
1731 int is2 = ixy + Nxy * iz;
1733 int ig = m_Ndf * is;
1734 int ix1 = Nvc2 * is2;
1735 int ix2 = ix1 + m_Nvc;
1737 for (
int ic = 0; ic < m_Nc; ++ic) {
1738 int ic2 = ic * m_Nvc;
1740 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1741 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1742 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1743 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1745 w2[2 * ic + id1 + iv] += wt1r;
1746 w2[2 * ic + 1 + id1 + iv] += wt1i;
1747 w2[2 * ic + id2 + iv] += wt2r;
1748 w2[2 * ic + 1 + id2 + iv] += wt2i;
1749 w2[2 * ic + id3 + iv] += wt1r;
1750 w2[2 * ic + 1 + id3 + iv] += wt1i;
1751 w2[2 * ic + id4 + iv] += wt2r;
1752 w2[2 * ic + 1 + id4 + iv] += wt2i;
1762 int itask,
double *v2,
const double *v1)
1764 int Nvcd = m_Nvc * m_Nd;
1768 int id3 = m_Nvc * 2;
1769 int id4 = m_Nvc * 3;
1773 double vt1[m_Nvc], vt2[m_Nvc];
1774 double wt1r, wt1i, wt2r, wt2i;
1776 int isite = m_arg[itask].isite;
1778 double *w2 = &v2[Nvcd * isite];
1779 const double *w1 = &v1[Nvcd * isite];
1780 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1782 int kt1 = m_arg[itask].kt1;
1783 int Nxy = m_Nx * m_Ny;
1784 int Nxyz = Nxy * m_Nz;
1786 for (
int it = 0; it < m_Mt - kt1; ++it) {
1787 for (
int iz = 0; iz < m_Mz; ++iz) {
1788 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1789 int is = ixy + Nxy * (iz + m_Nz * it);
1791 int in = Nvcd * (is + Nxyz);
1792 int ig = m_Ndf * is;
1794 for (
int ic = 0; ic < m_Nc; ++ic) {
1795 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1796 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1797 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1798 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1801 for (
int ic = 0; ic < m_Nc; ++ic) {
1802 int ic2 = ic * m_Nvc;
1804 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1805 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1806 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1807 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1809 w2[2 * ic + id1 + iv] += wt1r;
1810 w2[2 * ic + 1 + id1 + iv] += wt1i;
1811 w2[2 * ic + id2 + iv] += wt2r;
1812 w2[2 * ic + 1 + id2 + iv] += wt2i;
1813 w2[2 * ic + id3 + iv] += wt1r;
1814 w2[2 * ic + 1 + id3 + iv] += wt1i;
1815 w2[2 * ic + id4 + iv] += wt2r;
1816 w2[2 * ic + 1 + id4 + iv] += wt2i;
1826 int itask,
double *vcp1,
const double *v1)
1828 int Nvc2 = 2 * m_Nvc;
1829 int Nvcd = m_Nvc * m_Nd;
1830 int Nvcd2 = Nvcd / 2;
1834 int id3 = m_Nvc * 2;
1835 int id4 = m_Nvc * 3;
1839 int isite = m_arg[itask].isite;
1840 int isite_cp = m_arg[itask].isite_cpt;
1844 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1845 const double *w1 = &v1[Nvcd * isite];
1846 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1848 double vt1[m_Nvc], vt2[m_Nvc];
1850 if (m_arg[itask].kt1 == 1) {
1851 int Nxy = m_Nx * m_Ny;
1853 for (
int iz = 0; iz < m_Mz; ++iz) {
1854 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1855 int is = ixy + Nxy * (iz + m_Nz * it);
1856 int is2 = ixy + Nxy * iz;
1858 int ig = m_Ndf * is;
1859 int ix1 = Nvc2 * is2;
1860 int ix2 = ix1 + m_Nvc;
1862 for (
int ic = 0; ic < m_Nc; ++ic) {
1863 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1864 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1865 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1866 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1869 for (
int ic = 0; ic < m_Nc; ++ic) {
1871 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1872 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1873 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1874 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1880 m_fw_send[idir]->start_thread(itask);
1886 int itask,
double *v2,
const double *vcp2)
1888 int Nvc2 = 2 * m_Nvc;
1889 int Nvcd = m_Nvc * m_Nd;
1890 int Nvcd2 = Nvcd / 2;
1894 int id3 = m_Nvc * 2;
1895 int id4 = m_Nvc * 3;
1898 double bc2 = m_boundary2[idir];
1900 double wt1r, wt1i, wt2r, wt2i;
1902 int isite = m_arg[itask].isite;
1903 int isite_cp = m_arg[itask].isite_cpt;
1905 double *w2 = &v2[Nvcd * isite];
1908 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1910 m_fw_recv[idir]->wait_thread(itask);
1912 if (m_arg[itask].kt0 == 1) {
1913 int Nxy = m_Nx * m_Ny;
1915 for (
int iz = 0; iz < m_Mz; ++iz) {
1916 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1917 int is = ixy + Nxy * (iz + m_Nz * it);
1918 int is2 = ixy + Nxy * iz;
1920 int ix1 = Nvc2 * is2;
1921 int ix2 = ix1 + m_Nvc;
1923 for (
int ic = 0; ic < m_Nc; ++ic) {
1925 int ici = 2 * ic + 1;
1926 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1927 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1928 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1929 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1930 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1931 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1932 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1933 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1943 int itask,
double *v2,
const double *v1)
1945 int Nvcd = m_Nvc * m_Nd;
1949 int id3 = m_Nvc * 2;
1950 int id4 = m_Nvc * 3;
1954 double vt1[m_Nvc], vt2[m_Nvc];
1955 double wt1r, wt1i, wt2r, wt2i;
1957 int isite = m_arg[itask].isite;
1959 double *w2 = &v2[Nvcd * isite];
1960 const double *w1 = &v1[Nvcd * isite];
1961 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1963 int kt0 = m_arg[itask].kt0;
1964 int Nxy = m_Nx * m_Ny;
1965 int Nxyz = Nxy * m_Nz;
1967 for (
int it = kt0; it < m_Mt; ++it) {
1968 for (
int iz = 0; iz < m_Mz; ++iz) {
1969 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1970 int is = ixy + Nxy * (iz + m_Nz * it);
1972 int in = Nvcd * (is - Nxyz);
1973 int ig = m_Ndf * (is - Nxyz);
1975 for (
int ic = 0; ic < m_Nc; ++ic) {
1976 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1977 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1978 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1979 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1982 for (
int ic = 0; ic < m_Nc; ++ic) {
1984 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1985 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1986 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1987 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1989 w2[ic2 + id1 + iv] += wt1r;
1990 w2[ic2 + 1 + id1 + iv] += wt1i;
1991 w2[ic2 + id2 + iv] += wt2r;
1992 w2[ic2 + 1 + id2 + iv] += wt2i;
1993 w2[ic2 + id3 + iv] -= wt1r;
1994 w2[ic2 + 1 + id3 + iv] -= wt1i;
1995 w2[ic2 + id4 + iv] -= wt2r;
1996 w2[ic2 + 1 + id4 + iv] -= wt2i;
2006 int itask,
double *v2,
const double *v1)
2008 int Nvcd = m_Nvc * m_Nd;
2009 int Nxy = m_Nx * m_Ny;
2013 int id3 = m_Nvc * 2;
2014 int id4 = m_Nvc * 3;
2016 int isite = m_arg[itask].isite;
2017 double *w2 = &v2[Nvcd * isite];
2018 const double *w1 = &v1[Nvcd * isite];
2020 for (
int it = 0; it < m_Mt; ++it) {
2021 for (
int iz = 0; iz < m_Mz; ++iz) {
2022 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2023 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2024 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2025 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2026 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2027 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2028 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2038 int itask,
double *v2,
const double *v1)
2040 int Nvcd = m_Nvc * m_Nd;
2041 int Nxy = m_Nx * m_Ny;
2045 int id3 = m_Nvc * 2;
2046 int id4 = m_Nvc * 3;
2048 int isite = m_arg[itask].isite;
2049 double *w2 = &v2[Nvcd * isite];
2050 const double *w1 = &v1[Nvcd * isite];
2052 for (
int it = 0; it < m_Mt; ++it) {
2053 for (
int iz = 0; iz < m_Mz; ++iz) {
2054 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2055 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2056 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2057 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2058 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2059 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2060 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_zp2_thread(int, double *, const double *)
void mult_yp2_thread(int, double *, const double *)
void mult_tm1_dirac_thread(int, double *, const double *)
void gm5_dirac_thread(int, double *, const double *)
void general(const char *format,...)
std::vector< Channel * > m_bw_recv
void mult_ym2_thread(int, double *, const double *)
void mult_xp2_thread(int, double *, const double *)
void mult_yp1_thread(int, double *, const double *)
void mult_tm1_chiral_thread(int, double *, const double *)
void mult_ym1_thread(int, double *, const double *)
void mult_xp1_thread(int, double *, const double *)
std::vector< Channel * > m_fw_recv
std::vector< Channel * > m_fw_send
void mult_xm2_thread(int, double *, const double *)
void mult_tp1_dirac_thread(int, double *, const double *)
std::vector< mult_arg > m_arg
void mult_ymb_thread(int, double *, const double *)
void mult_zpb_thread(int, double *, const double *)
void mult_xm1_thread(int, double *, const double *)
void daypx_thread(int, double *, double, const double *)
Bridge::VerboseLevel m_vl
void mult_tpb_chiral_thread(int, double *, const double *)
void mult_tp1_chiral_thread(int, double *, const double *)
std::vector< Channel * > m_bw_send
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void gm5_chiral_thread(int, double *, const double *)
void mult_tp2_chiral_thread(int, double *, const double *)
void mult_zp1_thread(int, double *, const double *)
void clear_thread(int, double *)
void mult_ypb_thread(int, double *, const double *)
void crucial(const char *format,...)
void mult_zmb_thread(int, double *, const double *)
void mult_tmb_dirac_thread(int, double *, const double *)
void mult_xmb_thread(int, double *, const double *)
void mult_xpb_thread(int, double *, const double *)
void mult_zm2_thread(int, double *, const double *)
void mult_tm2_dirac_thread(int, double *, const double *)
void mult_tpb_dirac_thread(int, double *, const double *)
void mult_tp2_dirac_thread(int, double *, const double *)
void mult_zm1_thread(int, double *, const double *)
void mult_tm2_chiral_thread(int, double *, const double *)
void mult_tmb_chiral_thread(int, double *, const double *)