19 #if defined USE_GROUP_SU3
20 #include "fopr_Wilson_impl_SU3.inc"
21 #elif defined USE_GROUP_SU2
22 #include "fopr_Wilson_impl_SU2.inc"
23 #elif defined USE_GROUP_SU_N
24 #include "fopr_Wilson_impl_SU_N.inc"
76 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
77 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
78 int itask = ith_z + m_Ntask_z * ith_t;
86 if (ith_t == 0)
m_arg[itask].kt0 = 1;
87 if (ith_z == 0)
m_arg[itask].kz0 = 1;
88 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
89 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
93 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy;
94 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy;
101 int Nvcd2 = 2 * Nc * Nd / 2;
103 std::vector<int> destid(
m_Ntask);
104 std::vector<int> offset(
m_Ntask);
105 std::vector<int> datasize(
m_Ntask);
106 std::vector<int> offset_up(
m_Ntask);
107 std::vector<int> offset_lw(
m_Ntask);
108 std::vector<int> datasize_up(
m_Ntask);
109 std::vector<int> datasize_lw(
m_Ntask);
112 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
113 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
116 destid[itask] = itask;
117 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
118 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * m_Ny;
127 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
128 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
131 destid[itask] = itask;
132 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
133 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * m_Nx;
142 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
143 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
144 int itask = ith_z + m_Ntask_z * ith_t;
146 offset_up[itask] = 0;
147 offset_lw[itask] = 0;
148 datasize_up[itask] = 0;
149 datasize_lw[itask] = 0;
151 destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
152 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx * m_Ny;
153 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx * m_Ny;
155 if (ith_z == m_Ntask_z - 1) {
157 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx * m_Ny;
158 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx * m_Ny;
168 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
169 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
170 int itask = ith_z + m_Ntask_z * ith_t;
172 offset_up[itask] = 0;
173 offset_lw[itask] = 0;
174 datasize_up[itask] = 0;
175 datasize_lw[itask] = 0;
177 destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
178 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx * m_Ny;
179 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx * m_Ny;
181 if (ith_t == m_Ntask_t - 1) {
182 destid[itask] = ith_z;
183 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx * m_Ny;
184 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx * m_Ny;
197 int itask,
double *v2,
double fac,
const double *v1)
199 int Nvcd = m_Nvc * m_Nd;
200 int Nvxy = Nvcd * m_Nx * m_Ny;
202 int isite = m_arg[itask].isite;
204 const double *w1 = &v1[Nvcd * isite];
205 double *w2 = &v2[Nvcd * isite];
207 for (
int it = 0; it < m_Mt; ++it) {
208 for (
int iz = 0; iz < m_Mz; ++iz) {
209 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
210 int iv = ivxy + Nvxy * (iz + m_Nz * it);
211 w2[iv] = fac * w2[iv] + w1[iv];
222 int Nvcd = m_Nvc * m_Nd;
223 int Nvxy = Nvcd * m_Nx * m_Ny;
225 int isite = m_arg[itask].isite;
226 double *w2 = &v2[Nvcd * isite];
228 for (
int it = 0; it < m_Mt; ++it) {
229 for (
int iz = 0; iz < m_Mz; ++iz) {
230 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
231 int iv = ivxy + Nvxy * (iz + m_Nz * it);
241 int itask,
double *vcp1,
const double *v1)
243 int Nvc2 = 2 * m_Nvc;
244 int Nvcd = m_Nvc * m_Nd;
245 int Nvcd2 = Nvcd / 2;
253 double bc2 = m_boundary2[idir];
255 int isite = m_arg[itask].isite;
256 int isite_cp = m_arg[itask].isite_cpx;
259 const double *w1 = &v1[Nvcd * isite];
261 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
265 for (
int it = 0; it < m_Mt; ++it) {
266 for (
int iz = 0; iz < m_Mz; ++iz) {
267 for (
int iy = 0; iy < m_Ny; ++iy) {
268 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
269 int is2 = iy + m_Ny * (iz + m_Mz * it);
271 int ix1 = Nvc2 * is2;
272 int ix2 = ix1 + m_Nvc;
274 for (
int ic = 0; ic < m_Nc; ++ic) {
275 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
276 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
277 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
278 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
284 m_bw_send[idir]->start_thread(itask);
290 int itask,
double *v2,
const double *vcp2)
292 int Nvc2 = 2 * m_Nvc;
293 int Nvcd = m_Nvc * m_Nd;
294 int Nvcd2 = Nvcd / 2;
303 double wt1r, wt1i, wt2r, wt2i;
305 int isite = m_arg[itask].isite;
306 int isite_cp = m_arg[itask].isite_cpx;
308 double *w2 = &v2[Nvcd * isite];
311 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
312 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
314 m_bw_recv[idir]->wait_thread(itask);
317 for (
int it = 0; it < m_Mt; ++it) {
318 for (
int iz = 0; iz < m_Mz; ++iz) {
319 for (
int iy = 0; iy < m_Ny; ++iy) {
320 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
321 int is2 = iy + m_Ny * (iz + m_Mz * it);
324 int ix1 = Nvc2 * is2;
325 int ix2 = ix1 + m_Nvc;
327 for (
int ic = 0; ic < m_Nc; ++ic) {
328 int ic2 = ic * m_Nvc;
330 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
331 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
332 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
333 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
335 w2[2 * ic + id1 + iv] += wt1r;
336 w2[2 * ic + 1 + id1 + iv] += wt1i;
337 w2[2 * ic + id2 + iv] += wt2r;
338 w2[2 * ic + 1 + id2 + iv] += wt2i;
339 w2[2 * ic + id3 + iv] += wt2i;
340 w2[2 * ic + 1 + id3 + iv] += -wt2r;
341 w2[2 * ic + id4 + iv] += wt1i;
342 w2[2 * ic + 1 + id4 + iv] += -wt1r;
352 int itask,
double *v2,
const double *v1)
354 int Nvcd = m_Nvc * m_Nd;
363 double vt1[m_Nvc], vt2[m_Nvc];
364 double wt1r, wt1i, wt2r, wt2i;
366 int isite = m_arg[itask].isite;
368 const double *w1 = &v1[Nvcd * isite];
369 double *w2 = &v2[Nvcd * isite];
370 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
372 for (
int it = 0; it < m_Mt; ++it) {
373 for (
int iz = 0; iz < m_Mz; ++iz) {
374 for (
int iy = 0; iy < m_Ny; ++iy) {
375 for (
int ix = 0; ix < m_Nx - 1; ++ix) {
376 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
378 int in = Nvcd * (is + 1);
381 for (
int ic = 0; ic < m_Nc; ++ic) {
382 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
383 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
384 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
385 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
388 for (
int ic = 0; ic < m_Nc; ++ic) {
389 int ic2 = ic * m_Nvc;
391 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
392 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
393 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
394 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
396 w2[2 * ic + id1 + iv] += wt1r;
397 w2[2 * ic + 1 + id1 + iv] += wt1i;
398 w2[2 * ic + id2 + iv] += wt2r;
399 w2[2 * ic + 1 + id2 + iv] += wt2i;
400 w2[2 * ic + id3 + iv] += wt2i;
401 w2[2 * ic + 1 + id3 + iv] += -wt2r;
402 w2[2 * ic + id4 + iv] += wt1i;
403 w2[2 * ic + 1 + id4 + iv] += -wt1r;
414 int itask,
double *vcp1,
const double *v1)
416 int Nvc2 = 2 * m_Nvc;
417 int Nvcd = m_Nvc * m_Nd;
418 int Nvcd2 = Nvcd / 2;
427 int isite = m_arg[itask].isite;
428 int isite_cp = m_arg[itask].isite_cpx;
430 const double *w1 = &v1[Nvcd * isite];
433 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
434 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
436 double vt1[m_Nvc], vt2[m_Nvc];
440 for (
int it = 0; it < m_Mt; ++it) {
441 for (
int iz = 0; iz < m_Mz; ++iz) {
442 for (
int iy = 0; iy < m_Ny; ++iy) {
443 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
444 int is2 = iy + m_Ny * (iz + m_Mz * it);
447 int ix1 = Nvc2 * is2;
448 int ix2 = ix1 + m_Nvc;
450 for (
int ic = 0; ic < m_Nc; ++ic) {
451 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
452 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
453 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
454 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
457 for (
int ic = 0; ic < m_Nc; ++ic) {
459 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
460 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
461 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
462 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
468 m_fw_send[idir]->start_thread(itask);
474 int itask,
double *v2,
const double *vcp2)
476 int Nvc2 = 2 * m_Nvc;
477 int Nvcd = m_Nvc * m_Nd;
478 int Nvcd2 = Nvcd / 2;
486 double bc2 = m_boundary2[idir];
488 double wt1r, wt1i, wt2r, wt2i;
490 int isite = m_arg[itask].isite;
491 int isite_cp = m_arg[itask].isite_cpx;
493 double *w2 = &v2[Nvcd * isite];
496 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
498 m_fw_recv[idir]->wait_thread(itask);
501 for (
int it = 0; it < m_Mt; ++it) {
502 for (
int iz = 0; iz < m_Mz; ++iz) {
503 for (
int iy = 0; iy < m_Ny; ++iy) {
504 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
505 int is2 = iy + m_Ny * (iz + m_Mz * it);
507 int ix1 = Nvc2 * is2;
508 int ix2 = ix1 + m_Nvc;
510 for (
int ic = 0; ic < m_Nc; ++ic) {
512 int ici = 2 * ic + 1;
513 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
514 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
515 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
516 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
517 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
518 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
519 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
520 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
530 int itask,
double *v2,
const double *v1)
532 int Nvcd = m_Nvc * m_Nd;
541 double vt1[m_Nvc], vt2[m_Nvc];
542 double wt1r, wt1i, wt2r, wt2i;
544 int isite = m_arg[itask].isite;
546 const double *w1 = &v1[Nvcd * isite];
547 double *w2 = &v2[Nvcd * isite];
548 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
550 for (
int it = 0; it < m_Mt; ++it) {
551 for (
int iz = 0; iz < m_Mz; ++iz) {
552 for (
int iy = 0; iy < m_Ny; ++iy) {
553 for (
int ix = 1; ix < m_Nx; ++ix) {
554 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
556 int in = Nvcd * (is - 1);
557 int ig = m_Ndf * (is - 1);
559 for (
int ic = 0; ic < m_Nc; ++ic) {
560 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
561 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
562 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
563 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
566 for (
int ic = 0; ic < m_Nc; ++ic) {
569 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
570 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
571 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
572 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
574 w2[2 * ic + id1 + iv] += wt1r;
575 w2[2 * ic + 1 + id1 + iv] += wt1i;
576 w2[2 * ic + id2 + iv] += wt2r;
577 w2[2 * ic + 1 + id2 + iv] += wt2i;
578 w2[2 * ic + id3 + iv] += -wt2i;
579 w2[2 * ic + 1 + id3 + iv] += +wt2r;
580 w2[2 * ic + id4 + iv] += -wt1i;
581 w2[2 * ic + 1 + id4 + iv] += +wt1r;
592 int itask,
double *vcp1,
const double *v1)
594 int Nvc2 = 2 * m_Nvc;
595 int Nvcd = m_Nvc * m_Nd;
596 int Nvcd2 = Nvcd / 2;
603 int isite = m_arg[itask].isite;
604 int isite_cp = m_arg[itask].isite_cpy;
607 double bc2 = m_boundary2[idir];
609 const double *w1 = &v1[Nvcd * isite];
612 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
617 for (
int it = 0; it < m_Mt; ++it) {
618 for (
int iz = 0; iz < m_Mz; ++iz) {
619 for (
int ix = 0; ix < m_Nx; ++ix) {
620 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
621 int is2 = ix + m_Nx * (iz + m_Mz * it);
623 int ix1 = Nvc2 * is2;
624 int ix2 = ix1 + m_Nvc;
626 for (
int ic = 0; ic < m_Nc; ++ic) {
627 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
628 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
629 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
630 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
636 m_bw_send[idir]->start_thread(itask);
642 int itask,
double *v2,
const double *vcp2)
644 int Nvc2 = 2 * m_Nvc;
645 int Nvcd = m_Nvc * m_Nd;
646 int Nvcd2 = Nvcd / 2;
655 double wt1r, wt1i, wt2r, wt2i;
657 int isite = m_arg[itask].isite;
658 int isite_cp = m_arg[itask].isite_cpy;
660 double *w2 = &v2[Nvcd * isite];
663 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
664 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
666 m_bw_recv[idir]->wait_thread(itask);
669 for (
int it = 0; it < m_Mt; ++it) {
670 for (
int iz = 0; iz < m_Mz; ++iz) {
671 for (
int ix = 0; ix < m_Nx; ++ix) {
672 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
673 int is2 = ix + m_Nx * (iz + m_Mz * it);
676 int ix1 = Nvc2 * is2;
677 int ix2 = ix1 + m_Nvc;
679 for (
int ic = 0; ic < m_Nc; ++ic) {
680 int ic2 = ic * m_Nvc;
682 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
683 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
684 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
685 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
687 w2[2 * ic + id1 + iv] += wt1r;
688 w2[2 * ic + 1 + id1 + iv] += wt1i;
689 w2[2 * ic + id2 + iv] += wt2r;
690 w2[2 * ic + 1 + id2 + iv] += wt2i;
691 w2[2 * ic + id3 + iv] += -wt2r;
692 w2[2 * ic + 1 + id3 + iv] += -wt2i;
693 w2[2 * ic + id4 + iv] += wt1r;
694 w2[2 * ic + 1 + id4 + iv] += wt1i;
704 int itask,
double *v2,
const double *v1)
706 int Nvcd = m_Nvc * m_Nd;
715 double vt1[m_Nvc], vt2[m_Nvc];
716 double wt1r, wt1i, wt2r, wt2i;
718 int isite = m_arg[itask].isite;
720 double *w2 = &v2[Nvcd * isite];
721 const double *w1 = &v1[Nvcd * isite];
722 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
724 for (
int it = 0; it < m_Mt; ++it) {
725 for (
int iz = 0; iz < m_Mz; ++iz) {
726 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
727 for (
int ix = 0; ix < m_Nx; ++ix) {
728 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
730 int in = Nvcd * (is + m_Nx);
733 for (
int ic = 0; ic < m_Nc; ++ic) {
734 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
735 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
736 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
737 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
740 for (
int ic = 0; ic < m_Nc; ++ic) {
741 int ic2 = ic * m_Nvc;
743 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
744 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
745 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
746 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
748 w2[2 * ic + id1 + iv] += wt1r;
749 w2[2 * ic + 1 + id1 + iv] += wt1i;
750 w2[2 * ic + id2 + iv] += wt2r;
751 w2[2 * ic + 1 + id2 + iv] += wt2i;
752 w2[2 * ic + id3 + iv] += -wt2r;
753 w2[2 * ic + 1 + id3 + iv] += -wt2i;
754 w2[2 * ic + id4 + iv] += wt1r;
755 w2[2 * ic + 1 + id4 + iv] += wt1i;
766 int itask,
double *vcp1,
const double *v1)
768 int Nvc2 = 2 * m_Nvc;
769 int Nvcd = m_Nvc * m_Nd;
770 int Nvcd2 = Nvcd / 2;
779 int isite = m_arg[itask].isite;
780 int isite_cp = m_arg[itask].isite_cpy;
784 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
786 const double *w1 = &v1[Nvcd * isite];
787 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
789 double vt1[m_Nvc], vt2[m_Nvc];
793 for (
int it = 0; it < m_Mt; ++it) {
794 for (
int iz = 0; iz < m_Mz; ++iz) {
795 for (
int ix = 0; ix < m_Nx; ++ix) {
796 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
797 int is2 = ix + m_Nx * (iz + m_Mz * it);
800 int ix1 = Nvc2 * is2;
801 int ix2 = ix1 + m_Nvc;
803 for (
int ic = 0; ic < m_Nc; ++ic) {
804 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
805 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
806 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
807 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
810 for (
int ic = 0; ic < m_Nc; ++ic) {
812 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
813 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
814 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
815 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
821 m_fw_send[idir]->start_thread(itask);
827 int itask,
double *v2,
const double *vcp2)
829 int Nvc2 = 2 * m_Nvc;
830 int Nvcd = m_Nvc * m_Nd;
831 int Nvcd2 = Nvcd / 2;
839 double bc2 = m_boundary2[idir];
841 double wt1r, wt1i, wt2r, wt2i;
843 int isite = m_arg[itask].isite;
844 int isite_cp = m_arg[itask].isite_cpy;
846 double *w2 = &v2[Nvcd * isite];
849 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
851 m_fw_recv[idir]->wait_thread(itask);
854 for (
int it = 0; it < m_Mt; ++it) {
855 for (
int iz = 0; iz < m_Mz; ++iz) {
856 for (
int ix = 0; ix < m_Nx; ++ix) {
857 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
858 int is2 = ix + m_Nx * (iz + m_Mz * it);
860 int ix1 = Nvc2 * is2;
861 int ix2 = ix1 + m_Nvc;
863 for (
int ic = 0; ic < m_Nc; ++ic) {
865 int ici = 2 * ic + 1;
866 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
867 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
868 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
869 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
870 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
871 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
872 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
873 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
883 int itask,
double *v2,
const double *v1)
885 int Nvcd = m_Nvc * m_Nd;
894 double vt1[m_Nvc], vt2[m_Nvc];
895 double wt1r, wt1i, wt2r, wt2i;
897 int isite = m_arg[itask].isite;
899 double *w2 = &v2[Nvcd * isite];
900 const double *w1 = &v1[Nvcd * isite];
901 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
903 for (
int it = 0; it < m_Mt; ++it) {
904 for (
int iz = 0; iz < m_Mz; ++iz) {
905 for (
int iy = 1; iy < m_Ny; ++iy) {
906 for (
int ix = 0; ix < m_Nx; ++ix) {
907 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
909 int in = Nvcd * (is - m_Nx);
910 int ig = m_Ndf * (is - m_Nx);
912 for (
int ic = 0; ic < m_Nc; ++ic) {
913 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
914 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
915 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
916 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
919 for (
int ic = 0; ic < m_Nc; ++ic) {
921 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
922 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
923 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
924 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
926 w2[ic2 + id1 + iv] += wt1r;
927 w2[ic2 + 1 + id1 + iv] += wt1i;
928 w2[ic2 + id2 + iv] += wt2r;
929 w2[ic2 + 1 + id2 + iv] += wt2i;
930 w2[ic2 + id3 + iv] += wt2r;
931 w2[ic2 + 1 + id3 + iv] += wt2i;
932 w2[ic2 + id4 + iv] += -wt1r;
933 w2[ic2 + 1 + id4 + iv] += -wt1i;
944 int itask,
double *vcp1,
const double *v1)
946 int Nvc2 = 2 * m_Nvc;
947 int Nvcd = m_Nvc * m_Nd;
948 int Nvcd2 = Nvcd / 2;
955 int isite = m_arg[itask].isite;
956 int isite_cp = m_arg[itask].isite_cpz;
959 double bc2 = m_boundary2[idir];
963 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
964 const double *w1 = &v1[Nvcd * isite];
966 if (m_arg[itask].kz0 == 1) {
967 int Nxy = m_Nx * m_Ny;
969 for (
int it = 0; it < m_Mt; ++it) {
970 for (
int ixy = 0; ixy < Nxy; ++ixy) {
971 int is = ixy + Nxy * (iz + m_Nz * it);
972 int is2 = ixy + Nxy * it;
975 int ix1 = Nvc2 * is2;
976 int ix2 = ix1 + m_Nvc;
978 for (
int ic = 0; ic < m_Nc; ++ic) {
979 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
980 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
981 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
982 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
988 m_bw_send[idir]->start_thread(itask);
994 int itask,
double *v2,
const double *vcp2)
996 int Nvc2 = 2 * m_Nvc;
997 int Nvcd = m_Nvc * m_Nd;
998 int Nvcd2 = Nvcd / 2;
1002 int id3 = m_Nvc * 2;
1003 int id4 = m_Nvc * 3;
1007 double wt1r, wt1i, wt2r, wt2i;
1009 int isite = m_arg[itask].isite;
1010 int isite_cp = m_arg[itask].isite_cpz;
1012 double *w2 = &v2[Nvcd * isite];
1015 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1016 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1018 m_bw_recv[idir]->wait_thread(itask);
1020 if (m_arg[itask].kz1 == 1) {
1021 int Nxy = m_Nx * m_Ny;
1023 for (
int it = 0; it < m_Mt; ++it) {
1024 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1025 int is = ixy + Nxy * (iz + m_Nz * it);
1026 int is2 = ixy + Nxy * it;
1028 int ig = m_Ndf * is;
1029 int ix1 = Nvc2 * is2;
1030 int ix2 = ix1 + m_Nvc;
1032 for (
int ic = 0; ic < m_Nc; ++ic) {
1033 int ic2 = ic * m_Nvc;
1035 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1036 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1037 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1038 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1040 w2[2 * ic + id1 + iv] += wt1r;
1041 w2[2 * ic + 1 + id1 + iv] += wt1i;
1042 w2[2 * ic + id2 + iv] += wt2r;
1043 w2[2 * ic + 1 + id2 + iv] += wt2i;
1044 w2[2 * ic + id3 + iv] += wt1i;
1045 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1046 w2[2 * ic + id4 + iv] += -wt2i;
1047 w2[2 * ic + 1 + id4 + iv] += wt2r;
1057 int itask,
double *v2,
const double *v1)
1059 int Nvcd = m_Nvc * m_Nd;
1063 int id3 = m_Nvc * 2;
1064 int id4 = m_Nvc * 3;
1068 double vt1[m_Nvc], vt2[m_Nvc];
1069 double wt1r, wt1i, wt2r, wt2i;
1071 int isite = m_arg[itask].isite;
1073 double *w2 = &v2[Nvcd * isite];
1074 const double *w1 = &v1[Nvcd * isite];
1075 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1077 int kz1 = m_arg[itask].kz1;
1078 int Nxy = m_Nx * m_Ny;
1080 for (
int it = 0; it < m_Mt; ++it) {
1081 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
1082 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1083 int is = ixy + Nxy * (iz + m_Nz * it);
1085 int in = Nvcd * (is + Nxy);
1086 int ig = m_Ndf * is;
1088 for (
int ic = 0; ic < m_Nc; ++ic) {
1089 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1090 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1091 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1092 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1095 for (
int ic = 0; ic < m_Nc; ++ic) {
1096 int ic2 = ic * m_Nvc;
1098 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1099 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1100 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1101 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1103 w2[2 * ic + id1 + iv] += wt1r;
1104 w2[2 * ic + 1 + id1 + iv] += wt1i;
1105 w2[2 * ic + id2 + iv] += wt2r;
1106 w2[2 * ic + 1 + id2 + iv] += wt2i;
1107 w2[2 * ic + id3 + iv] += wt1i;
1108 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1109 w2[2 * ic + id4 + iv] += -wt2i;
1110 w2[2 * ic + 1 + id4 + iv] += wt2r;
1120 int itask,
double *vcp1,
const double *v1)
1122 int Nvc2 = 2 * m_Nvc;
1123 int Nvcd = m_Nvc * m_Nd;
1124 int Nvcd2 = Nvcd / 2;
1128 int id3 = m_Nvc * 2;
1129 int id4 = m_Nvc * 3;
1133 int isite = m_arg[itask].isite;
1134 int isite_cp = m_arg[itask].isite_cpz;
1138 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1139 const double *w1 = &v1[Nvcd * isite];
1140 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1142 double vt1[m_Nvc], vt2[m_Nvc];
1144 if (m_arg[itask].kz1 == 1) {
1145 int Nxy = m_Nx * m_Ny;
1147 for (
int it = 0; it < m_Mt; ++it) {
1148 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1149 int is = ixy + Nxy * (iz + m_Nz * it);
1150 int is2 = ixy + Nxy * it;
1152 int ig = m_Ndf * is;
1153 int ix1 = Nvc2 * is2;
1154 int ix2 = ix1 + m_Nvc;
1156 for (
int ic = 0; ic < m_Nc; ++ic) {
1157 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1158 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1159 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1160 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1163 for (
int ic = 0; ic < m_Nc; ++ic) {
1165 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1166 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1167 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1168 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1174 m_fw_send[idir]->start_thread(itask);
1180 int itask,
double *v2,
const double *vcp2)
1182 int Nvc2 = 2 * m_Nvc;
1183 int Nvcd = m_Nvc * m_Nd;
1184 int Nvcd2 = Nvcd / 2;
1188 int id3 = m_Nvc * 2;
1189 int id4 = m_Nvc * 3;
1192 double bc2 = m_boundary2[idir];
1194 double wt1r, wt1i, wt2r, wt2i;
1196 int isite = m_arg[itask].isite;
1197 int isite_cp = m_arg[itask].isite_cpz;
1199 double *w2 = &v2[Nvcd * isite];
1202 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1204 m_fw_recv[idir]->wait_thread(itask);
1206 if (m_arg[itask].kz0 == 1) {
1207 int Nxy = m_Nx * m_Ny;
1210 for (
int it = 0; it < m_Mt; ++it) {
1211 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1212 int is = ixy + Nxy * (iz + m_Nz * it);
1213 int is2 = ixy + Nxy * it;
1215 int ix1 = Nvc2 * is2;
1216 int ix2 = ix1 + m_Nvc;
1218 for (
int ic = 0; ic < m_Nc; ++ic) {
1220 int ici = 2 * ic + 1;
1221 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1222 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1223 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1224 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1225 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1226 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1227 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1228 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1238 int itask,
double *v2,
const double *v1)
1240 int Nvcd = m_Nvc * m_Nd;
1244 int id3 = m_Nvc * 2;
1245 int id4 = m_Nvc * 3;
1249 double vt1[m_Nvc], vt2[m_Nvc];
1250 double wt1r, wt1i, wt2r, wt2i;
1252 int isite = m_arg[itask].isite;
1254 double *w2 = &v2[Nvcd * isite];
1255 const double *w1 = &v1[Nvcd * isite];
1256 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1258 int kz0 = m_arg[itask].kz0;
1259 int Nxy = m_Nx * m_Ny;
1261 for (
int it = 0; it < m_Mt; ++it) {
1262 for (
int iz = kz0; iz < m_Mz; ++iz) {
1263 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1264 int is = ixy + Nxy * (iz + m_Nz * it);
1266 int in = Nvcd * (is - Nxy);
1267 int ig = m_Ndf * (is - Nxy);
1269 for (
int ic = 0; ic < m_Nc; ++ic) {
1270 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1271 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1272 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1273 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1276 for (
int ic = 0; ic < m_Nc; ++ic) {
1278 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1279 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1280 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1281 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1283 w2[ic2 + id1 + iv] += wt1r;
1284 w2[ic2 + 1 + id1 + iv] += wt1i;
1285 w2[ic2 + id2 + iv] += wt2r;
1286 w2[ic2 + 1 + id2 + iv] += wt2i;
1287 w2[ic2 + id3 + iv] += -wt1i;
1288 w2[ic2 + 1 + id3 + iv] += wt1r;
1289 w2[ic2 + id4 + iv] += wt2i;
1290 w2[ic2 + 1 + id4 + iv] += -wt2r;
1300 int itask,
double *vcp1,
const double *v1)
1302 int Nvc2 = 2 * m_Nvc;
1303 int Nvcd = m_Nvc * m_Nd;
1304 int Nvcd2 = Nvcd / 2;
1308 int id3 = m_Nvc * 2;
1309 int id4 = m_Nvc * 3;
1311 int isite = m_arg[itask].isite;
1312 int isite_cp = m_arg[itask].isite_cpt;
1315 double bc2 = m_boundary2[idir];
1319 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1320 const double *w1 = &v1[Nvcd * isite];
1322 if (m_arg[itask].kt0 == 1) {
1323 int Nxy = m_Nx * m_Ny;
1325 for (
int iz = 0; iz < m_Mz; ++iz) {
1326 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1327 int is = ixy + Nxy * (iz + m_Nz * it);
1328 int is2 = ixy + Nxy * iz;
1331 int ix1 = Nvc2 * is2;
1332 int ix2 = ix1 + m_Nvc;
1334 for (
int ic = 0; ic < m_Nc; ++ic) {
1335 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1336 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1337 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1338 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1344 m_bw_send[idir]->start_thread(itask);
1350 int itask,
double *v2,
const double *vcp2)
1352 int Nvc2 = 2 * m_Nvc;
1353 int Nvcd = m_Nvc * m_Nd;
1354 int Nvcd2 = Nvcd / 2;
1358 int id3 = m_Nvc * 2;
1359 int id4 = m_Nvc * 3;
1363 double wt1r, wt1i, wt2r, wt2i;
1365 int isite = m_arg[itask].isite;
1366 int isite_cp = m_arg[itask].isite_cpt;
1368 double *w2 = &v2[Nvcd * isite];
1371 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1372 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1374 m_bw_recv[idir]->wait_thread(itask);
1376 if (m_arg[itask].kt1 == 1) {
1377 int Nxy = m_Nx * m_Ny;
1379 for (
int iz = 0; iz < m_Mz; ++iz) {
1380 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1381 int is = ixy + Nxy * (iz + m_Nz * it);
1382 int is2 = ixy + Nxy * iz;
1384 int ig = m_Ndf * is;
1385 int ix1 = Nvc2 * is2;
1386 int ix2 = ix1 + m_Nvc;
1388 for (
int ic = 0; ic < m_Nc; ++ic) {
1389 int ic2 = ic * m_Nvc;
1391 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1392 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1393 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1394 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1396 w2[2 * ic + id3 + iv] += wt1r;
1397 w2[2 * ic + 1 + id3 + iv] += wt1i;
1398 w2[2 * ic + id4 + iv] += wt2r;
1399 w2[2 * ic + 1 + id4 + iv] += wt2i;
1409 int itask,
double *v2,
const double *v1)
1411 int Nvcd = m_Nvc * m_Nd;
1415 int id3 = m_Nvc * 2;
1416 int id4 = m_Nvc * 3;
1420 double vt1[m_Nvc], vt2[m_Nvc];
1421 double wt1r, wt1i, wt2r, wt2i;
1423 int isite = m_arg[itask].isite;
1425 double *w2 = &v2[Nvcd * isite];
1426 const double *w1 = &v1[Nvcd * isite];
1427 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1429 int kt1 = m_arg[itask].kt1;
1430 int Nxy = m_Nx * m_Ny;
1431 int Nxyz = Nxy * m_Nz;
1433 for (
int it = 0; it < m_Mt - kt1; ++it) {
1434 for (
int iz = 0; iz < m_Mz; ++iz) {
1435 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1436 int is = ixy + Nxy * (iz + m_Nz * it);
1438 int in = Nvcd * (is + Nxyz);
1439 int ig = m_Ndf * is;
1441 for (
int ic = 0; ic < m_Nc; ++ic) {
1442 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1443 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1444 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1445 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1448 for (
int ic = 0; ic < m_Nc; ++ic) {
1449 int ic2 = ic * m_Nvc;
1451 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1452 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1453 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1454 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1456 w2[2 * ic + id3 + iv] += wt1r;
1457 w2[2 * ic + 1 + id3 + iv] += wt1i;
1458 w2[2 * ic + id4 + iv] += wt2r;
1459 w2[2 * ic + 1 + id4 + iv] += wt2i;
1469 int itask,
double *vcp1,
const double *v1)
1471 int Nvc2 = 2 * m_Nvc;
1472 int Nvcd = m_Nvc * m_Nd;
1473 int Nvcd2 = Nvcd / 2;
1477 int id3 = m_Nvc * 2;
1478 int id4 = m_Nvc * 3;
1482 int isite = m_arg[itask].isite;
1483 int isite_cp = m_arg[itask].isite_cpt;
1487 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1488 const double *w1 = &v1[Nvcd * isite];
1489 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1491 double vt1[m_Nvc], vt2[m_Nvc];
1493 if (m_arg[itask].kt1 == 1) {
1494 int Nxy = m_Nx * m_Ny;
1496 for (
int iz = 0; iz < m_Mz; ++iz) {
1497 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1498 int is = ixy + Nxy * (iz + m_Nz * it);
1499 int is2 = ixy + Nxy * iz;
1501 int ig = m_Ndf * is;
1502 int ix1 = Nvc2 * is2;
1503 int ix2 = ix1 + m_Nvc;
1505 for (
int ic = 0; ic < m_Nc; ++ic) {
1506 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1507 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1508 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1509 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1512 for (
int ic = 0; ic < m_Nc; ++ic) {
1514 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1515 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1516 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1517 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1523 m_fw_send[idir]->start_thread(itask);
1529 int itask,
double *v2,
const double *vcp2)
1531 int Nvc2 = 2 * m_Nvc;
1532 int Nvcd = m_Nvc * m_Nd;
1533 int Nvcd2 = Nvcd / 2;
1537 int id3 = m_Nvc * 2;
1538 int id4 = m_Nvc * 3;
1541 double bc2 = m_boundary2[idir];
1543 double wt1r, wt1i, wt2r, wt2i;
1545 int isite = m_arg[itask].isite;
1546 int isite_cp = m_arg[itask].isite_cpt;
1548 double *w2 = &v2[Nvcd * isite];
1551 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1553 m_fw_recv[idir]->wait_thread(itask);
1555 if (m_arg[itask].kt0 == 1) {
1556 int Nxy = m_Nx * m_Ny;
1558 for (
int iz = 0; iz < m_Mz; ++iz) {
1559 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1560 int is = ixy + Nxy * (iz + m_Nz * it);
1561 int is2 = ixy + Nxy * iz;
1563 int ix1 = Nvc2 * is2;
1564 int ix2 = ix1 + m_Nvc;
1566 for (
int ic = 0; ic < m_Nc; ++ic) {
1568 int ici = 2 * ic + 1;
1569 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1570 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1571 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1572 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1582 int itask,
double *v2,
const double *v1)
1584 int Nvcd = m_Nvc * m_Nd;
1588 int id3 = m_Nvc * 2;
1589 int id4 = m_Nvc * 3;
1593 double vt1[m_Nvc], vt2[m_Nvc];
1594 double wt1r, wt1i, wt2r, wt2i;
1596 int isite = m_arg[itask].isite;
1598 double *w2 = &v2[Nvcd * isite];
1599 const double *w1 = &v1[Nvcd * isite];
1600 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1602 int kt0 = m_arg[itask].kt0;
1603 int Nxy = m_Nx * m_Ny;
1604 int Nxyz = Nxy * m_Nz;
1606 for (
int it = kt0; it < m_Mt; ++it) {
1607 for (
int iz = 0; iz < m_Mz; ++iz) {
1608 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1609 int is = ixy + Nxy * (iz + m_Nz * it);
1611 int in = Nvcd * (is - Nxyz);
1612 int ig = m_Ndf * (is - Nxyz);
1614 for (
int ic = 0; ic < m_Nc; ++ic) {
1615 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1616 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1617 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1618 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1621 for (
int ic = 0; ic < m_Nc; ++ic) {
1623 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1624 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1625 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1626 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1628 w2[ic2 + id1 + iv] += wt1r;
1629 w2[ic2 + 1 + id1 + iv] += wt1i;
1630 w2[ic2 + id2 + iv] += wt2r;
1631 w2[ic2 + 1 + id2 + iv] += wt2i;
1641 int itask,
double *vcp1,
const double *v1)
1643 int Nvc2 = 2 * m_Nvc;
1644 int Nvcd = m_Nvc * m_Nd;
1645 int Nvcd2 = Nvcd / 2;
1649 int id3 = m_Nvc * 2;
1650 int id4 = m_Nvc * 3;
1652 int isite = m_arg[itask].isite;
1653 int isite_cp = m_arg[itask].isite_cpt;
1656 double bc2 = m_boundary2[idir];
1660 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1661 const double *w1 = &v1[Nvcd * isite];
1663 if (m_arg[itask].kt0 == 1) {
1664 int Nxy = m_Nx * m_Ny;
1666 for (
int iz = 0; iz < m_Mz; ++iz) {
1667 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1668 int is = ixy + Nxy * (iz + m_Nz * it);
1669 int is2 = ixy + Nxy * iz;
1672 int ix1 = Nvc2 * is2;
1673 int ix2 = ix1 + m_Nvc;
1675 for (
int ic = 0; ic < m_Nc; ++ic) {
1676 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1677 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1678 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1679 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1685 m_bw_send[idir]->start_thread(itask);
1691 int itask,
double *v2,
const double *vcp2)
1693 int Nvc2 = 2 * m_Nvc;
1694 int Nvcd = m_Nvc * m_Nd;
1695 int Nvcd2 = Nvcd / 2;
1699 int id3 = m_Nvc * 2;
1700 int id4 = m_Nvc * 3;
1704 double wt1r, wt1i, wt2r, wt2i;
1706 int isite = m_arg[itask].isite;
1707 int isite_cp = m_arg[itask].isite_cpt;
1709 double *w2 = &v2[Nvcd * isite];
1712 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1713 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1715 m_bw_recv[idir]->wait_thread(itask);
1717 if (m_arg[itask].kt1 == 1) {
1718 int Nxy = m_Nx * m_Ny;
1720 for (
int iz = 0; iz < m_Mz; ++iz) {
1721 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1722 int is = ixy + Nxy * (iz + m_Nz * it);
1723 int is2 = ixy + Nxy * iz;
1725 int ig = m_Ndf * is;
1726 int ix1 = Nvc2 * is2;
1727 int ix2 = ix1 + m_Nvc;
1729 for (
int ic = 0; ic < m_Nc; ++ic) {
1730 int ic2 = ic * m_Nvc;
1732 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1733 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1734 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1735 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1737 w2[2 * ic + id1 + iv] += wt1r;
1738 w2[2 * ic + 1 + id1 + iv] += wt1i;
1739 w2[2 * ic + id2 + iv] += wt2r;
1740 w2[2 * ic + 1 + id2 + iv] += wt2i;
1741 w2[2 * ic + id3 + iv] += wt1r;
1742 w2[2 * ic + 1 + id3 + iv] += wt1i;
1743 w2[2 * ic + id4 + iv] += wt2r;
1744 w2[2 * ic + 1 + id4 + iv] += wt2i;
1754 int itask,
double *v2,
const double *v1)
1756 int Nvcd = m_Nvc * m_Nd;
1760 int id3 = m_Nvc * 2;
1761 int id4 = m_Nvc * 3;
1765 double vt1[m_Nvc], vt2[m_Nvc];
1766 double wt1r, wt1i, wt2r, wt2i;
1768 int isite = m_arg[itask].isite;
1770 double *w2 = &v2[Nvcd * isite];
1771 const double *w1 = &v1[Nvcd * isite];
1772 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1774 int kt1 = m_arg[itask].kt1;
1775 int Nxy = m_Nx * m_Ny;
1776 int Nxyz = Nxy * m_Nz;
1778 for (
int it = 0; it < m_Mt - kt1; ++it) {
1779 for (
int iz = 0; iz < m_Mz; ++iz) {
1780 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1781 int is = ixy + Nxy * (iz + m_Nz * it);
1783 int in = Nvcd * (is + Nxyz);
1784 int ig = m_Ndf * is;
1786 for (
int ic = 0; ic < m_Nc; ++ic) {
1787 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1788 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1789 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1790 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1793 for (
int ic = 0; ic < m_Nc; ++ic) {
1794 int ic2 = ic * m_Nvc;
1796 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1797 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1798 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1799 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1801 w2[2 * ic + id1 + iv] += wt1r;
1802 w2[2 * ic + 1 + id1 + iv] += wt1i;
1803 w2[2 * ic + id2 + iv] += wt2r;
1804 w2[2 * ic + 1 + id2 + iv] += wt2i;
1805 w2[2 * ic + id3 + iv] += wt1r;
1806 w2[2 * ic + 1 + id3 + iv] += wt1i;
1807 w2[2 * ic + id4 + iv] += wt2r;
1808 w2[2 * ic + 1 + id4 + iv] += wt2i;
1818 int itask,
double *vcp1,
const double *v1)
1820 int Nvc2 = 2 * m_Nvc;
1821 int Nvcd = m_Nvc * m_Nd;
1822 int Nvcd2 = Nvcd / 2;
1826 int id3 = m_Nvc * 2;
1827 int id4 = m_Nvc * 3;
1831 int isite = m_arg[itask].isite;
1832 int isite_cp = m_arg[itask].isite_cpt;
1836 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1837 const double *w1 = &v1[Nvcd * isite];
1838 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1840 double vt1[m_Nvc], vt2[m_Nvc];
1842 if (m_arg[itask].kt1 == 1) {
1843 int Nxy = m_Nx * m_Ny;
1845 for (
int iz = 0; iz < m_Mz; ++iz) {
1846 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1847 int is = ixy + Nxy * (iz + m_Nz * it);
1848 int is2 = ixy + Nxy * iz;
1850 int ig = m_Ndf * is;
1851 int ix1 = Nvc2 * is2;
1852 int ix2 = ix1 + m_Nvc;
1854 for (
int ic = 0; ic < m_Nc; ++ic) {
1855 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1856 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1857 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1858 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1861 for (
int ic = 0; ic < m_Nc; ++ic) {
1863 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1864 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1865 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1866 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1872 m_fw_send[idir]->start_thread(itask);
1878 int itask,
double *v2,
const double *vcp2)
1880 int Nvc2 = 2 * m_Nvc;
1881 int Nvcd = m_Nvc * m_Nd;
1882 int Nvcd2 = Nvcd / 2;
1886 int id3 = m_Nvc * 2;
1887 int id4 = m_Nvc * 3;
1890 double bc2 = m_boundary2[idir];
1892 double wt1r, wt1i, wt2r, wt2i;
1894 int isite = m_arg[itask].isite;
1895 int isite_cp = m_arg[itask].isite_cpt;
1897 double *w2 = &v2[Nvcd * isite];
1900 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1902 m_fw_recv[idir]->wait_thread(itask);
1904 if (m_arg[itask].kt0 == 1) {
1905 int Nxy = m_Nx * m_Ny;
1907 for (
int iz = 0; iz < m_Mz; ++iz) {
1908 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1909 int is = ixy + Nxy * (iz + m_Nz * it);
1910 int is2 = ixy + Nxy * iz;
1912 int ix1 = Nvc2 * is2;
1913 int ix2 = ix1 + m_Nvc;
1915 for (
int ic = 0; ic < m_Nc; ++ic) {
1917 int ici = 2 * ic + 1;
1918 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1919 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1920 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1921 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1922 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1923 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1924 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1925 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1935 int itask,
double *v2,
const double *v1)
1937 int Nvcd = m_Nvc * m_Nd;
1941 int id3 = m_Nvc * 2;
1942 int id4 = m_Nvc * 3;
1946 double vt1[m_Nvc], vt2[m_Nvc];
1947 double wt1r, wt1i, wt2r, wt2i;
1949 int isite = m_arg[itask].isite;
1951 double *w2 = &v2[Nvcd * isite];
1952 const double *w1 = &v1[Nvcd * isite];
1953 const double *u = m_U->ptr(m_Ndf * (isite + idir * m_Nvol));
1955 int kt0 = m_arg[itask].kt0;
1956 int Nxy = m_Nx * m_Ny;
1957 int Nxyz = Nxy * m_Nz;
1959 for (
int it = kt0; it < m_Mt; ++it) {
1960 for (
int iz = 0; iz < m_Mz; ++iz) {
1961 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1962 int is = ixy + Nxy * (iz + m_Nz * it);
1964 int in = Nvcd * (is - Nxyz);
1965 int ig = m_Ndf * (is - Nxyz);
1967 for (
int ic = 0; ic < m_Nc; ++ic) {
1968 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1969 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1970 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1971 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1974 for (
int ic = 0; ic < m_Nc; ++ic) {
1976 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1977 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1978 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1979 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1981 w2[ic2 + id1 + iv] += wt1r;
1982 w2[ic2 + 1 + id1 + iv] += wt1i;
1983 w2[ic2 + id2 + iv] += wt2r;
1984 w2[ic2 + 1 + id2 + iv] += wt2i;
1985 w2[ic2 + id3 + iv] -= wt1r;
1986 w2[ic2 + 1 + id3 + iv] -= wt1i;
1987 w2[ic2 + id4 + iv] -= wt2r;
1988 w2[ic2 + 1 + id4 + iv] -= wt2i;
1998 int itask,
double *v2,
const double *v1)
2000 int Nvcd = m_Nvc * m_Nd;
2001 int Nxy = m_Nx * m_Ny;
2005 int id3 = m_Nvc * 2;
2006 int id4 = m_Nvc * 3;
2008 int isite = m_arg[itask].isite;
2009 double *w2 = &v2[Nvcd * isite];
2010 const double *w1 = &v1[Nvcd * isite];
2012 for (
int it = 0; it < m_Mt; ++it) {
2013 for (
int iz = 0; iz < m_Mz; ++iz) {
2014 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2015 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2016 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2017 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2018 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2019 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2020 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2030 int itask,
double *v2,
const double *v1)
2032 int Nvcd = m_Nvc * m_Nd;
2033 int Nxy = m_Nx * m_Ny;
2037 int id3 = m_Nvc * 2;
2038 int id4 = m_Nvc * 3;
2040 int isite = m_arg[itask].isite;
2041 double *w2 = &v2[Nvcd * isite];
2042 const double *w1 = &v1[Nvcd * isite];
2044 for (
int it = 0; it < m_Mt; ++it) {
2045 for (
int iz = 0; iz < m_Mz; ++iz) {
2046 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2047 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2048 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2049 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2050 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2051 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2052 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_ym2_thread(int, double *, const double *)
std::vector< Channel * > m_fw_recv
void clear_thread(int, double *)
void general(const char *format,...)
void mult_zmb_thread(int, double *, const double *)
std::vector< Channel * > m_fw_send
void gm5_dirac_thread(int, double *, const double *)
void mult_tmb_dirac_thread(int, double *, const double *)
void mult_tm2_dirac_thread(int, double *, const double *)
void mult_tm2_chiral_thread(int, double *, const double *)
void mult_xp1_thread(int, double *, const double *)
void mult_ymb_thread(int, double *, const double *)
void daypx_thread(int, double *, double, const double *)
void mult_zp1_thread(int, double *, const double *)
void mult_zm1_thread(int, double *, const double *)
void mult_ypb_thread(int, double *, const double *)
void mult_tp2_chiral_thread(int, double *, const double *)
void gm5_chiral_thread(int, double *, const double *)
std::vector< Channel * > m_bw_recv
void mult_xp2_thread(int, double *, const double *)
void mult_tp1_chiral_thread(int, double *, const double *)
void mult_tm1_dirac_thread(int, double *, const double *)
void mult_yp1_thread(int, double *, const double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_xm2_thread(int, double *, const double *)
void mult_tmb_chiral_thread(int, double *, const double *)
void mult_xm1_thread(int, double *, const double *)
void crucial(const char *format,...)
void mult_zp2_thread(int, double *, const double *)
void mult_ym1_thread(int, double *, const double *)
std::vector< Channel * > m_bw_send
void mult_xpb_thread(int, double *, const double *)
void mult_tpb_dirac_thread(int, double *, const double *)
void mult_zm2_thread(int, double *, const double *)
void mult_tm1_chiral_thread(int, double *, const double *)
void mult_tp1_dirac_thread(int, double *, const double *)
void mult_yp2_thread(int, double *, const double *)
Bridge::VerboseLevel m_vl
void mult_zpb_thread(int, double *, const double *)
void mult_tpb_chiral_thread(int, double *, const double *)
void mult_tp2_dirac_thread(int, double *, const double *)
void mult_xmb_thread(int, double *, const double *)
std::vector< mult_arg > m_arg