30 #if defined USE_GROUP_SU3
31 #include "fopr_Wilson_impl_SU3.inc"
32 #elif defined USE_GROUP_SU2
33 #include "fopr_Wilson_impl_SU2.inc"
34 #elif defined USE_GROUP_SU_N
35 #include "fopr_Wilson_impl_SU_N.inc"
87 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
88 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
89 int itask = ith_z + m_Ntask_z * ith_t;
97 if (ith_t == 0)
m_arg[itask].kt0 = 1;
98 if (ith_z == 0)
m_arg[itask].kz0 = 1;
99 if (ith_t == m_Ntask_t - 1)
m_arg[itask].kt1 = 1;
100 if (ith_z == m_Ntask_z - 1)
m_arg[itask].kz1 = 1;
104 m_arg[itask].isite_cpz = ith_t *
m_Mt * Nxy;
105 m_arg[itask].isite_cpt = ith_z *
m_Mz * Nxy;
112 int Nvcd2 = 2 * Nc * Nd / 2;
116 valarray<int> datasize(
m_Ntask);
117 valarray<int> offset_up(
m_Ntask);
118 valarray<int> offset_lw(
m_Ntask);
119 valarray<int> datasize_up(
m_Ntask);
120 valarray<int> datasize_lw(
m_Ntask);
123 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
124 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
127 destid[itask] = itask;
128 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
129 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * m_Ny;
138 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
139 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
142 destid[itask] = itask;
143 offset[itask] =
sizeof(double) * Nvcd2 * isite_cp;
144 datasize[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Mt * m_Nx;
153 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
154 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
155 int itask = ith_z + m_Ntask_z * ith_t;
157 offset_up[itask] = 0;
158 offset_lw[itask] = 0;
159 datasize_up[itask] = 0;
160 datasize_lw[itask] = 0;
162 destid[itask] = (m_Ntask_z - 1) + ith_t * m_Ntask_z;
163 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx * m_Ny;
164 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx * m_Ny;
166 if (ith_z == m_Ntask_z - 1) {
168 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_t *
m_Mt *
m_Nx * m_Ny;
169 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mt *
m_Nx * m_Ny;
179 for (
int ith_t = 0; ith_t <
m_Ntask_t; ++ith_t) {
180 for (
int ith_z = 0; ith_z <
m_Ntask_z; ++ith_z) {
181 int itask = ith_z + m_Ntask_z * ith_t;
183 offset_up[itask] = 0;
184 offset_lw[itask] = 0;
185 datasize_up[itask] = 0;
186 datasize_lw[itask] = 0;
188 destid[itask] = ith_z + (m_Ntask_t - 1) * m_Ntask_z;
189 offset_lw[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx * m_Ny;
190 datasize_lw[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx * m_Ny;
192 if (ith_t == m_Ntask_t - 1) {
193 destid[itask] = ith_z;
194 offset_up[itask] =
sizeof(double) * Nvcd2 * ith_z *
m_Mz *
m_Nx * m_Ny;
195 datasize_up[itask] =
sizeof(double) * Nvcd2 *
m_Mz *
m_Nx * m_Ny;
207 double *v2,
double fac,
double *v1)
209 int Nvcd = m_Nvc * m_Nd;
210 int Nvxy = Nvcd * m_Nx * m_Ny;
212 int isite = m_arg[itask].isite;
213 double *w2 = &v2[Nvcd * isite];
214 double *w1 = &v1[Nvcd * isite];
216 for (
int it = 0; it < m_Mt; ++it) {
217 for (
int iz = 0; iz < m_Mz; ++iz) {
218 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
219 int iv = ivxy + Nvxy * (iz + m_Nz * it);
220 w2[iv] = fac * w2[iv] + w1[iv];
231 int Nvcd = m_Nvc * m_Nd;
232 int Nvxy = Nvcd * m_Nx * m_Ny;
234 int isite = m_arg[itask].isite;
235 double *w2 = &v2[Nvcd * isite];
237 for (
int it = 0; it < m_Mt; ++it) {
238 for (
int iz = 0; iz < m_Mz; ++iz) {
239 for (
int ivxy = 0; ivxy < Nvxy; ++ivxy) {
240 int iv = ivxy + Nvxy * (iz + m_Nz * it);
250 double *vcp1,
double *v1)
252 int Nvc2 = 2 * m_Nvc;
253 int Nvcd = m_Nvc * m_Nd;
254 int Nvcd2 = Nvcd / 2;
262 double bc2 = m_boundary2[idir];
264 int isite = m_arg[itask].isite;
265 int isite_cp = m_arg[itask].isite_cpx;
269 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
270 double *w1 = &v1[Nvcd * isite];
275 for (
int it = 0; it < m_Mt; ++it) {
276 for (
int iz = 0; iz < m_Mz; ++iz) {
277 for (
int iy = 0; iy < m_Ny; ++iy) {
278 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
279 int is2 = iy + m_Ny * (iz + m_Mz * it);
281 int ix1 = Nvc2 * is2;
282 int ix2 = ix1 + m_Nvc;
284 for (
int ic = 0; ic < m_Nc; ++ic) {
285 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in]);
286 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in]);
287 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in]);
288 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in]);
294 m_bw_send[idir]->start_thread(itask);
299 double *v2,
double *vcp2)
301 int Nvc2 = 2 * m_Nvc;
302 int Nvcd = m_Nvc * m_Nd;
303 int Nvcd2 = Nvcd / 2;
312 double wt1r, wt1i, wt2r, wt2i;
314 int isite = m_arg[itask].isite;
315 int isite_cp = m_arg[itask].isite_cpx;
317 double *w2 = &v2[Nvcd * isite];
320 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
321 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
323 m_bw_recv[idir]->wait_thread(itask);
326 for (
int it = 0; it < m_Mt; ++it) {
327 for (
int iz = 0; iz < m_Mz; ++iz) {
328 for (
int iy = 0; iy < m_Ny; ++iy) {
329 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
330 int is2 = iy + m_Ny * (iz + m_Mz * it);
333 int ix1 = Nvc2 * is2;
334 int ix2 = ix1 + m_Nvc;
336 for (
int ic = 0; ic < m_Nc; ++ic) {
337 int ic2 = ic * m_Nvc;
339 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
340 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
341 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
342 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
344 w2[2 * ic + id1 + iv] += wt1r;
345 w2[2 * ic + 1 + id1 + iv] += wt1i;
346 w2[2 * ic + id2 + iv] += wt2r;
347 w2[2 * ic + 1 + id2 + iv] += wt2i;
348 w2[2 * ic + id3 + iv] += wt2i;
349 w2[2 * ic + 1 + id3 + iv] += -wt2r;
350 w2[2 * ic + id4 + iv] += wt1i;
351 w2[2 * ic + 1 + id4 + iv] += -wt1r;
361 double *v2,
double *v1)
363 int Nvcd = m_Nvc * m_Nd;
372 double vt1[m_Nvc], vt2[m_Nvc];
373 double wt1r, wt1i, wt2r, wt2i;
375 int isite = m_arg[itask].isite;
377 double *w2 = &v2[Nvcd * isite];
378 double *w1 = &v1[Nvcd * isite];
379 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
381 for (
int it = 0; it < m_Mt; ++it) {
382 for (
int iz = 0; iz < m_Mz; ++iz) {
383 for (
int iy = 0; iy < m_Ny; ++iy) {
384 for (
int ix = 0; ix < m_Nx - 1; ++ix) {
385 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
387 int in = Nvcd * (is + 1);
390 for (
int ic = 0; ic < m_Nc; ++ic) {
391 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id4 + in];
392 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id4 + in];
393 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id3 + in];
394 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id3 + in];
397 for (
int ic = 0; ic < m_Nc; ++ic) {
398 int ic2 = ic * m_Nvc;
400 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
401 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
402 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
403 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
405 w2[2 * ic + id1 + iv] += wt1r;
406 w2[2 * ic + 1 + id1 + iv] += wt1i;
407 w2[2 * ic + id2 + iv] += wt2r;
408 w2[2 * ic + 1 + id2 + iv] += wt2i;
409 w2[2 * ic + id3 + iv] += wt2i;
410 w2[2 * ic + 1 + id3 + iv] += -wt2r;
411 w2[2 * ic + id4 + iv] += wt1i;
412 w2[2 * ic + 1 + id4 + iv] += -wt1r;
423 double *vcp1,
double *v1)
425 int Nvc2 = 2 * m_Nvc;
426 int Nvcd = m_Nvc * m_Nd;
427 int Nvcd2 = Nvcd / 2;
436 int isite = m_arg[itask].isite;
437 int isite_cp = m_arg[itask].isite_cpx;
441 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
442 double *w1 = &v1[Nvcd * isite];
443 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
445 double vt1[m_Nvc], vt2[m_Nvc];
449 for (
int it = 0; it < m_Mt; ++it) {
450 for (
int iz = 0; iz < m_Mz; ++iz) {
451 for (
int iy = 0; iy < m_Ny; ++iy) {
452 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
453 int is2 = iy + m_Ny * (iz + m_Mz * it);
456 int ix1 = Nvc2 * is2;
457 int ix2 = ix1 + m_Nvc;
459 for (
int ic = 0; ic < m_Nc; ++ic) {
460 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
461 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
462 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
463 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
466 for (
int ic = 0; ic < m_Nc; ++ic) {
468 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
469 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
470 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
471 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
477 m_fw_send[idir]->start_thread(itask);
482 double *v2,
double *vcp2)
484 int Nvc2 = 2 * m_Nvc;
485 int Nvcd = m_Nvc * m_Nd;
486 int Nvcd2 = Nvcd / 2;
494 double bc2 = m_boundary2[idir];
496 double wt1r, wt1i, wt2r, wt2i;
498 int isite = m_arg[itask].isite;
499 int isite_cp = m_arg[itask].isite_cpx;
501 double *w2 = &v2[Nvcd * isite];
504 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
506 m_fw_recv[idir]->wait_thread(itask);
509 for (
int it = 0; it < m_Mt; ++it) {
510 for (
int iz = 0; iz < m_Mz; ++iz) {
511 for (
int iy = 0; iy < m_Ny; ++iy) {
512 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
513 int is2 = iy + m_Ny * (iz + m_Mz * it);
515 int ix1 = Nvc2 * is2;
516 int ix2 = ix1 + m_Nvc;
518 for (
int ic = 0; ic < m_Nc; ++ic) {
520 int ici = 2 * ic + 1;
521 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
522 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
523 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
524 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
525 w2[icr + id3 + iv] += -bc2 * w1[ici + ix2];
526 w2[ici + id3 + iv] += +bc2 * w1[icr + ix2];
527 w2[icr + id4 + iv] += -bc2 * w1[ici + ix1];
528 w2[ici + id4 + iv] += +bc2 * w1[icr + ix1];
538 double *v2,
double *v1)
540 int Nvcd = m_Nvc * m_Nd;
549 double vt1[m_Nvc], vt2[m_Nvc];
550 double wt1r, wt1i, wt2r, wt2i;
552 int isite = m_arg[itask].isite;
554 double *w2 = &v2[Nvcd * isite];
555 double *w1 = &v1[Nvcd * isite];
556 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
558 for (
int it = 0; it < m_Mt; ++it) {
559 for (
int iz = 0; iz < m_Mz; ++iz) {
560 for (
int iy = 0; iy < m_Ny; ++iy) {
561 for (
int ix = 1; ix < m_Nx; ++ix) {
562 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
564 int in = Nvcd * (is - 1);
565 int ig = m_Ndf * (is - 1);
567 for (
int ic = 0; ic < m_Nc; ++ic) {
568 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id4 + in];
569 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id4 + in];
570 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id3 + in];
571 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id3 + in];
574 for (
int ic = 0; ic < m_Nc; ++ic) {
577 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
578 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
579 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
580 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
582 w2[2 * ic + id1 + iv] += wt1r;
583 w2[2 * ic + 1 + id1 + iv] += wt1i;
584 w2[2 * ic + id2 + iv] += wt2r;
585 w2[2 * ic + 1 + id2 + iv] += wt2i;
586 w2[2 * ic + id3 + iv] += -wt2i;
587 w2[2 * ic + 1 + id3 + iv] += +wt2r;
588 w2[2 * ic + id4 + iv] += -wt1i;
589 w2[2 * ic + 1 + id4 + iv] += +wt1r;
600 double *vcp1,
double *v1)
602 int Nvc2 = 2 * m_Nvc;
603 int Nvcd = m_Nvc * m_Nd;
604 int Nvcd2 = Nvcd / 2;
611 int isite = m_arg[itask].isite;
612 int isite_cp = m_arg[itask].isite_cpy;
615 double bc2 = m_boundary2[idir];
619 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
620 double *w1 = &v1[Nvcd * isite];
625 for (
int it = 0; it < m_Mt; ++it) {
626 for (
int iz = 0; iz < m_Mz; ++iz) {
627 for (
int ix = 0; ix < m_Nx; ++ix) {
628 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
629 int is2 = ix + m_Nx * (iz + m_Mz * it);
631 int ix1 = Nvc2 * is2;
632 int ix2 = ix1 + m_Nvc;
634 for (
int ic = 0; ic < m_Nc; ++ic) {
635 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in]);
636 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in]);
637 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in]);
638 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in]);
644 m_bw_send[idir]->start_thread(itask);
649 double *v2,
double *vcp2)
651 int Nvc2 = 2 * m_Nvc;
652 int Nvcd = m_Nvc * m_Nd;
653 int Nvcd2 = Nvcd / 2;
662 double wt1r, wt1i, wt2r, wt2i;
664 int isite = m_arg[itask].isite;
665 int isite_cp = m_arg[itask].isite_cpy;
667 double *w2 = &v2[Nvcd * isite];
670 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
671 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
673 m_bw_recv[idir]->wait_thread(itask);
676 for (
int it = 0; it < m_Mt; ++it) {
677 for (
int iz = 0; iz < m_Mz; ++iz) {
678 for (
int ix = 0; ix < m_Nx; ++ix) {
679 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
680 int is2 = ix + m_Nx * (iz + m_Mz * it);
683 int ix1 = Nvc2 * is2;
684 int ix2 = ix1 + m_Nvc;
686 for (
int ic = 0; ic < m_Nc; ++ic) {
687 int ic2 = ic * m_Nvc;
689 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
690 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
691 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
692 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
694 w2[2 * ic + id1 + iv] += wt1r;
695 w2[2 * ic + 1 + id1 + iv] += wt1i;
696 w2[2 * ic + id2 + iv] += wt2r;
697 w2[2 * ic + 1 + id2 + iv] += wt2i;
698 w2[2 * ic + id3 + iv] += -wt2r;
699 w2[2 * ic + 1 + id3 + iv] += -wt2i;
700 w2[2 * ic + id4 + iv] += wt1r;
701 w2[2 * ic + 1 + id4 + iv] += wt1i;
711 double *v2,
double *v1)
713 int Nvcd = m_Nvc * m_Nd;
722 double vt1[m_Nvc], vt2[m_Nvc];
723 double wt1r, wt1i, wt2r, wt2i;
725 int isite = m_arg[itask].isite;
727 double *w2 = &v2[Nvcd * isite];
728 double *w1 = &v1[Nvcd * isite];
729 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
731 for (
int it = 0; it < m_Mt; ++it) {
732 for (
int iz = 0; iz < m_Mz; ++iz) {
733 for (
int iy = 0; iy < m_Ny - 1; ++iy) {
734 for (
int ix = 0; ix < m_Nx; ++ix) {
735 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
737 int in = Nvcd * (is + m_Nx);
740 for (
int ic = 0; ic < m_Nc; ++ic) {
741 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id4 + in];
742 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id4 + in];
743 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id3 + in];
744 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id3 + in];
747 for (
int ic = 0; ic < m_Nc; ++ic) {
748 int ic2 = ic * m_Nvc;
750 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
751 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
752 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
753 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
755 w2[2 * ic + id1 + iv] += wt1r;
756 w2[2 * ic + 1 + id1 + iv] += wt1i;
757 w2[2 * ic + id2 + iv] += wt2r;
758 w2[2 * ic + 1 + id2 + iv] += wt2i;
759 w2[2 * ic + id3 + iv] += -wt2r;
760 w2[2 * ic + 1 + id3 + iv] += -wt2i;
761 w2[2 * ic + id4 + iv] += wt1r;
762 w2[2 * ic + 1 + id4 + iv] += wt1i;
773 double *vcp1,
double *v1)
775 int Nvc2 = 2 * m_Nvc;
776 int Nvcd = m_Nvc * m_Nd;
777 int Nvcd2 = Nvcd / 2;
786 int isite = m_arg[itask].isite;
787 int isite_cp = m_arg[itask].isite_cpy;
791 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
793 double *w1 = &v1[Nvcd * isite];
794 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
796 double vt1[m_Nvc], vt2[m_Nvc];
800 for (
int it = 0; it < m_Mt; ++it) {
801 for (
int iz = 0; iz < m_Mz; ++iz) {
802 for (
int ix = 0; ix < m_Nx; ++ix) {
803 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
804 int is2 = ix + m_Nx * (iz + m_Mz * it);
807 int ix1 = Nvc2 * is2;
808 int ix2 = ix1 + m_Nvc;
810 for (
int ic = 0; ic < m_Nc; ++ic) {
811 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
812 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
813 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
814 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
817 for (
int ic = 0; ic < m_Nc; ++ic) {
819 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
820 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
821 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
822 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
828 m_fw_send[idir]->start_thread(itask);
833 double *v2,
double *vcp2)
835 int Nvc2 = 2 * m_Nvc;
836 int Nvcd = m_Nvc * m_Nd;
837 int Nvcd2 = Nvcd / 2;
845 double bc2 = m_boundary2[idir];
847 double wt1r, wt1i, wt2r, wt2i;
849 int isite = m_arg[itask].isite;
850 int isite_cp = m_arg[itask].isite_cpy;
852 double *w2 = &v2[Nvcd * isite];
855 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
857 m_fw_recv[idir]->wait_thread(itask);
860 for (
int it = 0; it < m_Mt; ++it) {
861 for (
int iz = 0; iz < m_Mz; ++iz) {
862 for (
int ix = 0; ix < m_Nx; ++ix) {
863 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
864 int is2 = ix + m_Nx * (iz + m_Mz * it);
866 int ix1 = Nvc2 * is2;
867 int ix2 = ix1 + m_Nvc;
869 for (
int ic = 0; ic < m_Nc; ++ic) {
871 int ici = 2 * ic + 1;
872 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
873 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
874 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
875 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
876 w2[icr + id3 + iv] += bc2 * w1[icr + ix2];
877 w2[ici + id3 + iv] += bc2 * w1[ici + ix2];
878 w2[icr + id4 + iv] += -bc2 * w1[icr + ix1];
879 w2[ici + id4 + iv] += -bc2 * w1[ici + ix1];
889 double *v2,
double *v1)
891 int Nvcd = m_Nvc * m_Nd;
900 double vt1[m_Nvc], vt2[m_Nvc];
901 double wt1r, wt1i, wt2r, wt2i;
903 int isite = m_arg[itask].isite;
905 double *w2 = &v2[Nvcd * isite];
906 double *w1 = &v1[Nvcd * isite];
907 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
909 for (
int it = 0; it < m_Mt; ++it) {
910 for (
int iz = 0; iz < m_Mz; ++iz) {
911 for (
int iy = 1; iy < m_Ny; ++iy) {
912 for (
int ix = 0; ix < m_Nx; ++ix) {
913 int is = ix + m_Nx * (iy + m_Ny * (iz + m_Nz * it));
915 int in = Nvcd * (is - m_Nx);
916 int ig = m_Ndf * (is - m_Nx);
918 for (
int ic = 0; ic < m_Nc; ++ic) {
919 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id4 + in];
920 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id4 + in];
921 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id3 + in];
922 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id3 + in];
925 for (
int ic = 0; ic < m_Nc; ++ic) {
927 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
928 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
929 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
930 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
932 w2[ic2 + id1 + iv] += wt1r;
933 w2[ic2 + 1 + id1 + iv] += wt1i;
934 w2[ic2 + id2 + iv] += wt2r;
935 w2[ic2 + 1 + id2 + iv] += wt2i;
936 w2[ic2 + id3 + iv] += wt2r;
937 w2[ic2 + 1 + id3 + iv] += wt2i;
938 w2[ic2 + id4 + iv] += -wt1r;
939 w2[ic2 + 1 + id4 + iv] += -wt1i;
950 double *vcp1,
double *v1)
952 int Nvc2 = 2 * m_Nvc;
953 int Nvcd = m_Nvc * m_Nd;
954 int Nvcd2 = Nvcd / 2;
961 int isite = m_arg[itask].isite;
962 int isite_cp = m_arg[itask].isite_cpz;
965 double bc2 = m_boundary2[idir];
969 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
970 double *w1 = &v1[Nvcd * isite];
972 if (m_arg[itask].kz0 == 1) {
973 int Nxy = m_Nx * m_Ny;
975 for (
int it = 0; it < m_Mt; ++it) {
976 for (
int ixy = 0; ixy < Nxy; ++ixy) {
977 int is = ixy + Nxy * (iz + m_Nz * it);
978 int is2 = ixy + Nxy * it;
981 int ix1 = Nvc2 * is2;
982 int ix2 = ix1 + m_Nvc;
984 for (
int ic = 0; ic < m_Nc; ++ic) {
985 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in]);
986 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in]);
987 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in]);
988 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in]);
994 m_bw_send[idir]->start_thread(itask);
999 double *v2,
double *vcp2)
1001 int Nvc2 = 2 * m_Nvc;
1002 int Nvcd = m_Nvc * m_Nd;
1003 int Nvcd2 = Nvcd / 2;
1007 int id3 = m_Nvc * 2;
1008 int id4 = m_Nvc * 3;
1012 double wt1r, wt1i, wt2r, wt2i;
1014 int isite = m_arg[itask].isite;
1015 int isite_cp = m_arg[itask].isite_cpz;
1017 double *w2 = &v2[Nvcd * isite];
1020 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1021 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1023 m_bw_recv[idir]->wait_thread(itask);
1025 if (m_arg[itask].kz1 == 1) {
1026 int Nxy = m_Nx * m_Ny;
1028 for (
int it = 0; it < m_Mt; ++it) {
1029 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1030 int is = ixy + Nxy * (iz + m_Nz * it);
1031 int is2 = ixy + Nxy * it;
1033 int ig = m_Ndf * is;
1034 int ix1 = Nvc2 * is2;
1035 int ix2 = ix1 + m_Nvc;
1037 for (
int ic = 0; ic < m_Nc; ++ic) {
1038 int ic2 = ic * m_Nvc;
1040 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1041 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1042 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1043 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1045 w2[2 * ic + id1 + iv] += wt1r;
1046 w2[2 * ic + 1 + id1 + iv] += wt1i;
1047 w2[2 * ic + id2 + iv] += wt2r;
1048 w2[2 * ic + 1 + id2 + iv] += wt2i;
1049 w2[2 * ic + id3 + iv] += wt1i;
1050 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1051 w2[2 * ic + id4 + iv] += -wt2i;
1052 w2[2 * ic + 1 + id4 + iv] += wt2r;
1062 double *v2,
double *v1)
1064 int Nvcd = m_Nvc * m_Nd;
1068 int id3 = m_Nvc * 2;
1069 int id4 = m_Nvc * 3;
1073 double vt1[m_Nvc], vt2[m_Nvc];
1074 double wt1r, wt1i, wt2r, wt2i;
1076 int isite = m_arg[itask].isite;
1078 double *w2 = &v2[Nvcd * isite];
1079 double *w1 = &v1[Nvcd * isite];
1080 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1082 int kz1 = m_arg[itask].kz1;
1083 int Nxy = m_Nx * m_Ny;
1085 for (
int it = 0; it < m_Mt; ++it) {
1086 for (
int iz = 0; iz < m_Mz - kz1; ++iz) {
1087 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1088 int is = ixy + Nxy * (iz + m_Nz * it);
1090 int in = Nvcd * (is + Nxy);
1091 int ig = m_Ndf * is;
1093 for (
int ic = 0; ic < m_Nc; ++ic) {
1094 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + 1 + id3 + in];
1095 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + id3 + in];
1096 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + 1 + id4 + in];
1097 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + id4 + in];
1100 for (
int ic = 0; ic < m_Nc; ++ic) {
1101 int ic2 = ic * m_Nvc;
1103 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1104 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1105 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1106 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1108 w2[2 * ic + id1 + iv] += wt1r;
1109 w2[2 * ic + 1 + id1 + iv] += wt1i;
1110 w2[2 * ic + id2 + iv] += wt2r;
1111 w2[2 * ic + 1 + id2 + iv] += wt2i;
1112 w2[2 * ic + id3 + iv] += wt1i;
1113 w2[2 * ic + 1 + id3 + iv] += -wt1r;
1114 w2[2 * ic + id4 + iv] += -wt2i;
1115 w2[2 * ic + 1 + id4 + iv] += wt2r;
1125 double *vcp1,
double *v1)
1127 int Nvc2 = 2 * m_Nvc;
1128 int Nvcd = m_Nvc * m_Nd;
1129 int Nvcd2 = Nvcd / 2;
1133 int id3 = m_Nvc * 2;
1134 int id4 = m_Nvc * 3;
1138 int isite = m_arg[itask].isite;
1139 int isite_cp = m_arg[itask].isite_cpz;
1143 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1144 double *w1 = &v1[Nvcd * isite];
1145 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1147 double vt1[m_Nvc], vt2[m_Nvc];
1149 if (m_arg[itask].kz1 == 1) {
1150 int Nxy = m_Nx * m_Ny;
1152 for (
int it = 0; it < m_Mt; ++it) {
1153 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1154 int is = ixy + Nxy * (iz + m_Nz * it);
1155 int is2 = ixy + Nxy * it;
1157 int ig = m_Ndf * is;
1158 int ix1 = Nvc2 * is2;
1159 int ix2 = ix1 + m_Nvc;
1161 for (
int ic = 0; ic < m_Nc; ++ic) {
1162 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1163 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1164 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1165 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1168 for (
int ic = 0; ic < m_Nc; ++ic) {
1170 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1171 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1172 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1173 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1179 m_fw_send[idir]->start_thread(itask);
1184 double *v2,
double *vcp2)
1186 int Nvc2 = 2 * m_Nvc;
1187 int Nvcd = m_Nvc * m_Nd;
1188 int Nvcd2 = Nvcd / 2;
1192 int id3 = m_Nvc * 2;
1193 int id4 = m_Nvc * 3;
1196 double bc2 = m_boundary2[idir];
1198 double wt1r, wt1i, wt2r, wt2i;
1200 int isite = m_arg[itask].isite;
1201 int isite_cp = m_arg[itask].isite_cpz;
1203 double *w2 = &v2[Nvcd * isite];
1206 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1208 m_fw_recv[idir]->wait_thread(itask);
1210 if (m_arg[itask].kz0 == 1) {
1211 int Nxy = m_Nx * m_Ny;
1214 for (
int it = 0; it < m_Mt; ++it) {
1215 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1216 int is = ixy + Nxy * (iz + m_Nz * it);
1217 int is2 = ixy + Nxy * it;
1219 int ix1 = Nvc2 * is2;
1220 int ix2 = ix1 + m_Nvc;
1222 for (
int ic = 0; ic < m_Nc; ++ic) {
1224 int ici = 2 * ic + 1;
1225 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1226 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1227 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1228 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1229 w2[icr + id3 + iv] += -bc2 * w1[ici + ix1];
1230 w2[ici + id3 + iv] += bc2 * w1[icr + ix1];
1231 w2[icr + id4 + iv] += bc2 * w1[ici + ix2];
1232 w2[ici + id4 + iv] += -bc2 * w1[icr + ix2];
1242 double *v2,
double *v1)
1244 int Nvcd = m_Nvc * m_Nd;
1248 int id3 = m_Nvc * 2;
1249 int id4 = m_Nvc * 3;
1253 double vt1[m_Nvc], vt2[m_Nvc];
1254 double wt1r, wt1i, wt2r, wt2i;
1256 int isite = m_arg[itask].isite;
1258 double *w2 = &v2[Nvcd * isite];
1259 double *w1 = &v1[Nvcd * isite];
1260 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1262 int kz0 = m_arg[itask].kz0;
1263 int Nxy = m_Nx * m_Ny;
1265 for (
int it = 0; it < m_Mt; ++it) {
1266 for (
int iz = kz0; iz < m_Mz; ++iz) {
1267 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1268 int is = ixy + Nxy * (iz + m_Nz * it);
1270 int in = Nvcd * (is - Nxy);
1271 int ig = m_Ndf * (is - Nxy);
1273 for (
int ic = 0; ic < m_Nc; ++ic) {
1274 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + 1 + id3 + in];
1275 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + id3 + in];
1276 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + 1 + id4 + in];
1277 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + id4 + in];
1280 for (
int ic = 0; ic < m_Nc; ++ic) {
1282 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1283 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1284 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1285 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1287 w2[ic2 + id1 + iv] += wt1r;
1288 w2[ic2 + 1 + id1 + iv] += wt1i;
1289 w2[ic2 + id2 + iv] += wt2r;
1290 w2[ic2 + 1 + id2 + iv] += wt2i;
1291 w2[ic2 + id3 + iv] += -wt1i;
1292 w2[ic2 + 1 + id3 + iv] += wt1r;
1293 w2[ic2 + id4 + iv] += wt2i;
1294 w2[ic2 + 1 + id4 + iv] += -wt2r;
1304 double *vcp1,
double *v1)
1306 int Nvc2 = 2 * m_Nvc;
1307 int Nvcd = m_Nvc * m_Nd;
1308 int Nvcd2 = Nvcd / 2;
1312 int id3 = m_Nvc * 2;
1313 int id4 = m_Nvc * 3;
1315 int isite = m_arg[itask].isite;
1316 int isite_cp = m_arg[itask].isite_cpt;
1319 double bc2 = m_boundary2[idir];
1323 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1324 double *w1 = &v1[Nvcd * isite];
1326 if (m_arg[itask].kt0 == 1) {
1327 int Nxy = m_Nx * m_Ny;
1329 for (
int iz = 0; iz < m_Mz; ++iz) {
1330 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1331 int is = ixy + Nxy * (iz + m_Nz * it);
1332 int is2 = ixy + Nxy * iz;
1335 int ix1 = Nvc2 * is2;
1336 int ix2 = ix1 + m_Nvc;
1338 for (
int ic = 0; ic < m_Nc; ++ic) {
1339 w2[2 * ic + ix1] = 2.0 * bc2 * w1[2 * ic + id3 + in];
1340 w2[2 * ic + 1 + ix1] = 2.0 * bc2 * w1[2 * ic + 1 + id3 + in];
1341 w2[2 * ic + ix2] = 2.0 * bc2 * w1[2 * ic + id4 + in];
1342 w2[2 * ic + 1 + ix2] = 2.0 * bc2 * w1[2 * ic + 1 + id4 + in];
1348 m_bw_send[idir]->start_thread(itask);
1353 double *v2,
double *vcp2)
1355 int Nvc2 = 2 * m_Nvc;
1356 int Nvcd = m_Nvc * m_Nd;
1357 int Nvcd2 = Nvcd / 2;
1361 int id3 = m_Nvc * 2;
1362 int id4 = m_Nvc * 3;
1366 double wt1r, wt1i, wt2r, wt2i;
1368 int isite = m_arg[itask].isite;
1369 int isite_cp = m_arg[itask].isite_cpt;
1371 double *w2 = &v2[Nvcd * isite];
1374 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1375 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1377 m_bw_recv[idir]->wait_thread(itask);
1379 if (m_arg[itask].kt1 == 1) {
1380 int Nxy = m_Nx * m_Ny;
1382 for (
int iz = 0; iz < m_Mz; ++iz) {
1383 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1384 int is = ixy + Nxy * (iz + m_Nz * it);
1385 int is2 = ixy + Nxy * iz;
1387 int ig = m_Ndf * is;
1388 int ix1 = Nvc2 * is2;
1389 int ix2 = ix1 + m_Nvc;
1391 for (
int ic = 0; ic < m_Nc; ++ic) {
1392 int ic2 = ic * m_Nvc;
1394 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1395 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1396 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1397 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1399 w2[2 * ic + id3 + iv] += wt1r;
1400 w2[2 * ic + 1 + id3 + iv] += wt1i;
1401 w2[2 * ic + id4 + iv] += wt2r;
1402 w2[2 * ic + 1 + id4 + iv] += wt2i;
1412 double *v2,
double *v1)
1414 int Nvcd = m_Nvc * m_Nd;
1418 int id3 = m_Nvc * 2;
1419 int id4 = m_Nvc * 3;
1423 double vt1[m_Nvc], vt2[m_Nvc];
1424 double wt1r, wt1i, wt2r, wt2i;
1426 int isite = m_arg[itask].isite;
1428 double *w2 = &v2[Nvcd * isite];
1429 double *w1 = &v1[Nvcd * isite];
1430 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1432 int kt1 = m_arg[itask].kt1;
1433 int Nxy = m_Nx * m_Ny;
1434 int Nxyz = Nxy * m_Nz;
1436 for (
int it = 0; it < m_Mt - kt1; ++it) {
1437 for (
int iz = 0; iz < m_Mz; ++iz) {
1438 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1439 int is = ixy + Nxy * (iz + m_Nz * it);
1441 int in = Nvcd * (is + Nxyz);
1442 int ig = m_Ndf * is;
1444 for (
int ic = 0; ic < m_Nc; ++ic) {
1445 vt1[2 * ic] = 2.0 * w1[2 * ic + id3 + in];
1446 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id3 + in];
1447 vt2[2 * ic] = 2.0 * w1[2 * ic + id4 + in];
1448 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id4 + in];
1451 for (
int ic = 0; ic < m_Nc; ++ic) {
1452 int ic2 = ic * m_Nvc;
1454 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1455 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1456 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1457 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1459 w2[2 * ic + id3 + iv] += wt1r;
1460 w2[2 * ic + 1 + id3 + iv] += wt1i;
1461 w2[2 * ic + id4 + iv] += wt2r;
1462 w2[2 * ic + 1 + id4 + iv] += wt2i;
1472 double *vcp1,
double *v1)
1474 int Nvc2 = 2 * m_Nvc;
1475 int Nvcd = m_Nvc * m_Nd;
1476 int Nvcd2 = Nvcd / 2;
1480 int id3 = m_Nvc * 2;
1481 int id4 = m_Nvc * 3;
1485 int isite = m_arg[itask].isite;
1486 int isite_cp = m_arg[itask].isite_cpt;
1490 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1491 double *w1 = &v1[Nvcd * isite];
1492 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1494 double vt1[m_Nvc], vt2[m_Nvc];
1496 if (m_arg[itask].kt1 == 1) {
1497 int Nxy = m_Nx * m_Ny;
1499 for (
int iz = 0; iz < m_Mz; ++iz) {
1500 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1501 int is = ixy + Nxy * (iz + m_Nz * it);
1502 int is2 = ixy + Nxy * iz;
1504 int ig = m_Ndf * is;
1505 int ix1 = Nvc2 * is2;
1506 int ix2 = ix1 + m_Nvc;
1508 for (
int ic = 0; ic < m_Nc; ++ic) {
1509 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1510 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1511 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1512 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1515 for (
int ic = 0; ic < m_Nc; ++ic) {
1517 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1518 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1519 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1520 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1526 m_fw_send[idir]->start_thread(itask);
1531 double *v2,
double *vcp2)
1533 int Nvc2 = 2 * m_Nvc;
1534 int Nvcd = m_Nvc * m_Nd;
1535 int Nvcd2 = Nvcd / 2;
1539 int id3 = m_Nvc * 2;
1540 int id4 = m_Nvc * 3;
1543 double bc2 = m_boundary2[idir];
1545 double wt1r, wt1i, wt2r, wt2i;
1547 int isite = m_arg[itask].isite;
1548 int isite_cp = m_arg[itask].isite_cpt;
1550 double *w2 = &v2[Nvcd * isite];
1553 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1555 m_fw_recv[idir]->wait_thread(itask);
1557 if (m_arg[itask].kt0 == 1) {
1558 int Nxy = m_Nx * m_Ny;
1560 for (
int iz = 0; iz < m_Mz; ++iz) {
1561 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1562 int is = ixy + Nxy * (iz + m_Nz * it);
1563 int is2 = ixy + Nxy * iz;
1565 int ix1 = Nvc2 * is2;
1566 int ix2 = ix1 + m_Nvc;
1568 for (
int ic = 0; ic < m_Nc; ++ic) {
1570 int ici = 2 * ic + 1;
1571 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1572 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1573 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1574 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1584 double *v2,
double *v1)
1586 int Nvcd = m_Nvc * m_Nd;
1590 int id3 = m_Nvc * 2;
1591 int id4 = m_Nvc * 3;
1595 double vt1[m_Nvc], vt2[m_Nvc];
1596 double wt1r, wt1i, wt2r, wt2i;
1598 int isite = m_arg[itask].isite;
1600 double *w2 = &v2[Nvcd * isite];
1601 double *w1 = &v1[Nvcd * isite];
1602 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1604 int kt0 = m_arg[itask].kt0;
1605 int Nxy = m_Nx * m_Ny;
1606 int Nxyz = Nxy * m_Nz;
1608 for (
int it = kt0; it < m_Mt; ++it) {
1609 for (
int iz = 0; iz < m_Mz; ++iz) {
1610 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1611 int is = ixy + Nxy * (iz + m_Nz * it);
1613 int in = Nvcd * (is - Nxyz);
1614 int ig = m_Ndf * (is - Nxyz);
1616 for (
int ic = 0; ic < m_Nc; ++ic) {
1617 vt1[2 * ic] = 2.0 * w1[2 * ic + id1 + in];
1618 vt1[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id1 + in];
1619 vt2[2 * ic] = 2.0 * w1[2 * ic + id2 + in];
1620 vt2[2 * ic + 1] = 2.0 * w1[2 * ic + 1 + id2 + in];
1623 for (
int ic = 0; ic < m_Nc; ++ic) {
1625 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1626 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1627 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1628 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1630 w2[ic2 + id1 + iv] += wt1r;
1631 w2[ic2 + 1 + id1 + iv] += wt1i;
1632 w2[ic2 + id2 + iv] += wt2r;
1633 w2[ic2 + 1 + id2 + iv] += wt2i;
1643 double *vcp1,
double *v1)
1645 int Nvc2 = 2 * m_Nvc;
1646 int Nvcd = m_Nvc * m_Nd;
1647 int Nvcd2 = Nvcd / 2;
1651 int id3 = m_Nvc * 2;
1652 int id4 = m_Nvc * 3;
1654 int isite = m_arg[itask].isite;
1655 int isite_cp = m_arg[itask].isite_cpt;
1658 double bc2 = m_boundary2[idir];
1662 = (
double *)m_bw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1663 double *w1 = &v1[Nvcd * isite];
1665 if (m_arg[itask].kt0 == 1) {
1666 int Nxy = m_Nx * m_Ny;
1668 for (
int iz = 0; iz < m_Mz; ++iz) {
1669 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1670 int is = ixy + Nxy * (iz + m_Nz * it);
1671 int is2 = ixy + Nxy * iz;
1674 int ix1 = Nvc2 * is2;
1675 int ix2 = ix1 + m_Nvc;
1677 for (
int ic = 0; ic < m_Nc; ++ic) {
1678 w2[2 * ic + ix1] = bc2 * (w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in]);
1679 w2[2 * ic + 1 + ix1] = bc2 * (w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in]);
1680 w2[2 * ic + ix2] = bc2 * (w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in]);
1681 w2[2 * ic + 1 + ix2] = bc2 * (w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in]);
1687 m_bw_send[idir]->start_thread(itask);
1692 double *v2,
double *vcp2)
1694 int Nvc2 = 2 * m_Nvc;
1695 int Nvcd = m_Nvc * m_Nd;
1696 int Nvcd2 = Nvcd / 2;
1700 int id3 = m_Nvc * 2;
1701 int id4 = m_Nvc * 3;
1705 double wt1r, wt1i, wt2r, wt2i;
1707 int isite = m_arg[itask].isite;
1708 int isite_cp = m_arg[itask].isite_cpt;
1710 double *w2 = &v2[Nvcd * isite];
1713 = (
double *)m_bw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1714 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1716 m_bw_recv[idir]->wait_thread(itask);
1718 if (m_arg[itask].kt1 == 1) {
1719 int Nxy = m_Nx * m_Ny;
1721 for (
int iz = 0; iz < m_Mz; ++iz) {
1722 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1723 int is = ixy + Nxy * (iz + m_Nz * it);
1724 int is2 = ixy + Nxy * iz;
1726 int ig = m_Ndf * is;
1727 int ix1 = Nvc2 * is2;
1728 int ix2 = ix1 + m_Nvc;
1730 for (
int ic = 0; ic < m_Nc; ++ic) {
1731 int ic2 = ic * m_Nvc;
1733 wt1r = mult_uv_r(&u[ic2 + ig], &w1[ix1], m_Nc);
1734 wt1i = mult_uv_i(&u[ic2 + ig], &w1[ix1], m_Nc);
1735 wt2r = mult_uv_r(&u[ic2 + ig], &w1[ix2], m_Nc);
1736 wt2i = mult_uv_i(&u[ic2 + ig], &w1[ix2], m_Nc);
1738 w2[2 * ic + id1 + iv] += wt1r;
1739 w2[2 * ic + 1 + id1 + iv] += wt1i;
1740 w2[2 * ic + id2 + iv] += wt2r;
1741 w2[2 * ic + 1 + id2 + iv] += wt2i;
1742 w2[2 * ic + id3 + iv] += wt1r;
1743 w2[2 * ic + 1 + id3 + iv] += wt1i;
1744 w2[2 * ic + id4 + iv] += wt2r;
1745 w2[2 * ic + 1 + id4 + iv] += wt2i;
1755 double *v2,
double *v1)
1757 int Nvcd = m_Nvc * m_Nd;
1761 int id3 = m_Nvc * 2;
1762 int id4 = m_Nvc * 3;
1766 double vt1[m_Nvc], vt2[m_Nvc];
1767 double wt1r, wt1i, wt2r, wt2i;
1769 int isite = m_arg[itask].isite;
1771 double *w2 = &v2[Nvcd * isite];
1772 double *w1 = &v1[Nvcd * isite];
1773 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1775 int kt1 = m_arg[itask].kt1;
1776 int Nxy = m_Nx * m_Ny;
1777 int Nxyz = Nxy * m_Nz;
1779 for (
int it = 0; it < m_Mt - kt1; ++it) {
1780 for (
int iz = 0; iz < m_Mz; ++iz) {
1781 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1782 int is = ixy + Nxy * (iz + m_Nz * it);
1784 int in = Nvcd * (is + Nxyz);
1785 int ig = m_Ndf * is;
1787 for (
int ic = 0; ic < m_Nc; ++ic) {
1788 vt1[2 * ic] = w1[2 * ic + id1 + in] + w1[2 * ic + id3 + in];
1789 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] + w1[2 * ic + 1 + id3 + in];
1790 vt2[2 * ic] = w1[2 * ic + id2 + in] + w1[2 * ic + id4 + in];
1791 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] + w1[2 * ic + 1 + id4 + in];
1794 for (
int ic = 0; ic < m_Nc; ++ic) {
1795 int ic2 = ic * m_Nvc;
1797 wt1r = mult_uv_r(&u[ic2 + ig], vt1, m_Nc);
1798 wt1i = mult_uv_i(&u[ic2 + ig], vt1, m_Nc);
1799 wt2r = mult_uv_r(&u[ic2 + ig], vt2, m_Nc);
1800 wt2i = mult_uv_i(&u[ic2 + ig], vt2, m_Nc);
1802 w2[2 * ic + id1 + iv] += wt1r;
1803 w2[2 * ic + 1 + id1 + iv] += wt1i;
1804 w2[2 * ic + id2 + iv] += wt2r;
1805 w2[2 * ic + 1 + id2 + iv] += wt2i;
1806 w2[2 * ic + id3 + iv] += wt1r;
1807 w2[2 * ic + 1 + id3 + iv] += wt1i;
1808 w2[2 * ic + id4 + iv] += wt2r;
1809 w2[2 * ic + 1 + id4 + iv] += wt2i;
1819 double *vcp1,
double *v1)
1821 int Nvc2 = 2 * m_Nvc;
1822 int Nvcd = m_Nvc * m_Nd;
1823 int Nvcd2 = Nvcd / 2;
1827 int id3 = m_Nvc * 2;
1828 int id4 = m_Nvc * 3;
1832 int isite = m_arg[itask].isite;
1833 int isite_cp = m_arg[itask].isite_cpt;
1837 = (
double *)m_fw_send[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1838 double *w1 = &v1[Nvcd * isite];
1839 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1841 double vt1[m_Nvc], vt2[m_Nvc];
1843 if (m_arg[itask].kt1 == 1) {
1844 int Nxy = m_Nx * m_Ny;
1846 for (
int iz = 0; iz < m_Mz; ++iz) {
1847 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1848 int is = ixy + Nxy * (iz + m_Nz * it);
1849 int is2 = ixy + Nxy * iz;
1851 int ig = m_Ndf * is;
1852 int ix1 = Nvc2 * is2;
1853 int ix2 = ix1 + m_Nvc;
1855 for (
int ic = 0; ic < m_Nc; ++ic) {
1856 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1857 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1858 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1859 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1862 for (
int ic = 0; ic < m_Nc; ++ic) {
1864 w2[icr + ix1] = mult_udagv_r(&u[icr + ig], vt1, m_Nc);
1865 w2[icr + 1 + ix1] = mult_udagv_i(&u[icr + ig], vt1, m_Nc);
1866 w2[icr + ix2] = mult_udagv_r(&u[icr + ig], vt2, m_Nc);
1867 w2[icr + 1 + ix2] = mult_udagv_i(&u[icr + ig], vt2, m_Nc);
1873 m_fw_send[idir]->start_thread(itask);
1878 double *v2,
double *vcp2)
1880 int Nvc2 = 2 * m_Nvc;
1881 int Nvcd = m_Nvc * m_Nd;
1882 int Nvcd2 = Nvcd / 2;
1886 int id3 = m_Nvc * 2;
1887 int id4 = m_Nvc * 3;
1890 double bc2 = m_boundary2[idir];
1892 double wt1r, wt1i, wt2r, wt2i;
1894 int isite = m_arg[itask].isite;
1895 int isite_cp = m_arg[itask].isite_cpt;
1897 double *w2 = &v2[Nvcd * isite];
1900 = (
double *)m_fw_recv[idir]->ptr(
sizeof(
double) * Nvcd2 * isite_cp);
1902 m_fw_recv[idir]->wait_thread(itask);
1904 if (m_arg[itask].kt0 == 1) {
1905 int Nxy = m_Nx * m_Ny;
1907 for (
int iz = 0; iz < m_Mz; ++iz) {
1908 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1909 int is = ixy + Nxy * (iz + m_Nz * it);
1910 int is2 = ixy + Nxy * iz;
1912 int ix1 = Nvc2 * is2;
1913 int ix2 = ix1 + m_Nvc;
1915 for (
int ic = 0; ic < m_Nc; ++ic) {
1917 int ici = 2 * ic + 1;
1918 w2[icr + id1 + iv] += bc2 * w1[icr + ix1];
1919 w2[ici + id1 + iv] += bc2 * w1[ici + ix1];
1920 w2[icr + id2 + iv] += bc2 * w1[icr + ix2];
1921 w2[ici + id2 + iv] += bc2 * w1[ici + ix2];
1922 w2[icr + id3 + iv] -= bc2 * w1[icr + ix1];
1923 w2[ici + id3 + iv] -= bc2 * w1[ici + ix1];
1924 w2[icr + id4 + iv] -= bc2 * w1[icr + ix2];
1925 w2[ici + id4 + iv] -= bc2 * w1[ici + ix2];
1935 double *v2,
double *v1)
1937 int Nvcd = m_Nvc * m_Nd;
1941 int id3 = m_Nvc * 2;
1942 int id4 = m_Nvc * 3;
1946 double vt1[m_Nvc], vt2[m_Nvc];
1947 double wt1r, wt1i, wt2r, wt2i;
1949 int isite = m_arg[itask].isite;
1951 double *w2 = &v2[Nvcd * isite];
1952 double *w1 = &v1[Nvcd * isite];
1953 double *u =
const_cast<Field_G *
>(m_U)->ptr(m_Ndf * (isite + idir * m_Nvol));
1955 int kt0 = m_arg[itask].kt0;
1956 int Nxy = m_Nx * m_Ny;
1957 int Nxyz = Nxy * m_Nz;
1959 for (
int it = kt0; it < m_Mt; ++it) {
1960 for (
int iz = 0; iz < m_Mz; ++iz) {
1961 for (
int ixy = 0; ixy < Nxy; ++ixy) {
1962 int is = ixy + Nxy * (iz + m_Nz * it);
1964 int in = Nvcd * (is - Nxyz);
1965 int ig = m_Ndf * (is - Nxyz);
1967 for (
int ic = 0; ic < m_Nc; ++ic) {
1968 vt1[2 * ic] = w1[2 * ic + id1 + in] - w1[2 * ic + id3 + in];
1969 vt1[2 * ic + 1] = w1[2 * ic + 1 + id1 + in] - w1[2 * ic + 1 + id3 + in];
1970 vt2[2 * ic] = w1[2 * ic + id2 + in] - w1[2 * ic + id4 + in];
1971 vt2[2 * ic + 1] = w1[2 * ic + 1 + id2 + in] - w1[2 * ic + 1 + id4 + in];
1974 for (
int ic = 0; ic < m_Nc; ++ic) {
1976 wt1r = mult_udagv_r(&u[ic2 + ig], vt1, m_Nc);
1977 wt1i = mult_udagv_i(&u[ic2 + ig], vt1, m_Nc);
1978 wt2r = mult_udagv_r(&u[ic2 + ig], vt2, m_Nc);
1979 wt2i = mult_udagv_i(&u[ic2 + ig], vt2, m_Nc);
1981 w2[ic2 + id1 + iv] += wt1r;
1982 w2[ic2 + 1 + id1 + iv] += wt1i;
1983 w2[ic2 + id2 + iv] += wt2r;
1984 w2[ic2 + 1 + id2 + iv] += wt2i;
1985 w2[ic2 + id3 + iv] -= wt1r;
1986 w2[ic2 + 1 + id3 + iv] -= wt1i;
1987 w2[ic2 + id4 + iv] -= wt2r;
1988 w2[ic2 + 1 + id4 + iv] -= wt2i;
1998 double *v2,
double *v1)
2000 int Nvcd = m_Nvc * m_Nd;
2001 int Nxy = m_Nx * m_Ny;
2005 int id3 = m_Nvc * 2;
2006 int id4 = m_Nvc * 3;
2008 int isite = m_arg[itask].isite;
2009 double *w2 = &v2[Nvcd * isite];
2010 double *w1 = &v1[Nvcd * isite];
2012 for (
int it = 0; it < m_Mt; ++it) {
2013 for (
int iz = 0; iz < m_Mz; ++iz) {
2014 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2015 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2016 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2017 w2[ivc + id1 + iv] = w1[ivc + id3 + iv];
2018 w2[ivc + id2 + iv] = w1[ivc + id4 + iv];
2019 w2[ivc + id3 + iv] = w1[ivc + id1 + iv];
2020 w2[ivc + id4 + iv] = w1[ivc + id2 + iv];
2030 double *v2,
double *v1)
2032 int Nvcd = m_Nvc * m_Nd;
2033 int Nxy = m_Nx * m_Ny;
2037 int id3 = m_Nvc * 2;
2038 int id4 = m_Nvc * 3;
2040 int isite = m_arg[itask].isite;
2041 double *w2 = &v2[Nvcd * isite];
2042 double *w1 = &v1[Nvcd * isite];
2044 for (
int it = 0; it < m_Mt; ++it) {
2045 for (
int iz = 0; iz < m_Mz; ++iz) {
2046 for (
int ixy = 0; ixy < Nxy; ++ixy) {
2047 int iv = Nvcd * (ixy + Nxy * (iz + m_Nz * it));
2048 for (
int ivc = 0; ivc < m_Nvc; ++ivc) {
2049 w2[ivc + id1 + iv] = w1[ivc + id1 + iv];
2050 w2[ivc + id2 + iv] = w1[ivc + id2 + iv];
2051 w2[ivc + id3 + iv] = -w1[ivc + id3 + iv];
2052 w2[ivc + id4 + iv] = -w1[ivc + id4 + iv];
void mult_tpb_dirac_thread(int, double *, double *)
void mult_tm1_chiral_thread(int, double *, double *)
void general(const char *format,...)
void mult_xp1_thread(int, double *, double *)
void daypx_thread(int, double *, double, double *)
void mult_xm2_thread(int, double *, double *)
void mult_zm2_thread(int, double *, double *)
void mult_xp2_thread(int, double *, double *)
std::valarray< Channel * > m_fw_recv
void mult_ymb_thread(int, double *, double *)
std::valarray< Channel * > m_bw_send
void mult_tm2_chiral_thread(int, double *, double *)
void gm5_dirac_thread(int, double *, double *)
void mult_zmb_thread(int, double *, double *)
void mult_tm2_dirac_thread(int, double *, double *)
std::valarray< Channel * > m_bw_recv
void mult_zp1_thread(int, double *, double *)
void mult_ym1_thread(int, double *, double *)
void mult_yp2_thread(int, double *, double *)
void mult_zp2_thread(int, double *, double *)
void mult_tm1_dirac_thread(int, double *, double *)
void mult_tp1_chiral_thread(int, double *, double *)
void mult_xmb_thread(int, double *, double *)
Bridge::VerboseLevel m_vl
void mult_xm1_thread(int, double *, double *)
static int get_num_threads_available()
returns number of threads (works outside of parallel region).
void mult_tp1_dirac_thread(int, double *, double *)
void clear_thread(int, double *)
void mult_zm1_thread(int, double *, double *)
void mult_xpb_thread(int, double *, double *)
void mult_tmb_dirac_thread(int, double *, double *)
void mult_tp2_chiral_thread(int, double *, double *)
void mult_ypb_thread(int, double *, double *)
void mult_tp2_dirac_thread(int, double *, double *)
void gm5_chiral_thread(int, double *, double *)
void mult_zpb_thread(int, double *, double *)
void mult_ym2_thread(int, double *, double *)
void mult_tpb_chiral_thread(int, double *, double *)
void mult_yp1_thread(int, double *, double *)
void mult_tmb_chiral_thread(int, double *, double *)
valarray< mult_arg > m_arg
std::valarray< Channel * > m_fw_send