19 #if defined USE_GROUP_SU3
21 #elif defined USE_GROUP_SU2
23 #elif defined USE_GROUP_SU_N
40 double kappa_s, kappa_t, cSW_s, cSW_t;
44 err += params.
fetch_double(
"hopping_parameter_spatial", kappa_s);
45 err += params.
fetch_double(
"hopping_parameter_temporal", kappa_t);
46 err += params.
fetch_double(
"clover_coefficient_spatial", cSW_s);
47 err += params.
fetch_double(
"clover_coefficient_temporal", cSW_t);
75 const double cSW_s,
const double cSW_t,
76 const std::vector<int> bc)
84 for (
int mu = 0; mu <
m_Ndim; ++mu) {
90 assert(bc.size() ==
m_Ndim);
155 }
else if (
m_repr ==
"Chiral") {
176 (this->*
m_gm5)(v, f);
186 const double *v1 = f.
ptr(0);
187 double *v2 = w.
ptr(0);
191 const int id3 = Nvc * 2;
192 const int id4 = Nvc * 3;
198 const int is =
m_Nvol * i_thread / Nthread;
199 const int ns =
m_Nvol * (i_thread + 1) / Nthread - is;
201 for (
int site = is; site < is + ns; ++site) {
203 for (
int icc = 0; icc < Nvc; icc++) {
204 int in = Nvc * Nd * site;
206 v2[icc + id1 + in] = v1[icc + id3 + in];
207 v2[icc + id2 + in] = v1[icc + id4 + in];
208 v2[icc + id3 + in] = v1[icc + id1 + in];
209 v2[icc + id4 + in] = v1[icc + id2 + in];
221 const double *v1 = f.
ptr(0);
222 double *v2 = w.
ptr(0);
226 const int id3 = Nvc * 2;
227 const int id4 = Nvc * 3;
233 const int is =
m_Nvol * i_thread / Nthread;
234 const int ns =
m_Nvol * (i_thread + 1) / Nthread - is;
236 for (
int site = is; site < is + ns; ++site) {
238 for (
int icc = 0; icc < Nvc; icc++) {
239 int in = Nvc * Nd * site;
241 v2[icc + id1 + in] = v1[icc + id1 + in];
242 v2[icc + id2 + in] = v1[icc + id2 + in];
243 v2[icc + id3 + in] = -v1[icc + id3 + in];
244 v2[icc + id4 + in] = -v1[icc + id4 + in];
252 const int mu,
const int nu)
271 (this->*
m_csw)(v, w);
280 assert(w.
nex() == 1);
283 const int Nvc = 2 * Nc;
284 const int Ndf = 2 * Nc * Nc;
286 const int Nvol = w.
nvol();
290 const int id3 = Nvc * 2;
291 const int id4 = Nvc * 3;
296 const double *w2 = w.
ptr(0);
297 double *v2 = v.
ptr(0);
310 const int is =
m_Nvol * i_thread / Nthread;
311 const int ns =
m_Nvol * (i_thread + 1) / Nthread - is;
313 for (
int site = is; site < is + ns; ++site) {
314 int iv = Nvc * Nd * site;
317 for (
int ic = 0; ic < Nc; ++ic) {
320 int ic_g = ic * Nvc + ig;
322 v2[ic_r + id1 + iv] = 0.0;
323 v2[ic_i + id1 + iv] = 0.0;
324 v2[ic_r + id2 + iv] = 0.0;
325 v2[ic_i + id2 + iv] = 0.0;
327 v2[ic_r + id3 + iv] = 0.0;
328 v2[ic_i + id3 + iv] = 0.0;
329 v2[ic_r + id4 + iv] = 0.0;
330 v2[ic_i + id4 + iv] = 0.0;
333 v2[ic_r + id1 + iv] -= kappa_cSW_s * mult_uv_i(&Bx[ic_g], &w2[id2 + iv], Nc);
334 v2[ic_i + id1 + iv] += kappa_cSW_s * mult_uv_r(&Bx[ic_g], &w2[id2 + iv], Nc);
335 v2[ic_r + id2 + iv] -= kappa_cSW_s * mult_uv_i(&Bx[ic_g], &w2[id1 + iv], Nc);
336 v2[ic_i + id2 + iv] += kappa_cSW_s * mult_uv_r(&Bx[ic_g], &w2[id1 + iv], Nc);
338 v2[ic_r + id3 + iv] -= kappa_cSW_s * mult_uv_i(&Bx[ic_g], &w2[id4 + iv], Nc);
339 v2[ic_i + id3 + iv] += kappa_cSW_s * mult_uv_r(&Bx[ic_g], &w2[id4 + iv], Nc);
340 v2[ic_r + id4 + iv] -= kappa_cSW_s * mult_uv_i(&Bx[ic_g], &w2[id3 + iv], Nc);
341 v2[ic_i + id4 + iv] += kappa_cSW_s * mult_uv_r(&Bx[ic_g], &w2[id3 + iv], Nc);
344 v2[ic_r + id1 + iv] += kappa_cSW_s * mult_uv_r(&By[ic_g], &w2[id2 + iv], Nc);
345 v2[ic_i + id1 + iv] += kappa_cSW_s * mult_uv_i(&By[ic_g], &w2[id2 + iv], Nc);
346 v2[ic_r + id2 + iv] -= kappa_cSW_s * mult_uv_r(&By[ic_g], &w2[id1 + iv], Nc);
347 v2[ic_i + id2 + iv] -= kappa_cSW_s * mult_uv_i(&By[ic_g], &w2[id1 + iv], Nc);
349 v2[ic_r + id3 + iv] += kappa_cSW_s * mult_uv_r(&By[ic_g], &w2[id4 + iv], Nc);
350 v2[ic_i + id3 + iv] += kappa_cSW_s * mult_uv_i(&By[ic_g], &w2[id4 + iv], Nc);
351 v2[ic_r + id4 + iv] -= kappa_cSW_s * mult_uv_r(&By[ic_g], &w2[id3 + iv], Nc);
352 v2[ic_i + id4 + iv] -= kappa_cSW_s * mult_uv_i(&By[ic_g], &w2[id3 + iv], Nc);
355 v2[ic_r + id1 + iv] -= kappa_cSW_s * mult_uv_i(&Bz[ic_g], &w2[id1 + iv], Nc);
356 v2[ic_i + id1 + iv] += kappa_cSW_s * mult_uv_r(&Bz[ic_g], &w2[id1 + iv], Nc);
357 v2[ic_r + id2 + iv] += kappa_cSW_s * mult_uv_i(&Bz[ic_g], &w2[id2 + iv], Nc);
358 v2[ic_i + id2 + iv] -= kappa_cSW_s * mult_uv_r(&Bz[ic_g], &w2[id2 + iv], Nc);
360 v2[ic_r + id3 + iv] -= kappa_cSW_s * mult_uv_i(&Bz[ic_g], &w2[id3 + iv], Nc);
361 v2[ic_i + id3 + iv] += kappa_cSW_s * mult_uv_r(&Bz[ic_g], &w2[id3 + iv], Nc);
362 v2[ic_r + id4 + iv] += kappa_cSW_s * mult_uv_i(&Bz[ic_g], &w2[id4 + iv], Nc);
363 v2[ic_i + id4 + iv] -= kappa_cSW_s * mult_uv_r(&Bz[ic_g], &w2[id4 + iv], Nc);
366 v2[ic_r + id1 + iv] += kappa_cSW_t * mult_uv_i(&Ex[ic_g], &w2[id2 + iv], Nc);
367 v2[ic_i + id1 + iv] -= kappa_cSW_t * mult_uv_r(&Ex[ic_g], &w2[id2 + iv], Nc);
368 v2[ic_r + id2 + iv] += kappa_cSW_t * mult_uv_i(&Ex[ic_g], &w2[id1 + iv], Nc);
369 v2[ic_i + id2 + iv] -= kappa_cSW_t * mult_uv_r(&Ex[ic_g], &w2[id1 + iv], Nc);
371 v2[ic_r + id3 + iv] -= kappa_cSW_t * mult_uv_i(&Ex[ic_g], &w2[id4 + iv], Nc);
372 v2[ic_i + id3 + iv] += kappa_cSW_t * mult_uv_r(&Ex[ic_g], &w2[id4 + iv], Nc);
373 v2[ic_r + id4 + iv] -= kappa_cSW_t * mult_uv_i(&Ex[ic_g], &w2[id3 + iv], Nc);
374 v2[ic_i + id4 + iv] += kappa_cSW_t * mult_uv_r(&Ex[ic_g], &w2[id3 + iv], Nc);
377 v2[ic_r + id1 + iv] -= kappa_cSW_t * mult_uv_r(&Ey[ic_g], &w2[id2 + iv], Nc);
378 v2[ic_i + id1 + iv] -= kappa_cSW_t * mult_uv_i(&Ey[ic_g], &w2[id2 + iv], Nc);
379 v2[ic_r + id2 + iv] += kappa_cSW_t * mult_uv_r(&Ey[ic_g], &w2[id1 + iv], Nc);
380 v2[ic_i + id2 + iv] += kappa_cSW_t * mult_uv_i(&Ey[ic_g], &w2[id1 + iv], Nc);
382 v2[ic_r + id3 + iv] += kappa_cSW_t * mult_uv_r(&Ey[ic_g], &w2[id4 + iv], Nc);
383 v2[ic_i + id3 + iv] += kappa_cSW_t * mult_uv_i(&Ey[ic_g], &w2[id4 + iv], Nc);
384 v2[ic_r + id4 + iv] -= kappa_cSW_t * mult_uv_r(&Ey[ic_g], &w2[id3 + iv], Nc);
385 v2[ic_i + id4 + iv] -= kappa_cSW_t * mult_uv_i(&Ey[ic_g], &w2[id3 + iv], Nc);
388 v2[ic_r + id1 + iv] += kappa_cSW_t * mult_uv_i(&Ez[ic_g], &w2[id1 + iv], Nc);
389 v2[ic_i + id1 + iv] -= kappa_cSW_t * mult_uv_r(&Ez[ic_g], &w2[id1 + iv], Nc);
390 v2[ic_r + id2 + iv] -= kappa_cSW_t * mult_uv_i(&Ez[ic_g], &w2[id2 + iv], Nc);
391 v2[ic_i + id2 + iv] += kappa_cSW_t * mult_uv_r(&Ez[ic_g], &w2[id2 + iv], Nc);
393 v2[ic_r + id3 + iv] -= kappa_cSW_t * mult_uv_i(&Ez[ic_g], &w2[id3 + iv], Nc);
394 v2[ic_i + id3 + iv] += kappa_cSW_t * mult_uv_r(&Ez[ic_g], &w2[id3 + iv], Nc);
395 v2[ic_r + id4 + iv] += kappa_cSW_t * mult_uv_i(&Ez[ic_g], &w2[id4 + iv], Nc);
396 v2[ic_i + id4 + iv] -= kappa_cSW_t * mult_uv_r(&Ez[ic_g], &w2[id4 + iv], Nc);
408 assert(w.
nex() == 1);
411 const int Nvc = 2 * Nc;
412 const int Ndf = 2 * Nc * Nc;
414 const int Nvol = w.
nvol();
418 const int id3 = Nvc * 2;
419 const int id4 = Nvc * 3;
424 const double *w2 = w.
ptr(0);
425 double *v2 = v.
ptr(0);
438 const int is =
m_Nvol * i_thread / Nthread;
439 const int ns =
m_Nvol * (i_thread + 1) / Nthread - is;
441 for (
int site = is; site < is + ns; ++site) {
442 int iv = Nvc * Nd * site;
445 for (
int ic = 0; ic < Nc; ++ic) {
448 int ic_g = ic * Nvc + ig;
450 v2[ic_r + id1 + iv] = 0.0;
451 v2[ic_i + id1 + iv] = 0.0;
452 v2[ic_r + id2 + iv] = 0.0;
453 v2[ic_i + id2 + iv] = 0.0;
455 v2[ic_r + id3 + iv] = 0.0;
456 v2[ic_i + id3 + iv] = 0.0;
457 v2[ic_r + id4 + iv] = 0.0;
458 v2[ic_i + id4 + iv] = 0.0;
461 v2[ic_r + id1 + iv] -= kappa_cSW_s * mult_uv_i(&Bx[ic_g], &w2[id2 + iv], Nc);
462 v2[ic_i + id1 + iv] += kappa_cSW_s * mult_uv_r(&Bx[ic_g], &w2[id2 + iv], Nc);
463 v2[ic_r + id2 + iv] -= kappa_cSW_s * mult_uv_i(&Bx[ic_g], &w2[id1 + iv], Nc);
464 v2[ic_i + id2 + iv] += kappa_cSW_s * mult_uv_r(&Bx[ic_g], &w2[id1 + iv], Nc);
466 v2[ic_r + id3 + iv] -= kappa_cSW_s * mult_uv_i(&Bx[ic_g], &w2[id4 + iv], Nc);
467 v2[ic_i + id3 + iv] += kappa_cSW_s * mult_uv_r(&Bx[ic_g], &w2[id4 + iv], Nc);
468 v2[ic_r + id4 + iv] -= kappa_cSW_s * mult_uv_i(&Bx[ic_g], &w2[id3 + iv], Nc);
469 v2[ic_i + id4 + iv] += kappa_cSW_s * mult_uv_r(&Bx[ic_g], &w2[id3 + iv], Nc);
472 v2[ic_r + id1 + iv] += kappa_cSW_s * mult_uv_r(&By[ic_g], &w2[id2 + iv], Nc);
473 v2[ic_i + id1 + iv] += kappa_cSW_s * mult_uv_i(&By[ic_g], &w2[id2 + iv], Nc);
474 v2[ic_r + id2 + iv] -= kappa_cSW_s * mult_uv_r(&By[ic_g], &w2[id1 + iv], Nc);
475 v2[ic_i + id2 + iv] -= kappa_cSW_s * mult_uv_i(&By[ic_g], &w2[id1 + iv], Nc);
477 v2[ic_r + id3 + iv] += kappa_cSW_s * mult_uv_r(&By[ic_g], &w2[id4 + iv], Nc);
478 v2[ic_i + id3 + iv] += kappa_cSW_s * mult_uv_i(&By[ic_g], &w2[id4 + iv], Nc);
479 v2[ic_r + id4 + iv] -= kappa_cSW_s * mult_uv_r(&By[ic_g], &w2[id3 + iv], Nc);
480 v2[ic_i + id4 + iv] -= kappa_cSW_s * mult_uv_i(&By[ic_g], &w2[id3 + iv], Nc);
483 v2[ic_r + id1 + iv] -= kappa_cSW_s * mult_uv_i(&Bz[ic_g], &w2[id1 + iv], Nc);
484 v2[ic_i + id1 + iv] += kappa_cSW_s * mult_uv_r(&Bz[ic_g], &w2[id1 + iv], Nc);
485 v2[ic_r + id2 + iv] += kappa_cSW_s * mult_uv_i(&Bz[ic_g], &w2[id2 + iv], Nc);
486 v2[ic_i + id2 + iv] -= kappa_cSW_s * mult_uv_r(&Bz[ic_g], &w2[id2 + iv], Nc);
488 v2[ic_r + id3 + iv] -= kappa_cSW_s * mult_uv_i(&Bz[ic_g], &w2[id3 + iv], Nc);
489 v2[ic_i + id3 + iv] += kappa_cSW_s * mult_uv_r(&Bz[ic_g], &w2[id3 + iv], Nc);
490 v2[ic_r + id4 + iv] += kappa_cSW_s * mult_uv_i(&Bz[ic_g], &w2[id4 + iv], Nc);
491 v2[ic_i + id4 + iv] -= kappa_cSW_s * mult_uv_r(&Bz[ic_g], &w2[id4 + iv], Nc);
494 v2[ic_r + id1 + iv] += kappa_cSW_t * mult_uv_i(&Ex[ic_g], &w2[id4 + iv], Nc);
495 v2[ic_i + id1 + iv] -= kappa_cSW_t * mult_uv_r(&Ex[ic_g], &w2[id4 + iv], Nc);
496 v2[ic_r + id2 + iv] += kappa_cSW_t * mult_uv_i(&Ex[ic_g], &w2[id3 + iv], Nc);
497 v2[ic_i + id2 + iv] -= kappa_cSW_t * mult_uv_r(&Ex[ic_g], &w2[id3 + iv], Nc);
499 v2[ic_r + id3 + iv] += kappa_cSW_t * mult_uv_i(&Ex[ic_g], &w2[id2 + iv], Nc);
500 v2[ic_i + id3 + iv] -= kappa_cSW_t * mult_uv_r(&Ex[ic_g], &w2[id2 + iv], Nc);
501 v2[ic_r + id4 + iv] += kappa_cSW_t * mult_uv_i(&Ex[ic_g], &w2[id1 + iv], Nc);
502 v2[ic_i + id4 + iv] -= kappa_cSW_t * mult_uv_r(&Ex[ic_g], &w2[id1 + iv], Nc);
505 v2[ic_r + id1 + iv] -= kappa_cSW_t * mult_uv_r(&Ey[ic_g], &w2[id4 + iv], Nc);
506 v2[ic_i + id1 + iv] -= kappa_cSW_t * mult_uv_i(&Ey[ic_g], &w2[id4 + iv], Nc);
507 v2[ic_r + id2 + iv] += kappa_cSW_t * mult_uv_r(&Ey[ic_g], &w2[id3 + iv], Nc);
508 v2[ic_i + id2 + iv] += kappa_cSW_t * mult_uv_i(&Ey[ic_g], &w2[id3 + iv], Nc);
510 v2[ic_r + id3 + iv] -= kappa_cSW_t * mult_uv_r(&Ey[ic_g], &w2[id2 + iv], Nc);
511 v2[ic_i + id3 + iv] -= kappa_cSW_t * mult_uv_i(&Ey[ic_g], &w2[id2 + iv], Nc);
512 v2[ic_r + id4 + iv] += kappa_cSW_t * mult_uv_r(&Ey[ic_g], &w2[id1 + iv], Nc);
513 v2[ic_i + id4 + iv] += kappa_cSW_t * mult_uv_i(&Ey[ic_g], &w2[id1 + iv], Nc);
516 v2[ic_r + id1 + iv] += kappa_cSW_t * mult_uv_i(&Ez[ic_g], &w2[id3 + iv], Nc);
517 v2[ic_i + id1 + iv] -= kappa_cSW_t * mult_uv_r(&Ez[ic_g], &w2[id3 + iv], Nc);
518 v2[ic_r + id2 + iv] -= kappa_cSW_t * mult_uv_i(&Ez[ic_g], &w2[id4 + iv], Nc);
519 v2[ic_i + id2 + iv] += kappa_cSW_t * mult_uv_r(&Ez[ic_g], &w2[id4 + iv], Nc);
521 v2[ic_r + id3 + iv] += kappa_cSW_t * mult_uv_i(&Ez[ic_g], &w2[id1 + iv], Nc);
522 v2[ic_i + id3 + iv] -= kappa_cSW_t * mult_uv_r(&Ez[ic_g], &w2[id1 + iv], Nc);
523 v2[ic_r + id4 + iv] -= kappa_cSW_t * mult_uv_i(&Ez[ic_g], &w2[id2 + iv], Nc);
524 v2[ic_i + id4 + iv] += kappa_cSW_t * mult_uv_r(&Ez[ic_g], &w2[id2 + iv], Nc);
545 const int mu,
const int nu)
549 assert(Nthread == 1);
588 const double gflop = flop_site * (Nvol * (NPE / 1.0e+9));