10 template<
typename AFIELD>
12 =
"Fprop_alt_Standard_eo";
14 template<
typename AFIELD>
20 vout.
general(m_vl,
"%s: being setup (without link smearing).\n",
26 string fopr_type = params_fopr.
get_string(
"fermion_type");
29 m_fopr = AltFopr::New(fopr_type, params_fopr);
35 string solver_type = params_solver.
get_string(
"solver_type");
36 m_solver = AltSolver::New(solver_type, m_fopr);
37 m_solver->set_parameters(params_solver);
41 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
46 template<
typename AFIELD>
53 vout.
general(m_vl,
"%s: being setup (with link smearing).\n",
59 m_dr_smear = dr_smear;
61 string fopr_type = params_fopr.
get_string(
"fermion_type");
63 m_kernel = AltFopr::New(fopr_type, params_fopr);
68 string solver_type = params_solver.
get_string(
"solver_type");
69 m_solver = AltSolver::New(solver_type, m_fopr);
70 m_solver->set_parameters(params_solver);
74 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
79 template<
typename AFIELD>
84 if (m_kernel != 0)
delete m_kernel;
89 template<
typename AFIELD>
92 m_fopr->set_config(U);
97 template<
typename AFIELD>
99 int& nconv,
double& diff)
101 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
104 if ((m_mode ==
"D") || (m_mode ==
"D_prec")) {
105 invert_D(xq, b, nconv, diff);
106 }
else if ((m_mode ==
"DdagD") || (m_mode ==
"DdagD_prec")) {
107 invert_DdagD(xq, b, nconv, diff);
110 class_name.c_str(), m_mode.c_str());
117 template<
typename AFIELD>
119 int& nconv,
double& diff)
129 int nin = m_fopr->field_nin();
130 int nvol2 = m_fopr->field_nvol();
131 int nex = m_fopr->field_nex();
132 int nvol = 2 * nvol2;
134 AFIELD axq(nin, nvol, nex);
135 AFIELD abq(nin, nvol, nex);
137 AFIELD be(nin, nvol2, nex), bo(nin, nvol2, nex);
138 AFIELD xe(nin, nvol2, nex), xo(nin, nvol2, nex);
145 if (m_fopr->needs_convert()) {
147 m_fopr->convert(abq, b);
153 index_eo.split(be, bo, abq);
156 invert_De(xe, xo, be, bo, nconv, diff);
160 index_eo.merge(axq, xe, xo);
163 if (m_fopr->needs_convert()) {
164 m_fopr->reverse(xq, axq);
171 m_elapsed_time += m_timer.elapsed_sec();
172 m_flop_count += m_solver->flop_count();
177 template<
typename AFIELD>
179 int& nconv,
double& diff)
189 int nin = m_fopr->field_nin();
190 int nvol2 = m_fopr->field_nvol();
191 int nex = m_fopr->field_nex();
192 int nvol = 2 * nvol2;
194 AFIELD axq(nin, nvol, nex);
195 AFIELD abq(nin, nvol, nex);
197 AFIELD be(nin, nvol2, nex), bo(nin, nvol2, nex);
198 AFIELD xe(nin, nvol2, nex), xo(nin, nvol2, nex);
199 AFIELD y1(nin, nvol2, nex), y2(nin, nvol2, nex);
206 if (m_fopr->needs_convert()) {
208 m_fopr->convert(abq, b);
215 index_eo.split(be, bo, abq);
218 m_fopr->mult_gm5(y1, be);
219 m_fopr->mult_gm5(y2, bo);
224 invert_De(xe, xo, y1, y2, nconv1, diff1);
231 m_fopr->mult_gm5(y1, xe);
232 m_fopr->mult_gm5(y2, xo);
235 invert_De(xe, xo, y1, y2, nconv1, diff1);
242 index_eo.merge(axq, xe, xo);
245 if (m_fopr->needs_convert()) {
246 m_fopr->reverse(xq, axq);
255 template<
typename AFIELD>
257 int& nconv,
double& diff)
259 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
263 invert_D(xq, b, nconv, diff);
264 }
else if (m_mode ==
"DdagD") {
265 invert_DdagD(xq, b, nconv, diff);
268 class_name.c_str(), m_mode.c_str());
275 template<
typename AFIELD>
277 int& nconv,
double& diff)
287 int nin = m_fopr->field_nin();
288 int nvol2 = m_fopr->field_nvol();
289 int nex = m_fopr->field_nex();
291 AFIELD be(nin, nvol2, nex), bo(nin, nvol2, nex);
292 AFIELD xe(nin, nvol2, nex), xo(nin, nvol2, nex);
298 index_eo.split(be, bo, b);
301 invert_De(xe, xo, be, bo, nconv, diff);
305 index_eo.merge(xq, xe, xo);
309 m_elapsed_time += m_timer.elapsed_sec();
310 m_flop_count += m_solver->flop_count();
315 template<
typename AFIELD>
317 int& nconv,
double& diff)
327 int nin = m_fopr->field_nin();
328 int nvol2 = m_fopr->field_nvol();
329 int nex = m_fopr->field_nex();
331 AFIELD be(nin, nvol2, nex), bo(nin, nvol2, nex);
332 AFIELD xe(nin, nvol2, nex), xo(nin, nvol2, nex);
333 AFIELD y1(nin, nvol2, nex), y2(nin, nvol2, nex);
339 index_eo.split(be, bo, b);
342 m_fopr->mult_gm5(y1, be);
343 m_fopr->mult_gm5(y2, bo);
348 invert_De(xe, xo, y1, y2, nconv1, diff1);
355 m_fopr->mult_gm5(y1, xe);
356 m_fopr->mult_gm5(y2, xo);
359 invert_De(xe, xo, y1, y2, nconv1, diff1);
366 index_eo.merge(xq, xe, xo);
370 m_elapsed_time += m_timer.elapsed_sec();
371 m_flop_count += m_solver->flop_count();
376 template<
typename AFIELD>
379 int& nconv,
double& diff)
381 int nin = m_fopr->field_nin();
382 int nvol2 = m_fopr->field_nvol();
383 int nex = m_fopr->field_nex();
384 int nvol = 2 * nvol2;
386 AFIELD y1(nin, nvol2, nex), y2(nin, nvol2, nex);
391 m_fopr->mult(y1, bo,
"Doo_inv");
394 m_fopr->mult(y2, y1,
"Deo");
400 m_fopr->mult(y1, be,
"Dee_inv");
403 m_fopr->set_mode(
"D");
409 m_solver->solve(xe, y1, nconv, diff2);
412 m_fopr->normalize_fprop(xe);
414 m_fopr->mult(y1, xe,
"Doe");
420 m_fopr->mult(xo, y1,
"Doo_inv");
424 diff = double(diff2);
429 template<
typename AFIELD>
432 return m_solver->flop_count();
437 template<
typename AFIELD>
441 m_elapsed_time = 0.0;
446 template<
typename AFIELD>
449 double flops = m_flop_count / m_elapsed_time;
450 double gflops = flops * 1.0e-9;
453 vout.
general(m_vl,
"%s: solver performance:\n", class_name.c_str());
454 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", m_elapsed_time);
455 vout.
general(m_vl,
" Flop(total) = %18.0f\n", m_flop_count);
456 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
461 template<
typename AFIELD>
463 const std::string mode,
466 int nin = m_fopr->field_nin();
467 int nvol = m_fopr->field_nvol();
468 int nex = m_fopr->field_nex();
470 AFIELD axq(nin, nvol, nex), abq(nin, nvol, nex);
474 unique_ptr<Timer> timer(
new Timer);
476 std::string mode_prev = m_fopr->get_mode();
477 m_fopr->set_mode(mode);
483 for (
int i = 0; i < Nrepeat; ++i) {
484 m_fopr->mult(axq, abq);
485 m_fopr->mult(abq, axq);
491 double flop_fopr = m_fopr->flop_count();
492 double flop_total = flop_fopr * double(2 * Nrepeat);
494 double elapsed_time = timer->elapsed_sec();
495 double flops = flop_total / elapsed_time;
496 double gflops = flops * 1.0e-9;
499 vout.
general(m_vl,
"%s: mult performance:\n", class_name.c_str());
500 vout.
general(m_vl,
" mult mode = %s\n", mode.c_str());
501 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
502 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
503 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
504 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
506 m_fopr->set_mode(mode_prev);