10 template<
typename AFIELD>
12 =
"Fprop_alt_Standard_eo";
14 template<
typename AFIELD>
20 vout.
general(m_vl,
"%s: being setup (without link smearing).\n",
27 string fopr_type = params_fopr.
get_string(
"fermion_type");
28 if(fopr_type.substr(fopr_type.size()-3 ,3) !=
"_eo")
31 m_fopr = AltFopr::New(fopr_type, params_fopr);
37 string solver_type = params_solver.
get_string(
"solver_type");
38 m_solver = AltSolver::New(solver_type, m_fopr);
39 m_solver->set_parameters(params_solver);
44 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
49 template<
typename AFIELD>
57 vout.
general(m_vl,
"%s: being setup (with link smearing).\n",
64 m_dr_smear = dr_smear;
66 string fopr_type = params_fopr.
get_string(
"fermion_type");
67 if(fopr_type.substr(fopr_type.size()-3 ,3) !=
"_eo")
70 m_kernel = AltFopr::New(fopr_type, params_fopr);
75 string solver_type = params_solver.
get_string(
"solver_type");
76 m_solver = AltSolver::New(solver_type, m_fopr);
77 m_solver->set_parameters(params_solver);
82 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
87 template<
typename AFIELD>
92 if (m_kernel != 0)
delete m_kernel;
97 template<
typename AFIELD>
100 m_fopr->set_config(U);
105 template<
typename AFIELD>
107 int& nconv,
double& diff)
109 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
112 if ((m_mode ==
"D") || (m_mode ==
"D_prec")) {
113 invert_D(xq, b, nconv, diff);
114 }
else if ((m_mode ==
"DdagD") || (m_mode ==
"DdagD_prec")) {
115 invert_DdagD(xq, b, nconv, diff);
116 }
else if (m_mode ==
"D_even") {
117 m_fopr->set_mode(
"D");
118 invert_De(xq, b, nconv, diff);
119 }
else if (m_mode ==
"Ddag_even") {
120 m_fopr->set_mode(
"Ddag");
121 invert_De(xq, b, nconv, diff);
122 }
else if (m_mode ==
"DdagD_even") {
123 m_fopr->set_mode(
"DdagD");
124 invert_De(xq, b, nconv, diff);
127 class_name.c_str(), m_mode.c_str());
134 template<
typename AFIELD>
136 int& nconv,
double& diff)
138 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
142 invert_D(xq, b, nconv, diff);
143 }
else if (m_mode ==
"DdagD") {
144 invert_DdagD(xq, b, nconv, diff);
145 }
else if (m_mode ==
"D_even") {
146 m_fopr->set_mode(
"D");
147 invert_De(xq, b, nconv, diff);
148 }
else if (m_mode ==
"Ddag_even") {
149 m_fopr->set_mode(
"Ddag");
150 invert_De(xq, b, nconv, diff);
151 }
else if (m_mode ==
"DdagD_even") {
152 m_fopr->set_mode(
"DdagD");
153 invert_De(xq, b, nconv, diff);
156 class_name.c_str(), m_mode.c_str());
163 template<
typename AFIELD>
165 int& nconv,
double& diff)
173 int nin = m_fopr->field_nin();
174 int nvol2 = m_fopr->field_nvol();
175 int nex = m_fopr->field_nex();
176 int nvol = 2 * nvol2;
178 AFIELD axq(nin, nvol, nex);
179 AFIELD abq(nin, nvol, nex);
181 AFIELD be(nin, nvol2, nex), bo(nin, nvol2, nex);
182 AFIELD xe(nin, nvol2, nex), xo(nin, nvol2, nex);
189 if (m_fopr->needs_convert()) {
191 m_fopr->convert(abq, b);
197 index_eo.split(be, bo, abq);
200 invert_De(xe, xo, be, bo, nconv, diff);
202 vout.
detailed(m_vl,
"%s: diff = %e\n", class_name.c_str(), diff);
206 index_eo.merge(axq, xe, xo);
209 if (m_fopr->needs_convert()) {
210 m_fopr->reverse(xq, axq);
217 m_elapsed_time += m_timer.elapsed_sec();
218 m_flop_count += m_solver->flop_count();
223 template<
typename AFIELD>
225 int& nconv,
double& diff)
235 int nin = m_fopr->field_nin();
236 int nvol2 = m_fopr->field_nvol();
237 int nex = m_fopr->field_nex();
238 int nvol = 2 * nvol2;
240 AFIELD axq(nin, nvol, nex);
241 AFIELD abq(nin, nvol, nex);
243 AFIELD be(nin, nvol2, nex), bo(nin, nvol2, nex);
244 AFIELD xe(nin, nvol2, nex), xo(nin, nvol2, nex);
245 AFIELD y1(nin, nvol2, nex), y2(nin, nvol2, nex);
252 if (m_fopr->needs_convert()) {
254 m_fopr->convert(abq, b);
261 index_eo.split(be, bo, abq);
266 invert_De_dag(y1, y2, be, bo, nconv1, diff1);
271 invert_De(xe, xo, y1, y2, nconv1, diff1);
278 index_eo.merge(axq, xe, xo);
281 if (m_fopr->needs_convert()) {
282 m_fopr->reverse(xq, axq);
289 m_elapsed_time += m_timer.elapsed_sec();
290 m_flop_count += m_solver->flop_count();
295 template<
typename AFIELD>
297 int& nconv,
double& diff)
307 int nin = m_fopr->field_nin();
308 int nvol2 = m_fopr->field_nvol();
309 int nex = m_fopr->field_nex();
311 AFIELD be(nin, nvol2, nex), bo(nin, nvol2, nex);
312 AFIELD xe(nin, nvol2, nex), xo(nin, nvol2, nex);
318 index_eo.split(be, bo, b);
322 invert_De(xe, xo, be, bo, nconv, diff);
326 index_eo.merge(xq, xe, xo);
330 m_elapsed_time += m_timer.elapsed_sec();
331 m_flop_count += m_solver->flop_count();
336 template<
typename AFIELD>
338 int& nconv,
double& diff)
348 int nin = m_fopr->field_nin();
349 int nvol2 = m_fopr->field_nvol();
350 int nex = m_fopr->field_nex();
352 AFIELD abq(nin, nvol2, nex);
353 AFIELD axq(nin, nvol2, nex);
359 if (m_fopr->needs_convert()) {
361 m_fopr->convert(abq, b);
368 invert_De(axq, abq, nconv, diff);
372 if (m_fopr->needs_convert()) {
373 m_fopr->reverse(xq, axq);
380 m_elapsed_time += m_timer.elapsed_sec();
381 m_flop_count += m_solver->flop_count();
386 template<
typename AFIELD>
388 int& nconv,
double& diff)
398 int nin = m_fopr->field_nin();
399 int nvol2 = m_fopr->field_nvol();
400 int nex = m_fopr->field_nex();
402 AFIELD be(nin, nvol2, nex), bo(nin, nvol2, nex);
403 AFIELD xe(nin, nvol2, nex), xo(nin, nvol2, nex);
404 AFIELD y1(nin, nvol2, nex), y2(nin, nvol2, nex);
410 index_eo.split(be, bo, b);
415 invert_De_dag(y1, y2, be, bo, nconv1, diff1);
420 invert_De(xe, xo, y1, y2, nconv1, diff1);
427 index_eo.merge(xq, xe, xo);
431 m_elapsed_time += m_timer.elapsed_sec();
432 m_flop_count += m_solver->flop_count();
437 template<
typename AFIELD>
440 int& nconv,
double& diff)
442 int nin = m_fopr->field_nin();
443 int nvol2 = m_fopr->field_nvol();
444 int nex = m_fopr->field_nex();
445 int nvol = 2 * nvol2;
447 AFIELD y1(nin, nvol2, nex), y2(nin, nvol2, nex);
454 m_fopr->mult(y1, bo,
"Doo_inv");
456 m_fopr->mult(y2, y1,
"Deo");
462 m_fopr->mult(y1, be,
"Dee_inv");
466 m_fopr->set_mode(
"D");
467 m_solver->solve(xe, y1, nconv, diff2);
470 m_fopr->normalize_fprop(xe);
472 m_fopr->mult(y1, xe,
"Doe");
478 m_fopr->mult(xo, y1,
"Doo_inv");
482 diff = double(diff2);
492 template<
typename AFIELD>
495 int& nconv,
double& diff)
497 int nin = m_fopr->field_nin();
498 int nvol2 = m_fopr->field_nvol();
499 int nex = m_fopr->field_nex();
500 int nvol = 2 * nvol2;
502 AFIELD y1(nin, nvol2, nex), y2(nin, nvol2, nex);
504 vout.
detailed(m_vl,
"invert_De_dag(AFIELD)(6arg) start.\n");
509 m_fopr->mult_dag(y1, bo,
"Doo_inv");
511 m_fopr->mult_dag(y2, y1,
"Deo");
517 m_fopr->set_mode(
"Ddag");
518 m_solver->solve(y2, be, nconv, diff2);
521 m_fopr->normalize_fprop(y2);
523 m_fopr->mult_dag(xe, y2,
"Dee_inv");
526 m_fopr->mult_dag(y1, xe,
"Doe");
532 m_fopr->mult_dag(xo, y1,
"Doo_inv");
536 diff = double(diff2);
540 vout.
detailed(m_vl,
"diff(invert_De_dag) = %e\n", diff);
546 template<
typename AFIELD>
549 int& nconv,
double& diff)
557 m_solver->solve(xe, be, nconv, diff2);
560 diff = double(diff2);
566 template<
typename AFIELD>
569 return m_solver->flop_count();
574 template<
typename AFIELD>
578 m_elapsed_time = 0.0;
583 template<
typename AFIELD>
586 double flops = m_flop_count / m_elapsed_time;
587 double gflops = flops * 1.0e-9;
590 vout.
general(m_vl,
"%s: solver performance:\n", class_name.c_str());
591 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", m_elapsed_time);
592 vout.
general(m_vl,
" Flop(total) = %18.0f\n", m_flop_count);
593 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
598 template<
typename AFIELD>
600 const std::string mode,
603 int nin = m_fopr->field_nin();
604 int nvol = m_fopr->field_nvol();
605 int nex = m_fopr->field_nex();
607 AFIELD axq(nin, nvol, nex), abq(nin, nvol, nex);
611 unique_ptr<Timer> timer(
new Timer);
613 std::string mode_prev = m_fopr->get_mode();
614 m_fopr->set_mode(mode);
620 for (
int i = 0; i < Nrepeat; ++i) {
621 m_fopr->mult(axq, abq);
622 m_fopr->mult(abq, axq);
628 double flop_fopr = m_fopr->flop_count();
629 double flop_total = flop_fopr * double(2 * Nrepeat);
631 double elapsed_time = timer->elapsed_sec();
632 double flops = flop_total / elapsed_time;
633 double gflops = flops * 1.0e-9;
636 vout.
general(m_vl,
"%s: mult performance:\n", class_name.c_str());
637 vout.
general(m_vl,
" mult mode = %s\n", mode.c_str());
638 vout.
general(m_vl,
" Number of mult = %18d\n", 2 * Nrepeat);
639 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
640 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
641 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
642 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
644 m_fopr->set_mode(mode_prev);