10 template<
typename AFIELD_d,
typename AFIELD_f>
12 =
"Fprop_alt_Standard_eo_Richardson";
14 template<
typename AFIELD_d,
typename AFIELD_f>
20 vout.
general(m_vl,
"%s: being setup.\n", class_name.c_str());
22 string fopr_type = params_fopr.
get_string(
"fermion_type");
35 int max_iter = params_solver.
get_int(
"maximum_number_of_iteration");
36 int max_restart = params_solver.
get_int(
"maximum_number_of_restart");
37 params_solver2.
set_int(
"maximum_number_of_iteration", max_iter * max_restart);
38 params_solver2.
set_int(
"maximum_number_of_restart", 1);
39 params_solver2.
set_double(
"convergence_criterion_squared", 1.0e-8);
40 string str_vlevel = params_solver.
get_string(
"verbose_level");
41 params_solver2.
set_string(
"verbose_level", str_vlevel);
42 params_solver2.
set_string(
"initial_guess_mode",
"ZERO");
44 string solver_type = params_solver.
get_string(
"solver_type");
46 m_solver_prec->set_parameters(params_solver2);
53 m_solver->set_parameters(params_solver);
56 if (solver_type ==
"CGNR") {
62 int nin = m_fopr_d->field_nin();
63 int nvol2 = m_fopr_d->field_nvol();
64 int nex = m_fopr_d->field_nex();
67 m_axq.reset(nin, nvol, nex);
68 m_abq.reset(nin, nvol, nex);
70 m_be.reset(nin, nvol2, nex);
71 m_bo.reset(nin, nvol2, nex);
72 m_xe.reset(nin, nvol2, nex);
73 m_xo.reset(nin, nvol2, nex);
80 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
85 template<
typename AFIELD_d,
typename AFIELD_f>
92 vout.
general(m_vl,
"%s: being setup.\n", class_name.c_str());
94 m_dr_smear = dr_smear;
96 string fopr_type = params_fopr.
get_string(
"fermion_type");
107 params_solver2.
set_int(
"maximum_number_of_iteration", 200);
108 params_solver2.
set_int(
"maximum_number_of_restart", 1);
109 params_solver2.
set_double(
"convergence_criterion_squared", 2.5e-8);
110 string str_vlevel = params_solver.
get_string(
"verbose_level");
111 params_solver2.
set_string(
"verbose_level", str_vlevel);
112 params_solver2.
set_string(
"initial_guess_mode",
"ZERO");
114 string solver_type = params_solver.
get_string(
"solver_type");
116 m_solver_prec->set_parameters(params_solver2);
123 m_solver->set_parameters(params_solver);
126 if (solver_type ==
"CGNR") {
131 int nin = m_fopr_d->field_nin();
132 int nvol2 = m_fopr_d->field_nvol();
133 int nex = m_fopr_d->field_nex();
134 int nvol = 2 * nvol2;
136 m_axq.reset(nin, nvol, nex);
137 m_abq.reset(nin, nvol, nex);
139 m_be.reset(nin, nvol2, nex);
140 m_bo.reset(nin, nvol2, nex);
141 m_xe.reset(nin, nvol2, nex);
142 m_xo.reset(nin, nvol2, nex);
148 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
155 template<
typename AFIELD_d,
typename AFIELD_f>
160 delete m_solver_prec;
163 if (m_kernel_d != 0)
delete m_kernel_d;
164 if (m_kernel_f != 0)
delete m_kernel_f;
169 template<
typename AFIELD_d,
typename AFIELD_f>
172 m_fopr_d->set_config(U);
173 m_fopr_f->set_config(U);
178 template<
typename AFIELD_d,
typename AFIELD_f>
180 int& nconv,
double& diff)
182 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
186 invert_D(xq, b, nconv, diff);
187 }
else if (m_mode ==
"DdagD") {
188 invert_DdagD(xq, b, nconv, diff);
189 }
else if (m_mode ==
"DdagD_even") {
191 vout.
crucial(m_vl,
"%s: unsupported mode: %s (not yet ready for Field)\n",
192 class_name.c_str(), m_mode.c_str());
195 class_name.c_str(), m_mode.c_str());
202 template<
typename AFIELD_d,
typename AFIELD_f>
204 int& nconv,
double& diff)
206 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
210 invert_D(xq, b, nconv, diff);
211 }
else if (m_mode ==
"DdagD") {
212 invert_DdagD(xq, b, nconv, diff);
213 }
else if (m_mode ==
"DdagD_even") {
214 invert_DdagD_even(xq, b, nconv, diff);
217 class_name.c_str(), m_mode.c_str());
224 template<
typename AFIELD_d,
typename AFIELD_f>
227 int& nconv,
double& diff)
239 if (m_fopr_d->needs_convert()) {
241 m_fopr_d->convert(m_abq, b);
249 invert_D(m_axq, m_abq, nconv, diff);
252 if (m_fopr_d->needs_convert()) {
253 m_fopr_d->reverse(xq, m_axq);
267 template<
typename AFIELD_d,
typename AFIELD_f>
270 int& nconv,
double& diff)
279 if (m_fopr_d->needs_convert()) {
281 m_fopr_d->convert(m_abq, b);
288 invert_DdagD(m_axq, m_abq, nconv, diff);
292 if (m_fopr_d->needs_convert()) {
293 m_fopr_d->reverse(xq, m_axq);
304 template<
typename AFIELD_d,
typename AFIELD_f>
307 int& nconv,
double& diff)
321 index_eo.split(m_be, m_bo, b);
324 invert_De(m_xe, m_xo, m_be, m_bo, nconv, diff);
327 index_eo.merge(xq, m_xe, m_xo);
332 m_elapsed_time += m_timer.elapsed_sec();
333 m_flop_count += m_solver->flop_count();
339 template<
typename AFIELD_d,
typename AFIELD_f>
342 int& nconv,
double& diff)
356 index_eo.split(m_xe, m_xo, b);
361 invert_De_dag(m_be, m_bo, m_xe, m_xo, nconv1, diff1);
366 invert_De(m_xe, m_xo, m_be, m_bo, nconv2, diff2);
372 index_eo.merge(xq, m_xe, m_xo);
377 m_elapsed_time += m_timer.elapsed_sec();
378 m_flop_count += m_solver->flop_count();
387 template<
typename AFIELD_d,
typename AFIELD_f>
391 int& nconv,
double& diff)
396 m_fopr_d->mult(xo, bo,
"Doo_inv");
399 m_fopr_d->mult(xe, xo,
"Deo");
405 m_fopr_d->mult(be, xe,
"Dee_inv");
408 m_fopr_d->set_mode(
"Ddag");
409 m_fopr_d->mult(xo, be);
410 m_fopr_d->set_mode(
"DdagD");
411 m_fopr_f->set_mode(
"DdagD");
412 m_solver->solve(xe, xo, nconv, diff);
414 m_fopr_d->set_mode(
"D");
415 m_fopr_f->set_mode(
"D");
416 m_solver->solve(xe, be, nconv, diff);
420 m_fopr_d->mult(xo, xe,
"Doe");
426 m_fopr_d->mult(xo, bo,
"Doo_inv");
432 template<
typename AFIELD_d,
typename AFIELD_f>
436 int& nconv,
double& diff)
441 m_fopr_d->mult_dag(xo, bo,
"Doo_inv");
444 m_fopr_d->mult_dag(xe, xo,
"Deo");
451 m_fopr_d->set_mode(
"DdagD");
452 m_fopr_f->set_mode(
"DdagD");
453 m_solver->solve(xe, be, nconv, diff);
454 m_fopr_d->set_mode(
"D");
455 m_fopr_d->mult(xo, xe);
457 m_fopr_d->set_mode(
"Ddag");
458 m_fopr_f->set_mode(
"Ddag");
459 m_solver->solve(xo, be, nconv, diff);
462 m_fopr_d->mult_dag(xe, xo,
"Dee_inv");
465 m_fopr_d->mult_dag(xo, xe,
"Doe");
471 m_fopr_d->mult_dag(xo, bo,
"Doo_inv");
479 template<
typename AFIELD_d,
typename AFIELD_f>
482 int& nconv,
double& diff)
490 m_fopr_d->set_mode(
"DdagD");
491 m_fopr_f->set_mode(
"DdagD");
495 m_solver->solve(xq, b, nconv, diff);
499 m_elapsed_time += m_timer.elapsed_sec();
500 m_flop_count += m_solver->flop_count();
505 template<
typename AFIELD_d,
typename AFIELD_f>
508 return m_solver->flop_count();
513 template<
typename AFIELD_d,
typename AFIELD_f>
517 m_elapsed_time = 0.0;
522 template<
typename AFIELD_d,
typename AFIELD_f>
525 double flops = m_flop_count / m_elapsed_time;
526 double gflops = flops * 1.0e-9;
529 vout.
general(m_vl,
"%s: solver performance:\n", class_name.c_str());
530 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", m_elapsed_time);
531 vout.
general(m_vl,
" Flop(total) = %18.0f\n", m_flop_count);
532 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
537 template<
typename AFIELD_d,
typename AFIELD_f>
539 const std::string mode,
542 int nin = m_fopr_d->field_nin();
543 int nvol = m_fopr_d->field_nvol();
544 int nex = m_fopr_d->field_nex();
546 unique_ptr<Timer> timer(
new Timer);
548 std::string mode_prev_d = m_fopr_d->get_mode();
549 std::string mode_prev_f = m_fopr_f->get_mode();
551 m_fopr_d->set_mode(mode);
552 m_fopr_f->set_mode(mode);
568 for (
int i = 0; i < Nrepeat; ++i) {
569 m_fopr_d->mult(y2, y1);
570 m_fopr_d->mult(y1, y2);
577 double flop_fopr = m_fopr_d->flop_count();
578 double flop_total = flop_fopr * double(2 * Nrepeat);
580 double elapsed_time = timer->elapsed_sec();
581 double flops = flop_total / elapsed_time;
582 double gflops = flops * 1.0e-9;
585 vout.
general(m_vl,
"%s: mult performance:\n", class_name.c_str());
586 vout.
general(m_vl,
" mult mode = %s\n", mode.c_str());
588 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
589 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
590 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
591 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
597 int nin = m_fopr_f->field_nin();
598 int nvol2 = m_fopr_f->field_nvol();
599 int nex = m_fopr_f->field_nex();
600 y1f.reset(nin, nvol2, nex);
601 y2f.reset(nin, nvol2, nex);
614 for (
int i = 0; i < Nrepeat; ++i) {
615 m_fopr_f->mult(y2f, y1f);
616 m_fopr_f->mult(y1f, y2f);
623 flop_fopr = m_fopr_f->flop_count();
624 flop_total = flop_fopr * double(2 * Nrepeat);
626 elapsed_time = timer->elapsed_sec();
627 flops = flop_total / elapsed_time;
628 gflops = flops * 1.0e-9;
631 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
632 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
633 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
634 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
636 m_fopr_d->set_mode(mode_prev_d);
637 m_fopr_f->set_mode(mode_prev_f);