10 template<
typename AFIELD_d,
typename AFIELD_f>
12 =
"Fprop_alt_Standard_eo_Richardson";
14 template<
typename AFIELD_d,
typename AFIELD_f>
20 vout.
general(m_vl,
"%s: being setup.\n", class_name.c_str());
22 string fopr_type = params_fopr.
get_string(
"fermion_type");
35 int max_iter = params_solver.
get_int(
"maximum_number_of_iteration");
36 int max_restart = params_solver.
get_int(
"maximum_number_of_restart");
37 params_solver2.
set_int(
"maximum_number_of_iteration", max_iter * max_restart);
38 params_solver2.
set_int(
"maximum_number_of_restart", 1);
39 params_solver2.
set_double(
"convergence_criterion_squared", 1.0e-8);
40 string str_vlevel = params_solver.
get_string(
"verbose_level");
41 params_solver2.
set_string(
"verbose_level", str_vlevel);
42 params_solver2.
set_string(
"initial_guess_mode",
"ZERO");
44 string solver_type = params_solver.
get_string(
"solver_type");
46 m_solver_prec->set_parameters(params_solver2);
53 m_solver->set_parameters(params_solver);
56 if (solver_type ==
"CGNR") {
62 int nin = m_fopr_d->field_nin();
63 int nvol2 = m_fopr_d->field_nvol();
64 int nex = m_fopr_d->field_nex();
67 m_axq.reset(nin, nvol, nex);
68 m_abq.reset(nin, nvol, nex);
70 m_be.reset(nin, nvol2, nex);
71 m_bo.reset(nin, nvol2, nex);
72 m_xe.reset(nin, nvol2, nex);
73 m_xo.reset(nin, nvol2, nex);
74 m_y1.reset(nin, nvol2, nex);
75 m_y2.reset(nin, nvol2, nex);
76 m_y1f.reset(nin, nvol2, nex);
77 m_y2f.reset(nin, nvol2, nex);
80 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
85 template<
typename AFIELD_d,
typename AFIELD_f>
92 vout.
general(m_vl,
"%s: being setup.\n", class_name.c_str());
94 m_dr_smear = dr_smear;
96 string fopr_type = params_fopr.
get_string(
"fermion_type");
107 params_solver2.
set_int(
"maximum_number_of_iteration", 200);
108 params_solver2.
set_int(
"maximum_number_of_restart", 1);
109 params_solver2.
set_double(
"convergence_criterion_squared", 2.5e-8);
110 string str_vlevel = params_solver.
get_string(
"verbose_level");
111 params_solver2.
set_string(
"verbose_level", str_vlevel);
112 params_solver2.
set_string(
"initial_guess_mode",
"ZERO");
114 string solver_type = params_solver.
get_string(
"solver_type");
116 m_solver_prec->set_parameters(params_solver2);
123 m_solver->set_parameters(params_solver);
126 if (solver_type ==
"CGNR") {
131 int nin = m_fopr_d->field_nin();
132 int nvol2 = m_fopr_d->field_nvol();
133 int nex = m_fopr_d->field_nex();
134 int nvol = 2 * nvol2;
136 m_axq.reset(nin, nvol, nex);
137 m_abq.reset(nin, nvol, nex);
139 m_be.reset(nin, nvol2, nex);
140 m_bo.reset(nin, nvol2, nex);
141 m_xe.reset(nin, nvol2, nex);
142 m_xo.reset(nin, nvol2, nex);
143 m_y1.reset(nin, nvol2, nex);
144 m_y2.reset(nin, nvol2, nex);
145 m_y1f.reset(nin, nvol2, nex);
146 m_y2f.reset(nin, nvol2, nex);
148 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
155 template<
typename AFIELD_d,
typename AFIELD_f>
160 delete m_solver_prec;
163 if (m_kernel_d != 0)
delete m_kernel_d;
164 if (m_kernel_f != 0)
delete m_kernel_f;
169 template<
typename AFIELD_d,
typename AFIELD_f>
172 m_fopr_d->set_config(U);
173 m_fopr_f->set_config(U);
178 template<
typename AFIELD_d,
typename AFIELD_f>
180 int& nconv,
double& diff)
182 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
186 invert_D(xq, b, nconv, diff);
187 }
else if (m_mode ==
"DdagD") {
188 invert_DdagD(xq, b, nconv, diff);
191 class_name.c_str(), m_mode.c_str());
198 template<
typename AFIELD_d,
typename AFIELD_f>
200 int& nconv,
double& diff)
202 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
206 invert_D(xq, b, nconv, diff);
207 }
else if (m_mode ==
"DdagD") {
208 invert_DdagD(xq, b, nconv, diff);
211 class_name.c_str(), m_mode.c_str());
218 template<
typename AFIELD_d,
typename AFIELD_f>
221 int& nconv,
double& diff)
233 if (m_fopr_d->needs_convert()) {
235 m_fopr_d->convert(m_abq, b);
243 invert_D(m_axq, m_abq, nconv, diff);
246 if (m_fopr_d->needs_convert()) {
247 m_fopr_d->reverse(xq, m_axq);
261 template<
typename AFIELD_d,
typename AFIELD_f>
264 int& nconv,
double& diff)
273 if (m_fopr_d->needs_convert()) {
275 m_fopr_d->convert(m_abq, b);
283 invert_DdagD(m_axq, m_abq, nconv, diff);
286 if (m_fopr_d->needs_convert()) {
287 m_fopr_d->reverse(xq, m_axq);
300 template<
typename AFIELD_d,
typename AFIELD_f>
303 int& nconv,
double& diff)
317 index_eo.split(m_be, m_bo, b);
320 invert_De(m_xe, m_xo, m_be, m_bo, nconv, diff);
323 index_eo.merge(xq, m_xe, m_xo);
328 m_elapsed_time += m_timer.elapsed_sec();
329 m_flop_count += m_solver->flop_count();
335 template<
typename AFIELD_d,
typename AFIELD_f>
338 int& nconv,
double& diff)
350 index_eo.split(m_be, m_bo, b);
353 m_fopr_d->mult_gm5(m_y1, m_be);
354 m_fopr_d->mult_gm5(m_y2, m_bo);
358 invert_De(m_xe, m_xo, m_y1, m_y2, nconv1, diff1);
364 m_fopr_d->mult_gm5(m_y1, m_xe);
365 m_fopr_d->mult_gm5(m_y2, m_xo);
367 invert_De(m_xe, m_xo, m_y1, m_y2, nconv1, diff1);
372 index_eo.merge(xq, m_xe, m_xo);
377 m_elapsed_time += m_timer.elapsed_sec();
378 m_flop_count += m_solver->flop_count();
384 template<
typename AFIELD_d,
typename AFIELD_f>
388 int& nconv,
double& diff)
393 m_fopr_d->mult(m_y1, bo,
"Doo_inv");
396 m_fopr_d->mult(m_y2, m_y1,
"Deo");
402 m_fopr_d->mult(m_y1, be,
"Dee_inv");
405 m_fopr_d->set_mode(
"Ddag");
406 m_fopr_d->mult(m_y2, m_y1);
407 m_fopr_d->set_mode(
"DdagD");
408 m_fopr_f->set_mode(
"DdagD");
409 m_solver->solve(xe, m_y2, nconv, diff);
411 m_fopr_d->set_mode(
"D");
412 m_fopr_f->set_mode(
"D");
413 m_solver->solve(xe, m_y1, nconv, diff);
417 m_fopr_d->mult(m_y1, xe,
"Doe");
423 m_fopr_d->mult(xo, m_y1,
"Doo_inv");
429 template<
typename AFIELD_d,
typename AFIELD_f>
432 return m_solver->flop_count();
437 template<
typename AFIELD_d,
typename AFIELD_f>
441 m_elapsed_time = 0.0;
446 template<
typename AFIELD_d,
typename AFIELD_f>
449 double flops = m_flop_count / m_elapsed_time;
450 double gflops = flops * 1.0e-9;
453 vout.
general(m_vl,
"%s: solver performance:\n", class_name.c_str());
454 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", m_elapsed_time);
455 vout.
general(m_vl,
" Flop(total) = %18.0f\n", m_flop_count);
456 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
461 template<
typename AFIELD_d,
typename AFIELD_f>
463 const std::string mode,
466 int nin = m_fopr_d->field_nin();
467 int nvol = m_fopr_d->field_nvol();
468 int nex = m_fopr_d->field_nex();
470 unique_ptr<Timer> timer(
new Timer);
472 std::string mode_prev_d = m_fopr_d->get_mode();
473 std::string mode_prev_f = m_fopr_f->get_mode();
475 m_fopr_d->set_mode(mode);
476 m_fopr_f->set_mode(mode);
490 for (
int i = 0; i < Nrepeat; ++i) {
491 m_fopr_d->mult(m_y2, m_y1);
492 m_fopr_d->mult(m_y1, m_y2);
499 double flop_fopr = m_fopr_d->flop_count();
500 double flop_total = flop_fopr * double(2 * Nrepeat);
502 double elapsed_time = timer->elapsed_sec();
503 double flops = flop_total / elapsed_time;
504 double gflops = flops * 1.0e-9;
507 vout.
general(m_vl,
"%s: mult performance:\n", class_name.c_str());
508 vout.
general(m_vl,
" mult mode = %s\n", mode.c_str());
510 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
511 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
512 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
513 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
528 for (
int i = 0; i < Nrepeat; ++i) {
529 m_fopr_f->mult(m_y2f, m_y1f);
530 m_fopr_f->mult(m_y1f, m_y2f);
537 flop_fopr = m_fopr_f->flop_count();
538 flop_total = flop_fopr * double(2 * Nrepeat);
540 elapsed_time = timer->elapsed_sec();
541 flops = flop_total / elapsed_time;
542 gflops = flops * 1.0e-9;
545 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
546 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
547 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
548 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
550 m_fopr_d->set_mode(mode_prev_d);
551 m_fopr_f->set_mode(mode_prev_f);