10 template<
typename AFIELD_d,
typename AFIELD_f>
12 =
"Fprop_alt_Standard_eo_Mixedprec";
14 template<
typename AFIELD_d,
typename AFIELD_f>
20 vout.
general(m_vl,
"%s: being setup.\n", class_name.c_str());
22 string fopr_type = params_fopr.
get_string(
"fermion_type");
33 params_solver2.
set_int(
"maximum_number_of_iteration", 200);
34 params_solver2.
set_int(
"maximum_number_of_restart", 1);
35 params_solver2.
set_double(
"convergence_criterion_squared", 1.0e-8);
37 string str_vlevel = params_solver.
get_string(
"verbose_level");
38 params_solver2.
set_string(
"verbose_level", str_vlevel);
40 string solver_type = params_solver.
get_string(
"solver_type");
42 m_solver_prec->set_parameters(params_solver2);
51 m_solver->set_parameters(params_solver);
55 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
60 template<
typename AFIELD_d,
typename AFIELD_f>
67 vout.
general(m_vl,
"%s: being setup.\n", class_name.c_str());
69 m_dr_smear = dr_smear;
71 string fopr_type = params_fopr.
get_string(
"fermion_type");
82 params_solver2.
set_int(
"maximum_number_of_iteration", 200);
83 params_solver2.
set_int(
"maximum_number_of_restart", 1);
84 params_solver2.
set_double(
"convergence_criterion_squared", 1.0e-8);
85 string str_vlevel = params_solver.
get_string(
"verbose_level");
86 params_solver2.
set_string(
"verbose_level", str_vlevel);
88 string solver_type = params_solver.
get_string(
"solver_type");
90 m_solver_prec->set_parameters(params_solver2);
97 m_solver->set_parameters(params_solver);
101 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
106 template<
typename AFIELD_d,
typename AFIELD_f>
111 delete m_solver_prec;
114 if (m_kernel_d != 0)
delete m_kernel_d;
115 if (m_kernel_f != 0)
delete m_kernel_f;
120 template<
typename AFIELD_d,
typename AFIELD_f>
123 m_fopr_d->set_config(U);
124 m_fopr_f->set_config(U);
129 template<
typename AFIELD_d,
typename AFIELD_f>
131 int& nconv,
double& diff)
133 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
137 invert_D(xq, b, nconv, diff);
138 }
else if (m_mode ==
"DdagD") {
139 invert_DdagD(xq, b, nconv, diff);
142 class_name.c_str(), m_mode.c_str());
149 template<
typename AFIELD_d,
typename AFIELD_f>
151 int& nconv,
double& diff)
153 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
157 invert_D(xq, b, nconv, diff);
158 }
else if (m_mode ==
"DdagD") {
159 invert_DdagD(xq, b, nconv, diff);
162 class_name.c_str(), m_mode.c_str());
169 template<
typename AFIELD_d,
typename AFIELD_f>
172 int& nconv,
double& diff)
181 int nin = m_fopr_d->field_nin();
182 int nvol2 = m_fopr_d->field_nvol();
183 int nex = m_fopr_d->field_nex();
184 int nvol = 2 * nvol2;
193 if (m_fopr_d->needs_convert()) {
194 m_fopr_d->convert(abq, b);
201 invert_D(axq, abq, nconv, diff);
205 if (m_fopr_d->needs_convert()) {
206 m_fopr_d->reverse(xq, axq);
220 template<
typename AFIELD_d,
typename AFIELD_f>
223 int& nconv,
double& diff)
225 int nin = m_fopr_d->field_nin();
226 int nvol2 = m_fopr_d->field_nvol();
227 int nex = m_fopr_d->field_nex();
228 int nvol = 2 * nvol2;
237 if (m_fopr_d->needs_convert()) {
238 m_fopr_d->convert(abq, b);
245 invert_DdagD(axq, abq, nconv, diff);
249 if (m_fopr_d->needs_convert()) {
250 m_fopr_d->reverse(xq, axq);
259 template<
typename AFIELD_d,
typename AFIELD_f>
262 int& nconv,
double& diff)
270 int nin = m_fopr_d->field_nin();
271 int nvol2 = m_fopr_d->field_nvol();
272 int nex = m_fopr_d->field_nex();
273 int nvol = 2 * nvol2;
275 AFIELD_d be(nin, nvol2, nex), bo(nin, nvol2, nex);
276 AFIELD_d xe(nin, nvol2, nex), xo(nin, nvol2, nex);
277 AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
283 index_eo.split(be, bo, b);
286 invert_De(xe, xo, be, bo, nconv, diff);
290 index_eo.merge(xq, xe, xo);
294 m_elapsed_time += m_timer.elapsed_sec();
295 m_flop_count += m_solver->flop_count();
300 template<
typename AFIELD_d,
typename AFIELD_f>
303 int& nconv,
double& diff)
311 int nin = m_fopr_d->field_nin();
312 int nvol2 = m_fopr_d->field_nvol();
313 int nex = m_fopr_d->field_nex();
315 AFIELD_d be(nin, nvol2, nex), bo(nin, nvol2, nex);
316 AFIELD_d xe(nin, nvol2, nex), xo(nin, nvol2, nex);
317 AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
323 index_eo.split(be, bo, b);
328 invert_De_dag(y1, y2, be, bo, nconv1, diff1);
333 invert_De(xe, xo, y1, y2, nconv1, diff1);
340 index_eo.merge(xq, xe, xo);
344 m_elapsed_time += m_timer.elapsed_sec();
345 m_flop_count += m_solver->flop_count();
350 template<
typename AFIELD_d,
typename AFIELD_f>
354 int& nconv,
double& diff)
356 int nin = m_fopr_d->field_nin();
357 int nvol2 = m_fopr_d->field_nvol();
358 int nex = m_fopr_d->field_nex();
359 int nvol = 2 * nvol2;
361 AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
366 m_fopr_d->mult(y1, bo,
"Doo_inv");
369 m_fopr_d->mult(y2, y1,
"Deo");
375 m_fopr_d->mult(y1, be,
"Dee_inv");
378 m_fopr_d->set_mode(
"D");
379 m_fopr_f->set_mode(
"D");
383 m_solver->solve(xe, y1, nconv, diff);
386 m_fopr_d->mult(y1, xe,
"Doe");
392 m_fopr_d->mult(xo, y1,
"Doo_inv");
399 template<
typename AFIELD_d,
typename AFIELD_f>
403 int& nconv,
double& diff)
405 int nin = m_fopr_d->field_nin();
406 int nvol2 = m_fopr_d->field_nvol();
407 int nex = m_fopr_d->field_nex();
408 int nvol = 2 * nvol2;
410 AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
412 vout.
detailed(m_vl,
"invert_De_dag(AFIELD)(6arg) start.\n");
417 m_fopr_d->mult_dag(y1, bo,
"Doo_inv");
419 m_fopr_d->mult_dag(y2, y1,
"Deo");
425 m_fopr_d->set_mode(
"Ddag");
426 m_fopr_f->set_mode(
"Ddag");
427 m_solver->solve(y2, be, nconv, diff2);
430 m_fopr_d->mult_dag(xe, y2,
"Dee_inv");
433 m_fopr_d->mult_dag(y1, xe,
"Doe");
439 m_fopr_d->mult_dag(xo, y1,
"Doo_inv");
443 diff = double(diff2);
447 vout.
detailed(m_vl,
"diff(invert_De_dag) = %e\n", diff);
453 template<
typename AFIELD_d,
typename AFIELD_f>
456 return m_solver->flop_count();
461 template<
typename AFIELD_d,
typename AFIELD_f>
465 m_elapsed_time = 0.0;
470 template<
typename AFIELD_d,
typename AFIELD_f>
473 double flops = m_flop_count / m_elapsed_time;
474 double gflops = flops * 1.0e-9;
477 vout.
general(m_vl,
"%s: solver performance:\n", class_name.c_str());
478 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", m_elapsed_time);
479 vout.
general(m_vl,
" Flop(total) = %18.0f\n", m_flop_count);
480 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
485 template<
typename AFIELD_d,
typename AFIELD_f>
487 const std::string mode,
490 int nin = m_fopr_d->field_nin();
491 int nvol = m_fopr_d->field_nvol();
492 int nex = m_fopr_d->field_nex();
494 unique_ptr<Timer> timer(
new Timer);
496 std::string mode_prev_d = m_fopr_d->get_mode();
497 std::string mode_prev_f = m_fopr_f->get_mode();
499 m_fopr_d->set_mode(mode);
500 m_fopr_f->set_mode(mode);
503 AFIELD_d axq(nin, nvol, nex), abq(nin, nvol, nex);
511 for (
int i = 0; i < Nrepeat; ++i) {
512 m_fopr_d->mult(axq, abq);
513 m_fopr_d->mult(abq, axq);
520 double flop_fopr = m_fopr_d->flop_count();
521 double flop_total = flop_fopr * double(2 * Nrepeat);
523 double elapsed_time = timer->elapsed_sec();
524 double flops = flop_total / elapsed_time;
525 double gflops = flops * 1.0e-9;
528 vout.
general(m_vl,
"%s: mult performance:\n", class_name.c_str());
529 vout.
general(m_vl,
" mult mode = %s\n", mode.c_str());
531 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
532 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
533 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
534 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
537 AFIELD_f axq(nin, nvol, nex), abq(nin, nvol, nex);
546 for (
int i = 0; i < Nrepeat; ++i) {
547 m_fopr_f->mult(axq, abq);
548 m_fopr_f->mult(abq, axq);
555 flop_fopr = m_fopr_f->flop_count();
556 flop_total = flop_fopr * double(2 * Nrepeat);
558 elapsed_time = timer->elapsed_sec();
559 flops = flop_total / elapsed_time;
560 gflops = flops * 1.0e-9;
563 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
564 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
565 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
566 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
568 m_fopr_d->set_mode(mode_prev_d);
569 m_fopr_f->set_mode(mode_prev_f);