10 template<
typename AFIELD_d,
typename AFIELD_f>
12 =
"Fprop_alt_Standard_eo_Mixedprec";
14 template<
typename AFIELD_d,
typename AFIELD_f>
20 vout.
general(m_vl,
"%s: being setup.\n", class_name.c_str());
22 string fopr_type = params_fopr.
get_string(
"fermion_type");
33 params_solver2.
set_int(
"maximum_number_of_iteration", 200);
34 params_solver2.
set_int(
"maximum_number_of_restart", 1);
35 params_solver2.
set_double(
"convergence_criterion_squared", 1.0e-8);
37 string str_vlevel = params_solver.
get_string(
"verbose_level");
38 params_solver2.
set_string(
"verbose_level", str_vlevel);
40 string solver_type = params_solver.
get_string(
"solver_type");
42 m_solver_prec->set_parameters(params_solver2);
51 m_solver->set_parameters(params_solver);
55 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
60 template<
typename AFIELD_d,
typename AFIELD_f>
67 vout.
general(m_vl,
"%s: being setup.\n", class_name.c_str());
69 m_dr_smear = dr_smear;
71 string fopr_type = params_fopr.
get_string(
"fermion_type");
82 params_solver2.
set_int(
"maximum_number_of_iteration", 200);
83 params_solver2.
set_int(
"maximum_number_of_restart", 1);
84 params_solver2.
set_double(
"convergence_criterion_squared", 1.0e-8);
85 string str_vlevel = params_solver.
get_string(
"verbose_level");
86 params_solver2.
set_string(
"verbose_level", str_vlevel);
88 string solver_type = params_solver.
get_string(
"solver_type");
90 m_solver_prec->set_parameters(params_solver2);
97 m_solver->set_parameters(params_solver);
101 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
106 template<
typename AFIELD_d,
typename AFIELD_f>
111 delete m_solver_prec;
114 if (m_kernel_d != 0)
delete m_kernel_d;
115 if (m_kernel_f != 0)
delete m_kernel_f;
120 template<
typename AFIELD_d,
typename AFIELD_f>
123 m_fopr_d->set_config(U);
124 m_fopr_f->set_config(U);
129 template<
typename AFIELD_d,
typename AFIELD_f>
131 int& nconv,
double& diff)
133 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
137 invert_D(xq, b, nconv, diff);
138 }
else if (m_mode ==
"DdagD") {
139 invert_DdagD(xq, b, nconv, diff);
142 class_name.c_str(), m_mode.c_str());
149 template<
typename AFIELD_d,
typename AFIELD_f>
151 int& nconv,
double& diff)
153 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
157 invert_D(xq, b, nconv, diff);
158 }
else if (m_mode ==
"DdagD") {
159 invert_DdagD(xq, b, nconv, diff);
162 class_name.c_str(), m_mode.c_str());
169 template<
typename AFIELD_d,
typename AFIELD_f>
172 int& nconv,
double& diff)
181 int nin = m_fopr_d->field_nin();
182 int nvol2 = m_fopr_d->field_nvol();
183 int nex = m_fopr_d->field_nex();
184 int nvol = 2 * nvol2;
193 if (m_fopr_d->needs_convert()) {
194 m_fopr_d->convert(abq, b);
201 invert_D(axq, abq, nconv, diff);
205 if (m_fopr_d->needs_convert()) {
206 m_fopr_d->reverse(xq, axq);
220 template<
typename AFIELD_d,
typename AFIELD_f>
223 int& nconv,
double& diff)
225 int nin = m_fopr_d->field_nin();
226 int nvol2 = m_fopr_d->field_nvol();
227 int nex = m_fopr_d->field_nex();
228 int nvol = 2 * nvol2;
237 if (m_fopr_d->needs_convert()) {
238 m_fopr_d->convert(abq, b);
246 invert_DdagD(axq, abq, nconv, diff);
250 if (m_fopr_d->needs_convert()) {
251 m_fopr_d->reverse(xq, axq);
260 template<
typename AFIELD_d,
typename AFIELD_f>
263 int& nconv,
double& diff)
271 int nin = m_fopr_d->field_nin();
272 int nvol2 = m_fopr_d->field_nvol();
273 int nex = m_fopr_d->field_nex();
274 int nvol = 2 * nvol2;
276 AFIELD_d be(nin, nvol2, nex), bo(nin, nvol2, nex);
277 AFIELD_d xe(nin, nvol2, nex), xo(nin, nvol2, nex);
278 AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
284 index_eo.split(be, bo, b);
287 invert_De(xe, xo, be, bo, nconv, diff);
291 index_eo.merge(xq, xe, xo);
295 m_elapsed_time += m_timer.elapsed_sec();
296 m_flop_count += m_solver->flop_count();
301 template<
typename AFIELD_d,
typename AFIELD_f>
304 int& nconv,
double& diff)
312 int nin = m_fopr_d->field_nin();
313 int nvol2 = m_fopr_d->field_nvol();
314 int nex = m_fopr_d->field_nex();
316 AFIELD_d be(nin, nvol2, nex), bo(nin, nvol2, nex);
317 AFIELD_d xe(nin, nvol2, nex), xo(nin, nvol2, nex);
318 AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
324 index_eo.split(be, bo, b);
327 m_fopr_d->mult_gm5(y1, be);
328 m_fopr_d->mult_gm5(y2, bo);
333 invert_De(xe, xo, y1, y2, nconv1, diff1);
340 m_fopr_d->mult_gm5(y1, xe);
341 m_fopr_d->mult_gm5(y2, xo);
344 invert_De(xe, xo, y1, y2, nconv1, diff1);
351 index_eo.merge(xq, xe, xo);
355 m_elapsed_time += m_timer.elapsed_sec();
356 m_flop_count += m_solver->flop_count();
361 template<
typename AFIELD_d,
typename AFIELD_f>
365 int& nconv,
double& diff)
367 int nin = m_fopr_d->field_nin();
368 int nvol2 = m_fopr_d->field_nvol();
369 int nex = m_fopr_d->field_nex();
370 int nvol = 2 * nvol2;
372 AFIELD_d y1(nin, nvol2, nex), y2(nin, nvol2, nex);
377 m_fopr_d->mult(y1, bo,
"Doo_inv");
380 m_fopr_d->mult(y2, y1,
"Deo");
386 m_fopr_d->mult(y1, be,
"Dee_inv");
389 m_fopr_d->set_mode(
"D");
390 m_fopr_f->set_mode(
"D");
394 m_solver->solve(xe, y1, nconv, diff);
397 m_fopr_d->mult(y1, xe,
"Doe");
403 m_fopr_d->mult(xo, y1,
"Doo_inv");
410 template<
typename AFIELD_d,
typename AFIELD_f>
413 return m_solver->flop_count();
418 template<
typename AFIELD_d,
typename AFIELD_f>
422 m_elapsed_time = 0.0;
427 template<
typename AFIELD_d,
typename AFIELD_f>
430 double flops = m_flop_count / m_elapsed_time;
431 double gflops = flops * 1.0e-9;
434 vout.
general(m_vl,
"%s: solver performance:\n", class_name.c_str());
435 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", m_elapsed_time);
436 vout.
general(m_vl,
" Flop(total) = %18.0f\n", m_flop_count);
437 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
442 template<
typename AFIELD_d,
typename AFIELD_f>
444 const std::string mode,
447 int nin = m_fopr_d->field_nin();
448 int nvol = m_fopr_d->field_nvol();
449 int nex = m_fopr_d->field_nex();
451 unique_ptr<Timer> timer(
new Timer);
453 std::string mode_prev_d = m_fopr_d->get_mode();
454 std::string mode_prev_f = m_fopr_f->get_mode();
456 m_fopr_d->set_mode(mode);
457 m_fopr_f->set_mode(mode);
460 AFIELD_d axq(nin, nvol, nex), abq(nin, nvol, nex);
468 for (
int i = 0; i < Nrepeat; ++i) {
469 m_fopr_d->mult(axq, abq);
470 m_fopr_d->mult(abq, axq);
477 double flop_fopr = m_fopr_d->flop_count();
478 double flop_total = flop_fopr * double(2 * Nrepeat);
480 double elapsed_time = timer->elapsed_sec();
481 double flops = flop_total / elapsed_time;
482 double gflops = flops * 1.0e-9;
485 vout.
general(m_vl,
"%s: mult performance:\n", class_name.c_str());
486 vout.
general(m_vl,
" mult mode = %s\n", mode.c_str());
488 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
489 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
490 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
491 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
494 AFIELD_f axq(nin, nvol, nex), abq(nin, nvol, nex);
503 for (
int i = 0; i < Nrepeat; ++i) {
504 m_fopr_f->mult(axq, abq);
505 m_fopr_f->mult(abq, axq);
512 flop_fopr = m_fopr_f->flop_count();
513 flop_total = flop_fopr * double(2 * Nrepeat);
515 elapsed_time = timer->elapsed_sec();
516 flops = flop_total / elapsed_time;
517 gflops = flops * 1.0e-9;
520 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
521 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
522 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
523 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
525 m_fopr_d->set_mode(mode_prev_d);
526 m_fopr_f->set_mode(mode_prev_f);