15 template<
typename AFIELD_d,
typename AFIELD_f>
17 =
"Fprop_alt_Standard_lex_Mixedprec";
20 template<
typename AFIELD_d,
typename AFIELD_f>
26 vout.
general(m_vl,
"%s: being setup (without link smearing).\n",
31 string fopr_type = params_fopr.
get_string(
"fermion_type");
41 params_solver2.
set_double(
"convergence_criterion_squared", 1.0e-14);
43 string solver_type = params_solver.
get_string(
"solver_type");
45 m_solver_prec->set_parameters(params_solver2);
53 m_solver->set_parameters(params_solver);
57 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
62 template<
typename AFIELD_d,
typename AFIELD_f>
69 vout.
general(m_vl,
"%s: being setup (with link smearing).\n",
72 m_dr_smear = dr_smear;
74 string fopr_type = params_fopr.
get_string(
"fermion_type");
84 params_solver2.
set_double(
"convergence_criterion_squared", 1.0e-14);
86 string solver_type = params_solver.
get_string(
"solver_type");
88 m_solver_prec->set_parameters(params_solver2);
95 m_solver->set_parameters(params_solver);
99 vout.
general(m_vl,
"%s: setup finished.\n", class_name.c_str());
104 template<
typename AFIELD_d,
typename AFIELD_f>
109 delete m_solver_prec;
112 if (m_kernel_d != 0)
delete m_kernel_d;
113 if (m_kernel_f != 0)
delete m_kernel_f;
118 template<
typename AFIELD_d,
typename AFIELD_f>
121 m_fopr_d->set_config(U);
122 m_fopr_f->set_config(U);
127 template<
typename AFIELD_d,
typename AFIELD_f>
129 int& nconv,
double& diff)
131 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
135 invert_D(xq, b, nconv, diff);
136 }
else if (m_mode ==
"DdagD") {
137 invert_DdagD(xq, b, nconv, diff);
140 class_name.c_str(), m_mode.c_str());
147 template<
typename AFIELD_d,
typename AFIELD_f>
149 int& nconv,
double& diff)
154 int nin = m_fopr_d->field_nin();
155 int nvol = m_fopr_d->field_nvol();
156 int nex = m_fopr_d->field_nex();
165 if (m_fopr_d->needs_convert()) {
166 m_fopr_d->convert(abq, b);
172 m_fopr_d->set_mode(
"D");
173 m_fopr_f->set_mode(
"D");
179 m_solver->solve(axq, abq, nconv, diff2);
182 if (m_fopr_d->needs_convert()) {
183 m_fopr_d->reverse(xq, axq);
188 diff = double(diff2);
191 m_elapsed_time += m_timer.elapsed_sec();
192 m_flop_count += m_solver->flop_count();
197 template<
typename AFIELD_d,
typename AFIELD_f>
200 int& nconv,
double& diff)
205 int nin = m_fopr_d->field_nin();
206 int nvol = m_fopr_d->field_nvol();
207 int nex = m_fopr_d->field_nex();
216 if (m_fopr_d->needs_convert()) {
217 m_fopr_d->convert(abq, b);
225 m_fopr_d->set_mode(
"DdagD");
226 m_fopr_f->set_mode(
"DdagD");
230 m_solver->solve(axq, abq, nconv, diff2);
232 if (m_fopr_d->needs_convert()) {
233 m_fopr_d->reverse(xq, axq);
239 diff = double(diff2);
242 m_elapsed_time += m_timer.elapsed_sec();
243 m_flop_count += m_solver->flop_count();
248 template<
typename AFIELD_d,
typename AFIELD_f>
251 int& nconv,
double& diff)
253 vout.
paranoiac(m_vl,
"%s: invert is called.\n", class_name.c_str());
257 invert_D(xq, b, nconv, diff);
258 }
else if (m_mode ==
"DdagD") {
259 invert_DdagD(xq, b, nconv, diff);
262 class_name.c_str(), m_mode.c_str());
269 template<
typename AFIELD_d,
typename AFIELD_f>
272 int& nconv,
double& diff)
277 int nin = m_fopr_d->field_nin();
278 int nvol = m_fopr_d->field_nvol();
279 int nex = m_fopr_d->field_nex();
281 m_fopr_d->set_mode(
"D");
282 m_fopr_f->set_mode(
"D");
288 m_solver->solve(xq, b, nconv, diff2);
291 diff = double(diff2);
294 m_elapsed_time += m_timer.elapsed_sec();
295 m_flop_count += m_solver->flop_count();
300 template<
typename AFIELD_d,
typename AFIELD_f>
303 int& nconv,
double& diff)
308 int nin = m_fopr_d->field_nin();
309 int nvol = m_fopr_d->field_nvol();
310 int nex = m_fopr_d->field_nex();
312 m_fopr_d->set_mode(
"DdagD");
313 m_fopr_f->set_mode(
"DdagD");
319 m_solver->solve(xq, b, nconv, diff2);
322 diff = double(diff2);
325 m_elapsed_time += m_timer.elapsed_sec();
326 m_flop_count += m_solver->flop_count();
331 template<
typename AFIELD_d,
typename AFIELD_f>
334 return m_solver->flop_count();
339 template<
typename AFIELD_d,
typename AFIELD_f>
343 m_elapsed_time = 0.0;
348 template<
typename AFIELD_d,
typename AFIELD_f>
351 double flops = m_flop_count / m_elapsed_time;
352 double gflops = flops * 1.0e-9;
355 vout.
general(m_vl,
"%s: solver performance:\n", class_name.c_str());
356 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", m_elapsed_time);
357 vout.
general(m_vl,
" Flop(total) = %18.0f\n", m_flop_count);
358 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
363 template<
typename AFIELD_d,
typename AFIELD_f>
365 const std::string mode,
368 int nin = m_fopr_d->field_nin();
369 int nvol = m_fopr_d->field_nvol();
370 int nex = m_fopr_d->field_nex();
372 unique_ptr<Timer> timer(
new Timer);
374 std::string mode_prev_d = m_fopr_d->get_mode();
375 std::string mode_prev_f = m_fopr_f->get_mode();
377 m_fopr_d->set_mode(mode);
378 m_fopr_f->set_mode(mode);
381 AFIELD_d axq(nin, nvol, nex), abq(nin, nvol, nex);
389 for (
int i = 0; i < Nrepeat; ++i) {
390 m_fopr_d->mult(axq, abq);
391 m_fopr_d->mult(abq, axq);
398 double flop_fopr = m_fopr_d->flop_count();
399 double flop_total = flop_fopr * double(2 * Nrepeat);
401 double elapsed_time = timer->elapsed_sec();
402 double flops = flop_total / elapsed_time;
403 double gflops = flops * 1.0e-9;
406 vout.
general(m_vl,
"%s: mult performance:\n", class_name.c_str());
408 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
409 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
410 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
411 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
414 AFIELD_f axq(nin, nvol, nex), abq(nin, nvol, nex);
423 for (
int i = 0; i < Nrepeat; ++i) {
424 m_fopr_f->mult(axq, abq);
425 m_fopr_f->mult(abq, axq);
432 flop_fopr = m_fopr_f->flop_count();
433 flop_total = flop_fopr * double(2 * Nrepeat);
435 elapsed_time = timer->elapsed_sec();
436 flops = flop_total / elapsed_time;
437 gflops = flops * 1.0e-9;
440 vout.
general(m_vl,
" Elapsed time = %14.6f sec\n", elapsed_time);
441 vout.
general(m_vl,
" Flop(Fopr) = %18.0f\n", flop_fopr);
442 vout.
general(m_vl,
" Flop(total) = %18.0f\n", flop_total);
443 vout.
general(m_vl,
" Performance = %11.3f GFlops\n", gflops);
445 m_fopr_d->set_mode(mode_prev_d);
446 m_fopr_f->set_mode(mode_prev_f);