Bridge++  Ver. 2.0.2
aprecond_MG-tmpl.h
Go to the documentation of this file.
1 
10 #include <cassert>
13 
14 
15 template<typename AFIELD, typename AFIELD2>
16 const std::string APrecond_MG<AFIELD, AFIELD2>::class_name = "APrecond_MG";
17 
18 //====================================================================
19 namespace {
20 #ifndef AFILED_HAS_SUB
21  template<typename AFIELD>
22  inline void sub(AFIELD& v, const AFIELD& w)
23  {
24  typedef typename AFIELD::real_t real_t;
25  axpy(v, (real_t)-1.0, w);
26  }
27 #endif
28 
29 #ifndef AFILED_HAS_ADD
30  template<typename AFIELD>
31  inline void add(AFIELD& v, const AFIELD& w)
32  {
33  typedef typename AFIELD::real_t real_t;
34  axpy(v, (real_t)1.0, w);
35  }
36 #endif
37 
38  static inline int get_flop_restrict_site(int ncol) // ncol=2*nvec; spin proj + mult
39  {
40  return ncol * (2 * 6 + 8 * 12);
41  }
42 
43 
44  static inline int get_flop_prolong_site(int ncol) // ncol=2*nvec; spin proj + mult
45  {
46  return ncol * (2 * 6 + 8 * 12);
47  }
48 
49 
50  template<typename INDEX, typename AFIELD>
51  void convert(const INDEX&, AFIELD& w, const INDEX&, const AFIELD& v)
52  {
53  copy(w, v);
54  }
55 }
56 
57 //====================================================================
58 template<typename AFIELD, typename AFIELD2>
60 {
62 
63  reset_flop_count();
64 }
65 
66 
67 //====================================================================
68 template<typename AFIELD, typename AFIELD2>
70 {
72  m_coarse_solver = solver;
73  int nin = solver->get_fopr()->field_nin();
74  int nvol = solver->get_fopr()->field_nvol();
75  int nex = solver->get_fopr()->field_nex();
76  m_coarse_v.reset(nin, nvol, nex);
77  m_coarse_w.reset(nin, nvol, nex);
78  m_coarse_ncol = nin / 2;
79 }
80 
81 
82 //====================================================================
83 template<typename AFIELD, typename AFIELD2>
85 {
87  m_smoother = solver;
88  int nin = solver->get_fopr()->field_nin();
89  int nvol = solver->get_fopr()->field_nvol();
90  int nex = solver->get_fopr()->field_nex();
91  m_fine_w.reset(nin, nvol, nex);
92  m_fine_v.reset(nin, nvol, nex);
93  m_fine_r.reset(nin, nvol, nex);
94  m_fine_tmp.reset(nin, nvol, nex);
95 }
96 
97 
98 //====================================================================
99 template<typename AFIELD, typename AFIELD2>
101 {
102  // ThreadManager::assert_single_thread(class_name);
103 }
104 
105 
106 //====================================================================
107 template<typename AFIELD, typename AFIELD2>
109 {
110  int nin = v.nin();
111  int nvol = v.nvol();
112  int nex = v.nex();
113  assert(w.check_size(nin, nvol, nex));
114  assert(m_fine_v.check_size(nin, nvol, nex));
115  assert(m_fine_w.check_size(nin, nvol, nex));
116 
117  Timer timer;
118  Timer timer_each;
119  timer.start();
120  // want to solve |v> =Dinv |w>
121 
122 #pragma omp master
123  {
124  m_coarse_solver->set_init_mode(ASolver<AFIELD2>::InitialGuess::ZERO);
125  }
126 
127 #pragma omp barrier
128  timer_each.reset();
129  timer_each.start();
130  double time0 = 0.0;
131  // use the normalized source
132  double norm2_in = norm2(w);
133  double scale_in = sqrt(norm2_in);
134  copy(v, w);
135  v.scal(1.0 / scale_in);
136  timer_each.stop();
137  double time1 = timer_each.elapsed_sec();
138  double time_double = time1 - time0;
139  time0 = time1;
140 
141  // float <- double on fine lattice assuming lexical indexing
142  timer_each.start();
144  AIndex_lex<real_t, IMPL2> index_f; // assumes float
145  convert(index_f, m_fine_w, index_d, v); // if AFILED==AFIELD2, calls the local version
146  timer_each.stop();
147  time1 = timer_each.elapsed_sec();
148  double time_d2f = time1 - time0;
149  time0 = time1;
150 
151  // single precision part
152  // timer_each.start();
153  mult_single(m_fine_v, m_fine_w);
154  // timer_each.stop();
155  // time1=timer_each.elapsed_sec();
156  // double time_mult_single=time1-time0; time0=time1;// measured in mult_sinle
157 
158  // double <- float
159  timer_each.start();
160  convert(index_d, v, index_f, m_fine_v); // if AFILED==AFIELD2, calls the local version
161  timer_each.stop();
162  time1 = timer_each.elapsed_sec();
163  double time_f2d = time1 - time0;
164  time0 = time1;
165 
166  timer_each.start();
167  v.scal(scale_in); // recover the normalization
168  timer_each.stop();
169  time1 = timer_each.elapsed_sec();
170  time_double += (time1 - time0);
171  time0 = time1;
172 
173 #pragma omp master
174  { // flop counting
175  const size_t Lvol = CommonParameters::Lvol();
176  const double flop_other_double = 4 * 24 * static_cast<double>(Lvol);
177  m_accum_flop_double += flop_other_double;
178  }
179 #pragma omp barrier
180 
181  timer.stop();
182 
183 #pragma omp master
184  {
185  double elapsed_time = timer.elapsed_sec();
186  // vout.general(" time for mult in %s = %e sec\n",
187  // class_name.c_str(), elapsed_time);
188  ++m_num_mult_called;
189  m_time_mult_total += elapsed_time;
190  m_time_d2f += time_d2f;
191  m_time_f2d += time_f2d;
192  m_time_double += time_double;
193  }
194 }
195 
196 
197 //====================================================================
198 template<typename AFIELD, typename AFIELD2>
200  const AFIELD2& w)
201 {
202  // want to solve |v> =Dinv |w>
203  // float [pre]
204  // guess: |v> = |w>
205  // |r> = |w> - D|v> = |v> -D|v>
206  // one more time
207  //
208  //
209  // float: coarse
210  // |ww> = Dinv[coarse]|r>
211  // |v> := |v> + |ww>
212  // float: calc res.
213  // |r> = |w> - D|v>
214  // float: smoother
215  // |ww> = Dinv[smoother]|rr>
216  // |v> := |v> + |ww>
217 
218  //initial barrier --- no need as m_afoprF has a barrier
219  //#pragma omp barrier
220 
221  Timer timer;
222  double time0 = 0.0;
223 
224  // |v> = |w>
225  // -|r> = |w> - D|v>
226  timer.start();
227  m_afoprF->mult(m_fine_r, w);
228  copy(v, w);
229  sub(m_fine_r, w);
230  timer.stop();
231  double time1 = timer.elapsed_sec();
232  double time_residual = time1 - time0;
233  time0 = time1;
234 
235  // restriction: corase vector <- fine vector
236  timer.start();
237  m_multigrid->make_coarse_vector(m_coarse_v, m_fine_r);
238 #pragma omp barrier
239  timer.stop();
240  time1 = timer.elapsed_sec();
241  double time_restriction = time1 - time0;
242  time0 = time1;
243  // vout.general(" time for restriction = %e sec\n", time_restriction);
244 
245  // coarse solver: coarse_w = (Dinv) coarse_v
246  timer.start();
247  int coarse_nconv = -1;
248  real_t coarse_diff = -1.0;
249 
250  m_coarse_solver->solve(m_coarse_w, m_coarse_v, coarse_nconv, coarse_diff);
251  vout.detailed("%s: coarse: nconv = %d diff = %e\n",
252  class_name.c_str(), coarse_nconv, coarse_diff);
253  timer.stop();
254  time1 = timer.elapsed_sec();
255  double time_coarse_solver = time1 - time0;
256  time0 = time1;
257  // vout.general(" time for coarse solver = %e sec\n",
258  // time_coarse_solver);
259 
260  // prolong the solution to the fine grid
261  // coarse_w => fine_tmp
262  timer.start();
263  m_multigrid->make_fine_vector(m_fine_tmp, m_coarse_w);
264  timer.stop();
265  time1 = timer.elapsed_sec();
266  double time_prolongation = time1 - time0;
267  time0 = time1;
268  // vout.general(" time for prologation = %e sec\n", time_prolongation);
269 
270  // update solution and calculate residual vector
271  // -|v> := |w> - D |v>
272  timer.start();
273  sub(v, m_fine_tmp);
274  m_afoprF->mult(m_fine_r, v);
275  sub(m_fine_r, w);
276 #ifdef DEBUG
277  {
278  double r2 = m_fine_r.norm2();
279  vout.general("%s: after the coarse solver, r2=%15.7e\n", class_name.c_str(), r2);
280  }
281 #endif
282  timer.stop();
283  time1 = timer.elapsed_sec();
284  time_residual += (time1 - time0);
285  time0 = time1;
286 
287  // call smoother
288  // |tmp> = Dinv[fine] |v> smoother
289  // |w> = |w> + |tmp> update solution
290  timer.start();
291 #pragma omp barrier
292 
293  int smoother_nconv = -1;
294  real_t smoother_diff = -1.0;
295  m_smoother->solve(m_fine_tmp, m_fine_r, smoother_nconv, smoother_diff);
296 
297  vout.detailed("%s: smoother: nconv = %d diff = %e\n",
298  class_name.c_str(), smoother_nconv, smoother_diff);
299  timer.stop();
300  time1 = timer.elapsed_sec();
301  double time_smoother = time1 - time0;
302  time0 = time1;
303  // vout.general(" time for smoother = %e sec\n", time_smoother);
304 
305  // update the solution
306  timer.start();
307  sub(v, m_fine_tmp);
308 #ifdef DEBUG
309  {
310  m_afoprF->mult(m_fine_r, v);
311  sub(m_fine_r, w);
312  double r2 = m_fine_r.norm2();
313  vout.general("%s: after the smoother, r2=%15.7e\n", class_name.c_str(), r2);
314  }
315 #endif
316  timer.stop();
317  time1 = timer.elapsed_sec();
318  time_residual += (time1 - time0);
319  time0 = time1;
320  double time_single_total = time1;
321  // double elapsed_time = time1;
322  // vout.general(" time for mult_single (total) = %e sec\n",
323  // elapsed_time);
324 
325  update_flop_count();
326 
327 #pragma omp master
328  {
329  m_time_restriction += time_restriction;
330  m_time_coarse_solver += time_coarse_solver;
331  m_time_prolongation += time_prolongation;
332  m_time_smoother += time_smoother;
333  m_time_residual += time_residual;
334  m_time_mult_single_total += time_single_total;
335  ++m_num_mult_single_called;
336  }
337 
338 #pragma omp barrier
339 }
340 
341 
342 //====================================================================
343 template<typename AFIELD, typename AFIELD2>
345  const AFIELD2& w)
346 {
347  // m_coarse_solver->set_init_mode(ASolver<AFIELD2>::InitialGuess::ZERO);
348  mult_single(v, w);
349 #pragma omp barrier
350 }
351 
352 
353 //====================================================================
354 template<typename AFIELD, typename AFIELD2>
356 {
357 #pragma omp master
358  {
359  const size_t Lvol = CommonParameters::Lvol();
360 
361  const double flop_restrict_site = get_flop_restrict_site(m_coarse_ncol);
362  const double flop_prolong_site = get_flop_prolong_site(m_coarse_ncol);
363  const double flop_other_float
364  = m_afoprF->flop_count()
365  + (flop_restrict_site + flop_prolong_site + 4 * 24)
366  * static_cast<double>(Lvol)
367  + m_afoprF->flop_count();
368  double tmp = 0.0;
369  m_accum_flop_coarse += m_coarse_solver->flop_count();
370  m_accum_flop_smoother += m_smoother->flop_count();
371  m_accum_flop_other += flop_other_float;
372  tmp += m_coarse_solver->flop_count();
373  tmp += m_smoother->flop_count();
374  tmp += flop_other_float;
375  m_accum_flop_float += tmp;
376  m_accum_flop_restrict += flop_restrict_site * static_cast<double>(Lvol);
377  m_accum_flop_prolong += flop_prolong_site * static_cast<double>(Lvol);
378  vout.general("update_flop_count: flop_other_float=%e, m_accum_flop_float=%e, tmp=%e\n",
379  flop_other_float, m_accum_flop_float, tmp);
380  //#pragma omp flush(m_accum_flop_float, m_accum_flop_other, m_accum_flop_smoother, m_accum_flop_coarse)
381  }
382 }
383 
384 
385 //====================================================================
386 template<typename AFIELD, typename AFIELD2>
388 {
389  m_accum_flop_coarse = 0.0;
390  m_accum_flop_smoother = 0.0;
391  m_accum_flop_other = 0.0;
392  m_accum_flop_float = 0.0;
393  m_accum_flop_double = 0.0;
394 
395  m_time_f2d = 0.0;
396  m_time_d2f = 0.0;
397  m_time_double = 0.0;
398  m_time_residual = 0.0;
399  m_time_restriction = 0.0;
400  m_time_coarse_solver = 0.0;
401  m_time_smoother = 0.0;
402  m_time_prolongation = 0.0;
403  m_time_mult_total = 0.0;
404  m_time_mult_single_total = 0.0;
405 
406  m_num_mult_called = 0;
407  m_num_mult_single_called = 0;
408 }
409 
410 
411 //====================================================================
412 template<typename AFIELD, typename AFIELD2>
414 {
415  vout.general("%s: time budget\n", class_name.c_str());
416  vout.general("Elapsed time: restriction : total %14.6e\n", m_time_restriction);
417  vout.general("Elapsed time: coarse solver : total %14.6e\n", m_time_coarse_solver);
418  vout.general("Elapsed time: prolongation : total %14.6e\n", m_time_prolongation);
419  vout.general("Elapsed time: smoother : total %14.6e\n", m_time_smoother);
420  vout.general("Elapsed time: resudial(+lin.alg) : total %14.6e\n", m_time_residual);
421  vout.general("Elapsed time: convert(f2d) : total %14.6e\n", m_time_f2d);
422  vout.general("Elapsed time: convert(d2f) : total %14.6e\n", m_time_d2f);
423  vout.general("Elapsed time: double : total %14.6e\n", m_time_double);
424  vout.general("Elapsed time: mult_single(total) : total %14.6e\n", m_time_mult_single_total);
425  vout.general("Elapsed time: mult (total) : total %14.6e\n", m_time_mult_total);
426  vout.general(" number of mult call : %d\n", m_num_mult_called);
427 
428  double flop_restrict = get_flop_restrict_site(m_coarse_ncol)
429  * m_num_mult_single_called * static_cast<double>(CommonParameters::Lvol());
430  double flop_prolong = get_flop_prolong_site(m_coarse_ncol)
431  * m_num_mult_single_called * static_cast<double>(CommonParameters::Lvol());
432 
433  vout.general(" Flops: smoother (float) : %14.6e GFlop/s\n", flop_count_smoother() / m_time_smoother * 1.0e-9);
434  vout.general(" Flops: coarse solver (float) : %14.6e GFlop/s\n", flop_count_coarse() / m_time_coarse_solver * 1.0e-9);
435  vout.general(" Flops: float total : %14.6e GFlop/s\n", m_accum_flop_float / m_time_mult_single_total * 1.0e-9);
436  vout.general(" Flops: restrict (float) : %14.6e GFlop/s\n", flop_restrict / m_time_restriction * 1.0e-9);
437  vout.general(" Flops: prolong (float) : %14.6e GFlop/s\n", flop_prolong / m_time_prolongation * 1.0e-9);
438 }
439 
440 
441 //============================================================END=====
aprecond_MG.h
Multigrid preconditionor (SIMD version)
CommonParameters::Lvol
static long_t Lvol()
Definition: commonParameters.h:95
ASolver< AFIELD2 >
AIndex_lex
Definition: aindex_lex_base.h:17
convert
void convert(INDEX &index, AFIELD &v, const Field &w)
Definition: afield-inc.h:129
Field::scal
friend void scal(Field &x, const double a)
scal(x, a): x = a * x
Definition: field.cpp:261
Bridge::BridgeIO::detailed
void detailed(const char *format,...)
Definition: bridgeIO.cpp:219
Field::nex
int nex() const
Definition: field.h:128
Field::check_size
bool check_size(const int nin, const int nvol, const int nex) const
checking size parameters. [23 May 2016 H.Matsufuru]
Definition: field.h:135
APrecond_MG::real_t
AFIELD2::real_t real_t
Definition: aprecond_MG.h:43
axpy
void axpy(Field &y, const double a, const Field &x)
axpy(y, a, x): y := a * x + y
Definition: field.cpp:380
APrecond_MG::mult_single
void mult_single(AFIELD2 &, const AFIELD2 &)
Definition: aprecond_MG-tmpl.h:199
real_t
double real_t
Definition: bridgeQXS_Clover_coarse_double.cpp:16
norm2
REALTYPE norm2(const AField< REALTYPE, QXS > &v)
Definition: afield-inc.h:453
Field::nin
int nin() const
Definition: field.h:126
Timer
Definition: timer.h:31
copy
void copy(Field &y, const Field &x)
copy(y, x): y = x
Definition: field.cpp:212
Field::real_t
double real_t
Definition: field.h:51
Timer::start
void start()
Definition: timer.cpp:44
APrecond_MG::tidyup
void tidyup()
Definition: aprecond_MG-tmpl.h:100
APrecond_MG::mult
void mult(AFIELD &, const AFIELD &)
Definition: aprecond_MG-tmpl.h:108
threadManager.h
APrecond_MG::set_smoother
void set_smoother(ASolver< AFIELD2 > *solver)
Definition: aprecond_MG-tmpl.h:84
Field::nvol
int nvol() const
Definition: field.h:127
Test_Solver_Wilson::solver
int solver(const std::string &)
Definition: test_Solver_Wilson.cpp:134
APrecond_MG::reset_flop_count
void reset_flop_count()
Definition: aprecond_MG-tmpl.h:387
APrecond_MG::init
void init()
Definition: aprecond_MG-tmpl.h:59
APrecond_MG::report_timer
void report_timer()
Definition: aprecond_MG-tmpl.h:413
APrecond_MG::set_coarse_solver
void set_coarse_solver(ASolver< AFIELD2 > *solver)
Definition: aprecond_MG-tmpl.h:69
APrecond_MG::update_flop_count
void update_flop_count()
Definition: aprecond_MG-tmpl.h:355
APrecond_MG
Definition: aprecond_MG.h:39
APrecond_MG::mult_as_setup
void mult_as_setup(AFIELD2 &, const AFIELD2 &)
Definition: aprecond_MG-tmpl.h:344
Field
Container of Field-type object.
Definition: field.h:46
Timer::elapsed_sec
double elapsed_sec() const
Definition: timer.cpp:107
Bridge::BridgeIO::general
void general(const char *format,...)
Definition: bridgeIO.cpp:200
Timer::stop
void stop()
Definition: timer.cpp:69
ThreadManager::assert_single_thread
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
Definition: threadManager.cpp:372
Bridge::vout
BridgeIO vout
Definition: bridgeIO.cpp:512
Timer::reset
void reset()
Definition: timer.cpp:97