Bridge++  Version 1.6.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
threadManager_OpenMP.cpp
Go to the documentation of this file.
1 
15 
16 #include <omp.h>
17 
19 #include "IO/bridgeIO.h"
20 using Bridge::vout;
21 
22 namespace ThreadManager_OpenMP_Reduce {
23  template<typename REALTYPE>
24  void sum_global(REALTYPE *a,
25  const int num,
26  std::vector<REALTYPE>& array_reduction,
27  const int each_buf_size,
28  const int i_thread, const int Nthread)
29  {
30  typedef REALTYPE real_t;
31  int remaining = num;
32  std::vector<real_t> sum;
33 #ifdef LIB_CPP11
34  real_t *psum = nullptr;
35 #else
36  real_t *psum = NULL;
37 #endif
38 
39 #pragma omp master
40  {
41  sum.resize(num);
42  for (int i = 0; i < num; i++) {
43  sum[i] = 0;
44  }
45  psum = &sum[0];
46  }
47 
48  real_t *pa = a;
49  while (remaining > 0) // sum over threads; shared buffer size is each_buf_Size
50  {
51  const int n = (remaining < each_buf_size)?num:each_buf_size;
52  for (int i = 0; i < n; i++) {
53  array_reduction[i_thread * each_buf_size + i] = pa[i];
54  }
55 #pragma omp barrier
56 #pragma omp master
57  {
58  for (int i = 0; i < Nthread; i++) {
59  for (int j = 0; j < n; j++) {
60  psum[j] += array_reduction[i * each_buf_size + j];
61  }
62  }
63  psum += each_buf_size;
64  } // master
65  pa += each_buf_size;
66  remaining -= each_buf_size;
67 #pragma omp barrier
68  } // sum over threads, done
69 
70 #pragma omp master
71  {
72  Communicator::reduce_sum(num, a, &sum[0], 0);
73  } // a in the master threads knows the global sum
74 
75  remaining = num;
76  pa = a;
77  const int total_buf_size = each_buf_size * Nthread;
78  while (remaining > 0) // distributes the sum to each thread
79  {
80  const int n = (remaining < total_buf_size)?num:total_buf_size;
81 #pragma omp master
82  {
83  for (int i = 0; i < n; i++) { // copy to the common buffer
84  array_reduction[i] = pa[i];
85  }
86  } // master
87 
88  // ensures to read updated m_darray_reduction
89 #pragma omp barrier
90  //#ifdef NECSX
91  //#pragma omp flush
92  //#else
93  //if(sizeof(real_t)==4){
94  //#pragma omp flush (ThreadManager_OpenMP::m_darray_reductionF)
95  // } else {
96  //#pragma omp flush (ThreadManager_OpenMP::m_darray_reduction)
97  // }
98  //#endif
99  for (int i = 0; i < n; i++) { // copy from the common buffer
100  pa[i] = array_reduction[i];
101  }
102  pa += total_buf_size;
103  remaining -= total_buf_size;
104 #pragma omp barrier
105  } // distributes the sum to each thread, done
106  }
107 } // ThreadManager_OpenMP_Reduce
108 
109 //====================================================================
110 // initialization of static member variables.
111 
114 std::vector<dcomplex> ThreadManager_OpenMP::m_darray_reductionDC(0);
115 std::vector<double> ThreadManager_OpenMP::m_darray_reduction(0);
116 std::vector<float> ThreadManager_OpenMP::m_darray_reductionF(0);
117 
118 const std::string ThreadManager_OpenMP::class_name = "ThreadManager_OpenMP";
119 
120 //====================================================================
121 void ThreadManager_OpenMP::init(int Nthread)
122 {
124 
125  vout.general(m_vl, "%s: initialization\n", class_name.c_str());
126 
127  int Nthread_env = 0;
128 
129 #pragma omp parallel
130  {
131  if (omp_get_thread_num() == 0) {
132  Nthread_env = omp_get_num_threads();
133  }
134  }
135 
136 
137  if ((Nthread == Nthread_env) || (Nthread == 0)) {
138  m_Nthread = Nthread_env;
139  } else {
140  vout.general(m_vl, "Warning at %s: Nthread(env) /= Nthread(input)", class_name.c_str());
141  vout.general(m_vl, " Number of threads(env) = %d\n", Nthread_env);
142  vout.general(m_vl, " Number of threads(input) = %d\n", Nthread);
143  vout.general(m_vl, " reset Nthread = Nthread(input).\n");
144 
145  omp_set_num_threads(Nthread);
146  m_Nthread = Nthread;
147  }
148 
149  vout.general(m_vl, " Number of threads = %d\n", m_Nthread);
150 
154 }
155 
156 
157 //====================================================================
159 {
160  vout.paranoiac(m_vl, "%s: finalize.\n", class_name.c_str());
161 }
162 
163 
164 //====================================================================
166 {
167  return omp_get_num_threads();
168 }
169 
170 
171 //====================================================================
173 {
174  return omp_get_thread_num();
175 }
176 
177 
178 //====================================================================
180 {
181  int Nthread = get_num_threads();
182 
183  barrier(Nthread);
184 }
185 
186 
187 //====================================================================
189 {
190 #pragma omp barrier
191 }
192 
193 
194 //====================================================================
196 {
197 #pragma omp barrier
198 #pragma omp master
199  {
201  }
202 #pragma omp barrier
203 }
204 
205 
206 //====================================================================
208  const int i_thread, const int Nthread)
209 {
212  i_thread, Nthread);
213 }
214 
215 
216 //====================================================================
218  const int num,
219  const int i_thread, const int Nthread)
220 {
223  i_thread, Nthread);
224 }
225 
226 
227 //====================================================================
229  const int i_thread, const int Nthread)
230 {
233  i_thread, Nthread);
234 }
235 
236 
237 //====================================================================
239  const int num,
240  const int i_thread, const int Nthread)
241 {
244  i_thread, Nthread);
245 }
246 
247 
248 //====================================================================
250  const int i_thread, const int Nthread)
251 {
254  i_thread, Nthread);
255 }
256 
257 
258 //====================================================================
260  const int num,
261  const int i_thread, const int Nthread)
262 {
265  i_thread, Nthread);
266 }
267 
268 
269 //====================================================================
270 void ThreadManager_OpenMP::assert_single_thread(const std::string& name)
271 {
272  int Nthread = get_num_threads();
273 
274  if (Nthread != 1) {
275  vout.crucial(m_vl, "\n");
276  vout.crucial(m_vl, "##### Caution #####\n");
277  vout.crucial(m_vl, "Single-thread %s is called in parallel region.\n", name.c_str());
278  vout.crucial(m_vl, "Current number of thread = %d.\n", Nthread);
279 
280  exit(EXIT_FAILURE);
281  }
282 }
283 
284 
285 //====================================================================
286 //============================================================END=====
static int m_Nthread
number of threads.
static const int each_buf_size
reduction buffer size for each thread (double)
BridgeIO vout
Definition: bridgeIO.cpp:503
static int get_num_threads()
returns available number of threads.
static const int each_buf_sizeDC
reduction buffer size for each thread (dcomplex)
void general(const char *format,...)
Definition: bridgeIO.cpp:197
static std::vector< float > m_darray_reductionF
static Bridge::VerboseLevel Vlevel()
static int reduce_sum(int count, dcomplex *recv_buf, dcomplex *send_buf, int pattern=0)
make a global sum of an array of dcomplex over the communicator. pattern specifies the dimensions to ...
static void wait()
barrier among threads inside a node.
static int get_thread_id()
returns thread id.
static void init(int Nthread)
setup: called in main only once.
static std::vector< dcomplex > m_darray_reductionDC
static const std::string class_name
static void barrier(const int Nthread)
barrier among threads inside a node.
static void sync_barrier_all()
barrier among all the threads and nodes.
static void finalize()
finalization.
void paranoiac(const char *format,...)
Definition: bridgeIO.cpp:235
static const int each_buf_sizeF
reduction buffer size for each thread (float)
void crucial(const char *format,...)
Definition: bridgeIO.cpp:178
static void reduce_sum_global(dcomplex &value, const int i_thread, const int Nthread)
global reduction with summation: dcomplex values are assumed thread local.
static Bridge::VerboseLevel m_vl
verbose level.
void sum_global(REALTYPE *a, const int num, std::vector< REALTYPE > &array_reduction, const int each_buf_size, const int i_thread, const int Nthread)
static std::vector< double > m_darray_reduction
VerboseLevel
Definition: bridgeIO.h:42
static int sync()
synchronize within small world.
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.