Go to the documentation of this file.
27 template<
typename REALTYPE>
30 std::vector<REALTYPE>& array_reduction,
31 const int each_buf_size,
32 const int ith,
const int nth)
36 std::vector<real_t> sum;
42 for (
int i = 0; i < num; i++) {
51 const int n = (remaining < each_buf_size) ? remaining : each_buf_size;
52 for (
int i = 0; i < n; ++i) {
53 array_reduction[ith * each_buf_size + i] = pa[i];
58 for (
int i = 0; i < nth; ++i) {
59 for (
int j = 0; j < n; ++j) {
60 psum[j] += array_reduction[i * each_buf_size + j];
63 psum += each_buf_size;
66 remaining -= each_buf_size;
77 const int total_buf_size = each_buf_size * nth;
80 const int n = (remaining < total_buf_size) ? remaining : total_buf_size;
83 for (
int i = 0; i < n; ++i) {
84 array_reduction[i] = pa[i];
99 for (
int i = 0; i < n; ++i) {
100 pa[i] = array_reduction[i];
102 pa += total_buf_size;
103 remaining -= total_buf_size;
110 template<
typename REALTYPE>
113 std::vector<REALTYPE>& array_reduction,
114 const int each_buf_size,
115 const int ith,
const int nth)
119 std::vector<real_t> vmax;
125 for (
int i = 0; i < num; ++i) {
132 while (remaining > 0)
134 const int n = (remaining < each_buf_size) ? remaining : each_buf_size;
135 for (
int i = 0; i < n; ++i) {
136 array_reduction[ith * each_buf_size + i] = pa[i];
141 for (
int i = 0; i < nth; ++i) {
142 for (
int j = 0; j < n; ++j) {
144 if (array_reduction[i * each_buf_size + j] > pmax[j]) {
145 pmax[j] = array_reduction[i * each_buf_size + j];
148 pmax[j] = std::max(pmax[j], array_reduction[i * each_buf_size + j]);
152 pmax += each_buf_size;
155 remaining -= each_buf_size;
166 const int total_buf_size = each_buf_size * nth;
167 while (remaining > 0)
169 const int n = (remaining < total_buf_size) ? remaining : total_buf_size;
172 for (
int i = 0; i < n; ++i) {
173 array_reduction[i] = pa[i];
180 for (
int i = 0; i < n; ++i) {
181 pa[i] = array_reduction[i];
183 pa += total_buf_size;
184 remaining -= total_buf_size;
212 if (omp_get_thread_num() == 0) {
213 Nthread_env = omp_get_num_threads();
218 if ((Nthread == Nthread_env) || (Nthread == 0)) {
226 omp_set_num_threads(Nthread);
248 return omp_get_num_threads();
255 return omp_get_thread_num();
289 const int ith,
const int nth)
300 const int ith,
const int nth)
310 const int ith,
const int nth)
321 const int ith,
const int nth)
331 const int ith,
const int nth)
342 const int ith,
const int nth)
352 const int ith,
const int nth)
363 const int ith,
const int nth)
static int sync()
synchronize within small world.
static const int each_buf_size
reduction buffer size for each thread (double)
static int m_Nthread
number of threads.
static int get_num_threads()
returns available number of threads.
static int reduce_max(int count, double *recv_buf, double *send_buf, int pattern=0)
find a global maximum of an array of double over the communicator. pattern specifies the dimensions t...
static std::vector< double > m_darray_reduction
static void barrier(const int Nthread)
barrier among threads inside a node.
void paranoiac(const char *format,...)
void max_global(REALTYPE *a, const int num, std::vector< REALTYPE > &array_reduction, const int each_buf_size, const int ith, const int nth)
static const int each_buf_sizeF
reduction buffer size for each thread (float)
void sum_global(REALTYPE *a, const int num, std::vector< REALTYPE > &array_reduction, const int each_buf_size, const int ith, const int nth)
static int reduce_sum(int count, dcomplex *recv_buf, dcomplex *send_buf, int pattern=0)
make a global sum of an array of dcomplex over the communicator. pattern specifies the dimensions to ...
static void reduce_max_global(double *value, const int num, const int i_thread, const int Nthread)
global reduction with max for an array: double values are assumed thread local.
static void sync_barrier_all()
barrier among all the threads and nodes.
static void init(int Nthread)
setup: called in main only once.
static Bridge::VerboseLevel m_vl
verbose level.
static void reduce_sum_global(dcomplex &value, const int i_thread, const int Nthread)
global reduction with summation: dcomplex values are assumed thread local.
static const std::string class_name
static std::vector< dcomplex > m_darray_reductionDC
static void wait()
barrier among threads inside a node.
static std::vector< float > m_darray_reductionF
static Bridge::VerboseLevel Vlevel()
static void finalize()
finalization.
void crucial(const char *format,...)
static int get_thread_id()
returns thread id.
void general(const char *format,...)
static void assert_single_thread(const std::string &class_name)
assert currently running on single thread.
static const int each_buf_sizeDC
reduction buffer size for each thread (dcomplex)