18 static int pe_logical_layout(
const int ndim,
const int *dims,
int nproc,
int *npe);
19 static int find_primes(
const int n,
int *p);
52 return m_grid_dims[idir];
59 for (
int i = 0; i < m_ndim; ++i) {
60 dims[i] = m_grid_dims[i];
70 return 2 * m_ndim * rank + idir + ((ipm > 0) ? 0 : m_ndim);
100 m_dims =
new int [m_ndim];
108 m_grid_dims =
new int [m_ndim];
116 int *local_dims =
new int [m_ndim];
123 for (
int i = 0; i < m_ndim; ++i) {
124 if (local_dims[i] != 0) {
125 if (m_grid_dims[i] != 0) {
126 if (m_grid_dims[i] * local_dims[i] != m_dims[i]) {
127 fprintf(stderr,
"layout mismatch.\n");
133 if (m_dims[i] % local_dims[i] != 0) {
134 fprintf(stderr,
"layout mismatch. lattice undivisable by local volume.\n");
137 m_grid_dims[i] = m_dims[i] / local_dims[i];
143 delete [] local_dims;
146 std::vector<int> lattice_size(m_dims, m_dims + 4);
147 std::vector<int> grid_size(m_grid_dims, m_grid_dims + 4);
150 return layout_setup(lattice_size, grid_size);
155 const std::vector<int>& lattice_size,
156 std::vector<int>& grid_size)
158 m_ndim = lattice_size.size();
161 m_dims =
new int [m_ndim];
163 for (
int i = 0; i < m_ndim; ++i) {
164 m_dims[i] = lattice_size[i];
168 m_grid_dims =
new int [m_ndim];
170 for (
int i = 0; i < m_ndim; ++i) {
174 for (
int i = 0; i < grid_size.size(); ++i) {
175 m_grid_dims[i] = grid_size[i];
180 int retv = pe_logical_layout(m_ndim, m_dims,
m_grid_size, m_grid_dims);
184 if (retv != EXIT_SUCCESS) {
185 fprintf(stderr,
"layout failed.\n");
191 grid_size.resize(m_ndim);
192 for (
int i = 0; i < m_ndim; ++i) {
193 grid_size[i] = m_grid_dims[i];
198 physical_map_setup();
201 m_grid_coord =
new int [m_ndim];
205 m_ipe_up =
new int [m_ndim];
206 m_ipe_dn =
new int [m_ndim];
208 int *coord =
new int [m_ndim];
209 for (
int i = 0; i < m_ndim; ++i) {
210 for (
int j = 0; j < m_ndim; ++j) {
211 coord[j] = m_grid_coord[j];
215 coord[i] = (m_grid_coord[i] + 1) % m_grid_dims[i];
216 grid_rank(&m_ipe_up[i], coord);
219 coord[i] = (m_grid_coord[i] - 1 + m_grid_dims[i]) % m_grid_dims[i];
220 grid_rank(&m_ipe_dn[i], coord);
226 printf(
"rank %d: up=(%d,%d,%d,%d), dn=(%d,%d,%d,%d)\n",
228 m_ipe_up[0], m_ipe_up[1], m_ipe_up[2], m_ipe_up[3],
229 m_ipe_dn[0], m_ipe_dn[1], m_ipe_dn[2], m_ipe_dn[3]);
244 physical_map_delete();
247 delete [] m_grid_dims;
248 delete [] m_grid_coord;
265 m_sub_comm =
new MPI_Comm [Nmask];
267 for (
int imask = 0; imask < Nmask; ++imask) {
269 int *coord =
new int [m_ndim];
270 for (
int i = 0; i < m_ndim; ++i) {
271 coord[i] = m_grid_coord[i];
274 for (
int i = 0; i < m_ndim; ++i) {
275 bool mask = ((imask >> i) & 1) == 1 ? true :
false;
276 if (!mask) coord[i] = 0;
280 grid_rank(&rank, coord);
290 MPI_Comm_split(
m_comm, rank, 0 , &m_sub_comm[imask]);
301 delete [] m_sub_comm;
310 static const int prime_table[] =
312 2, 3, 5, 7, 11, 13, 17, 19,
313 23, 29, 31, 37, 41, 43, 47, 53,
314 59, 61, 67, 71, 73, 79, 83, 89,
315 97, 101, 103, 107, 109, 113, 127, 131,
316 137, 139, 149, 151, 157, 163, 167, 173,
317 179, 181, 191, 193, 197, 199, 211, 223,
318 227, 229, 233, 239, 241, 251, 257, 263,
319 269, 271, 277, 281, 283, 293, 307, 311,
322 static const int nprime =
sizeof(prime_table) /
sizeof(
int);
327 static int pe_logical_layout(
const int ndim,
const int *dims,
int nproc,
int *npe)
329 int retv = EXIT_SUCCESS;
332 int nfreeproc = nproc;
334 for (
int i = 0; i < ndim; ++i) {
339 fprintf(stderr,
"illegal value: npe[%d]=%d.\n", i, npe[i]);
341 }
else if (nproc % npe[i] != 0) {
342 fprintf(stderr,
"illegal value: npe[%d]=%d does not divide NPE=%d.\n", i, npe[i], nproc);
344 }
else if (nfreeproc % npe[i] != 0) {
345 fprintf(stderr,
"illegal value: NPE=%d is not divisable by %d.\n", nproc, nproc / nfreeproc * npe[i]);
347 }
else if (dims[i] % npe[i] != 0) {
348 fprintf(stderr,
"illegal value: npe[%d]=%d does not divide L=%d.\n", i, npe[i], dims[i]);
357 fprintf(stderr,
"impossible layout.\n");
359 }
else if (nfreeproc == 1) {
360 for (
int i = 0; i < ndim; ++i) {
361 if (npe[i] == 0) npe[i] = 1;
366 fprintf(stderr,
"impossible layout. no room to divide.\n");
374 int *subdims =
new int [ndim];
376 for (
int i = 0; i < ndim; ++i) {
377 if (npe[i] == 0) subdims[nf++] = dims[i]; }
379 int *count =
new int [nprime];
380 for (
int i = 0; i < nprime; ++i) {
384 for (
int i = 0; i < nprime; ++i) {
385 int p = prime_table[i];
386 while (np > 1 && np % p == 0)
401 fprintf(stderr,
"insufficient prime table.\n");
412 for (
int i = nprime - 1; i >= 0; --i) {
413 if (count[i] == 0)
continue;
415 int p = prime_table[i];
417 for (
int j = 0; j < count[i]; ++j) {
420 for (
int k = 0; k < nf; ++k) {
421 if ((subdims[k] >= maxsubdim) && (subdims[k] % p == 0)) {
422 maxsubdim = subdims[k];
429 fprintf(stderr,
"not divisable. %d\n", p);
446 for (
int i = 0, k = 0; i < ndim; ++i) {
448 npe[i] = dims[i] / subdims[k];
462 static int find_primes(
const int n,
int *p)
464 if (n < 1)
return EXIT_FAILURE;
474 for (j = 0; j < i; ++j) {
475 if (k % p[j] == 0)
break;
480 if (i >= n)
return EXIT_FAILURE;