Bridge++  Ver. 2.0.2
prefetch.h
Go to the documentation of this file.
1 
10 /*
11  Copyright
12  Bridge++ project and RIKEN (2022)
13 
14  Licence: GPL
15  see README.txt and LICENSE for more details
16 */
17 #pragma once
18 
19 #if !defined(DISABLE_PREFETCH)
20 
21 #define __prefetch_load_luinv(a, offset) \
22  { \
23  __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 0)], 0, 2); \
24  __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 4)], 0, 2); \
25  }
26 #define __prefetch_write_luinv(a, offset) \
27  { \
28  __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 0)], 1, 2); \
29  __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 4)], 1, 2); \
30  }
31 
32 #define __prefetch_load_luinv_l1(a, offset) \
33  { \
34  __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 0)], 0, 3); \
35  __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 4)], 0, 3); \
36  }
37 #define __prefetch_write_luinv_l1(a, offset) \
38  { \
39  __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 0)], 1, 3); \
40  __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 4)], 1, 3); \
41  }
42 
43 #define __prefetch_load_hop_u_l2(a, dir, idx) \
44  { \
45  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 0 * 64], 0, 2); \
46  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 1 * 64], 0, 2); \
47  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 2 * 64], 0, 2); \
48  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 3 * 64], 0, 2); \
49  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 4 * 64], 0, 2); \
50  }
51 #define __prefetch_load_hop_vec_l2(a, idx, is) \
52  { \
53  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 0 * 64], 0, 2); \
54  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 1 * 64], 0, 2); \
55  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 2 * 64], 0, 2); \
56  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 3 * 64], 0, 2); \
57  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 4 * 64], 0, 2); \
58  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 5 * 64], 0, 2); \
59  }
60 
61 #define __prefetch_write_hop_vec_l2(a, idx, is) \
62  { \
63  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 0 * 64], 1, 2); \
64  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 1 * 64], 1, 2); \
65  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 2 * 64], 1, 2); \
66  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 3 * 64], 1, 2); \
67  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 4 * 64], 1, 2); \
68  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 5 * 64], 1, 2); \
69  }
70 
71 #define __prefetch_load_hop_u_l1(a, dir, idx) \
72  { \
73  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 0 * 64], 0, 3); \
74  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 1 * 64], 0, 3); \
75  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 2 * 64], 0, 3); \
76  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 3 * 64], 0, 3); \
77  __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 4 * 64], 0, 3); \
78  }
79 #define __prefetch_load_hop_vec_l1(a, idx, is) \
80  { \
81  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 0 * 64], 0, 3); \
82  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 1 * 64], 0, 3); \
83  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 2 * 64], 0, 3); \
84  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 3 * 64], 0, 3); \
85  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 4 * 64], 0, 3); \
86  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 5 * 64], 0, 3); \
87  }
88 
89 #define __prefetch_write_hop_vec_l1(a, idx, is) \
90  { \
91  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 0 * 64], 1, 3); \
92  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 1 * 64], 1, 3); \
93  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 2 * 64], 1, 3); \
94  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 3 * 64], 1, 3); \
95  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 4 * 64], 1, 3); \
96  __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 5 * 64], 1, 3); \
97  }
98 
99 #define __prefetch_load_hop2_buf_x_l2(a, idx, is, skip) \
100  { \
101  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 2); \
102  }
103 
104 #define __prefetch_load_hop2_buf_y_l2(a, idx, is, skip) \
105  { \
106  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 2); \
107  }
108 
109 #define __prefetch_load_hop2_buf_zt_l2(a, idx, is, skip) \
110  { \
111  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 2); \
112  __builtin_prefetch(&a[idx + skip * is + 1 * 64], 0, 2); \
113  __builtin_prefetch(&a[idx + skip * is + 2 * 64], 0, 2); \
114  }
115 
116 #define __prefetch_load_hop2_buf_x_l1(a, idx, is, skip) \
117  { \
118  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 3); \
119  }
120 
121 #define __prefetch_load_hop2_buf_y_l1(a, idx, is, skip) \
122  { \
123  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 3); \
124  }
125 
126 #define __prefetch_load_hop2_buf_zt_l1(a, idx, is, skip) \
127  { \
128  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 3); \
129  __builtin_prefetch(&a[idx + skip * is + 1 * 64], 0, 3); \
130  __builtin_prefetch(&a[idx + skip * is + 2 * 64], 0, 3); \
131  }
132 
133 #define __prefetch_write_hop1_buf_x_l2(a, idx, is, skip) \
134  { \
135  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 2); \
136  }
137 
138 #define __prefetch_write_hop1_buf_y_l2(a, idx, is, skip) \
139  { \
140  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 2); \
141  }
142 
143 #define __prefetch_write_hop1_buf_zt_l2(a, idx, is, skip) \
144  { \
145  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 2); \
146  __builtin_prefetch(&a[idx + skip * is + 1 * 64], 1, 2); \
147  __builtin_prefetch(&a[idx + skip * is + 2 * 64], 1, 2); \
148  }
149 
150 #define __prefetch_write_hop1_buf_x_l1(a, idx, is, skip) \
151  { \
152  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 3); \
153  }
154 
155 #define __prefetch_write_hop1_buf_y_l1(a, idx, is, skip) \
156  { \
157  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 3); \
158  }
159 
160 #define __prefetch_write_hop1_buf_zt_l1(a, idx, is, skip) \
161  { \
162  __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 3); \
163  __builtin_prefetch(&a[idx + skip * is + 1 * 64], 1, 3); \
164  __builtin_prefetch(&a[idx + skip * is + 2 * 64], 1, 3); \
165  }
166 
167 #else
168 
169 #define __prefetch_load_luinv(a, offset)
170 #define __prefetch_write_luinv(a, offset)
171 #define __prefetch_load_luinv_l1(a, offset)
172 #define __prefetch_write_luinv_l1(a, offset)
173 #define __prefetch_load_hop_u_l2(a, dir, idx)
174 #define __prefetch_load_hop_vec_l2(a, idx, is)
175 #define __prefetch_write_hop_vec_l2(a, idx, is)
176 #define __prefetch_load_hop_u_l1(a, dir, idx)
177 #define __prefetch_load_hop_vec_l1(a, idx, is)
178 #define __prefetch_write_hop_vec_l1(a, idx, is)
179 #define __prefetch_load_hop2_buf_x_l2(a, skip, idx, is)
180 #define __prefetch_load_hop2_buf_y_l2(a, idx, is, skip)
181 #define __prefetch_load_hop2_buf_zt_l2(a, idx, is, skip)
182 #define __prefetch_load_hop2_buf_x_l1(a, idx, is, skip)
183 #define __prefetch_load_hop2_buf_y_l1(a, idx, is, skip)
184 #define __prefetch_load_hop2_buf_zt_l1(a, idx, is, skip)
185 #define __prefetch_write_hop1_buf_x_l2(a, idx, is, skip)
186 #define __prefetch_write_hop1_buf_y_l2(a, idx, is, skip)
187 #define __prefetch_write_hop1_buf_zt_l2(a, idx, is, skip)
188 #define __prefetch_write_hop1_buf_x_l1(a, idx, is, skip)
189 #define __prefetch_write_hop1_buf_y_l1(a, idx, is, skip)
190 #define __prefetch_write_hop1_buf_zt_l1(a, idx, is, skip)
191 #endif