Go to the documentation of this file.
19 #if !defined(DISABLE_PREFETCH)
21 #define __prefetch_load_luinv(a, offset) \
23 __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 0)], 0, 2); \
24 __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 4)], 0, 2); \
26 #define __prefetch_write_luinv(a, offset) \
28 __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 0)], 1, 2); \
29 __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 4)], 1, 2); \
32 #define __prefetch_load_luinv_l1(a, offset) \
34 __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 0)], 0, 3); \
35 __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 4)], 0, 3); \
37 #define __prefetch_write_luinv_l1(a, offset) \
39 __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 0)], 1, 3); \
40 __builtin_prefetch(&a[Nin5 * (site + 1) + VLEN * (offset + 4)], 1, 3); \
43 #define __prefetch_load_hop_u_l2(a, dir, idx) \
45 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 0 * 64], 0, 2); \
46 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 1 * 64], 0, 2); \
47 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 2 * 64], 0, 2); \
48 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 3 * 64], 0, 2); \
49 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 4 * 64], 0, 2); \
51 #define __prefetch_load_hop_vec_l2(a, idx, is) \
53 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 0 * 64], 0, 2); \
54 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 1 * 64], 0, 2); \
55 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 2 * 64], 0, 2); \
56 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 3 * 64], 0, 2); \
57 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 4 * 64], 0, 2); \
58 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 5 * 64], 0, 2); \
61 #define __prefetch_write_hop_vec_l2(a, idx, is) \
63 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 0 * 64], 1, 2); \
64 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 1 * 64], 1, 2); \
65 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 2 * 64], 1, 2); \
66 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 3 * 64], 1, 2); \
67 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 4 * 64], 1, 2); \
68 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 5 * 64], 1, 2); \
71 #define __prefetch_load_hop_u_l1(a, dir, idx) \
73 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 0 * 64], 0, 3); \
74 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 1 * 64], 0, 3); \
75 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 2 * 64], 0, 3); \
76 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 3 * 64], 0, 3); \
77 __builtin_prefetch(&a[NDF * Nst2 * (dir) + VLEN * NDF * (idx) + 4 * 64], 0, 3); \
79 #define __prefetch_load_hop_vec_l1(a, idx, is) \
81 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 0 * 64], 0, 3); \
82 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 1 * 64], 0, 3); \
83 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 2 * 64], 0, 3); \
84 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 3 * 64], 0, 3); \
85 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 4 * 64], 0, 3); \
86 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 5 * 64], 0, 3); \
89 #define __prefetch_write_hop_vec_l1(a, idx, is) \
91 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 0 * 64], 1, 3); \
92 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 1 * 64], 1, 3); \
93 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 2 * 64], 1, 3); \
94 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 3 * 64], 1, 3); \
95 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 4 * 64], 1, 3); \
96 __builtin_prefetch(&a[Nin5 * (idx) + Nin4 * is + 5 * 64], 1, 3); \
99 #define __prefetch_load_hop2_buf_x_l2(a, idx, is, skip) \
101 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 2); \
104 #define __prefetch_load_hop2_buf_y_l2(a, idx, is, skip) \
106 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 2); \
109 #define __prefetch_load_hop2_buf_zt_l2(a, idx, is, skip) \
111 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 2); \
112 __builtin_prefetch(&a[idx + skip * is + 1 * 64], 0, 2); \
113 __builtin_prefetch(&a[idx + skip * is + 2 * 64], 0, 2); \
116 #define __prefetch_load_hop2_buf_x_l1(a, idx, is, skip) \
118 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 3); \
121 #define __prefetch_load_hop2_buf_y_l1(a, idx, is, skip) \
123 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 3); \
126 #define __prefetch_load_hop2_buf_zt_l1(a, idx, is, skip) \
128 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 0, 3); \
129 __builtin_prefetch(&a[idx + skip * is + 1 * 64], 0, 3); \
130 __builtin_prefetch(&a[idx + skip * is + 2 * 64], 0, 3); \
133 #define __prefetch_write_hop1_buf_x_l2(a, idx, is, skip) \
135 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 2); \
138 #define __prefetch_write_hop1_buf_y_l2(a, idx, is, skip) \
140 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 2); \
143 #define __prefetch_write_hop1_buf_zt_l2(a, idx, is, skip) \
145 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 2); \
146 __builtin_prefetch(&a[idx + skip * is + 1 * 64], 1, 2); \
147 __builtin_prefetch(&a[idx + skip * is + 2 * 64], 1, 2); \
150 #define __prefetch_write_hop1_buf_x_l1(a, idx, is, skip) \
152 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 3); \
155 #define __prefetch_write_hop1_buf_y_l1(a, idx, is, skip) \
157 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 3); \
160 #define __prefetch_write_hop1_buf_zt_l1(a, idx, is, skip) \
162 __builtin_prefetch(&a[idx + skip * is + 0 * 64], 1, 3); \
163 __builtin_prefetch(&a[idx + skip * is + 1 * 64], 1, 3); \
164 __builtin_prefetch(&a[idx + skip * is + 2 * 64], 1, 3); \
169 #define __prefetch_load_luinv(a, offset)
170 #define __prefetch_write_luinv(a, offset)
171 #define __prefetch_load_luinv_l1(a, offset)
172 #define __prefetch_write_luinv_l1(a, offset)
173 #define __prefetch_load_hop_u_l2(a, dir, idx)
174 #define __prefetch_load_hop_vec_l2(a, idx, is)
175 #define __prefetch_write_hop_vec_l2(a, idx, is)
176 #define __prefetch_load_hop_u_l1(a, dir, idx)
177 #define __prefetch_load_hop_vec_l1(a, idx, is)
178 #define __prefetch_write_hop_vec_l1(a, idx, is)
179 #define __prefetch_load_hop2_buf_x_l2(a, skip, idx, is)
180 #define __prefetch_load_hop2_buf_y_l2(a, idx, is, skip)
181 #define __prefetch_load_hop2_buf_zt_l2(a, idx, is, skip)
182 #define __prefetch_load_hop2_buf_x_l1(a, idx, is, skip)
183 #define __prefetch_load_hop2_buf_y_l1(a, idx, is, skip)
184 #define __prefetch_load_hop2_buf_zt_l1(a, idx, is, skip)
185 #define __prefetch_write_hop1_buf_x_l2(a, idx, is, skip)
186 #define __prefetch_write_hop1_buf_y_l2(a, idx, is, skip)
187 #define __prefetch_write_hop1_buf_zt_l2(a, idx, is, skip)
188 #define __prefetch_write_hop1_buf_x_l1(a, idx, is, skip)
189 #define __prefetch_write_hop1_buf_y_l1(a, idx, is, skip)
190 #define __prefetch_write_hop1_buf_zt_l1(a, idx, is, skip)