Bridge++  Ver. 2.0.2
mult_Wilson_parts_qxs2-inc.h
Go to the documentation of this file.
1 
10 #ifndef MULT_WILSON_PARTS_QXS_2_H
11 #define MULT_WILSON_PARTS_QXS_2_H
12 
13 namespace {
14 //====================================================================
15  inline void check_setup()
16  {
17  /*
18  if(VLEN2 < 2){
19  vout.crucial("VLEN2 = %d is too small for this implementation\n",
20  VLEN2);
21  exit(EXIT_FAILURE);
22  }
23  */
24  }
25 
26 
27 //====================================================================
28  template<typename REALTYPE>
29  inline void mult_wilson_xp1(REALTYPE *buf, REALTYPE *v1)
30  {
31  REALTYPE vt[NVCD];
32 
33  load_vec1(vt, v1, 0, NVCD);
34  set_sp2_xp1(buf, vt);
35  }
36 
37 
38 //====================================================================
39  template<typename REALTYPE>
40  inline void mult_wilson_xp2(Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
41  {
42  Vsimd_t vt1[NVC], vt2[NVC];
43  shift_vec1_bw(vt1, &buf[0], NVC);
44  shift_vec1_bw(vt2, &buf[NVC], NVC);
45 
46  Vsimd_t ut[NDF];
47  load_vec(ut, u, NDF);
48 
49  Vsimd_t wt1[2], wt2[2];
50  for (int ic = 0; ic < NC; ++ic) {
51  int ic2 = ND * 2 * ic;
52  mult_uv(wt1, &ut[2 * ic], vt1, NC);
53  mult_uv(wt2, &ut[2 * ic], vt2, NC);
54  set_sp4_xp(&v2[ic2], wt1, wt2);
55  }
56  }
57 
58 
59 //====================================================================
60  template<typename REALTYPE>
61  inline void mult_wilson_xpb(Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
62  {
63  svbool_t pg = set_predicate();
64 
65  svreal_t vt10, vt11, vt12, vt13, vt14, vt15;
66  svreal_t vt20, vt21, vt22, vt23, vt24, vt25;
67 
68  set_sp2_xp(pg, vt10, vt11, vt20, vt21, v1, 0);
69  set_sp2_xp(pg, vt12, vt13, vt22, vt23, v1, 1);
70  set_sp2_xp(pg, vt14, vt15, vt24, vt25, v1, 2);
71 
72  svreal_t ut10, ut11, ut12, ut13, ut14, ut15;
73  svreal_t wt1r, wt1i, wt2r, wt2i;
74 
75  for (int ic = 0; ic < NC; ++ic) {
76  load_u(pg, ut10, ut11, ut12, ut13, ut14, ut15,
77  &u[VLEN * (2 * ic)]);
78  mult_uv(pg, wt1r, wt1i,
79  ut10, ut11, ut12, ut13, ut14, ut15,
80  vt10, vt11, vt12, vt13, vt14, vt15);
81  mult_uv(pg, wt2r, wt2i,
82  ut10, ut11, ut12, ut13, ut14, ut15,
83  vt20, vt21, vt22, vt23, vt24, vt25);
84  set_sp4_xp(pg, v2, wt1r, wt1i, wt2r, wt2i, ic);
85  }
86  }
87 
88 
89 //====================================================================
90  template<typename REALTYPE>
91  inline void mult_wilson_xm1(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
92  {
93  Vsimd_t vt1[VLEN * NVC], vt2[VLEN * NVC];
94  set_sp2_xm(vt1, vt2, v1);
95 
96  Vsimd_t ut[NDF];
97  load_vec(ut, u, NDF);
98 
99  Vsimd_t wt1[NVC], wt2[NVC];
100  for (int ic = 0; ic < NC; ++ic) {
101  int ic2 = NVC * ic;
102  mult_udagv(&wt1[2 * ic], &ut[ic2], vt1, NC);
103  mult_udagv(&wt2[2 * ic], &ut[ic2], vt2, NC);
104  }
105 
106  for (int ic = 0; ic < NC; ++ic) {
107  save_vec1(&buf[0], wt1, VLEN - 1, NVC);
108  save_vec1(&buf[NVC], wt2, VLEN - 1, NVC);
109  }
110  }
111 
112 
113 //====================================================================
114  template<typename REALTYPE>
115  inline void mult_wilson_xm2(Vsimd_t *v2, REALTYPE *buf)
116  {
117  Vsimd_t wt1[2], wt2[2];
118  for (int ic = 0; ic < NC; ++ic) {
119  int ic2 = ND * 2 * ic;
120  shift_vec1_fw(wt1, &buf[2 * ic], 2);
121  shift_vec1_fw(wt2, &buf[2 * ic + NVC], 2);
122  set_sp4_xm(&v2[ic2], wt1, wt2);
123  }
124  }
125 
126 
127 //====================================================================
128  template<typename REALTYPE>
129  inline void mult_wilson_xmb(Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
130  {
131  svbool_t pg = set_predicate();
132 
133  svreal_t vt10, vt11, vt12, vt13, vt14, vt15;
134  svreal_t vt20, vt21, vt22, vt23, vt24, vt25;
135 
136  set_sp2_xm(pg, vt10, vt11, vt20, vt21, v1, 0);
137  set_sp2_xm(pg, vt12, vt13, vt22, vt23, v1, 1);
138  set_sp2_xm(pg, vt14, vt15, vt24, vt25, v1, 2);
139 
140  svreal_t ut10, ut11, ut12, ut13, ut14, ut15;
141  svreal_t wt1r, wt1i, wt2r, wt2i;
142 
143  for (int ic = 0; ic < NC; ++ic) {
144  load_udag(pg, ut10, ut11, ut12, ut13, ut14, ut15,
145  &u[VLEN * NVC * ic]);
146  mult_udv(pg, wt1r, wt1i,
147  ut10, ut11, ut12, ut13, ut14, ut15,
148  vt10, vt11, vt12, vt13, vt14, vt15);
149  mult_udv(pg, wt2r, wt2i,
150  ut10, ut11, ut12, ut13, ut14, ut15,
151  vt20, vt21, vt22, vt23, vt24, vt25);
152  set_sp4_xm(pg, v2, wt1r, wt1i, wt2r, wt2i, ic);
153  }
154  }
155 
156 
157 //====================================================================
158  template<typename REALTYPE>
159  inline void mult_wilson_yp1(REALTYPE *buf, REALTYPE *v1)
160  {
161  svbool_t pg = set_predicate();
162 
163  for (int ic = 0; ic < NC; ++ic) {
164  svreal_t vt1r, vt1i, vt2r, vt2i;
165  set_sp2_yp(pg, vt1r, vt1i, vt2r, vt2i, v1, ic);
166  save_vec(pg, &buf[VLEN * (2 * ic)], vt1r);
167  save_vec(pg, &buf[VLEN * (2 * ic + 1)], vt1i);
168  save_vec(pg, &buf[VLEN * (2 * ic + NVC)], vt2r);
169  save_vec(pg, &buf[VLEN * (2 * ic + 1 + NVC)], vt2i);
170  }
171  }
172 
173 
174 //====================================================================
175  template<typename REALTYPE>
176  inline void mult_wilson_yp2(Vsimd_t *v2, REALTYPE *u, REALTYPE *buf)
177  {
178  svbool_t pg = set_predicate();
179 
180  svreal_t vt10, vt11, vt12, vt13, vt14, vt15;
181  svreal_t vt20, vt21, vt22, vt23, vt24, vt25;
182 
183  load_vec(pg, vt10, &buf[VLEN * 0]);
184  load_vec(pg, vt11, &buf[VLEN * 1]);
185  load_vec(pg, vt12, &buf[VLEN * 2]);
186  load_vec(pg, vt13, &buf[VLEN * 3]);
187  load_vec(pg, vt14, &buf[VLEN * 4]);
188  load_vec(pg, vt15, &buf[VLEN * 5]);
189 
190  load_vec(pg, vt20, &buf[VLEN * (0 + NVC)]);
191  load_vec(pg, vt21, &buf[VLEN * (1 + NVC)]);
192  load_vec(pg, vt22, &buf[VLEN * (2 + NVC)]);
193  load_vec(pg, vt23, &buf[VLEN * (3 + NVC)]);
194  load_vec(pg, vt24, &buf[VLEN * (4 + NVC)]);
195  load_vec(pg, vt25, &buf[VLEN * (5 + NVC)]);
196 
197  svreal_t ut10, ut11, ut12, ut13, ut14, ut15;
198  svreal_t wt1r, wt1i, wt2r, wt2i;
199 
200  for (int ic = 0; ic < NC; ++ic) {
201  load_u(pg, ut10, ut11, ut12, ut13, ut14, ut15,
202  &u[VLEN * (2 * ic)]);
203  mult_uv(pg, wt1r, wt1i,
204  ut10, ut11, ut12, ut13, ut14, ut15,
205  vt10, vt11, vt12, vt13, vt14, vt15);
206  mult_uv(pg, wt2r, wt2i,
207  ut10, ut11, ut12, ut13, ut14, ut15,
208  vt20, vt21, vt22, vt23, vt24, vt25);
209  set_sp4_yp(pg, v2, wt1r, wt1i, wt2r, wt2i, ic);
210  }
211  }
212 
213 
214 //====================================================================
215  template<typename REALTYPE>
216  inline void mult_wilson_ypb(Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
217  {
218  svbool_t pg = set_predicate();
219 
220  svreal_t vt10, vt11, vt12, vt13, vt14, vt15;
221  svreal_t vt20, vt21, vt22, vt23, vt24, vt25;
222 
223  set_sp2_yp(pg, vt10, vt11, vt20, vt21, v1, 0);
224  set_sp2_yp(pg, vt12, vt13, vt22, vt23, v1, 1);
225  set_sp2_yp(pg, vt14, vt15, vt24, vt25, v1, 2);
226 
227  svreal_t ut10, ut11, ut12, ut13, ut14, ut15;
228  svreal_t wt1r, wt1i, wt2r, wt2i;
229 
230  for (int ic = 0; ic < NC; ++ic) {
231  load_u(pg, ut10, ut11, ut12, ut13, ut14, ut15,
232  &u[VLEN * (2 * ic)]);
233  mult_uv(pg, wt1r, wt1i,
234  ut10, ut11, ut12, ut13, ut14, ut15,
235  vt10, vt11, vt12, vt13, vt14, vt15);
236  mult_uv(pg, wt2r, wt2i,
237  ut10, ut11, ut12, ut13, ut14, ut15,
238  vt20, vt21, vt22, vt23, vt24, vt25);
239  set_sp4_yp(pg, v2, wt1r, wt1i, wt2r, wt2i, ic);
240  }
241  }
242 
243 
244 //====================================================================
245  template<typename REALTYPE>
246  inline void mult_wilson_ypb(REALTYPE *v2, REALTYPE *u, REALTYPE *v1)
247  {
248  svbool_t pg = set_predicate();
249 
250  svreal_t vt10, vt11, vt12, vt13, vt14, vt15;
251  svreal_t vt20, vt21, vt22, vt23, vt24, vt25;
252 
253  set_sp2_yp(pg, vt10, vt11, vt20, vt21, v1, 0);
254  set_sp2_yp(pg, vt12, vt13, vt22, vt23, v1, 1);
255  set_sp2_yp(pg, vt14, vt15, vt24, vt25, v1, 2);
256 
257  svreal_t ut10, ut11, ut12, ut13, ut14, ut15;
258  svreal_t wt1r, wt1i, wt2r, wt2i;
259 
260  for (int ic = 0; ic < NC; ++ic) {
261  load_u(pg, ut10, ut11, ut12, ut13, ut14, ut15,
262  &u[VLEN * (2 * ic)]);
263  mult_uv(pg, wt1r, wt1i,
264  ut10, ut11, ut12, ut13, ut14, ut15,
265  vt10, vt11, vt12, vt13, vt14, vt15);
266  mult_uv(pg, wt2r, wt2i,
267  ut10, ut11, ut12, ut13, ut14, ut15,
268  vt20, vt21, vt22, vt23, vt24, vt25);
269  set_sp4_yp(pg, v2, wt1r, wt1i, wt2r, wt2i, ic);
270  }
271  }
272 
273 
274 //====================================================================
275  template<typename REALTYPE>
276  inline void mult_wilson_ym1(REALTYPE *buf, REALTYPE *u, REALTYPE *v1)
277  {
278  svbool_t pg = set_predicate();
279 
280  svreal_t vt10, vt11, vt12, vt13, vt14, vt15;
281  svreal_t vt20, vt21, vt22, vt23, vt24, vt25;
282 
283  set_sp2_ym(pg, vt10, vt11, vt20, vt21, v1, 0);
284  set_sp2_ym(pg, vt12, vt13, vt22, vt23, v1, 1);
285  set_sp2_ym(pg, vt14, vt15, vt24, vt25, v1, 2);
286 
287  svreal_t ut10, ut11, ut12, ut13, ut14, ut15;
288  svreal_t wt1r, wt1i, wt2r, wt2i;
289 
290  for (int ic = 0; ic < NC; ++ic) {
291  load_udag(pg, ut10, ut11, ut12, ut13, ut14, ut15,
292  &u[VLEN * NVC * ic]);
293  mult_udv(pg, wt1r, wt1i,
294  ut10, ut11, ut12, ut13, ut14, ut15,
295  vt10, vt11, vt12, vt13, vt14, vt15);
296 
297  mult_udv(pg, wt2r, wt2i,
298  ut10, ut11, ut12, ut13, ut14, ut15,
299  vt20, vt21, vt22, vt23, vt24, vt25);
300 
301  save_vec(pg, &buf[VLEN * (2 * ic)], wt1r);
302  save_vec(pg, &buf[VLEN * (2 * ic + 1)], wt1i);
303 
304  save_vec(pg, &buf[VLEN * (2 * ic + NVC)], wt2r);
305  save_vec(pg, &buf[VLEN * (2 * ic + 1 + NVC)], wt2i);
306  }
307  }
308 
309 
310 //====================================================================
311  template<typename REALTYPE>
312  inline void mult_wilson_ym2(Vsimd_t *v2, REALTYPE *buf)
313  {
314  svbool_t pg = set_predicate();
315 
316  for (int ic = 0; ic < NC; ++ic) {
317  svreal_t wt1r, wt1i, wt2r, wt2i;
318  load_vec(pg, wt1r, &buf[VLEN * (2 * ic)]);
319  load_vec(pg, wt1i, &buf[VLEN * (2 * ic + 1)]);
320  load_vec(pg, wt2r, &buf[VLEN * (2 * ic + NVC)]);
321  load_vec(pg, wt2i, &buf[VLEN * (2 * ic + 1 + NVC)]);
322  set_sp4_ym(pg, v2, wt1r, wt1i, wt2r, wt2i, ic);
323  }
324  }
325 
326 
327 //====================================================================
328  template<typename REALTYPE>
329  inline void mult_wilson_ymb(Vsimd_t *v2, REALTYPE *u, REALTYPE *v1)
330  {
331  svbool_t pg = set_predicate();
332 
333  svreal_t vt10, vt11, vt12, vt13, vt14, vt15;
334  svreal_t vt20, vt21, vt22, vt23, vt24, vt25;
335 
336  set_sp2_ym(pg, vt10, vt11, vt20, vt21, v1, 0);
337  set_sp2_ym(pg, vt12, vt13, vt22, vt23, v1, 1);
338  set_sp2_ym(pg, vt14, vt15, vt24, vt25, v1, 2);
339 
340  svreal_t ut10, ut11, ut12, ut13, ut14, ut15;
341  svreal_t wt1r, wt1i, wt2r, wt2i;
342 
343  for (int ic = 0; ic < NC; ++ic) {
344  load_udag(pg, ut10, ut11, ut12, ut13, ut14, ut15,
345  &u[VLEN * NVC * ic]);
346  mult_udv(pg, wt1r, wt1i,
347  ut10, ut11, ut12, ut13, ut14, ut15,
348  vt10, vt11, vt12, vt13, vt14, vt15);
349  mult_udv(pg, wt2r, wt2i,
350  ut10, ut11, ut12, ut13, ut14, ut15,
351  vt20, vt21, vt22, vt23, vt24, vt25);
352  set_sp4_ym(pg, v2, wt1r, wt1i, wt2r, wt2i, ic);
353  }
354  }
355 
356 
357 //====================================================================
358 } // nameless namespace end
359 
360 #endif
NVCD
#define NVCD
Definition: define_params_SU3.h:20
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
NDF
#define NDF
Definition: field_F_imp_SU2-inc.h:4
Vsimd_t
Definition: vsimd_double-inc.h:13
NC
#define NC
Definition: field_F_imp_SU2-inc.h:2
ND
#define ND
Definition: field_F_imp_SU2-inc.h:5
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
svbool_t
Definition: vsimd_double-inc.h:30