Bridge++  Ver. 2.0.2
vsimd_Domainwall_SU3_double-inc.h
Go to the documentation of this file.
1 
9 #ifndef QXS_VSIMD_DOMAINWALL_SU3_DOUBLE_INC_INCLUDED
10 #define QXS_VSIMD_DOMAINWALL_SU3_DOUBLE_INC_INCLUDED
11 
28 namespace {
29  template<typename REALTYPE>
30  inline void set_aPp5_dirac_vec(Vsimd_t *v,
31  REALTYPE a, Vsimd_t *w, int Nc)
32  {
33  for (int ivc = 0; ivc < NVC; ++ivc) {
34  int ivc2 = (ivc % 2) + 2 * ND * (ivc / 2);
35  for (int k = 0; k < VLEN; ++k) {
36  v[ID1 + ivc2].v[k] = a * (w[ID1 + ivc2].v[k] - w[ID3 + ivc2].v[k]);
37  v[ID2 + ivc2].v[k] = a * (w[ID2 + ivc2].v[k] - w[ID4 + ivc2].v[k]);
38  v[ID3 + ivc2].v[k] = a * (w[ID3 + ivc2].v[k] - w[ID1 + ivc2].v[k]);
39  v[ID4 + ivc2].v[k] = a * (w[ID4 + ivc2].v[k] - w[ID2 + ivc2].v[k]);
40  }
41  }
42  }
43 
44 
45  template<typename REALTYPE>
46  inline void set_aPm5_dirac_vec(Vsimd_t *v,
47  REALTYPE a, Vsimd_t *w, int Nc)
48  {
49  for (int ivc = 0; ivc < NVC; ++ivc) {
50  int ivc2 = (ivc % 2) + 2 * ND * (ivc / 2);
51  for (int k = 0; k < VLEN; ++k) {
52  v[ID1 + ivc2].v[k] = a * (w[ID1 + ivc2].v[k] + w[ID3 + ivc2].v[k]);
53  v[ID2 + ivc2].v[k] = a * (w[ID2 + ivc2].v[k] + w[ID4 + ivc2].v[k]);
54  v[ID3 + ivc2].v[k] = a * (w[ID3 + ivc2].v[k] + w[ID1 + ivc2].v[k]);
55  v[ID4 + ivc2].v[k] = a * (w[ID4 + ivc2].v[k] + w[ID2 + ivc2].v[k]);
56  }
57  }
58  }
59 
60 
61  template<typename REALTYPE>
62  inline void add_aPp5_dirac_vec(Vsimd_t *v,
63  REALTYPE a, Vsimd_t *w, int Nc)
64  {
65  for (int ivc = 0; ivc < NVC; ++ivc) {
66  int ivc2 = (ivc % 2) + 2 * ND * (ivc / 2);
67  for (int k = 0; k < VLEN; ++k) {
68  v[ID1 + ivc2].v[k] += a * (w[ID1 + ivc2].v[k] - w[ID3 + ivc2].v[k]);
69  v[ID2 + ivc2].v[k] += a * (w[ID2 + ivc2].v[k] - w[ID4 + ivc2].v[k]);
70  v[ID3 + ivc2].v[k] += a * (w[ID3 + ivc2].v[k] - w[ID1 + ivc2].v[k]);
71  v[ID4 + ivc2].v[k] += a * (w[ID4 + ivc2].v[k] - w[ID2 + ivc2].v[k]);
72  }
73  }
74  }
75 
76 
77  template<typename REALTYPE>
78  inline void add_aPm5_dirac_vec(Vsimd_t *v,
79  REALTYPE a, Vsimd_t *w, int Nc)
80  {
81  for (int ivc = 0; ivc < NVC; ++ivc) {
82  int ivc2 = (ivc % 2) + 2 * ND * (ivc / 2);
83  for (int k = 0; k < VLEN; ++k) {
84  v[ID1 + ivc2].v[k] += a * (w[ID1 + ivc2].v[k] + w[ID3 + ivc2].v[k]);
85  v[ID2 + ivc2].v[k] += a * (w[ID2 + ivc2].v[k] + w[ID4 + ivc2].v[k]);
86  v[ID3 + ivc2].v[k] += a * (w[ID3 + ivc2].v[k] + w[ID1 + ivc2].v[k]);
87  v[ID4 + ivc2].v[k] += a * (w[ID4 + ivc2].v[k] + w[ID2 + ivc2].v[k]);
88  }
89  }
90  }
91 
92 
93  template<typename REALTYPE>
94  inline void add_aPp5_dirac_vec(svreal_t& vt1r, svreal_t& vt1i,
95  svreal_t& vt2r, svreal_t& vt2i,
96  svreal_t& vt3r, svreal_t& vt3i,
97  svreal_t& vt4r, svreal_t& vt4i,
98  REALTYPE a, REALTYPE *w, int is, int ic)
99  {
100  svbool_t pg = set_predicate();
101  int off_up = 2 * ND * ic + NVCD * is;
102  svreal_t w1r, w1i, w2r, w2i, w3r, w3i, w4r, w4i;
103 
104  load_vec(pg, w1r, &w[VLEN * (ID1 + off_up)]);
105  load_vec(pg, w1i, &w[VLEN * (ID1 + 1 + off_up)]);
106  load_vec(pg, w2r, &w[VLEN * (ID2 + off_up)]);
107  load_vec(pg, w2i, &w[VLEN * (ID2 + 1 + off_up)]);
108  load_vec(pg, w3r, &w[VLEN * (ID3 + off_up)]);
109  load_vec(pg, w3i, &w[VLEN * (ID3 + 1 + off_up)]);
110  load_vec(pg, w4r, &w[VLEN * (ID4 + off_up)]);
111  load_vec(pg, w4i, &w[VLEN * (ID4 + 1 + off_up)]);
112 
113  sub_vec(pg, w1r, w3r);
114  sub_vec(pg, w1i, w3i);
115  sub_vec(pg, w2r, w4r);
116  sub_vec(pg, w2i, w4i);
117 
118  axpy_vec(pg, vt1r, a, w1r);
119  axpy_vec(pg, vt1i, a, w1i);
120  axpy_vec(pg, vt3r, -a, w1r);
121  axpy_vec(pg, vt3i, -a, w1i);
122 
123  axpy_vec(pg, vt2r, a, w2r);
124  axpy_vec(pg, vt2i, a, w2i);
125  axpy_vec(pg, vt4r, -a, w2r);
126  axpy_vec(pg, vt4i, -a, w2i);
127  }
128 
129 
130  template<typename REALTYPE>
131  inline void add_aPm5_dirac_vec(svreal_t& vt1r, svreal_t& vt1i,
132  svreal_t& vt2r, svreal_t& vt2i,
133  svreal_t& vt3r, svreal_t& vt3i,
134  svreal_t& vt4r, svreal_t& vt4i,
135  REALTYPE a, REALTYPE *w, int is, int ic)
136  {
137  svbool_t pg = set_predicate();
138  int off_up = 2 * ND * ic + NVCD * is;
139  svreal_t w1r, w1i, w2r, w2i, w3r, w3i, w4r, w4i;
140 
141  load_vec(pg, w1r, &w[VLEN * (ID1 + off_up)]);
142  load_vec(pg, w1i, &w[VLEN * (ID1 + 1 + off_up)]);
143  load_vec(pg, w2r, &w[VLEN * (ID2 + off_up)]);
144  load_vec(pg, w2i, &w[VLEN * (ID2 + 1 + off_up)]);
145  load_vec(pg, w3r, &w[VLEN * (ID3 + off_up)]);
146  load_vec(pg, w3i, &w[VLEN * (ID3 + 1 + off_up)]);
147  load_vec(pg, w4r, &w[VLEN * (ID4 + off_up)]);
148  load_vec(pg, w4i, &w[VLEN * (ID4 + 1 + off_up)]);
149 
150  add_vec(pg, w1r, w3r);
151  add_vec(pg, w1i, w3i);
152  add_vec(pg, w2r, w4r);
153  add_vec(pg, w2i, w4i);
154 
155  axpy_vec(pg, vt1r, a, w1r);
156  axpy_vec(pg, vt1i, a, w1i);
157  axpy_vec(pg, vt3r, a, w1r);
158  axpy_vec(pg, vt3i, a, w1i);
159 
160  axpy_vec(pg, vt2r, a, w2r);
161  axpy_vec(pg, vt2i, a, w2i);
162  axpy_vec(pg, vt4r, a, w2r);
163  axpy_vec(pg, vt4i, a, w2i);
164  }
165 
166 
167  template<typename REALTYPE>
168  inline void set_aPm5_dirac_vec(svreal_t& vt1r, svreal_t& vt1i,
169  svreal_t& vt2r, svreal_t& vt2i,
170  svreal_t& vt3r, svreal_t& vt3i,
171  svreal_t& vt4r, svreal_t& vt4i,
172  REALTYPE a, REALTYPE *w, int is, int ic)
173  {
174  svbool_t pg = set_predicate();
175  int off_up = 2 * ND * ic + NVCD * is;
176  svreal_t w3r, w3i, w4r, w4i;
177 
178  load_vec(pg, vt1r, &w[VLEN * (ID1 + off_up)]);
179  load_vec(pg, vt1i, &w[VLEN * (ID1 + 1 + off_up)]);
180  load_vec(pg, vt2r, &w[VLEN * (ID2 + off_up)]);
181  load_vec(pg, vt2i, &w[VLEN * (ID2 + 1 + off_up)]);
182 
183  load_vec(pg, w3r, &w[VLEN * (ID3 + off_up)]);
184  load_vec(pg, w3i, &w[VLEN * (ID3 + 1 + off_up)]);
185  load_vec(pg, w4r, &w[VLEN * (ID4 + off_up)]);
186  load_vec(pg, w4i, &w[VLEN * (ID4 + 1 + off_up)]);
187 
188  add_vec(pg, vt1r, w3r);
189  add_vec(pg, vt1i, w3i);
190  scal_vec(pg, vt1r, a);
191  scal_vec(pg, vt1i, a);
192 
193  vt3r = vt1r;
194  vt3i = vt1i;
195 
196  add_vec(pg, vt2r, w4r);
197  add_vec(pg, vt2i, w4i);
198  scal_vec(pg, vt2r, a);
199  scal_vec(pg, vt2i, a);
200 
201  vt4r = vt2r;
202  vt4i = vt2i;
203  }
204 
205 
206  template<typename REALTYPE>
207  inline void dw_5dir_axpy(svbool_t pg, REALTYPE *v,
208  REALTYPE *y, REALTYPE *w,
209  REALTYPE a1, REALTYPE a2,
210  REALTYPE b1, REALTYPE b2,
211  svreal_t zt, int index)
212  {
213  //v[i]=a1*w[i]+a2*zt
214  //y[i]=-0.5*(b1*w[i]+b2*zt)
215  svreal_t vt, wt, yt;
216  load_vec(pg, wt, &w[VLEN * index]);
217  set_vec(pg, vt, a1, wt);
218  axpy_vec(pg, vt, a2, zt);
219  save_vec(pg, &v[VLEN * index], vt);
220 
221  set_vec(pg, yt, -0.5 * b1, wt);
222  axpy_vec(pg, yt, -0.5 * b2, zt);
223  save_vec(pg, &y[VLEN * index], yt);
224  }
225 
226 
227  template<typename REALTYPE>
228  inline void dw_5dir_dag(svbool_t pg,
229  svreal_t& vt1r, svreal_t& vt1i,
230  svreal_t& vt2r, svreal_t& vt2i,
231  svreal_t& vt3r, svreal_t& vt3i,
232  svreal_t& vt4r, svreal_t& vt4i,
233  REALTYPE *w, REALTYPE *y,
234  REALTYPE a1, REALTYPE a2, int index)
235  {
236  load_vec(pg, vt1r, &w[VLEN * (ID3 + index)]);
237  load_vec(pg, vt1i, &w[VLEN * (ID3 + 1 + index)]);
238  load_vec(pg, vt2r, &w[VLEN * (ID4 + index)]);
239  load_vec(pg, vt2i, &w[VLEN * (ID4 + 1 + index)]);
240  load_vec(pg, vt3r, &w[VLEN * (ID1 + index)]);
241  load_vec(pg, vt3i, &w[VLEN * (ID1 + 1 + index)]);
242  load_vec(pg, vt4r, &w[VLEN * (ID2 + index)]);
243  load_vec(pg, vt4i, &w[VLEN * (ID2 + 1 + index)]);
244 
245  svreal_t yt1r, yt1i;
246  load_vec(pg, yt1r, &y[VLEN * (ID1 + index)]);
247  load_vec(pg, yt1i, &y[VLEN * (ID1 + 1 + index)]);
248  scal_vec(pg, vt1r, -a1);
249  scal_vec(pg, vt1i, -a1);
250  axpy_vec(pg, vt1r, a2, yt1r);
251  axpy_vec(pg, vt1i, a2, yt1i);
252 
253  svreal_t yt2r, yt2i;
254  load_vec(pg, yt2r, &y[VLEN * (ID2 + index)]);
255  load_vec(pg, yt2i, &y[VLEN * (ID2 + 1 + index)]);
256  scal_vec(pg, vt2r, -a1);
257  scal_vec(pg, vt2i, -a1);
258  axpy_vec(pg, vt2r, a2, yt2r);
259  axpy_vec(pg, vt2i, a2, yt2i);
260 
261  svreal_t yt3r, yt3i;
262  load_vec(pg, yt3r, &y[VLEN * (ID3 + index)]);
263  load_vec(pg, yt3i, &y[VLEN * (ID3 + 1 + index)]);
264  scal_vec(pg, vt3r, -a1);
265  scal_vec(pg, vt3i, -a1);
266  axpy_vec(pg, vt3r, a2, yt3r);
267  axpy_vec(pg, vt3i, a2, yt3i);
268 
269  svreal_t yt4r, yt4i;
270  load_vec(pg, yt4r, &y[VLEN * (ID4 + index)]);
271  load_vec(pg, yt4i, &y[VLEN * (ID4 + 1 + index)]);
272  scal_vec(pg, vt4r, -a1);
273  scal_vec(pg, vt4i, -a1);
274  axpy_vec(pg, vt4r, a2, yt4r);
275  axpy_vec(pg, vt4i, a2, yt4i);
276  }
277 
278 
279  template<typename REALTYPE>
280  inline void add_aPp5_dirac_vec(svbool_t pg,
281  svreal_t& vt1r, svreal_t& vt1i,
282  svreal_t& vt2r, svreal_t& vt2i,
283  svreal_t& vt3r, svreal_t& vt3i,
284  svreal_t& vt4r, svreal_t& vt4i,
285  REALTYPE a,
286  svreal_t& xt1r, svreal_t& xt1i,
287  svreal_t& xt2r, svreal_t& xt2i,
288  svreal_t& xt3r, svreal_t& xt3i,
289  svreal_t& xt4r, svreal_t& xt4i)
290  {
291  svreal_t yt1r, yt1i, yt2r, yt2i;
292  yt1r = svsub_m(pg, xt1r, xt3r);
293  yt1i = svsub_m(pg, xt1i, xt3i);
294  yt2r = svsub_m(pg, xt2r, xt4r);
295  yt2i = svsub_m(pg, xt2i, xt4i);
296  axpy_vec(pg, vt1r, a, yt1r);
297  axpy_vec(pg, vt1i, a, yt1i);
298  axpy_vec(pg, vt2r, a, yt2r);
299  axpy_vec(pg, vt2i, a, yt2i);
300  axpy_vec(pg, vt3r, -a, yt1r);
301  axpy_vec(pg, vt3i, -a, yt1i);
302  axpy_vec(pg, vt4r, -a, yt2r);
303  axpy_vec(pg, vt4i, -a, yt2i);
304  }
305 
306 
307  template<typename REALTYPE>
308  inline void set_aPp5_dirac_vec(svbool_t pg,
309  svreal_t& vt1r, svreal_t& vt1i,
310  svreal_t& vt2r, svreal_t& vt2i,
311  svreal_t& vt3r, svreal_t& vt3i,
312  svreal_t& vt4r, svreal_t& vt4i,
313  REALTYPE a,
314  svreal_t& xt1r, svreal_t& xt1i,
315  svreal_t& xt2r, svreal_t& xt2i,
316  svreal_t& xt3r, svreal_t& xt3i,
317  svreal_t& xt4r, svreal_t& xt4i)
318  {
319  vt1r = svsub_m(pg, xt1r, xt3r);
320  vt1i = svsub_m(pg, xt1i, xt3i);
321  vt2r = svsub_m(pg, xt2r, xt4r);
322  vt2i = svsub_m(pg, xt2i, xt4i);
323  vt3r = vt1r;
324  vt3i = vt1i;
325  vt4r = vt2r;
326  vt4i = vt2i;
327 
328  scal_vec(pg, vt1r, a);
329  scal_vec(pg, vt1i, a);
330  scal_vec(pg, vt2r, a);
331  scal_vec(pg, vt2i, a);
332  scal_vec(pg, vt3r, -a);
333  scal_vec(pg, vt3i, -a);
334  scal_vec(pg, vt4r, -a);
335  scal_vec(pg, vt4i, -a);
336  }
337 
338 
339  template<typename REALTYPE>
340  inline void add_aPm5_dirac_vec(svbool_t pg,
341  svreal_t& vt1r, svreal_t& vt1i,
342  svreal_t& vt2r, svreal_t& vt2i,
343  svreal_t& vt3r, svreal_t& vt3i,
344  svreal_t& vt4r, svreal_t& vt4i,
345  REALTYPE a,
346  svreal_t& xt1r, svreal_t& xt1i,
347  svreal_t& xt2r, svreal_t& xt2i,
348  svreal_t& xt3r, svreal_t& xt3i,
349  svreal_t& xt4r, svreal_t& xt4i)
350  {
351  svreal_t yt1r, yt1i, yt2r, yt2i;
352  yt1r = svadd_m(pg, xt1r, xt3r);
353  yt1i = svadd_m(pg, xt1i, xt3i);
354  yt2r = svadd_m(pg, xt2r, xt4r);
355  yt2i = svadd_m(pg, xt2i, xt4i);
356  axpy_vec(pg, vt1r, a, yt1r);
357  axpy_vec(pg, vt1i, a, yt1i);
358  axpy_vec(pg, vt2r, a, yt2r);
359  axpy_vec(pg, vt2i, a, yt2i);
360  axpy_vec(pg, vt3r, a, yt1r);
361  axpy_vec(pg, vt3i, a, yt1i);
362  axpy_vec(pg, vt4r, a, yt2r);
363  axpy_vec(pg, vt4i, a, yt2i);
364  }
365 
366 
367  template<typename REALTYPE>
368  inline void set_aPm5_dirac_vec(svbool_t pg,
369  svreal_t& vt1r, svreal_t& vt1i,
370  svreal_t& vt2r, svreal_t& vt2i,
371  svreal_t& vt3r, svreal_t& vt3i,
372  svreal_t& vt4r, svreal_t& vt4i,
373  REALTYPE a,
374  svreal_t& xt1r, svreal_t& xt1i,
375  svreal_t& xt2r, svreal_t& xt2i,
376  svreal_t& xt3r, svreal_t& xt3i,
377  svreal_t& xt4r, svreal_t& xt4i)
378  {
379  vt1r = svadd_m(pg, xt1r, xt3r);
380  vt1i = svadd_m(pg, xt1i, xt3i);
381  vt2r = svadd_m(pg, xt2r, xt4r);
382  vt2i = svadd_m(pg, xt2i, xt4i);
383  scal_vec(pg, vt1r, a);
384  scal_vec(pg, vt1i, a);
385  scal_vec(pg, vt2r, a);
386  scal_vec(pg, vt2i, a);
387  vt3r = vt1r;
388  vt3i = vt1i;
389  vt4r = vt2r;
390  vt4i = vt2i;
391  }
392 } // nameless namespace end
393 
394 #endif
395 //============================================================END=====
ID1
#define ID1
Definition: fopr_Wilson_impl_SU2-inc.h:18
NVCD
#define NVCD
Definition: define_params_SU3.h:20
VLEN
#define VLEN
Definition: bridgeQXS_Clover_coarse_double.cpp:12
Vsimd_t
Definition: vsimd_double-inc.h:13
ID2
#define ID2
Definition: fopr_Wilson_impl_SU2-inc.h:19
ID4
#define ID4
Definition: fopr_Wilson_impl_SU2-inc.h:21
Vsimd_t::v
double v[VLEND]
Definition: vsimd_double-inc.h:15
ND
#define ND
Definition: field_F_imp_SU2-inc.h:5
ID3
#define ID3
Definition: fopr_Wilson_impl_SU2-inc.h:20
NVC
#define NVC
Definition: fopr_Wilson_impl_SU2-inc.h:15
svbool_t
Definition: vsimd_double-inc.h:30