10 #ifndef QXS_VSIMD_INCLUDED
11 #define QXS_VSIMD_INCLUDED
18 #ifdef __ARM_FEATURE_SVE
27 typedef int64_t
int_t;
38 inline svbool_t set_predicate_false()
44 inline svbool_t set_predicate_whilelt(
int range)
46 return svwhilelt_b64(0, range);
54 for (
int iy =
VLENYD; iy > 0; --iy) {
55 pg1 = svwhilelt_b64(0, iy *
VLENXD);
56 pg2 = sveor_z(pg0, pg2, pg1);
57 pg1 = svwhilelt_b64(0, iy *
VLENXD - 1);
58 pg2 = sveor_z(pg0, pg2, pg1);
60 pg1 = svnot_z(pg0, pg2);
68 for (
int iy =
VLENYD - 1; iy > 0; --iy) {
69 pg1 = svwhilelt_b64(0, iy *
VLENXD);
70 pg2 = sveor_z(pg0, pg2, pg1);
71 pg1 = svwhilelt_b64(0, (iy - 1) *
VLENXD + 1);
72 pg2 = sveor_z(pg0, pg2, pg1);
74 pg1 = svnot_z(pg0, pg2);
84 for (
int iy =
VLENYD; iy > 0; --iy) {
86 pg1 = svwhilelt_b64(0, iy *
VLENXD);
87 pg2 = sveor_z(pg0, pg2, pg1);
88 pg1 = svwhilelt_b64(0, iy *
VLENXD - 1);
89 pg2 = sveor_z(pg0, pg2, pg1);
91 pg1 = svwhilelt_b64(0, iy *
VLENXD);
92 pg3 = sveor_z(pg0, pg3, pg1);
93 pg1 = svwhilelt_b64(0, (iy - 1) *
VLENXD);
94 pg3 = sveor_z(pg0, pg3, pg1);
97 pg1 = sveor_z(pg0, pg2, pg3);
98 pg1 = svnot_z(pg0, pg1);
108 for (
int iy =
VLENYD; iy > 0; --iy) {
110 pg2 = svwhilelt_b64(0, iy *
VLENXD);
111 pg3 = sveor_z(pg0, pg3, pg2);
112 pg2 = svwhilelt_b64(0, (iy - 1) *
VLENXD);
113 pg3 = sveor_z(pg0, pg3, pg2);
115 pg2 = svwhilelt_b64(0, iy *
VLENXD);
116 pg1 = sveor_z(pg0, pg1, pg2);
117 pg2 = svwhilelt_b64(0, (iy - 1) *
VLENXD + 1);
118 pg1 = sveor_z(pg0, pg1, pg2);
121 pg2 = sveor_z(pg0, pg1, pg3);
122 pg2 = svnot_z(pg0, pg2);
130 pg2 = svnot_z(pg0, pg1);
137 pg2 = svwhilelt_b64(0,
VLENXD);
138 pg1 = svnot_z(pg0, pg2);
143 inline void set1_at(
const int i,
svbool_t& pg)
147 svbool_t pg2 = svwhilelt_b64(0, i + 1);
148 pg = sveor_z(pg0, pg, pg1);
149 pg = sveor_z(pg0, pg, pg2);
153 inline void rot1_R(uint64_t *u,
const int len =
VLENXD)
155 uint64_t tmp = u[len - 1];
156 for (
int i = len - 1; i >= 1; i--) {
163 inline void rot1_L(uint64_t *u,
const int len =
VLENXD)
166 for (
int i = 0; i < len - 1; ++i) {
174 inline void set_idx_predicate_xp_eo(
svbool_t& pg,
179 for (
int i = 0; i <
VLEN; ++i) {
198 idx = svld1_u64(svptrue_b64(), u);
202 inline void set_idx_predicate_xm_eo(
svbool_t& pg,
207 for (
int i = 0; i <
VLEN; i++) {
215 set1_at(i +
VLENXD - 1, pg);
222 set1_at(i +
VLENXD - 1, pg);
226 idx = svld1_u64(svptrue_b64(), u);
230 inline void set_idx_predicate_yp(
svbool_t& pg1, svuint64_t&
idx)
232 pg1 = svwhilelt_b64(0,
VLENXD);
237 for (
int i = 0; i <
VLENXD; ++i) {
240 idx = svld1_u64(svptrue_b64(), u);
244 inline void set_idx_predicate_ym(
svbool_t& pg1, svuint64_t&
idx)
247 pg1 = svnot_z(svptrue_b64(), pg2);
249 for (
int i = 0; i <
VLENXD; ++i) {
255 idx = svld1_u64(svptrue_b64(), u);
259 inline void load_vec(
svbool_t pg, svfloat64_t& v,
const float64_t *vp)
261 v = svld1_f64(pg, vp);
265 inline void load_add(
svbool_t pg, svfloat64_t& v, float64_t *vp)
268 v2 = svld1_f64(pg, vp);
269 v = svsel_f64(pg, v2, v);
275 inline void set_vec(
svbool_t pg, svfloat64_t& v, float64_t a)
277 v = svdup_f64_m(v, pg, a);
281 inline void clear_vec(
svbool_t pg, svfloat64_t& v)
283 v = svdup_f64_m(v, pg, 0.0);
287 inline void load_svint(
svbool_t pg, svint64_t& v, int64_t *vp)
289 v = svld1_s64(pg, vp);
293 inline void load_svuint(
svbool_t pg, svuint64_t& v, uint64_t *vp)
295 v = svld1_u64(pg, vp);
299 inline void load_vec_gather(
svbool_t pg, svfloat64_t& v, float64_t *vp,
302 v = svld1_gather_s64index_f64(pg, vp, index);
306 inline void load_add_gather(
svbool_t pg, svfloat64_t& v, float64_t *vp,
310 v2 = svld1_gather_s64index_f64(pg, vp, index);
311 v = svsel_f64(pg, v2, v);
317 inline void flip_sign(
svbool_t pg, svfloat64_t& v)
319 v = svneg_f64_m(v, pg, v);
323 inline void flip_sign(
svbool_t pg, svfloat64_t& v1, svfloat64_t& v2)
325 v1 = svneg_f64_m(v2, pg, v2);
329 #endif // __ARM_FEATURE_SVE