target/loongarch: Implement vilvl vilvh vextrins vshuf

This patch includes:
- VILV{L/H}.{B/H/W/D};
- VSHUF.{B/H/W/D};
- VSHUF4I.{B/H/W/D};
- VPERMI.W;
- VEXTRINS.{B/H/W/D}.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Song Gao <gaosong@loongson.cn>
Message-Id: <20230504122810.4094787-41-gaosong@loongson.cn>
This commit is contained in:
Song Gao 2023-05-04 20:28:06 +08:00
parent d5e5563cb3
commit e93dd43147
No known key found for this signature in database
GPG key ID: 40A2FFF239263EDF
5 changed files with 248 additions and 0 deletions

View file

@ -2854,3 +2854,151 @@ VPICKOD(vpickod_b, 16, B)
VPICKOD(vpickod_h, 32, H)
VPICKOD(vpickod_w, 64, W)
VPICKOD(vpickod_d, 128, D)
#define VILVL(NAME, BIT, E) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t vk) \
{ \
int i; \
VReg temp; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
VReg *Vk = &(env->fpr[vk].vreg); \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E(2 * i + 1) = Vj->E(i); \
temp.E(2 * i) = Vk->E(i); \
} \
*Vd = temp; \
}
VILVL(vilvl_b, 16, B)
VILVL(vilvl_h, 32, H)
VILVL(vilvl_w, 64, W)
VILVL(vilvl_d, 128, D)
#define VILVH(NAME, BIT, E) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t vk) \
{ \
int i; \
VReg temp; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
VReg *Vk = &(env->fpr[vk].vreg); \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \
temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \
} \
*Vd = temp; \
}
VILVH(vilvh_b, 16, B)
VILVH(vilvh_h, 32, H)
VILVH(vilvh_w, 64, W)
VILVH(vilvh_d, 128, D)
void HELPER(vshuf_b)(CPULoongArchState *env,
uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va)
{
int i, m;
VReg temp;
VReg *Vd = &(env->fpr[vd].vreg);
VReg *Vj = &(env->fpr[vj].vreg);
VReg *Vk = &(env->fpr[vk].vreg);
VReg *Va = &(env->fpr[va].vreg);
m = LSX_LEN/8;
for (i = 0; i < m ; i++) {
uint64_t k = (uint8_t)Va->B(i) % (2 * m);
temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
}
*Vd = temp;
}
#define VSHUF(NAME, BIT, E) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t vk) \
{ \
int i, m; \
VReg temp; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
VReg *Vk = &(env->fpr[vk].vreg); \
\
m = LSX_LEN/BIT; \
for (i = 0; i < m; i++) { \
uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \
temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \
} \
*Vd = temp; \
}
VSHUF(vshuf_h, 16, H)
VSHUF(vshuf_w, 32, W)
VSHUF(vshuf_d, 64, D)
#define VSHUF4I(NAME, BIT, E) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t imm) \
{ \
int i; \
VReg temp; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \
(2 * ((i) & 0x03))) & 0x03)); \
} \
*Vd = temp; \
}
VSHUF4I(vshuf4i_b, 8, B)
VSHUF4I(vshuf4i_h, 16, H)
VSHUF4I(vshuf4i_w, 32, W)
void HELPER(vshuf4i_d)(CPULoongArchState *env,
uint32_t vd, uint32_t vj, uint32_t imm)
{
VReg *Vd = &(env->fpr[vd].vreg);
VReg *Vj = &(env->fpr[vj].vreg);
VReg temp;
temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1);
*Vd = temp;
}
void HELPER(vpermi_w)(CPULoongArchState *env,
uint32_t vd, uint32_t vj, uint32_t imm)
{
VReg temp;
VReg *Vd = &(env->fpr[vd].vreg);
VReg *Vj = &(env->fpr[vj].vreg);
temp.W(0) = Vj->W(imm & 0x3);
temp.W(1) = Vj->W((imm >> 2) & 0x3);
temp.W(2) = Vd->W((imm >> 4) & 0x3);
temp.W(3) = Vd->W((imm >> 6) & 0x3);
*Vd = temp;
}
#define VEXTRINS(NAME, BIT, E, MASK) \
void HELPER(NAME)(CPULoongArchState *env, \
uint32_t vd, uint32_t vj, uint32_t imm) \
{ \
int ins, extr; \
VReg *Vd = &(env->fpr[vd].vreg); \
VReg *Vj = &(env->fpr[vj].vreg); \
\
ins = (imm >> 4) & MASK; \
extr = imm & MASK; \
Vd->E(ins) = Vj->E(extr); \
}
VEXTRINS(vextrins_b, 8, B, 0xf)
VEXTRINS(vextrins_h, 16, H, 0x7)
VEXTRINS(vextrins_w, 32, W, 0x3)
VEXTRINS(vextrins_d, 64, D, 0x1)