target/i386: implement F16C instructions

F16C only consists of two instructions, which are a bit peculiar
nevertheless.

First, they access only the low half of an YMM or XMM register for the
packed-half operand; the exact size still depends on the VEX.L flag.
This is similar to the existing avx_movx flag, but not exactly because
avx_movx is hardcoded to affect operand 2.  To this end I added a "ph"
format name; it's possible to reuse this approach for the VPMOVSX and
VPMOVZX instructions, though that would also require adding two more
formats for the low-quarter and low-eighth of an operand.

Second, VCVTPS2PH is somewhat weird because it *stores* the result of
the instruction into memory rather than loading it.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2022-10-19 13:22:06 +02:00
parent 314d3eff66
commit cf5ec6641e
9 changed files with 89 additions and 6 deletions

View file

@ -586,6 +586,35 @@ void glue(helper_cvtpd2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
}
}
#if SHIFT >= 1
void glue(helper_cvtph2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
int i;
for (i = 2 << SHIFT; --i >= 0; ) {
d->ZMM_S(i) = float16_to_float32(s->ZMM_H(i), true, &env->sse_status);
}
}
void glue(helper_cvtps2ph, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, int mode)
{
int i;
FloatRoundMode prev_rounding_mode = env->sse_status.float_rounding_mode;
if (!(mode & (1 << 2))) {
set_x86_rounding_mode(mode & 3, &env->sse_status);
}
for (i = 0; i < 2 << SHIFT; i++) {
d->ZMM_H(i) = float32_to_float16(s->ZMM_S(i), true, &env->sse_status);
}
for (i >>= 2; i < 1 << SHIFT; i++) {
d->Q(i) = 0;
}
env->sse_status.float_rounding_mode = prev_rounding_mode;
}
#endif
#if SHIFT == 1
void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *v, Reg *s)
{