tcg: Add guest load/store primitives for TCGv_i128

These are not yet considering atomicity of the 16-byte value;
this is a direct replacement for the current target code which
uses a pair of 8-byte operations.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-11-07 19:48:14 +11:00
parent 4771e71c28
commit cb48f3654e
5 changed files with 324 additions and 0 deletions

View file

@ -3109,6 +3109,140 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
}
}
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
{
MemOp mop_1 = orig, mop_2;
tcg_debug_assert((orig & MO_SIZE) == MO_128);
tcg_debug_assert((orig & MO_SIGN) == 0);
/* Use a memory ordering implemented by the host. */
if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
mop_1 &= ~MO_BSWAP;
}
/* Reduce the size to 64-bit. */
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
/* Retain the alignment constraints of the original. */
switch (orig & MO_AMASK) {
case MO_UNALN:
case MO_ALIGN_2:
case MO_ALIGN_4:
mop_2 = mop_1;
break;
case MO_ALIGN_8:
/* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
mop_2 = mop_1;
break;
case MO_ALIGN:
/* Second has 8-byte alignment; first has 16-byte alignment. */
mop_2 = mop_1;
mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
break;
case MO_ALIGN_16:
case MO_ALIGN_32:
case MO_ALIGN_64:
/* Second has 8-byte alignment; first retains original. */
mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
break;
default:
g_assert_not_reached();
}
ret[0] = mop_1;
ret[1] = mop_2;
}
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp mop[2];
TCGv addr_p8;
TCGv_i64 x, y;
canonicalize_memop_i128_as_i64(mop, memop);
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
addr = plugin_prep_mem_callbacks(addr);
/* TODO: respect atomicity of the operation. */
/* TODO: allow the tcg backend to see the whole operation. */
/*
* Since there are no global TCGv_i128, there is no visible state
* changed if the second load faults. Load directly into the two
* subwords.
*/
if ((memop & MO_BSWAP) == MO_LE) {
x = TCGV128_LOW(val);
y = TCGV128_HIGH(val);
} else {
x = TCGV128_HIGH(val);
y = TCGV128_LOW(val);
}
gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
if ((mop[0] ^ memop) & MO_BSWAP) {
tcg_gen_bswap64_i64(x, x);
}
addr_p8 = tcg_temp_new();
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
tcg_temp_free(addr_p8);
if ((mop[0] ^ memop) & MO_BSWAP) {
tcg_gen_bswap64_i64(y, y);
}
plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
QEMU_PLUGIN_MEM_R);
}
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
{
MemOp mop[2];
TCGv addr_p8;
TCGv_i64 x, y;
canonicalize_memop_i128_as_i64(mop, memop);
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
addr = plugin_prep_mem_callbacks(addr);
/* TODO: respect atomicity of the operation. */
/* TODO: allow the tcg backend to see the whole operation. */
if ((memop & MO_BSWAP) == MO_LE) {
x = TCGV128_LOW(val);
y = TCGV128_HIGH(val);
} else {
x = TCGV128_HIGH(val);
y = TCGV128_LOW(val);
}
addr_p8 = tcg_temp_new();
if ((mop[0] ^ memop) & MO_BSWAP) {
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_bswap64_i64(t, x);
gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
tcg_gen_bswap64_i64(t, y);
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
tcg_temp_free_i64(t);
} else {
gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
tcg_gen_addi_tl(addr_p8, addr, 8);
gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
}
tcg_temp_free(addr_p8);
plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
QEMU_PLUGIN_MEM_W);
}
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
{
switch (opc & MO_SSIZE) {