target/i386: implement SHA instructions

The implementation was validated with OpenSSL and with the test vectors in
https://github.com/rust-lang/stdarch/blob/master/crates/core_arch/src/x86/sha.rs.

The instructions provide a ~25% improvement on hashing a 64 MiB file:
runtime goes down from 1.8 seconds to 1.4 seconds; instruction count on
the host goes down from 5.8 billion to 4.8 billion with slightly better
IPC too.  Good job Intel. ;)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2023-10-10 10:31:17 +02:00
parent 45b5933f7a
commit e582b629f0
6 changed files with 209 additions and 1 deletions

View file

@ -460,6 +460,13 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = {
[0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
[0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
[0xc8] = X86_OP_ENTRY2(SHA1NEXTE, V,dq, W,dq, cpuid(SHA_NI)),
[0xc9] = X86_OP_ENTRY2(SHA1MSG1, V,dq, W,dq, cpuid(SHA_NI)),
[0xca] = X86_OP_ENTRY2(SHA1MSG2, V,dq, W,dq, cpuid(SHA_NI)),
[0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)),
[0xcc] = X86_OP_ENTRY2(SHA256MSG1, V,dq, W,dq, cpuid(SHA_NI)),
[0xcd] = X86_OP_ENTRY2(SHA256MSG2, V,dq, W,dq, cpuid(SHA_NI)),
[0xdb] = X86_OP_ENTRY3(VAESIMC, V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
[0xdc] = X86_OP_ENTRY3(VAESENC, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
[0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
@ -609,6 +616,8 @@ static const X86OpEntry opcodes_0F3A[256] = {
[0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x, H,x, W,x, vex6 cpuid(AVX) p_66),
[0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x, H,x, W,x, vex6 cpuid(AVX) p_66 avx2_256),
[0xcc] = X86_OP_ENTRY3(SHA1RNDS4, V,dq, W,dq, I,b, cpuid(SHA_NI)),
[0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b, vex4 cpuid(AES) p_66),
[0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
@ -1456,6 +1465,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
case X86_FEAT_AVX2:
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
case X86_FEAT_SHA_NI:
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
}
g_assert_not_reached();
}