* target/i386: improved EPYC models

* more removal of mb_read/mb_set
 * bump _WIN32_WINNT to the Windows 8 API
 * fix for modular builds with --disable-system
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmRZK7wUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroObngf8D6A5l1QQAnImRrZAny6HZV/9xseD
 9QhkUW3fxXlUhb8tXomv2BlT8h9GzLIN6aWvcCotT+xK3kAX7mRcYKgPMr9CYL7y
 vev/hh+B6RY1CJ/xPT09/BMVjkj50AL0O/OuWMhcQ5nCO7F2sdMjMrsYqqeZcjYf
 zx9RTX7gVGt+wWFHxgCgdfL0kfgzexK55YuZU0vLzcA+pYsZWoEfW+fKBIf4rzDV
 r9M6mDBUkHBQ0rIVC3QFloAXnYb1JrpeqqL2i2qwhAkLz8LyGqk3lZF20hE/04im
 XZcZjWO5pxAxIEPeTken+2x1n8tn2BLkMtvwJdV5TpvICCFRtPZlbH79qw==
 =rXLN
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* target/i386: improved EPYC models
* more removal of mb_read/mb_set
* bump _WIN32_WINNT to the Windows 8 API
* fix for modular builds with --disable-system

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmRZK7wUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroObngf8D6A5l1QQAnImRrZAny6HZV/9xseD
# 9QhkUW3fxXlUhb8tXomv2BlT8h9GzLIN6aWvcCotT+xK3kAX7mRcYKgPMr9CYL7y
# vev/hh+B6RY1CJ/xPT09/BMVjkj50AL0O/OuWMhcQ5nCO7F2sdMjMrsYqqeZcjYf
# zx9RTX7gVGt+wWFHxgCgdfL0kfgzexK55YuZU0vLzcA+pYsZWoEfW+fKBIf4rzDV
# r9M6mDBUkHBQ0rIVC3QFloAXnYb1JrpeqqL2i2qwhAkLz8LyGqk3lZF20hE/04im
# XZcZjWO5pxAxIEPeTken+2x1n8tn2BLkMtvwJdV5TpvICCFRtPZlbH79qw==
# =rXLN
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 08 May 2023 06:05:00 PM BST
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [undefined]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu:
  meson: leave unnecessary modules out of the build
  docs: clarify --without-default-devices
  target/i386: Add EPYC-Genoa model to support Zen 4 processor series
  target/i386: Add VNMI and automatic IBRS feature bits
  target/i386: Add missing feature bits in EPYC-Milan model
  target/i386: Add feature bits for CPUID_Fn80000021_EAX
  target/i386: Add a couple of feature bits in 8000_0008_EBX
  target/i386: Add new EPYC CPU versions with updated cache_info
  target/i386: allow versioned CPUs to specify new cache_info
  include/qemu/osdep.h: Bump _WIN32_WINNT to the Windows 8 API
  MAINTAINERS: add stanza for Kconfig files
  tb-maint: do not use mb_read/mb_set
  call_rcu: stop using mb_set/mb_read
  test-aio-multithread: simplify test_multi_co_schedule
  test-aio-multithread: do not use mb_read/mb_set for simple flags
  rcu: remove qatomic_mb_set, expand comments

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-05-09 17:21:39 +01:00
commit 577e648bdb
10 changed files with 479 additions and 51 deletions

View file

@ -3910,6 +3910,16 @@ F: configure
F: scripts/mtest2make.py F: scripts/mtest2make.py
F: tests/Makefile.include F: tests/Makefile.include
Kconfig
M: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: scripts/minikconf.py
F: docs/devel/kconfig.rst
F: Kconfig*
F: */Kconfig*
F: hw/*/Kconfig*
F: target/*/Kconfig*
GIT submodules GIT submodules
M: Daniel P. Berrange <berrange@redhat.com> M: Daniel P. Berrange <berrange@redhat.com>
S: Odd Fixes S: Odd Fixes

View file

@ -746,7 +746,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
tcg_region_reset_all(); tcg_region_reset_all();
/* XXX: flush processor icache at this point if cache flush is expensive */ /* XXX: flush processor icache at this point if cache flush is expensive */
qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1); qatomic_inc(&tb_ctx.tb_flush_count);
done: done:
mmap_unlock(); mmap_unlock();
@ -758,7 +758,7 @@ done:
void tb_flush(CPUState *cpu) void tb_flush(CPUState *cpu)
{ {
if (tcg_enabled()) { if (tcg_enabled()) {
unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count); unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
if (cpu_in_exclusive_context(cpu)) { if (cpu_in_exclusive_context(cpu)) {
do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count)); do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));

View file

@ -282,9 +282,19 @@ want to change some lines in the first group, for example like this::
CONFIG_PCI_DEVICES=y CONFIG_PCI_DEVICES=y
#CONFIG_TEST_DEVICES=n #CONFIG_TEST_DEVICES=n
and/or pick a subset of the devices in those device groups. Right now and/or pick a subset of the devices in those device groups. Without
there is no single place that lists all the optional devices for further modifications to ``configs/devices/``, a system emulator built
``CONFIG_PCI_DEVICES`` and ``CONFIG_TEST_DEVICES``. In the future, without default devices might not do much more than start an empty
machine, and even then only if ``--nodefaults`` is specified on the
command line. Starting a VM *without* ``--nodefaults`` is allowed to
fail, but should never abort. Failures in ``make check`` with
``--without-default-devices`` are considered bugs in the test code:
the tests should either use ``--nodefaults``, and should be skipped
if a necessary device is not present in the build. Such failures
should not be worked around with ``select`` directives.
Right now there is no single place that lists all the optional devices
for ``CONFIG_PCI_DEVICES`` and ``CONFIG_TEST_DEVICES``. In the future,
we expect that ``.mak`` files will be automatically generated, so that we expect that ``.mak`` files will be automatically generated, so that
they will include all these symbols and some help text on what they do. they will include all these symbols and some help text on what they do.

View file

@ -75,7 +75,7 @@ QEMU_EXTERN_C int daemon(int, int);
#ifdef _WIN32 #ifdef _WIN32
/* as defined in sdkddkver.h */ /* as defined in sdkddkver.h */
#ifndef _WIN32_WINNT #ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0601 /* Windows 7 API (should be in sync with glib) */ #define _WIN32_WINNT 0x0602 /* Windows 8 API (should be >= the one from glib) */
#endif #endif
/* reduces the number of implicitly included headers */ /* reduces the number of implicitly included headers */
#ifndef WIN32_LEAN_AND_MEAN #ifndef WIN32_LEAN_AND_MEAN

View file

@ -87,7 +87,10 @@ static inline void rcu_read_lock(void)
ctr = qatomic_read(&rcu_gp_ctr); ctr = qatomic_read(&rcu_gp_ctr);
qatomic_set(&p_rcu_reader->ctr, ctr); qatomic_set(&p_rcu_reader->ctr, ctr);
/* Write p_rcu_reader->ctr before reading RCU-protected pointers. */ /*
* Read rcu_gp_ptr and write p_rcu_reader->ctr before reading
* RCU-protected pointers.
*/
smp_mb_placeholder(); smp_mb_placeholder();
} }

View file

@ -3217,6 +3217,10 @@ modinfo_files = []
block_mods = [] block_mods = []
softmmu_mods = [] softmmu_mods = []
foreach d, list : modules foreach d, list : modules
if not (d == 'block' ? have_block : have_system)
continue
endif
foreach m, module_ss : list foreach m, module_ss : list
if enable_modules and targetos != 'windows' if enable_modules and targetos != 'windows'
module_ss = module_ss.apply(config_all, strict: false) module_ss = module_ss.apply(config_all, strict: false)

View file

@ -809,7 +809,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
"pfthreshold", "avic", NULL, "v-vmsave-vmload", "pfthreshold", "avic", NULL, "v-vmsave-vmload",
"vgif", NULL, NULL, NULL, "vgif", NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, "vnmi", NULL, NULL,
"svme-addr-chk", NULL, NULL, NULL, "svme-addr-chk", NULL, NULL, NULL,
}, },
.cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, .cpuid = { .eax = 0x8000000A, .reg = R_EDX, },
@ -933,15 +933,31 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, "wbnoinvd", NULL, NULL, NULL, "wbnoinvd", NULL, NULL,
"ibpb", NULL, "ibrs", "amd-stibp", "ibpb", NULL, "ibrs", "amd-stibp",
NULL, NULL, NULL, NULL, NULL, "stibp-always-on", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
"amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL,
NULL, NULL, NULL, NULL, "amd-psfd", NULL, NULL, NULL,
}, },
.cpuid = { .eax = 0x80000008, .reg = R_EBX, }, .cpuid = { .eax = 0x80000008, .reg = R_EBX, },
.tcg_features = 0, .tcg_features = 0,
.unmigratable_flags = 0, .unmigratable_flags = 0,
}, },
[FEAT_8000_0021_EAX] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
"no-nested-data-bp", NULL, "lfence-always-serializing", NULL,
NULL, NULL, "null-sel-clr-base", NULL,
"auto-ibrs", NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
},
.cpuid = { .eax = 0x80000021, .reg = R_EAX, },
.tcg_features = 0,
.unmigratable_flags = 0,
},
[FEAT_XSAVE] = { [FEAT_XSAVE] = {
.type = CPUID_FEATURE_WORD, .type = CPUID_FEATURE_WORD,
.feat_names = { .feat_names = {
@ -1620,6 +1636,7 @@ typedef struct X86CPUVersionDefinition {
const char *alias; const char *alias;
const char *note; const char *note;
PropValue *props; PropValue *props;
const CPUCaches *const cache_info;
} X86CPUVersionDefinition; } X86CPUVersionDefinition;
/* Base definition for a CPU model */ /* Base definition for a CPU model */
@ -1728,6 +1745,56 @@ static const CPUCaches epyc_cache_info = {
}, },
}; };
static CPUCaches epyc_v4_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
.level = 1,
.size = 32 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 64,
.lines_per_tag = 1,
.self_init = 1,
.no_invd_sharing = true,
},
.l1i_cache = &(CPUCacheInfo) {
.type = INSTRUCTION_CACHE,
.level = 1,
.size = 64 * KiB,
.line_size = 64,
.associativity = 4,
.partitions = 1,
.sets = 256,
.lines_per_tag = 1,
.self_init = 1,
.no_invd_sharing = true,
},
.l2_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 2,
.size = 512 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 1024,
.lines_per_tag = 1,
},
.l3_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 3,
.size = 8 * MiB,
.line_size = 64,
.associativity = 16,
.partitions = 1,
.sets = 8192,
.lines_per_tag = 1,
.self_init = true,
.inclusive = true,
.complex_indexing = false,
},
};
static const CPUCaches epyc_rome_cache_info = { static const CPUCaches epyc_rome_cache_info = {
.l1d_cache = &(CPUCacheInfo) { .l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE, .type = DATA_CACHE,
@ -1778,6 +1845,56 @@ static const CPUCaches epyc_rome_cache_info = {
}, },
}; };
static const CPUCaches epyc_rome_v3_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
.level = 1,
.size = 32 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 64,
.lines_per_tag = 1,
.self_init = 1,
.no_invd_sharing = true,
},
.l1i_cache = &(CPUCacheInfo) {
.type = INSTRUCTION_CACHE,
.level = 1,
.size = 32 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 64,
.lines_per_tag = 1,
.self_init = 1,
.no_invd_sharing = true,
},
.l2_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 2,
.size = 512 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 1024,
.lines_per_tag = 1,
},
.l3_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 3,
.size = 16 * MiB,
.line_size = 64,
.associativity = 16,
.partitions = 1,
.sets = 16384,
.lines_per_tag = 1,
.self_init = true,
.inclusive = true,
.complex_indexing = false,
},
};
static const CPUCaches epyc_milan_cache_info = { static const CPUCaches epyc_milan_cache_info = {
.l1d_cache = &(CPUCacheInfo) { .l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE, .type = DATA_CACHE,
@ -1828,6 +1945,106 @@ static const CPUCaches epyc_milan_cache_info = {
}, },
}; };
static const CPUCaches epyc_milan_v2_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
.level = 1,
.size = 32 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 64,
.lines_per_tag = 1,
.self_init = 1,
.no_invd_sharing = true,
},
.l1i_cache = &(CPUCacheInfo) {
.type = INSTRUCTION_CACHE,
.level = 1,
.size = 32 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 64,
.lines_per_tag = 1,
.self_init = 1,
.no_invd_sharing = true,
},
.l2_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 2,
.size = 512 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 1024,
.lines_per_tag = 1,
},
.l3_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 3,
.size = 32 * MiB,
.line_size = 64,
.associativity = 16,
.partitions = 1,
.sets = 32768,
.lines_per_tag = 1,
.self_init = true,
.inclusive = true,
.complex_indexing = false,
},
};
static const CPUCaches epyc_genoa_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
.level = 1,
.size = 32 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 64,
.lines_per_tag = 1,
.self_init = 1,
.no_invd_sharing = true,
},
.l1i_cache = &(CPUCacheInfo) {
.type = INSTRUCTION_CACHE,
.level = 1,
.size = 32 * KiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 64,
.lines_per_tag = 1,
.self_init = 1,
.no_invd_sharing = true,
},
.l2_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 2,
.size = 1 * MiB,
.line_size = 64,
.associativity = 8,
.partitions = 1,
.sets = 2048,
.lines_per_tag = 1,
},
.l3_cache = &(CPUCacheInfo) {
.type = UNIFIED_CACHE,
.level = 3,
.size = 32 * MiB,
.line_size = 64,
.associativity = 16,
.partitions = 1,
.sets = 32768,
.lines_per_tag = 1,
.self_init = true,
.inclusive = true,
.complex_indexing = false,
},
};
/* The following VMX features are not supported by KVM and are left out in the /* The following VMX features are not supported by KVM and are left out in the
* CPU definitions: * CPU definitions:
* *
@ -4112,6 +4329,15 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ } { /* end of list */ }
} }
}, },
{
.version = 4,
.props = (PropValue[]) {
{ "model-id",
"AMD EPYC-v4 Processor" },
{ /* end of list */ }
},
.cache_info = &epyc_v4_cache_info
},
{ /* end of list */ } { /* end of list */ }
} }
}, },
@ -4231,6 +4457,15 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ } { /* end of list */ }
} }
}, },
{
.version = 3,
.props = (PropValue[]) {
{ "model-id",
"AMD EPYC-Rome-v3 Processor" },
{ /* end of list */ }
},
.cache_info = &epyc_rome_v3_cache_info
},
{ /* end of list */ } { /* end of list */ }
} }
}, },
@ -4288,6 +4523,98 @@ static const X86CPUDefinition builtin_x86_defs[] = {
.xlevel = 0x8000001E, .xlevel = 0x8000001E,
.model_id = "AMD EPYC-Milan Processor", .model_id = "AMD EPYC-Milan Processor",
.cache_info = &epyc_milan_cache_info, .cache_info = &epyc_milan_cache_info,
.versions = (X86CPUVersionDefinition[]) {
{ .version = 1 },
{
.version = 2,
.props = (PropValue[]) {
{ "model-id",
"AMD EPYC-Milan-v2 Processor" },
{ "vaes", "on" },
{ "vpclmulqdq", "on" },
{ "stibp-always-on", "on" },
{ "amd-psfd", "on" },
{ "no-nested-data-bp", "on" },
{ "lfence-always-serializing", "on" },
{ "null-sel-clr-base", "on" },
{ /* end of list */ }
},
.cache_info = &epyc_milan_v2_cache_info
},
{ /* end of list */ }
}
},
{
.name = "EPYC-Genoa",
.level = 0xd,
.vendor = CPUID_VENDOR_AMD,
.family = 25,
.model = 17,
.stepping = 0,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH |
CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE |
CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE |
CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE |
CPUID_VME | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX |
CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT |
CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA |
CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ |
CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB |
CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX |
CPUID_EXT2_SYSCALL,
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH |
CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM |
CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM |
CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE,
.features[FEAT_8000_0008_EBX] =
CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR |
CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB |
CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP |
CPUID_8000_0008_EBX_STIBP_ALWAYS_ON |
CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD,
.features[FEAT_8000_0021_EAX] =
CPUID_8000_0021_EAX_No_NESTED_DATA_BP |
CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING |
CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE |
CPUID_8000_0021_EAX_AUTO_IBRS,
.features[FEAT_7_0_EBX] =
CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 |
CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS |
CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F |
CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX |
CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA |
CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB |
CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
.features[FEAT_7_0_ECX] =
CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
CPUID_7_0_ECX_RDPID,
.features[FEAT_7_0_EDX] =
CPUID_7_0_EDX_FSRM,
.features[FEAT_7_1_EAX] =
CPUID_7_1_EAX_AVX512_BF16,
.features[FEAT_XSAVE] =
CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
.features[FEAT_6_EAX] =
CPUID_6_EAX_ARAT,
.features[FEAT_SVM] =
CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI |
CPUID_SVM_SVME_ADDR_CHK,
.xlevel = 0x80000022,
.model_id = "AMD EPYC-Genoa Processor",
.cache_info = &epyc_genoa_cache_info,
}, },
}; };
@ -5225,6 +5552,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model)
assert(vdef->version == version); assert(vdef->version == version);
} }
static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu,
X86CPUModel *model)
{
const X86CPUVersionDefinition *vdef;
X86CPUVersion version = x86_cpu_model_resolve_version(model);
const CPUCaches *cache_info = model->cpudef->cache_info;
if (version == CPU_VERSION_LEGACY) {
return cache_info;
}
for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) {
if (vdef->cache_info) {
cache_info = vdef->cache_info;
}
if (vdef->version == version) {
break;
}
}
assert(vdef->version == version);
return cache_info;
}
/* /*
* Load data from X86CPUDefinition into a X86CPU object. * Load data from X86CPUDefinition into a X86CPU object.
* Only for builtin_x86_defs models initialized with x86_register_cpudef_types. * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
@ -5257,7 +5609,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
} }
/* legacy-cache defaults to 'off' if CPU model provides cache info */ /* legacy-cache defaults to 'off' if CPU model provides cache info */
cpu->legacy_cache = !def->cache_info; cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model);
env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR;
@ -6024,6 +6376,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
} }
break; break;
case 0x80000021:
*eax = env->features[FEAT_8000_0021_EAX];
*ebx = *ecx = *edx = 0;
break;
default: default:
/* reserved values: zero */ /* reserved values: zero */
*eax = 0; *eax = 0;
@ -6453,6 +6809,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F);
} }
if (env->features[FEAT_8000_0021_EAX]) {
x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021);
}
/* SGX requires CPUID[0x12] for EPC enumeration */ /* SGX requires CPUID[0x12] for EPC enumeration */
if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) {
x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12);
@ -6736,14 +7096,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
/* Cache information initialization */ /* Cache information initialization */
if (!cpu->legacy_cache) { if (!cpu->legacy_cache) {
if (!xcc->model || !xcc->model->cpudef->cache_info) { const CPUCaches *cache_info =
x86_cpu_get_versioned_cache_info(cpu, xcc->model);
if (!xcc->model || !cache_info) {
g_autofree char *name = x86_cpu_class_get_model_name(xcc); g_autofree char *name = x86_cpu_class_get_model_name(xcc);
error_setg(errp, error_setg(errp,
"CPU model '%s' doesn't support legacy-cache=off", name); "CPU model '%s' doesn't support legacy-cache=off", name);
return; return;
} }
env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd =
*xcc->model->cpudef->cache_info; *cache_info;
} else { } else {
/* Build legacy cache information */ /* Build legacy cache information */
env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache;

View file

@ -600,6 +600,7 @@ typedef enum FeatureWord {
FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */
FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */
FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */
FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */
FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */
FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */
@ -773,6 +774,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_SVM_AVIC (1U << 13) #define CPUID_SVM_AVIC (1U << 13)
#define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15)
#define CPUID_SVM_VGIF (1U << 16) #define CPUID_SVM_VGIF (1U << 16)
#define CPUID_SVM_VNMI (1U << 25)
#define CPUID_SVM_SVME_ADDR_CHK (1U << 28) #define CPUID_SVM_SVME_ADDR_CHK (1U << 28)
/* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
@ -946,8 +948,21 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_8000_0008_EBX_IBRS (1U << 14) #define CPUID_8000_0008_EBX_IBRS (1U << 14)
/* Single Thread Indirect Branch Predictors */ /* Single Thread Indirect Branch Predictors */
#define CPUID_8000_0008_EBX_STIBP (1U << 15) #define CPUID_8000_0008_EBX_STIBP (1U << 15)
/* STIBP mode has enhanced performance and may be left always on */
#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17)
/* Speculative Store Bypass Disable */ /* Speculative Store Bypass Disable */
#define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24)
/* Predictive Store Forwarding Disable */
#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28)
/* Processor ignores nested data breakpoints */
#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0)
/* LFENCE is always serializing */
#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2)
/* Null Selector Clears Base */
#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6)
/* Automatic IBRS */
#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8)
#define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEOPT (1U << 0)
#define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XSAVEC (1U << 1)

View file

@ -107,8 +107,7 @@ static void test_lifecycle(void)
/* aio_co_schedule test. */ /* aio_co_schedule test. */
static Coroutine *to_schedule[NUM_CONTEXTS]; static Coroutine *to_schedule[NUM_CONTEXTS];
static bool stop[NUM_CONTEXTS];
static bool now_stopping;
static int count_retry; static int count_retry;
static int count_here; static int count_here;
@ -136,6 +135,7 @@ static bool schedule_next(int n)
static void finish_cb(void *opaque) static void finish_cb(void *opaque)
{ {
stop[id] = true;
schedule_next(id); schedule_next(id);
} }
@ -143,13 +143,19 @@ static coroutine_fn void test_multi_co_schedule_entry(void *opaque)
{ {
g_assert(to_schedule[id] == NULL); g_assert(to_schedule[id] == NULL);
while (!qatomic_mb_read(&now_stopping)) { /*
* The next iteration will set to_schedule[id] again, but once finish_cb
* is scheduled there is no guarantee that it will actually be woken up,
* so at that point it must not go to sleep.
*/
while (!stop[id]) {
int n; int n;
n = g_test_rand_int_range(0, NUM_CONTEXTS); n = g_test_rand_int_range(0, NUM_CONTEXTS);
schedule_next(n); schedule_next(n);
qatomic_mb_set(&to_schedule[id], qemu_coroutine_self()); qatomic_mb_set(&to_schedule[id], qemu_coroutine_self());
/* finish_cb can run here. */
qemu_coroutine_yield(); qemu_coroutine_yield();
g_assert(to_schedule[id] == NULL); g_assert(to_schedule[id] == NULL);
} }
@ -161,7 +167,6 @@ static void test_multi_co_schedule(int seconds)
int i; int i;
count_here = count_other = count_retry = 0; count_here = count_other = count_retry = 0;
now_stopping = false;
create_aio_contexts(); create_aio_contexts();
for (i = 0; i < NUM_CONTEXTS; i++) { for (i = 0; i < NUM_CONTEXTS; i++) {
@ -171,10 +176,10 @@ static void test_multi_co_schedule(int seconds)
g_usleep(seconds * 1000000); g_usleep(seconds * 1000000);
qatomic_mb_set(&now_stopping, true); /* Guarantee that each AioContext is woken up from its last wait. */
for (i = 0; i < NUM_CONTEXTS; i++) { for (i = 0; i < NUM_CONTEXTS; i++) {
ctx_run(i, finish_cb, NULL); ctx_run(i, finish_cb, NULL);
to_schedule[i] = NULL; g_assert(to_schedule[i] == NULL);
} }
join_aio_contexts(); join_aio_contexts();
@ -199,10 +204,11 @@ static uint32_t atomic_counter;
static uint32_t running; static uint32_t running;
static uint32_t counter; static uint32_t counter;
static CoMutex comutex; static CoMutex comutex;
static bool now_stopping;
static void coroutine_fn test_multi_co_mutex_entry(void *opaque) static void coroutine_fn test_multi_co_mutex_entry(void *opaque)
{ {
while (!qatomic_mb_read(&now_stopping)) { while (!qatomic_read(&now_stopping)) {
qemu_co_mutex_lock(&comutex); qemu_co_mutex_lock(&comutex);
counter++; counter++;
qemu_co_mutex_unlock(&comutex); qemu_co_mutex_unlock(&comutex);
@ -236,7 +242,7 @@ static void test_multi_co_mutex(int threads, int seconds)
g_usleep(seconds * 1000000); g_usleep(seconds * 1000000);
qatomic_mb_set(&now_stopping, true); qatomic_set(&now_stopping, true);
while (running > 0) { while (running > 0) {
g_usleep(100000); g_usleep(100000);
} }
@ -327,7 +333,7 @@ static void mcs_mutex_unlock(void)
static void test_multi_fair_mutex_entry(void *opaque) static void test_multi_fair_mutex_entry(void *opaque)
{ {
while (!qatomic_mb_read(&now_stopping)) { while (!qatomic_read(&now_stopping)) {
mcs_mutex_lock(); mcs_mutex_lock();
counter++; counter++;
mcs_mutex_unlock(); mcs_mutex_unlock();
@ -355,7 +361,7 @@ static void test_multi_fair_mutex(int threads, int seconds)
g_usleep(seconds * 1000000); g_usleep(seconds * 1000000);
qatomic_mb_set(&now_stopping, true); qatomic_set(&now_stopping, true);
while (running > 0) { while (running > 0) {
g_usleep(100000); g_usleep(100000);
} }
@ -383,7 +389,7 @@ static QemuMutex mutex;
static void test_multi_mutex_entry(void *opaque) static void test_multi_mutex_entry(void *opaque)
{ {
while (!qatomic_mb_read(&now_stopping)) { while (!qatomic_read(&now_stopping)) {
qemu_mutex_lock(&mutex); qemu_mutex_lock(&mutex);
counter++; counter++;
qemu_mutex_unlock(&mutex); qemu_mutex_unlock(&mutex);
@ -411,7 +417,7 @@ static void test_multi_mutex(int threads, int seconds)
g_usleep(seconds * 1000000); g_usleep(seconds * 1000000);
qatomic_mb_set(&now_stopping, true); qatomic_set(&now_stopping, true);
while (running > 0) { while (running > 0) {
g_usleep(100000); g_usleep(100000);
} }

View file

@ -83,12 +83,6 @@ static void wait_for_readers(void)
*/ */
qemu_event_reset(&rcu_gp_event); qemu_event_reset(&rcu_gp_event);
/* Instead of using qatomic_mb_set for index->waiting, and
* qatomic_mb_read for index->ctr, memory barriers are placed
* manually since writes to different threads are independent.
* qemu_event_reset has acquire semantics, so no memory barrier
* is needed here.
*/
QLIST_FOREACH(index, &registry, node) { QLIST_FOREACH(index, &registry, node) {
qatomic_set(&index->waiting, true); qatomic_set(&index->waiting, true);
} }
@ -96,6 +90,10 @@ static void wait_for_readers(void)
/* Here, order the stores to index->waiting before the loads of /* Here, order the stores to index->waiting before the loads of
* index->ctr. Pairs with smp_mb_placeholder() in rcu_read_unlock(), * index->ctr. Pairs with smp_mb_placeholder() in rcu_read_unlock(),
* ensuring that the loads of index->ctr are sequentially consistent. * ensuring that the loads of index->ctr are sequentially consistent.
*
* If this is the last iteration, this barrier also prevents
* frees from seeping upwards, and orders the two wait phases
* on architectures with 32-bit longs; see synchronize_rcu().
*/ */
smp_mb_global(); smp_mb_global();
@ -104,7 +102,7 @@ static void wait_for_readers(void)
QLIST_REMOVE(index, node); QLIST_REMOVE(index, node);
QLIST_INSERT_HEAD(&qsreaders, index, node); QLIST_INSERT_HEAD(&qsreaders, index, node);
/* No need for mb_set here, worst of all we /* No need for memory barriers here, worst of all we
* get some extra futex wakeups. * get some extra futex wakeups.
*/ */
qatomic_set(&index->waiting, false); qatomic_set(&index->waiting, false);
@ -149,26 +147,26 @@ void synchronize_rcu(void)
/* Write RCU-protected pointers before reading p_rcu_reader->ctr. /* Write RCU-protected pointers before reading p_rcu_reader->ctr.
* Pairs with smp_mb_placeholder() in rcu_read_lock(). * Pairs with smp_mb_placeholder() in rcu_read_lock().
*
* Also orders write to RCU-protected pointers before
* write to rcu_gp_ctr.
*/ */
smp_mb_global(); smp_mb_global();
QEMU_LOCK_GUARD(&rcu_registry_lock); QEMU_LOCK_GUARD(&rcu_registry_lock);
if (!QLIST_EMPTY(&registry)) { if (!QLIST_EMPTY(&registry)) {
/* In either case, the qatomic_mb_set below blocks stores that free
* old RCU-protected pointers.
*/
if (sizeof(rcu_gp_ctr) < 8) { if (sizeof(rcu_gp_ctr) < 8) {
/* For architectures with 32-bit longs, a two-subphases algorithm /* For architectures with 32-bit longs, a two-subphases algorithm
* ensures we do not encounter overflow bugs. * ensures we do not encounter overflow bugs.
* *
* Switch parity: 0 -> 1, 1 -> 0. * Switch parity: 0 -> 1, 1 -> 0.
*/ */
qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
wait_for_readers(); wait_for_readers();
qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
} else { } else {
/* Increment current grace period. */ /* Increment current grace period. */
qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); qatomic_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
} }
wait_for_readers(); wait_for_readers();
@ -191,8 +189,22 @@ static void enqueue(struct rcu_head *node)
struct rcu_head **old_tail; struct rcu_head **old_tail;
node->next = NULL; node->next = NULL;
/*
* Make this node the tail of the list. The node will be
* used by further enqueue operations, but it will not
* be dequeued yet...
*/
old_tail = qatomic_xchg(&tail, &node->next); old_tail = qatomic_xchg(&tail, &node->next);
qatomic_mb_set(old_tail, node);
/*
* ... until it is pointed to from another item in the list.
* In the meantime, try_dequeue() will find a NULL next pointer
* and loop.
*
* Synchronizes with qatomic_load_acquire() in try_dequeue().
*/
qatomic_store_release(old_tail, node);
} }
static struct rcu_head *try_dequeue(void) static struct rcu_head *try_dequeue(void)
@ -200,26 +212,31 @@ static struct rcu_head *try_dequeue(void)
struct rcu_head *node, *next; struct rcu_head *node, *next;
retry: retry:
/* Test for an empty list, which we do not expect. Note that for /* Head is only written by this thread, so no need for barriers. */
node = head;
/*
* If the head node has NULL in its next pointer, the value is
* wrong and we need to wait until its enqueuer finishes the update.
*/
next = qatomic_load_acquire(&node->next);
if (!next) {
return NULL;
}
/*
* Test for an empty list, which we do not expect. Note that for
* the consumer head and tail are always consistent. The head * the consumer head and tail are always consistent. The head
* is consistent because only the consumer reads/writes it. * is consistent because only the consumer reads/writes it.
* The tail, because it is the first step in the enqueuing. * The tail, because it is the first step in the enqueuing.
* It is only the next pointers that might be inconsistent. * It is only the next pointers that might be inconsistent.
*/ */
if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) { if (head == &dummy && qatomic_read(&tail) == &dummy.next) {
abort(); abort();
} }
/* If the head node has NULL in its next pointer, the value is /*
* wrong and we need to wait until its enqueuer finishes the update. * Since we are the sole consumer, and we excluded the empty case
*/
node = head;
next = qatomic_mb_read(&head->next);
if (!next) {
return NULL;
}
/* Since we are the sole consumer, and we excluded the empty case
* above, the queue will always have at least two nodes: the * above, the queue will always have at least two nodes: the
* dummy node, and the one being removed. So we do not need to update * dummy node, and the one being removed. So we do not need to update
* the tail pointer. * the tail pointer.