mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-04 00:03:54 -06:00
virtio, pci, pc: fixes, features
Bugfixes all over the place. HMAT support. New flags for vhost-user-blk utility. Auto-tuning of seg max for virtio storage. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAl4TaMEPHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRpvzgH/2LyDAzCa9h93ikSJjmyUk5FUaqve38daEb3 S3JYjwKxQx7u1ydooKhvBQnBCZ2i3S+k62gfYyKB+nBv8xvjs0Eg5D1YJ5E8hciy lf5OFGWWtX2iPDjZwQwT13kiJe0o3JRGxJJ6XqTEG+1EYOp7cky/FEv4PD030b9m I2wROZ/Am+onB9YJX8c0Vv1CG+AryuJNXnvwQzTXEjj4U7bEYUyJwVZaCRyAdWQ3 uYXIZN9VwjVX6BFvy9ZAJbEsUVJvOM1/aQaDqcrLz+VlzRT7bRkKHi2G3vakrm1I r5OpgyLo84132awCncbSykKDH5o8WaxLaJBjGmuBfasMz9wPzAg= =uL1o -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging virtio, pci, pc: fixes, features Bugfixes all over the place. HMAT support. New flags for vhost-user-blk utility. Auto-tuning of seg max for virtio storage. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Mon 06 Jan 2020 17:05:05 GMT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (32 commits) intel_iommu: add present bit check for pasid table entries intel_iommu: a fix to vtd_find_as_from_bus_num() virtio-net: delete also control queue when TX/RX deleted virtio: reset region cache when on queue deletion virtio-mmio: update queue size on guest write tests: add virtio-scsi and virtio-blk seg_max_adjust test virtio: make seg_max virtqueue size dependent hw: fix using 4.2 compat in 5.0 machine types for i440fx/q35 vhost-user-scsi: reset the device if supported vhost-user: add VHOST_USER_RESET_DEVICE to reset devices hw/pci/pci_host: Let pci_data_[read/write] use unsigned 'size' argument hw/pci/pci_host: Remove redundant PCI_DPRINTF() virtio-mmio: Clear v2 transport state on soft reset ACPI: add expected files for HMAT tests (acpihmat) tests/bios-tables-test: add test cases for ACPI HMAT tests/numa: Add case for QMP build HMAT hmat acpi: Build Memory Side Cache Information Structure(s) hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s) hmat acpi: Build Memory Proximity Domain Attributes Structure(s) numa: Extend CLI to provide memory side cache information ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
973d306dd6
45 changed files with 1796 additions and 133 deletions
297
hw/core/numa.c
297
hw/core/numa.c
|
@ -23,6 +23,7 @@
|
|||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/units.h"
|
||||
#include "sysemu/hostmem.h"
|
||||
#include "sysemu/numa.h"
|
||||
#include "sysemu/sysemu.h"
|
||||
|
@ -129,6 +130,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
|
|||
numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL);
|
||||
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
|
||||
}
|
||||
|
||||
/*
|
||||
* If not set the initiator, set it to MAX_NODES. And if
|
||||
* HMAT is enabled and this node has no cpus, QEMU will raise error.
|
||||
*/
|
||||
numa_info[nodenr].initiator = MAX_NODES;
|
||||
if (node->has_initiator) {
|
||||
if (!ms->numa_state->hmat_enabled) {
|
||||
error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
|
||||
"(HMAT) is disabled, enable it with -machine hmat=on "
|
||||
"before using any of hmat specific options");
|
||||
return;
|
||||
}
|
||||
|
||||
if (node->initiator >= MAX_NODES) {
|
||||
error_report("The initiator id %" PRIu16 " expects an integer "
|
||||
"between 0 and %d", node->initiator,
|
||||
MAX_NODES - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
numa_info[nodenr].initiator = node->initiator;
|
||||
}
|
||||
numa_info[nodenr].present = true;
|
||||
max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
|
||||
ms->numa_state->num_nodes++;
|
||||
|
@ -171,6 +195,253 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp)
|
|||
ms->numa_state->have_numa_distance = true;
|
||||
}
|
||||
|
||||
void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
|
||||
Error **errp)
|
||||
{
|
||||
int i, first_bit, last_bit;
|
||||
uint64_t max_entry, temp_base, bitmap_copy;
|
||||
NodeInfo *numa_info = numa_state->nodes;
|
||||
HMAT_LB_Info *hmat_lb =
|
||||
numa_state->hmat_lb[node->hierarchy][node->data_type];
|
||||
HMAT_LB_Data lb_data = {};
|
||||
HMAT_LB_Data *lb_temp;
|
||||
|
||||
/* Error checking */
|
||||
if (node->initiator > numa_state->num_nodes) {
|
||||
error_setg(errp, "Invalid initiator=%d, it should be less than %d",
|
||||
node->initiator, numa_state->num_nodes);
|
||||
return;
|
||||
}
|
||||
if (node->target > numa_state->num_nodes) {
|
||||
error_setg(errp, "Invalid target=%d, it should be less than %d",
|
||||
node->target, numa_state->num_nodes);
|
||||
return;
|
||||
}
|
||||
if (!numa_info[node->initiator].has_cpu) {
|
||||
error_setg(errp, "Invalid initiator=%d, it isn't an "
|
||||
"initiator proximity domain", node->initiator);
|
||||
return;
|
||||
}
|
||||
if (!numa_info[node->target].present) {
|
||||
error_setg(errp, "The target=%d should point to an existing node",
|
||||
node->target);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!hmat_lb) {
|
||||
hmat_lb = g_malloc0(sizeof(*hmat_lb));
|
||||
numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
|
||||
hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
|
||||
}
|
||||
hmat_lb->hierarchy = node->hierarchy;
|
||||
hmat_lb->data_type = node->data_type;
|
||||
lb_data.initiator = node->initiator;
|
||||
lb_data.target = node->target;
|
||||
|
||||
if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
|
||||
/* Input latency data */
|
||||
|
||||
if (!node->has_latency) {
|
||||
error_setg(errp, "Missing 'latency' option");
|
||||
return;
|
||||
}
|
||||
if (node->has_bandwidth) {
|
||||
error_setg(errp, "Invalid option 'bandwidth' since "
|
||||
"the data type is latency");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Detect duplicate configuration */
|
||||
for (i = 0; i < hmat_lb->list->len; i++) {
|
||||
lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
|
||||
|
||||
if (node->initiator == lb_temp->initiator &&
|
||||
node->target == lb_temp->target) {
|
||||
error_setg(errp, "Duplicate configuration of the latency for "
|
||||
"initiator=%d and target=%d", node->initiator,
|
||||
node->target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
|
||||
|
||||
if (node->latency) {
|
||||
/* Calculate the temporary base and compressed latency */
|
||||
max_entry = node->latency;
|
||||
temp_base = 1;
|
||||
while (QEMU_IS_ALIGNED(max_entry, 10)) {
|
||||
max_entry /= 10;
|
||||
temp_base *= 10;
|
||||
}
|
||||
|
||||
/* Calculate the max compressed latency */
|
||||
temp_base = MIN(hmat_lb->base, temp_base);
|
||||
max_entry = node->latency / hmat_lb->base;
|
||||
max_entry = MAX(hmat_lb->range_bitmap, max_entry);
|
||||
|
||||
/*
|
||||
* For latency hmat_lb->range_bitmap record the max compressed
|
||||
* latency which should be less than 0xFFFF (UINT16_MAX)
|
||||
*/
|
||||
if (max_entry >= UINT16_MAX) {
|
||||
error_setg(errp, "Latency %" PRIu64 " between initiator=%d and "
|
||||
"target=%d should not differ from previously entered "
|
||||
"min or max values on more than %d", node->latency,
|
||||
node->initiator, node->target, UINT16_MAX - 1);
|
||||
return;
|
||||
} else {
|
||||
hmat_lb->base = temp_base;
|
||||
hmat_lb->range_bitmap = max_entry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set lb_info_provided bit 0 as 1,
|
||||
* latency information is provided
|
||||
*/
|
||||
numa_info[node->target].lb_info_provided |= BIT(0);
|
||||
}
|
||||
lb_data.data = node->latency;
|
||||
} else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) {
|
||||
/* Input bandwidth data */
|
||||
if (!node->has_bandwidth) {
|
||||
error_setg(errp, "Missing 'bandwidth' option");
|
||||
return;
|
||||
}
|
||||
if (node->has_latency) {
|
||||
error_setg(errp, "Invalid option 'latency' since "
|
||||
"the data type is bandwidth");
|
||||
return;
|
||||
}
|
||||
if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) {
|
||||
error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and "
|
||||
"target=%d should be 1MB aligned", node->bandwidth,
|
||||
node->initiator, node->target);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Detect duplicate configuration */
|
||||
for (i = 0; i < hmat_lb->list->len; i++) {
|
||||
lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
|
||||
|
||||
if (node->initiator == lb_temp->initiator &&
|
||||
node->target == lb_temp->target) {
|
||||
error_setg(errp, "Duplicate configuration of the bandwidth for "
|
||||
"initiator=%d and target=%d", node->initiator,
|
||||
node->target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1;
|
||||
|
||||
if (node->bandwidth) {
|
||||
/* Keep bitmap unchanged when bandwidth out of range */
|
||||
bitmap_copy = hmat_lb->range_bitmap;
|
||||
bitmap_copy |= node->bandwidth;
|
||||
first_bit = ctz64(bitmap_copy);
|
||||
temp_base = UINT64_C(1) << first_bit;
|
||||
max_entry = node->bandwidth / temp_base;
|
||||
last_bit = 64 - clz64(bitmap_copy);
|
||||
|
||||
/*
|
||||
* For bandwidth, first_bit record the base unit of bandwidth bits,
|
||||
* last_bit record the last bit of the max bandwidth. The max
|
||||
* compressed bandwidth should be less than 0xFFFF (UINT16_MAX)
|
||||
*/
|
||||
if ((last_bit - first_bit) > UINT16_BITS ||
|
||||
max_entry >= UINT16_MAX) {
|
||||
error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d "
|
||||
"and target=%d should not differ from previously "
|
||||
"entered values on more than %d", node->bandwidth,
|
||||
node->initiator, node->target, UINT16_MAX - 1);
|
||||
return;
|
||||
} else {
|
||||
hmat_lb->base = temp_base;
|
||||
hmat_lb->range_bitmap = bitmap_copy;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set lb_info_provided bit 1 as 1,
|
||||
* bandwidth information is provided
|
||||
*/
|
||||
numa_info[node->target].lb_info_provided |= BIT(1);
|
||||
}
|
||||
lb_data.data = node->bandwidth;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
g_array_append_val(hmat_lb->list, lb_data);
|
||||
}
|
||||
|
||||
void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
|
||||
Error **errp)
|
||||
{
|
||||
int nb_numa_nodes = ms->numa_state->num_nodes;
|
||||
NodeInfo *numa_info = ms->numa_state->nodes;
|
||||
NumaHmatCacheOptions *hmat_cache = NULL;
|
||||
|
||||
if (node->node_id >= nb_numa_nodes) {
|
||||
error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
|
||||
"than %d", node->node_id, nb_numa_nodes);
|
||||
return;
|
||||
}
|
||||
|
||||
if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
|
||||
error_setg(errp, "The latency and bandwidth information of "
|
||||
"node-id=%" PRIu32 " should be provided before memory side "
|
||||
"cache attributes", node->node_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
|
||||
error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
|
||||
"and less than or equal to %d", node->level,
|
||||
HMAT_LB_LEVELS - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
|
||||
assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
|
||||
if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
|
||||
error_setg(errp, "Duplicate configuration of the side cache for "
|
||||
"node-id=%" PRIu32 " and level=%" PRIu8,
|
||||
node->node_id, node->level);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((node->level > 1) &&
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
|
||||
(node->size >=
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
|
||||
error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
|
||||
" should be less than the size(%" PRIu64 ") of "
|
||||
"level=%u", node->size, node->level,
|
||||
ms->numa_state->hmat_cache[node->node_id]
|
||||
[node->level - 1]->size,
|
||||
node->level - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((node->level < HMAT_LB_LEVELS - 1) &&
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
|
||||
(node->size <=
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
|
||||
error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
|
||||
" should be larger than the size(%" PRIu64 ") of "
|
||||
"level=%u", node->size, node->level,
|
||||
ms->numa_state->hmat_cache[node->node_id]
|
||||
[node->level + 1]->size,
|
||||
node->level + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
hmat_cache = g_malloc0(sizeof(*hmat_cache));
|
||||
memcpy(hmat_cache, node, sizeof(*hmat_cache));
|
||||
ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
|
||||
}
|
||||
|
||||
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
|
||||
{
|
||||
Error *err = NULL;
|
||||
|
@ -208,6 +479,32 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
|
|||
machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
|
||||
&err);
|
||||
break;
|
||||
case NUMA_OPTIONS_TYPE_HMAT_LB:
|
||||
if (!ms->numa_state->hmat_enabled) {
|
||||
error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
|
||||
"(HMAT) is disabled, enable it with -machine hmat=on "
|
||||
"before using any of hmat specific options");
|
||||
return;
|
||||
}
|
||||
|
||||
parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err);
|
||||
if (err) {
|
||||
goto end;
|
||||
}
|
||||
break;
|
||||
case NUMA_OPTIONS_TYPE_HMAT_CACHE:
|
||||
if (!ms->numa_state->hmat_enabled) {
|
||||
error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
|
||||
"(HMAT) is disabled, enable it with -machine hmat=on "
|
||||
"before using any of hmat specific options");
|
||||
return;
|
||||
}
|
||||
|
||||
parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err);
|
||||
if (err) {
|
||||
goto end;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue