From 5ee8534731645551a3883210b9a8d5741bb79df2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Apr 2017 16:03:49 +0100 Subject: [PATCH 01/13] hw/acpi-defs: replace leading X with x_ in FADT field names At the request of Michael, replace the leading capital X in the FADT field name Xfacs and Xdsdt with lower case x + underscore. Cc: Michael S. Tsirkin Signed-off-by: Ard Biesheuvel Reviewed-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 4 ++-- include/hw/acpi/acpi-defs.h | 4 ++-- tests/bios-tables-test.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1d8c645ed3..c75f73ebb1 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -341,7 +341,7 @@ build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, AcpiFadtDescriptorRev3 *fadt = acpi_data_push(table_data, sizeof(*fadt)); unsigned fw_ctrl_offset = (char *)&fadt->firmware_ctrl - table_data->data; unsigned dsdt_entry_offset = (char *)&fadt->dsdt - table_data->data; - unsigned xdsdt_entry_offset = (char *)&fadt->Xdsdt - table_data->data; + unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; /* FACS address to be filled by Guest linker */ bios_linker_loader_add_pointer(linker, @@ -354,7 +354,7 @@ build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, ACPI_BUILD_TABLE_FILE, dsdt_entry_offset, sizeof(fadt->dsdt), ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->Xdsdt), + ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); build_header(linker, table_data, diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index 293ee4524b..93e1ebad62 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -144,8 +144,8 @@ typedef struct AcpiTableHeader AcpiTableHeader; /* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ \ uint16_t arm_boot_flags; \ uint8_t minor_revision; /* FADT Minor Revision (ACPI 5.1) */ \ - uint64_t Xfacs; /* 64-bit physical address of FACS */ \ - uint64_t Xdsdt; /* 64-bit physical address of DSDT */ \ + uint64_t x_facs; /* 64-bit physical address of FACS */ \ + uint64_t x_dsdt; /* 64-bit physical address of DSDT */ \ /* 64-bit Extended Power Mgt 1a Event Reg Blk address */ \ struct AcpiGenericAddress xpm1a_event_block; \ /* 64-bit Extended Power Mgt 1b Event Reg Blk address */ \ diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index 9c96a67053..bdef3b9cee 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -175,8 +175,8 @@ static void test_acpi_fadt_table(test_data *data) ACPI_READ_FIELD(fadt_table->reset_value, addr); ACPI_READ_FIELD(fadt_table->arm_boot_flags, addr); ACPI_READ_FIELD(fadt_table->minor_revision, addr); - ACPI_READ_FIELD(fadt_table->Xfacs, addr); - ACPI_READ_FIELD(fadt_table->Xdsdt, addr); + ACPI_READ_FIELD(fadt_table->x_facs, addr); + ACPI_READ_FIELD(fadt_table->x_dsdt, addr); ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1a_event_block, addr); ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1b_event_block, addr); ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1a_control_block, addr); From cb51ac2ffe3649eb8f5c65dccc2012f0ba2c6b12 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Apr 2017 16:03:50 +0100 Subject: [PATCH 02/13] hw/arm/virt: generate 64-bit addressable ACPI objects Our current ACPI table generation code limits the placement of ACPI tables to 32-bit addressable memory, in order to be able to emit the root pointer (RSDP) and root table (RSDT) using table types from the ACPI 1.0 days. Since ARM was not supported by ACPI before version 5.0, it makes sense to lift this restriction. This is not crucial for mach-virt, which is guaranteed to have some memory available below the 4 GB mark, but it is a nice to have for QEMU machines that do not have any 32-bit addressable memory, which is not uncommon for real world 64-bit ARM systems. Since we already emit a version of the RSDP root pointer that has a secondary 64-bit wide address field for the 64-bit root table (XSDT), all we need to do is replace the RSDT generation with the generation of an XSDT table, and use a different slot in the FADT table to refer to the DSDT. Signed-off-by: Ard Biesheuvel Reviewed-by: Andrew Jones Acked-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Peter Maydell --- hw/acpi/aml-build.c | 27 +++++++++++++++++++++++++++ hw/arm/virt-acpi-build.c | 26 +++++++++++++------------- include/hw/acpi/acpi-defs.h | 11 +++++++++++ include/hw/acpi/aml-build.h | 3 +++ 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index c6f2032dec..4ddfb68b24 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1599,6 +1599,33 @@ build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, (void *)rsdt, "RSDT", rsdt_len, 1, oem_id, oem_table_id); } +/* Build xsdt table */ +void +build_xsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, + const char *oem_id, const char *oem_table_id) +{ + int i; + unsigned xsdt_entries_offset; + AcpiXsdtDescriptorRev2 *xsdt; + const unsigned table_data_len = (sizeof(uint64_t) * table_offsets->len); + const unsigned xsdt_entry_size = sizeof(xsdt->table_offset_entry[0]); + const size_t xsdt_len = sizeof(*xsdt) + table_data_len; + + xsdt = acpi_data_push(table_data, xsdt_len); + xsdt_entries_offset = (char *)xsdt->table_offset_entry - table_data->data; + for (i = 0; i < table_offsets->len; ++i) { + uint64_t ref_tbl_offset = g_array_index(table_offsets, uint32_t, i); + uint64_t xsdt_entry_offset = xsdt_entries_offset + xsdt_entry_size * i; + + /* xsdt->table_offset_entry to be filled by Guest linker */ + bios_linker_loader_add_pointer(linker, + ACPI_BUILD_TABLE_FILE, xsdt_entry_offset, xsdt_entry_size, + ACPI_BUILD_TABLE_FILE, ref_tbl_offset); + } + build_header(linker, table_data, + (void *)xsdt, "XSDT", xsdt_len, 1, oem_id, oem_table_id); +} + void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags) { diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 0835e59bb2..6e5f3399f2 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -364,12 +364,12 @@ static void acpi_dsdt_add_power_button(Aml *scope) /* RSDP */ static GArray * -build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset) +build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset) { AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); - unsigned rsdt_pa_size = sizeof(rsdp->rsdt_physical_address); - unsigned rsdt_pa_offset = - (char *)&rsdp->rsdt_physical_address - rsdp_table->data; + unsigned xsdt_pa_size = sizeof(rsdp->xsdt_physical_address); + unsigned xsdt_pa_offset = + (char *)&rsdp->xsdt_physical_address - rsdp_table->data; bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16, true /* fseg memory */); @@ -381,8 +381,8 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset) /* Address to be filled by Guest linker */ bios_linker_loader_add_pointer(linker, - ACPI_BUILD_RSDP_FILE, rsdt_pa_offset, rsdt_pa_size, - ACPI_BUILD_TABLE_FILE, rsdt_tbl_offset); + ACPI_BUILD_RSDP_FILE, xsdt_pa_offset, xsdt_pa_size, + ACPI_BUILD_TABLE_FILE, xsdt_tbl_offset); /* Checksum to be filled by Guest linker */ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, @@ -659,7 +659,7 @@ static void build_fadt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms, unsigned dsdt_tbl_offset) { AcpiFadtDescriptorRev5_1 *fadt = acpi_data_push(table_data, sizeof(*fadt)); - unsigned dsdt_entry_offset = (char *)&fadt->dsdt - table_data->data; + unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; uint16_t bootflags; switch (vms->psci_conduit) { @@ -685,7 +685,7 @@ static void build_fadt(GArray *table_data, BIOSLinker *linker, /* DSDT address to be filled by Guest linker */ bios_linker_loader_add_pointer(linker, - ACPI_BUILD_TABLE_FILE, dsdt_entry_offset, sizeof(fadt->dsdt), + ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); build_header(linker, table_data, @@ -748,7 +748,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) { VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); GArray *table_offsets; - unsigned dsdt, rsdt; + unsigned dsdt, xsdt; GArray *tables_blob = tables->table_data; table_offsets = g_array_new(false, true /* clear */, @@ -788,12 +788,12 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_iort(tables_blob, tables->linker); } - /* RSDT is pointed to by RSDP */ - rsdt = tables_blob->len; - build_rsdt(tables_blob, tables->linker, table_offsets, NULL, NULL); + /* XSDT is pointed to by RSDP */ + xsdt = tables_blob->len; + build_xsdt(tables_blob, tables->linker, table_offsets, NULL, NULL); /* RSDP is in FSEG memory, so allocate it separately */ - build_rsdp(tables->rsdp, tables->linker, rsdt); + build_rsdp(tables->rsdp, tables->linker, xsdt); /* Cleanup memory that's no longer used. */ g_array_free(table_offsets, true); diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index 93e1ebad62..91bae7fee0 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -232,6 +232,17 @@ struct AcpiRsdtDescriptorRev1 } QEMU_PACKED; typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1; +/* + * ACPI 2.0 eXtended System Description Table (XSDT) + */ +struct AcpiXsdtDescriptorRev2 +{ + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ + uint64_t table_offset_entry[0]; /* Array of pointers to other */ + /* ACPI tables */ +} QEMU_PACKED; +typedef struct AcpiXsdtDescriptorRev2 AcpiXsdtDescriptorRev2; + /* * ACPI 1.0 Firmware ACPI Control Structure (FACS) */ diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 00c21f160c..eb07c2d43c 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -381,6 +381,9 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre); void build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, const char *oem_id, const char *oem_table_id); +void +build_xsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, + const char *oem_id, const char *oem_table_id); int build_append_named_dword(GArray *array, const char *name_format, ...) From 60cd11024f41cc73175e651a2dfe09a3cade56bb Mon Sep 17 00:00:00 2001 From: Zhiyong Yang Date: Fri, 5 May 2017 00:25:36 +0800 Subject: [PATCH 03/13] hw/virtio: fix vhost user fails to startup when MQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qemu2.7~2.9 and vhost user for dpdk 17.02 release work together to cause failures of new connection when negotiating to set MQ. (one queue pair works well). Because there exist some bugs in qemu code when introducing VHOST_USER_PROTOCOL_F_REPLY_ACK to qemu. When vhost_user_set_mem_table is invoked to deal with the vhost message VHOST_USER_SET_MEM_TABLE for the second time, qemu indeed doesn't send the messge (The message needs to be sent only once)but still will be waiting for dpdk's reply ack, then, qemu is always freezing, while DPDK is always waiting for next vhost message from qemu. The patch aims to fix the bug, MQ can work well. The same bug is found in function vhost_user_net_set_mtu, it is fixed at the same time. DPDK related patch is as following: http://www.dpdk.org/dev/patchwork/patch/23955/ Signed-off-by: Zhiyong Yang Cc: qemu-stable@nongnu.org Fixes: ca525ce5618b ("vhost-user: Introduce a new protocol feature REPLY_ACK.") Reviewed-by: Maxime Coquelin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Jens Freimann Reviewed-by: Marc-André Lureau --- hw/virtio/vhost-user.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 9334a8ae22..32a95a8c69 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -163,22 +163,26 @@ fail: } static int process_message_reply(struct vhost_dev *dev, - VhostUserRequest request) + VhostUserMsg msg) { - VhostUserMsg msg; + VhostUserMsg msg_reply; - if (vhost_user_read(dev, &msg) < 0) { + if ((msg.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { + return 0; + } + + if (vhost_user_read(dev, &msg_reply) < 0) { return -1; } - if (msg.request != request) { + if (msg_reply.request != msg.request) { error_report("Received unexpected msg type." "Expected %d received %d", - request, msg.request); + msg.request, msg_reply.request); return -1; } - return msg.payload.u64 ? -1 : 0; + return msg_reply.payload.u64 ? -1 : 0; } static bool vhost_user_one_time_request(VhostUserRequest request) @@ -208,6 +212,7 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, * request, we just ignore it. */ if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { + msg->flags &= ~VHOST_USER_NEED_REPLY_MASK; return 0; } @@ -320,7 +325,7 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, } if (reply_supported) { - return process_message_reply(dev, msg.request); + return process_message_reply(dev, msg); } return 0; @@ -712,7 +717,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) /* If reply_ack supported, slave has to ack specified MTU is valid */ if (reply_supported) { - return process_message_reply(dev, msg.request); + return process_message_reply(dev, msg); } return 0; From 640601c7cb1b6b41d3e1a435b986266c2b71e9bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 3 May 2017 20:54:12 +0400 Subject: [PATCH 04/13] libvhost-user: fix crash when rings aren't ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling libvhost-user functions like vu_queue_get_avail_bytes() when the queue doesn't yet have addresses will result in the crashes like the following: Program received signal SIGSEGV, Segmentation fault. 0x000055c414112ce4 in vring_avail_idx (vq=0x55c41582fd68, vq=0x55c41582fd68) at /home/dgilbert/git/qemu/contrib/libvhost-user/libvhost-user.c:940 940 vq->shadow_avail_idx = vq->vring.avail->idx; (gdb) p vq $1 = (VuVirtq *) 0x55c41582fd68 (gdb) p vq->vring $2 = {num = 0, desc = 0x0, avail = 0x0, used = 0x0, log_guest_addr = 0, flags = 0} at /home/dgilbert/git/qemu/contrib/libvhost-user/libvhost-user.c:940 No locals. at /home/dgilbert/git/qemu/contrib/libvhost-user/libvhost-user.c:960 num_heads = out_bytes=out_bytes@entry=0x7fffd035d7c4, max_in_bytes=max_in_bytes@entry=0, max_out_bytes=max_out_bytes@entry=0) at /home/dgilbert/git/qemu/contrib/libvhost-user/libvhost-user.c:1034 Add a pre-condition checks on vring.avail before accessing it. Fix documentation and return type of vu_queue_empty() while at it. Signed-off-by: Marc-André Lureau Tested-by: Dr. David Alan Gilbert Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/libvhost-user/libvhost-user.c | 26 ++++++++++++++++++++------ contrib/libvhost-user/libvhost-user.h | 6 +++--- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 61e1657e41..9efb9dac0e 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -1031,6 +1031,11 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; + if (unlikely(dev->broken) || + unlikely(!vq->vring.avail)) { + goto done; + } + while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) { unsigned int max, num_bufs, indirect = 0; struct vring_desc *desc; @@ -1121,11 +1126,16 @@ vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. */ -int +bool vu_queue_empty(VuDev *dev, VuVirtq *vq) { + if (unlikely(dev->broken) || + unlikely(!vq->vring.avail)) { + return true; + } + if (vq->shadow_avail_idx != vq->last_avail_idx) { - return 0; + return false; } return vring_avail_idx(vq) == vq->last_avail_idx; @@ -1174,7 +1184,8 @@ vring_notify(VuDev *dev, VuVirtq *vq) void vu_queue_notify(VuDev *dev, VuVirtq *vq) { - if (unlikely(dev->broken)) { + if (unlikely(dev->broken) || + unlikely(!vq->vring.avail)) { return; } @@ -1291,7 +1302,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) struct vring_desc *desc; int rc; - if (unlikely(dev->broken)) { + if (unlikely(dev->broken) || + unlikely(!vq->vring.avail)) { return NULL; } @@ -1445,7 +1457,8 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq, { struct vring_used_elem uelem; - if (unlikely(dev->broken)) { + if (unlikely(dev->broken) || + unlikely(!vq->vring.avail)) { return; } @@ -1474,7 +1487,8 @@ vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count) { uint16_t old, new; - if (unlikely(dev->broken)) { + if (unlikely(dev->broken) || + unlikely(!vq->vring.avail)) { return; } diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 156b50e989..af02a31ebe 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -327,13 +327,13 @@ void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable); bool vu_queue_enabled(VuDev *dev, VuVirtq *vq); /** - * vu_queue_enabled: + * vu_queue_empty: * @dev: a VuDev context * @vq: a VuVirtq queue * - * Returns: whether the queue is empty. + * Returns: true if the queue is empty or not ready. */ -int vu_queue_empty(VuDev *dev, VuVirtq *vq); +bool vu_queue_empty(VuDev *dev, VuVirtq *vq); /** * vu_queue_notify: From 98e753a6e51b255d474c4db5e7af8b01633b6a4c Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 25 Apr 2017 17:37:50 +0200 Subject: [PATCH 05/13] pc/fwcfg: unbreak migration from qemu-2.5 and qemu-2.6 during firmware boot Since 2.7 commit (b2a575a Add optionrom compatible with fw_cfg DMA version) regressed migration during firmware exection time by abusing fwcfg.dma_enabled property to decide loading dma version of option rom AND by mistake disabling DMA for 2.6 and earlier globally instead of only for option rom. so 2.6 machine type guest is broken when it already runs firmware in DMA mode but migrated to qemu-2.7(pc-2.6) at that time; a) qemu-2.6:pc2.6 (fwcfg.dma=on,firmware=dma,oprom=ioport) b) qemu-2.7:pc2.6 (fwcfg.dma=off,firmware=ioport,oprom=ioport) to: a b from a OK FAIL b OK OK So we currently have broken forward migration from qemu-2.6 to qemu-2.[789] that however could be fixed for 2.10 by re-enabling DMA for 2.[56] machine types and allowing dma capable option rom only since 2.7. As result qemu should end up with: c) qemu-2.10:pc2.6 (fwcfg.dma=on,firmware=dma,oprom=ioport) to: a b c from a OK FAIL OK b OK OK OK c OK FAIL OK where forward migration from qemu-2.6 to qemu-2.10 should work again leaving only qemu-2.[789]:pc-2.6 broken. Reported-by: Eduardo Habkost Analyzed-by: Laszlo Ersek Signed-off-by: Igor Mammedov Reviewed-by: Laszlo Ersek Reviewed-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 9 ++++----- hw/i386/pc_piix.c | 1 + hw/i386/pc_q35.c | 1 + include/hw/i386/pc.h | 7 +++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f3b372a18f..8063241140 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1047,12 +1047,10 @@ static void load_linux(PCMachineState *pcms, fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); - if (fw_cfg_dma_enabled(fw_cfg)) { + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "linuxboot.bin"; + if (pcmc->linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { option_rom[nb_option_roms].name = "linuxboot_dma.bin"; - option_rom[nb_option_roms].bootindex = 0; - } else { - option_rom[nb_option_roms].name = "linuxboot.bin"; - option_rom[nb_option_roms].bootindex = 0; } nb_option_roms++; } @@ -2321,6 +2319,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) * to be used at the moment, 32K should be enough for a while. */ pcmc->acpi_data_size = 0x20000 + 0x8000; pcmc->save_tsc_khz = true; + pcmc->linuxboot_dma_enabled = true; mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 9f102aa388..a11190be46 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -474,6 +474,7 @@ static void pc_i440fx_2_6_machine_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_2_7_machine_options(m); pcmc->legacy_cpu_hotplug = true; + pcmc->linuxboot_dma_enabled = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_6); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index dd792a8547..0a61a2070c 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -335,6 +335,7 @@ static void pc_q35_2_6_machine_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_2_7_machine_options(m); pcmc->legacy_cpu_hotplug = true; + pcmc->linuxboot_dma_enabled = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_6); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 416aaa56ea..d0183c4890 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -151,6 +151,9 @@ struct PCMachineClass { bool save_tsc_khz; /* generate legacy CPU hotplug AML */ bool legacy_cpu_hotplug; + + /* use DMA capable linuxboot option rom */ + bool linuxboot_dma_enabled; }; #define TYPE_PC_MACHINE "generic-pc-machine" @@ -438,10 +441,6 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_6 \ HW_COMPAT_2_6 \ {\ - .driver = "fw_cfg_io",\ - .property = "dma_enabled",\ - .value = "off",\ - },{\ .driver = TYPE_X86_CPU,\ .property = "cpuid-0xb",\ .value = "off",\ From 465238d9f873a6251223db1669aa4766822a8783 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 25 Apr 2017 17:49:13 +0800 Subject: [PATCH 06/13] pc: add 2.10 machine type CC: "Michael S. Tsirkin" CC: Paolo Bonzini CC: Richard Henderson CC: Eduardo Habkost Signed-off-by: Peter Xu Reviewed-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 15 ++++++++++++--- hw/i386/pc_q35.c | 13 +++++++++++-- include/hw/i386/pc.h | 3 +++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index a11190be46..8f3d85ca58 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -437,21 +437,30 @@ static void pc_i440fx_machine_options(MachineClass *m) m->default_display = "std"; } -static void pc_i440fx_2_9_machine_options(MachineClass *m) +static void pc_i440fx_2_10_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; } +DEFINE_I440FX_MACHINE(v2_10, "pc-i440fx-2.10", NULL, + pc_i440fx_2_10_machine_options); + +static void pc_i440fx_2_9_machine_options(MachineClass *m) +{ + pc_i440fx_2_10_machine_options(m); + m->is_default = 0; + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_9); +} + DEFINE_I440FX_MACHINE(v2_9, "pc-i440fx-2.9", NULL, pc_i440fx_2_9_machine_options); static void pc_i440fx_2_8_machine_options(MachineClass *m) { pc_i440fx_2_9_machine_options(m); - m->is_default = 0; - m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_8); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 0a61a2070c..cf9a788ec7 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -301,19 +301,28 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_2_9_machine_options(MachineClass *m) +static void pc_q35_2_10_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v2_10, "pc-q35-2.10", NULL, + pc_q35_2_10_machine_options); + +static void pc_q35_2_9_machine_options(MachineClass *m) +{ + pc_q35_2_10_machine_options(m); + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_9); +} + DEFINE_Q35_MACHINE(v2_9, "pc-q35-2.9", NULL, pc_q35_2_9_machine_options); static void pc_q35_2_8_machine_options(MachineClass *m) { pc_q35_2_9_machine_options(m); - m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_8); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index d0183c4890..e447f5d8f4 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -382,6 +382,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +#define PC_COMPAT_2_9 \ + HW_COMPAT_2_9 \ + #define PC_COMPAT_2_8 \ HW_COMPAT_2_8 \ {\ From ef0e8fc768a561dd13a86420b3268f6f3d5d0621 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 8 May 2017 17:08:12 -0300 Subject: [PATCH 07/13] iommu: Don't crash if machine is not PC_MACHINE Currently it's possible to crash QEMU using "-device *-iommu" and "-machine none": $ qemu-system-x86_64 -machine none -device amd-iommu qemu/hw/i386/amd_iommu.c:1140:amdvi_realize: Object 0x55627dafbc90 is not an instance of type generic-pc-machine Aborted (core dumped) $ qemu-system-x86_64 -machine none -device intel-iommu qemu/hw/i386/intel_iommu.c:2972:vtd_realize: Object 0x56292ec0bc90 is not an instance of type generic-pc-machine Aborted (core dumped) Fix amd-iommu and intel-iommu to ensure the current machine is really a TYPE_PC_MACHINE instance at their realize methods. Resulting error messages: $ qemu-system-x86_64 -machine none -device amd-iommu qemu-system-x86_64: -device amd-iommu: Machine-type 'none' not supported by amd-iommu $ qemu-system-x86_64 -machine none -device intel-iommu qemu-system-x86_64: -device intel-iommu: Machine-type 'none' not supported by intel-iommu Signed-off-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 15 ++++++++++++++- hw/i386/intel_iommu.c | 14 ++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index f86a40aa30..516ebae952 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -21,6 +21,7 @@ */ #include "qemu/osdep.h" #include "hw/i386/amd_iommu.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "trace.h" @@ -1137,7 +1138,19 @@ static void amdvi_realize(DeviceState *dev, Error **err) int ret = 0; AMDVIState *s = AMD_IOMMU_DEVICE(dev); X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); - PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus; + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); + PCMachineState *pcms = + PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); + PCIBus *bus; + + if (!pcms) { + error_setg(err, "Machine-type '%s' not supported by amd-iommu", + mc->name); + return; + } + + bus = pcms->bus; s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, amdvi_uint64_equal, g_free, g_free); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 02f047c8e3..a12b1761f5 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2969,11 +2969,21 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) static void vtd_realize(DeviceState *dev, Error **errp) { - PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); - PCIBus *bus = pcms->bus; + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); + PCMachineState *pcms = + PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); + PCIBus *bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); + if (!pcms) { + error_setg(errp, "Machine-type '%s' not supported by intel-iommu", + mc->name); + return; + } + + bus = pcms->bus; VTD_DPRINTF(GENERAL, ""); x86_iommu->type = TYPE_INTEL; From 153eba4726dfa1bdfc31d1fe973b2a61b9035492 Mon Sep 17 00:00:00 2001 From: Bruce Rogers Date: Thu, 27 Apr 2017 13:59:08 -0600 Subject: [PATCH 08/13] ACPI: don't call acpi_pcihp_device_plug_cb on xen Commit f0c9d64a exposed the issue that with a xenfv machine using pci passthrough, acpi pci hotplug code was being executed by mistake. Guard calls to acpi_pcihp_device_plug_cb (and corresponding acpi_pcihp_device_unplug_cb) with a check for xen_enabled(). Without this check I am seeing an error that the bus doesn't have the acpi-pcihp-bsel property set. Signed-off-by: Bruce Rogers Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/piix4.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index a553a7e110..c409374ab8 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -385,7 +385,10 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, dev, errp); } } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { - acpi_pcihp_device_plug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, errp); + if (!xen_enabled()) { + acpi_pcihp_device_plug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, + errp); + } } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { if (s->cpu_hotplug_legacy) { legacy_acpi_cpu_plug_cb(hotplug_dev, &s->gpe_cpu, dev, errp); @@ -408,8 +411,10 @@ static void piix4_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_memory_unplug_request_cb(hotplug_dev, &s->acpi_memory_hotplug, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { - acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, - errp); + if (!xen_enabled()) { + acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, + errp); + } } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && !s->cpu_hotplug_legacy) { acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); From 8b12e48950a3d59188489b2ff6c5ad9cc09e9866 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 9 May 2017 21:10:05 +0300 Subject: [PATCH 09/13] acpi-defs: clean up open brace usage patchew has been saying: ERROR: open brace '{' following struct go on the same line Fix up acpi-defs.h to follow this rule. Signed-off-by: Michael S. Tsirkin --- include/hw/acpi/acpi-defs.h | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index 91bae7fee0..72be675dd6 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -81,8 +81,8 @@ typedef struct AcpiRsdpDescriptor AcpiRsdpDescriptor; uint32_t asl_compiler_revision; /* ASL compiler revision number */ -struct AcpiTableHeader /* ACPI common table header */ -{ +/* ACPI common table header */ +struct AcpiTableHeader { ACPI_TABLE_HEADER_DEF } QEMU_PACKED; typedef struct AcpiTableHeader AcpiTableHeader; @@ -224,8 +224,7 @@ typedef struct AcpiSerialPortConsoleRedirection /* * ACPI 1.0 Root System Description Table (RSDT) */ -struct AcpiRsdtDescriptorRev1 -{ +struct AcpiRsdtDescriptorRev1 { ACPI_TABLE_HEADER_DEF /* ACPI common table header */ uint32_t table_offset_entry[0]; /* Array of pointers to other */ /* ACPI tables */ @@ -235,8 +234,7 @@ typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1; /* * ACPI 2.0 eXtended System Description Table (XSDT) */ -struct AcpiXsdtDescriptorRev2 -{ +struct AcpiXsdtDescriptorRev2 { ACPI_TABLE_HEADER_DEF /* ACPI common table header */ uint64_t table_offset_entry[0]; /* Array of pointers to other */ /* ACPI tables */ @@ -246,8 +244,7 @@ typedef struct AcpiXsdtDescriptorRev2 AcpiXsdtDescriptorRev2; /* * ACPI 1.0 Firmware ACPI Control Structure (FACS) */ -struct AcpiFacsDescriptorRev1 -{ +struct AcpiFacsDescriptorRev1 { uint32_t signature; /* ACPI Signature */ uint32_t length; /* Length of structure, in bytes */ uint32_t hardware_signature; /* Hardware configuration signature */ @@ -273,8 +270,7 @@ typedef struct AcpiFacsDescriptorRev1 AcpiFacsDescriptorRev1; /* Master MADT */ -struct AcpiMultipleApicTable -{ +struct AcpiMultipleApicTable { ACPI_TABLE_HEADER_DEF /* ACPI common table header */ uint32_t local_apic_address; /* Physical address of local APIC */ uint32_t flags; @@ -310,8 +306,7 @@ typedef struct AcpiMultipleApicTable AcpiMultipleApicTable; /* Sub-structures for MADT */ -struct AcpiMadtProcessorApic -{ +struct AcpiMadtProcessorApic { ACPI_SUB_HEADER_DEF uint8_t processor_id; /* ACPI processor id */ uint8_t local_apic_id; /* Processor's local APIC id */ @@ -319,8 +314,7 @@ struct AcpiMadtProcessorApic } QEMU_PACKED; typedef struct AcpiMadtProcessorApic AcpiMadtProcessorApic; -struct AcpiMadtIoApic -{ +struct AcpiMadtIoApic { ACPI_SUB_HEADER_DEF uint8_t io_apic_id; /* I/O APIC ID */ uint8_t reserved; /* Reserved - must be zero */ @@ -473,8 +467,7 @@ typedef struct Acpi20Hpet Acpi20Hpet; * SRAT (NUMA topology description) table */ -struct AcpiSystemResourceAffinityTable -{ +struct AcpiSystemResourceAffinityTable { ACPI_TABLE_HEADER_DEF uint32_t reserved1; uint32_t reserved2[2]; @@ -486,8 +479,7 @@ typedef struct AcpiSystemResourceAffinityTable AcpiSystemResourceAffinityTable; #define ACPI_SRAT_PROCESSOR_x2APIC 2 #define ACPI_SRAT_PROCESSOR_GICC 3 -struct AcpiSratProcessorAffinity -{ +struct AcpiSratProcessorAffinity { ACPI_SUB_HEADER_DEF uint8_t proximity_lo; uint8_t local_apic_id; @@ -509,8 +501,7 @@ struct AcpiSratProcessorX2ApicAffinity { } QEMU_PACKED; typedef struct AcpiSratProcessorX2ApicAffinity AcpiSratProcessorX2ApicAffinity; -struct AcpiSratMemoryAffinity -{ +struct AcpiSratMemoryAffinity { ACPI_SUB_HEADER_DEF uint32_t proximity; uint16_t reserved1; @@ -522,8 +513,7 @@ struct AcpiSratMemoryAffinity } QEMU_PACKED; typedef struct AcpiSratMemoryAffinity AcpiSratMemoryAffinity; -struct AcpiSratProcessorGiccAffinity -{ +struct AcpiSratProcessorGiccAffinity { ACPI_SUB_HEADER_DEF uint32_t proximity; uint32_t acpi_processor_uid; From 2fa356629ed2ce9c714f11c89c1a074b8bad3fcb Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 11 May 2017 13:25:29 +0300 Subject: [PATCH 10/13] Revert "hw/pci: disable pci-bridge's shpc by default" This reverts commit dc0ae767700c156894e36fab89a745a2dc4173de. Disabling the shpc controller has an undesired side effect. The PCI bridge remains with no attached devices at boot time, and the guest operating systems do not allocate any resources for it, leaving the bridge unusable. Note that the behaviour is dictated by the pci bridge specification. Revert the commit and leave the shpc controller even if is not actually used by any architecture. Slot 0 remains unusable at boot time. Keep shpc off for QEMU 2.9 machines. Signed-off-by: Marcel Apfelbaum Reviewed-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_bridge_dev.c | 2 +- include/hw/compat.h | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index 647ad80155..5dbd933cc1 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -163,7 +163,7 @@ static Property pci_bridge_dev_properties[] = { DEFINE_PROP_ON_OFF_AUTO(PCI_BRIDGE_DEV_PROP_MSI, PCIBridgeDev, msi, ON_OFF_AUTO_AUTO), DEFINE_PROP_BIT(PCI_BRIDGE_DEV_PROP_SHPC, PCIBridgeDev, flags, - PCI_BRIDGE_DEV_F_SHPC_REQ, false), + PCI_BRIDGE_DEV_F_SHPC_REQ, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/compat.h b/include/hw/compat.h index 846b90eb67..55b176507a 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -2,7 +2,11 @@ #define HW_COMPAT_H #define HW_COMPAT_2_9 \ - /* empty */ + {\ + .driver = "pci-bridge",\ + .property = "shpc",\ + .value = "off",\ + }, #define HW_COMPAT_2_8 \ {\ From 66453cff9e5e75344c601cd7674c8ef5fefee8a6 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 17 May 2017 10:17:51 +0200 Subject: [PATCH 11/13] virtio: allow broken device to notify guest According to section 2.1.2 of the virtio-1 specification: "The device SHOULD set DEVICE_NEEDS_RESET when it enters an error state that a reset is needed. If DRIVER_OK is set, after it sets DEVICE_NEEDS_RESET, the device MUST send a device configuration change notification to the driver." Commit "f5ed36635d8f virtio: stop virtqueue processing if device is broken" introduced a virtio_error() call that just does that: - internally mark the device as broken - set the DEVICE_NEEDS_RESET bit in the status - send a configuration change notification Unfortunately, virtio_notify_vector(), called by virtio_notify_config(), returns right away when the device is marked as broken and the notification isn't sent in this case. The spec doesn't say whether a broken device can send notifications in other situations or not. But since the driver isn't supposed to do anything but to reset the device, it makes sense to keep the check in virtio_notify_config(). Marking the device as broken AFTER the configuration change notification was sent is enough to fix the issue. Signed-off-by: Greg Kurz Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/virtio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 03592c542a..890b4d7eb7 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2451,12 +2451,12 @@ void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...) error_vreport(fmt, ap); va_end(ap); - vdev->broken = true; - if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET); virtio_notify_config(vdev); } + + vdev->broken = true; } static void virtio_memory_listener_commit(MemoryListener *listener) From 3936161f1fd72d1dfa577aaba910819c5e873260 Mon Sep 17 00:00:00 2001 From: "Herongguang (Stephen)" Date: Tue, 25 Apr 2017 10:29:54 +0800 Subject: [PATCH 12/13] pci: deassert intx when pci device unrealize If a pci device is not reset by VM (by writing into config space) and unplugged by VM, after that when VM reboots, qemu may assert: pcibus_reset: Assertion `bus->irq_count[i] == 0' failed Cc: qemu-stable@nongnu.org Signed-off-by: herongguang Reviewed-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 259483b1c0..98ccc27533 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1083,6 +1083,7 @@ static void pci_qdev_unrealize(DeviceState *dev, Error **errp) pc->exit(pci_dev); } + pci_device_deassert_intx(pci_dev); do_pci_unregister_device(pci_dev); } From a764040cc831cfe5b8bf1c80e8341b9bf2de3ce8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 17 May 2017 16:57:42 +0800 Subject: [PATCH 13/13] exec: abstract address_space_do_translate() This function is an abstraction helper for address_space_translate() and address_space_get_iotlb_entry(). It does the lookup of address into memory region section, then does proper IOMMU translation if necessary. Refactor the two existing functions to use it. This fixes vhost when IOMMU is disabled by guest. Tested-by: Maxime Coquelin Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- exec.c | 115 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 40 deletions(-) diff --git a/exec.c b/exec.c index eac6085760..f942eb2bd1 100644 --- a/exec.c +++ b/exec.c @@ -463,42 +463,12 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x } /* Called from RCU critical section */ -IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr, - bool is_write) -{ - IOMMUTLBEntry iotlb = {0}; - MemoryRegionSection *section; - MemoryRegion *mr; - - for (;;) { - AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); - section = address_space_lookup_region(d, addr, false); - addr = addr - section->offset_within_address_space - + section->offset_within_region; - mr = section->mr; - - if (!mr->iommu_ops) { - break; - } - - iotlb = mr->iommu_ops->translate(mr, addr, is_write); - if (!(iotlb.perm & (1 << is_write))) { - iotlb.target_as = NULL; - break; - } - - addr = ((iotlb.translated_addr & ~iotlb.addr_mask) - | (addr & iotlb.addr_mask)); - as = iotlb.target_as; - } - - return iotlb; -} - -/* Called from RCU critical section */ -MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, - hwaddr *xlat, hwaddr *plen, - bool is_write) +static MemoryRegionSection address_space_do_translate(AddressSpace *as, + hwaddr addr, + hwaddr *xlat, + hwaddr *plen, + bool is_write, + bool is_mmio) { IOMMUTLBEntry iotlb; MemoryRegionSection *section; @@ -506,7 +476,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, for (;;) { AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch); - section = address_space_translate_internal(d, addr, &addr, plen, true); + section = address_space_translate_internal(d, addr, &addr, plen, is_mmio); mr = section->mr; if (!mr->iommu_ops) { @@ -518,19 +488,84 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, | (addr & iotlb.addr_mask)); *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1); if (!(iotlb.perm & (1 << is_write))) { - mr = &io_mem_unassigned; - break; + goto translate_fail; } as = iotlb.target_as; } + *xlat = addr; + + return *section; + +translate_fail: + return (MemoryRegionSection) { .mr = &io_mem_unassigned }; +} + +/* Called from RCU critical section */ +IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr, + bool is_write) +{ + MemoryRegionSection section; + hwaddr xlat, plen; + + /* Try to get maximum page mask during translation. */ + plen = (hwaddr)-1; + + /* This can never be MMIO. */ + section = address_space_do_translate(as, addr, &xlat, &plen, + is_write, false); + + /* Illegal translation */ + if (section.mr == &io_mem_unassigned) { + goto iotlb_fail; + } + + /* Convert memory region offset into address space offset */ + xlat += section.offset_within_address_space - + section.offset_within_region; + + if (plen == (hwaddr)-1) { + /* + * We use default page size here. Logically it only happens + * for identity mappings. + */ + plen = TARGET_PAGE_SIZE; + } + + /* Convert to address mask */ + plen -= 1; + + return (IOMMUTLBEntry) { + .target_as = section.address_space, + .iova = addr & ~plen, + .translated_addr = xlat & ~plen, + .addr_mask = plen, + /* IOTLBs are for DMAs, and DMA only allows on RAMs. */ + .perm = IOMMU_RW, + }; + +iotlb_fail: + return (IOMMUTLBEntry) {0}; +} + +/* Called from RCU critical section */ +MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, + hwaddr *xlat, hwaddr *plen, + bool is_write) +{ + MemoryRegion *mr; + MemoryRegionSection section; + + /* This can be MMIO, so setup MMIO bit. */ + section = address_space_do_translate(as, addr, xlat, plen, is_write, true); + mr = section.mr; + if (xen_enabled() && memory_access_is_direct(mr, is_write)) { hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr; *plen = MIN(page, *plen); } - *xlat = addr; return mr; }