vhost, pc, virtio features, fixes, cleanups

New features:
     VT-d support for devices behind a bridge
     vhost-user migration support
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJWKMrnAAoJECgfDbjSjVRpVL0H/iRc31o00QE4nWBRpxUpf8WJ
 V5RWE8qKkDgBha5bS5Nt4vs8K4jkkHGXCbmygMidWph96hUPK8/yHy1A/wmpBibB
 5hVSPDK8onavNGJwpaWDrkhd9OhKAaKOuu49T6+VWJGZY/uX5ayqmcN934y0NPUa
 4EhH5tyxPpYOYeW9i/VOMQ374gCJcpzYBMug4NJZRyFpfz/b2mzAQtoqw3EsPtB0
 vpVJ+fKiCyG39HFKQJW7cL12yBeXOoyhjfDxpumLqwLWMfmde+vJwTFx6wbechgV
 aU3jIdvUX8wHCNYaB937NsMaDALoGNqUjbpKnf+xD1w7xr9pwTzdyrGH3rpGLEE=
 =+G1+
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

vhost, pc, virtio features, fixes, cleanups

New features:
    VT-d support for devices behind a bridge
    vhost-user migration support

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 22 Oct 2015 12:39:19 BST using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"

* remotes/mst/tags/for_upstream: (37 commits)
  hw/isa/lpc_ich9: inject the SMI on the VCPU that is writing to APM_CNT
  i386: keep cpu_model field in MachineState uptodate
  vhost: set the correct queue index in case of migration with multiqueue
  piix: fix resource leak reported by Coverity
  seccomp: add memfd_create to whitelist
  vhost-user-test: check ownership during migration
  vhost-user-test: add live-migration test
  vhost-user-test: learn to tweak various qemu arguments
  vhost-user-test: wrap server in TestServer struct
  vhost-user-test: remove useless static check
  vhost-user-test: move wait_for_fds() out
  vhost: add migration block if memfd failed
  vhost-user: use an enum helper for features mask
  vhost user: add rarp sending after live migration for legacy guest
  vhost user: add support of live migration
  net: add trace_vhost_user_event
  vhost-user: document migration log
  vhost: use a function for each call
  vhost-user: add a migration blocker
  vhost-user: send log shm fd along with log_base
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2015-10-22 12:41:44 +01:00
commit ca3e40e233
45 changed files with 5097 additions and 483 deletions

View file

@ -12,6 +12,7 @@
#include "libqtest.h"
#include "qemu/option.h"
#include "qemu/range.h"
#include "sysemu/char.h"
#include "sysemu/sysemu.h"
@ -30,10 +31,10 @@
#endif
#define QEMU_CMD_ACCEL " -machine accel=tcg"
#define QEMU_CMD_MEM " -m 512 -object memory-backend-file,id=mem,size=512M,"\
#define QEMU_CMD_MEM " -m %d -object memory-backend-file,id=mem,size=%dM,"\
"mem-path=%s,share=on -numa node,memdev=mem"
#define QEMU_CMD_CHR " -chardev socket,id=chr0,path=%s"
#define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=chr0,vhostforce"
#define QEMU_CMD_CHR " -chardev socket,id=%s,path=%s"
#define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=%s,vhostforce"
#define QEMU_CMD_NET " -device virtio-net-pci,netdev=net0 "
#define QEMU_CMD_ROM " -option-rom ../pc-bios/pxe-virtio.rom"
@ -47,6 +48,9 @@
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
#define VHOST_LOG_PAGE 0x1000
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
@ -108,10 +112,17 @@ static VhostUserMsg m __attribute__ ((unused));
#define VHOST_USER_VERSION (0x1)
/*****************************************************************************/
int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS];
static VhostUserMemory memory;
static CompatGMutex data_mutex;
static CompatGCond data_cond;
typedef struct TestServer {
gchar *socket_path;
gchar *chr_name;
CharDriverState *chr;
int fds_num;
int fds[VHOST_MEMORY_MAX_NREGIONS];
VhostUserMemory memory;
GMutex data_mutex;
GCond data_cond;
int log_fd;
} TestServer;
#if !GLIB_CHECK_VERSION(2, 32, 0)
static gboolean g_cond_wait_until(CompatGCond cond, CompatGMutex mutex,
@ -126,58 +137,71 @@ static gboolean g_cond_wait_until(CompatGCond cond, CompatGMutex mutex,
}
#endif
static void read_guest_mem(void)
{
uint32_t *guest_mem;
gint64 end_time;
int i, j;
size_t size;
static const char *tmpfs;
static const char *root;
g_mutex_lock(&data_mutex);
static void wait_for_fds(TestServer *s)
{
gint64 end_time;
g_mutex_lock(&s->data_mutex);
end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
while (!fds_num) {
if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) {
while (!s->fds_num) {
if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) {
/* timeout has passed */
g_assert(fds_num);
g_assert(s->fds_num);
break;
}
}
/* check for sanity */
g_assert_cmpint(fds_num, >, 0);
g_assert_cmpint(fds_num, ==, memory.nregions);
g_assert_cmpint(s->fds_num, >, 0);
g_assert_cmpint(s->fds_num, ==, s->memory.nregions);
g_mutex_unlock(&s->data_mutex);
}
static void read_guest_mem(TestServer *s)
{
uint32_t *guest_mem;
int i, j;
size_t size;
wait_for_fds(s);
g_mutex_lock(&s->data_mutex);
/* iterate all regions */
for (i = 0; i < fds_num; i++) {
for (i = 0; i < s->fds_num; i++) {
/* We'll check only the region statring at 0x0*/
if (memory.regions[i].guest_phys_addr != 0x0) {
if (s->memory.regions[i].guest_phys_addr != 0x0) {
continue;
}
g_assert_cmpint(memory.regions[i].memory_size, >, 1024);
g_assert_cmpint(s->memory.regions[i].memory_size, >, 1024);
size = memory.regions[i].memory_size + memory.regions[i].mmap_offset;
size = s->memory.regions[i].memory_size +
s->memory.regions[i].mmap_offset;
guest_mem = mmap(0, size, PROT_READ | PROT_WRITE,
MAP_SHARED, fds[i], 0);
MAP_SHARED, s->fds[i], 0);
g_assert(guest_mem != MAP_FAILED);
guest_mem += (memory.regions[i].mmap_offset / sizeof(*guest_mem));
guest_mem += (s->memory.regions[i].mmap_offset / sizeof(*guest_mem));
for (j = 0; j < 256; j++) {
uint32_t a = readl(memory.regions[i].guest_phys_addr + j*4);
uint32_t a = readl(s->memory.regions[i].guest_phys_addr + j*4);
uint32_t b = guest_mem[j];
g_assert_cmpint(a, ==, b);
}
munmap(guest_mem, memory.regions[i].memory_size);
munmap(guest_mem, s->memory.regions[i].memory_size);
}
g_assert_cmpint(1, ==, 1);
g_mutex_unlock(&data_mutex);
g_mutex_unlock(&s->data_mutex);
}
static void *thread_function(void *data)
@ -195,7 +219,8 @@ static int chr_can_read(void *opaque)
static void chr_read(void *opaque, const uint8_t *buf, int size)
{
CharDriverState *chr = opaque;
TestServer *s = opaque;
CharDriverState *chr = s->chr;
VhostUserMsg msg;
uint8_t *p = (uint8_t *) &msg;
int fd;
@ -205,12 +230,12 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
return;
}
g_mutex_lock(&data_mutex);
g_mutex_lock(&s->data_mutex);
memcpy(p, buf, VHOST_USER_HDR_SIZE);
if (msg.size) {
p += VHOST_USER_HDR_SIZE;
qemu_chr_fe_read_all(chr, p, msg.size);
g_assert_cmpint(qemu_chr_fe_read_all(chr, p, msg.size), ==, msg.size);
}
switch (msg.request) {
@ -218,7 +243,8 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
/* send back features to qemu */
msg.flags |= VHOST_USER_REPLY_MASK;
msg.size = sizeof(m.u64);
msg.u64 = 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
msg.u64 = 0x1ULL << VHOST_F_LOG_ALL |
0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
break;
@ -232,7 +258,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
/* send back features to qemu */
msg.flags |= VHOST_USER_REPLY_MASK;
msg.size = sizeof(m.u64);
msg.u64 = 0;
msg.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
break;
@ -248,11 +274,11 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
case VHOST_USER_SET_MEM_TABLE:
/* received the mem table */
memcpy(&memory, &msg.memory, sizeof(msg.memory));
fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int));
memcpy(&s->memory, &msg.memory, sizeof(msg.memory));
s->fds_num = qemu_chr_fe_get_msgfds(chr, s->fds, G_N_ELEMENTS(s->fds));
/* signal the test that it can continue */
g_cond_signal(&data_cond);
g_cond_signal(&s->data_cond);
break;
case VHOST_USER_SET_VRING_KICK:
@ -266,10 +292,30 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
*/
qemu_set_nonblock(fd);
break;
case VHOST_USER_SET_LOG_BASE:
if (s->log_fd != -1) {
close(s->log_fd);
s->log_fd = -1;
}
qemu_chr_fe_get_msgfds(chr, &s->log_fd, 1);
msg.flags |= VHOST_USER_REPLY_MASK;
msg.size = 0;
p = (uint8_t *) &msg;
qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE);
g_cond_signal(&s->data_cond);
break;
case VHOST_USER_RESET_DEVICE:
s->fds_num = 0;
break;
default:
break;
}
g_mutex_unlock(&data_mutex);
g_mutex_unlock(&s->data_mutex);
}
static const char *init_hugepagefs(const char *path)
@ -299,26 +345,246 @@ static const char *init_hugepagefs(const char *path)
return path;
}
static TestServer *test_server_new(const gchar *name)
{
TestServer *server = g_new0(TestServer, 1);
gchar *chr_path;
server->socket_path = g_strdup_printf("%s/%s.sock", tmpfs, name);
chr_path = g_strdup_printf("unix:%s,server,nowait", server->socket_path);
server->chr_name = g_strdup_printf("chr-%s", name);
server->chr = qemu_chr_new(server->chr_name, chr_path, NULL);
g_free(chr_path);
qemu_chr_add_handlers(server->chr, chr_can_read, chr_read, NULL, server);
g_mutex_init(&server->data_mutex);
g_cond_init(&server->data_cond);
server->log_fd = -1;
return server;
}
#define GET_QEMU_CMD(s) \
g_strdup_printf(QEMU_CMD, 512, 512, (root), (s)->chr_name, \
(s)->socket_path, (s)->chr_name)
#define GET_QEMU_CMDE(s, mem, extra, ...) \
g_strdup_printf(QEMU_CMD extra, (mem), (mem), (root), (s)->chr_name, \
(s)->socket_path, (s)->chr_name, ##__VA_ARGS__)
static void test_server_free(TestServer *server)
{
int i;
qemu_chr_delete(server->chr);
for (i = 0; i < server->fds_num; i++) {
close(server->fds[i]);
}
if (server->log_fd != -1) {
close(server->log_fd);
}
unlink(server->socket_path);
g_free(server->socket_path);
g_free(server->chr_name);
g_free(server);
}
static void wait_for_log_fd(TestServer *s)
{
gint64 end_time;
g_mutex_lock(&s->data_mutex);
end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
while (s->log_fd == -1) {
if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) {
/* timeout has passed */
g_assert(s->log_fd != -1);
break;
}
}
g_mutex_unlock(&s->data_mutex);
}
static void write_guest_mem(TestServer *s, uint32 seed)
{
uint32_t *guest_mem;
int i, j;
size_t size;
wait_for_fds(s);
/* iterate all regions */
for (i = 0; i < s->fds_num; i++) {
/* We'll write only the region statring at 0x0 */
if (s->memory.regions[i].guest_phys_addr != 0x0) {
continue;
}
g_assert_cmpint(s->memory.regions[i].memory_size, >, 1024);
size = s->memory.regions[i].memory_size +
s->memory.regions[i].mmap_offset;
guest_mem = mmap(0, size, PROT_READ | PROT_WRITE,
MAP_SHARED, s->fds[i], 0);
g_assert(guest_mem != MAP_FAILED);
guest_mem += (s->memory.regions[i].mmap_offset / sizeof(*guest_mem));
for (j = 0; j < 256; j++) {
guest_mem[j] = seed + j;
}
munmap(guest_mem, s->memory.regions[i].memory_size);
break;
}
}
static guint64 get_log_size(TestServer *s)
{
guint64 log_size = 0;
int i;
for (i = 0; i < s->memory.nregions; ++i) {
VhostUserMemoryRegion *reg = &s->memory.regions[i];
guint64 last = range_get_last(reg->guest_phys_addr,
reg->memory_size);
log_size = MAX(log_size, last / (8 * VHOST_LOG_PAGE) + 1);
}
return log_size;
}
typedef struct TestMigrateSource {
GSource source;
TestServer *src;
TestServer *dest;
} TestMigrateSource;
static gboolean
test_migrate_source_check(GSource *source)
{
TestMigrateSource *t = (TestMigrateSource *)source;
gboolean overlap = t->src->fds_num > 0 && t->dest->fds_num > 0;
g_assert(!overlap);
return FALSE;
}
GSourceFuncs test_migrate_source_funcs = {
NULL,
test_migrate_source_check,
NULL,
NULL
};
static void test_migrate(void)
{
TestServer *s = test_server_new("src");
TestServer *dest = test_server_new("dest");
const char *uri = "tcp:127.0.0.1:1234";
QTestState *global = global_qtest, *from, *to;
GSource *source;
gchar *cmd;
QDict *rsp;
guint8 *log;
guint64 size;
cmd = GET_QEMU_CMDE(s, 2, "");
from = qtest_start(cmd);
g_free(cmd);
wait_for_fds(s);
size = get_log_size(s);
g_assert_cmpint(size, ==, (2 * 1024 * 1024) / (VHOST_LOG_PAGE * 8));
cmd = GET_QEMU_CMDE(dest, 2, " -incoming %s", uri);
to = qtest_init(cmd);
g_free(cmd);
source = g_source_new(&test_migrate_source_funcs,
sizeof(TestMigrateSource));
((TestMigrateSource *)source)->src = s;
((TestMigrateSource *)source)->dest = dest;
g_source_attach(source, NULL);
/* slow down migration to have time to fiddle with log */
/* TODO: qtest could learn to break on some places */
rsp = qmp("{ 'execute': 'migrate_set_speed',"
"'arguments': { 'value': 10 } }");
g_assert(qdict_haskey(rsp, "return"));
QDECREF(rsp);
cmd = g_strdup_printf("{ 'execute': 'migrate',"
"'arguments': { 'uri': '%s' } }",
uri);
rsp = qmp(cmd);
g_free(cmd);
g_assert(qdict_haskey(rsp, "return"));
QDECREF(rsp);
wait_for_log_fd(s);
log = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, s->log_fd, 0);
g_assert(log != MAP_FAILED);
/* modify first page */
write_guest_mem(s, 0x42);
log[0] = 1;
munmap(log, size);
/* speed things up */
rsp = qmp("{ 'execute': 'migrate_set_speed',"
"'arguments': { 'value': 0 } }");
g_assert(qdict_haskey(rsp, "return"));
QDECREF(rsp);
qmp_eventwait("STOP");
global_qtest = to;
qmp_eventwait("RESUME");
read_guest_mem(dest);
g_source_destroy(source);
g_source_unref(source);
qtest_quit(to);
test_server_free(dest);
qtest_quit(from);
test_server_free(s);
global_qtest = global;
}
int main(int argc, char **argv)
{
QTestState *s = NULL;
CharDriverState *chr = NULL;
TestServer *server = NULL;
const char *hugefs;
char *socket_path = 0;
char *qemu_cmd = 0;
char *chr_path = 0;
char *qemu_cmd = NULL;
int ret;
char template[] = "/tmp/vhost-test-XXXXXX";
const char *tmpfs;
const char *root;
g_test_init(&argc, &argv, NULL);
module_call_init(MODULE_INIT_QOM);
qemu_add_opts(&qemu_chardev_opts);
tmpfs = mkdtemp(template);
if (!tmpfs) {
g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno));
g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno));
}
g_assert(tmpfs);
@ -330,25 +596,18 @@ int main(int argc, char **argv)
root = tmpfs;
}
socket_path = g_strdup_printf("%s/vhost.sock", tmpfs);
/* create char dev and add read handlers */
qemu_add_opts(&qemu_chardev_opts);
chr_path = g_strdup_printf("unix:%s,server,nowait", socket_path);
chr = qemu_chr_new("chr0", chr_path, NULL);
g_free(chr_path);
qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr);
server = test_server_new("test");
/* run the main loop thread so the chardev may operate */
g_mutex_init(&data_mutex);
g_cond_init(&data_cond);
g_thread_new(NULL, thread_function, NULL);
qemu_cmd = g_strdup_printf(QEMU_CMD, root, socket_path);
qemu_cmd = GET_QEMU_CMD(server);
s = qtest_start(qemu_cmd);
g_free(qemu_cmd);
qtest_add_func("/vhost-user/read-guest-mem", read_guest_mem);
qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem);
qtest_add_func("/vhost-user/migrate", test_migrate);
ret = g_test_run();
@ -357,8 +616,7 @@ int main(int argc, char **argv)
}
/* cleanup */
unlink(socket_path);
g_free(socket_path);
test_server_free(server);
ret = rmdir(tmpfs);
if (ret != 0) {