mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-19 08:02:15 -06:00
Block layer patches
- virtio-scsi: add iothread-vq-mapping parameter - Improve writethrough performance - Fix missing zero init in bdrv_snapshot_goto() - Added scripts/qcow2-to-stdout.py - Code cleanup and iotests fixes -----BEGIN PGP SIGNATURE----- iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmfTDysRHGt3b2xmQHJl ZGhhdC5jb20ACgkQfwmycsiPL9Yz6A//asOl37zjbtf9pYjY/gliH859TQOppPGD LB9IIr+nTDME0wfUkCOlag+CeEYZwkeo2PF+XeopsyzlJeBOk4tL7AkY57XYe3lZ M5hlnNrn6l3gb6iioMg60pEKSMrpKprB16vT3nAtyN6aEXsm9TvtPkWPFTCFGVeK W74VCr7wuXbfdEJcOGd8WhB9ZHIgwoWYnoL41tvCoefW2yNaMA6X0TLn98toXzOi il50ZnnchTQngns5R+n+1R1Ma995t393D+CArQcYVRzxKGOs5p0y4otz4gCkMhdp GVL09R7Ge4TteSJ2myxlN/EjYOxmdoMrVDajr4xPdHBw12MKzgk8i82h4/Es/Q5o 3Npgx74+jDyqlICb/czTVM5KJINpyO80vO3N3WpYUOQGyTCcYgv7pIpy8pB2o6Te RPlv0W9bHVSSgThFFLQ0Ud8WRGJe1K/ar8bdmiWN08Wez1avENWaYmsv5zGnFL24 vD6cNXMR4mF7mzyeWda/5hGKv75djVgX+ZfzvWNT3qgizD56JBOA3RdCRwBZJOJb TvJkfi5RGyaji9BfKVCYBL3/iDELJEVDW8jxvIIUrS0aPcTHpAQ5gTO7VAokreqZ 5Smll11eeoEgPPvNLw8ikmOGTWOMkJGrmExP2K1ApANq3kSbBSU4jroEr0BG9PZT 6Y0hUdtFSdU= =w2Ri -----END PGP SIGNATURE----- Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging Block layer patches - virtio-scsi: add iothread-vq-mapping parameter - Improve writethrough performance - Fix missing zero init in bdrv_snapshot_goto() - Added scripts/qcow2-to-stdout.py - Code cleanup and iotests fixes # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmfTDysRHGt3b2xmQHJl # ZGhhdC5jb20ACgkQfwmycsiPL9Yz6A//asOl37zjbtf9pYjY/gliH859TQOppPGD # LB9IIr+nTDME0wfUkCOlag+CeEYZwkeo2PF+XeopsyzlJeBOk4tL7AkY57XYe3lZ # M5hlnNrn6l3gb6iioMg60pEKSMrpKprB16vT3nAtyN6aEXsm9TvtPkWPFTCFGVeK # W74VCr7wuXbfdEJcOGd8WhB9ZHIgwoWYnoL41tvCoefW2yNaMA6X0TLn98toXzOi # il50ZnnchTQngns5R+n+1R1Ma995t393D+CArQcYVRzxKGOs5p0y4otz4gCkMhdp # GVL09R7Ge4TteSJ2myxlN/EjYOxmdoMrVDajr4xPdHBw12MKzgk8i82h4/Es/Q5o # 3Npgx74+jDyqlICb/czTVM5KJINpyO80vO3N3WpYUOQGyTCcYgv7pIpy8pB2o6Te # RPlv0W9bHVSSgThFFLQ0Ud8WRGJe1K/ar8bdmiWN08Wez1avENWaYmsv5zGnFL24 # vD6cNXMR4mF7mzyeWda/5hGKv75djVgX+ZfzvWNT3qgizD56JBOA3RdCRwBZJOJb # TvJkfi5RGyaji9BfKVCYBL3/iDELJEVDW8jxvIIUrS0aPcTHpAQ5gTO7VAokreqZ # 5Smll11eeoEgPPvNLw8ikmOGTWOMkJGrmExP2K1ApANq3kSbBSU4jroEr0BG9PZT # 6Y0hUdtFSdU= # =w2Ri # -----END PGP SIGNATURE----- # gpg: Signature made Fri 14 Mar 2025 01:00:27 HKT # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * tag 'for-upstream' of https://repo.or.cz/qemu/kevin: (23 commits) scripts/qcow2-to-stdout.py: Add script to write qcow2 images to stdout virtio-scsi: only expose cmd vqs via iothread-vq-mapping virtio-scsi: handle ctrl virtqueue in main loop virtio-scsi: add iothread-vq-mapping parameter virtio: extract iothread-vq-mapping.h API virtio-blk: tidy up iothread_vq_mapping functions virtio-blk: extract cleanup_iothread_vq_mapping() function virtio-scsi: perform TMFs in appropriate AioContexts virtio-scsi: protect events_dropped field virtio-scsi: introduce event and ctrl virtqueue locks scsi: introduce requests_lock scsi: track per-SCSIRequest AioContext dma: use current AioContext for dma_blk_io() scsi-disk: drop unused SCSIDiskState->bh field iotests: Limit qsd-migrate to working formats aio-posix: Adjust polling time also for new handlers aio-posix: Separate AioPolledEvent per AioHandler aio-posix: Factor out adjust_polling_time() aio: Create AioPolledEvent block/io: Ignore FUA with cache.no-flush=on ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
0462a32b4f
30 changed files with 1306 additions and 531 deletions
|
@ -2357,18 +2357,6 @@ void *blk_blockalign(BlockBackend *blk, size_t size)
|
||||||
return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
|
return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
|
|
||||||
{
|
|
||||||
BlockDriverState *bs = blk_bs(blk);
|
|
||||||
GLOBAL_STATE_CODE();
|
|
||||||
GRAPH_RDLOCK_GUARD_MAINLOOP();
|
|
||||||
|
|
||||||
if (!bs) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return bdrv_op_is_blocked(bs, op, errp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return BB's current AioContext. Note that this context may change
|
* Return BB's current AioContext. Note that this context may change
|
||||||
|
|
|
@ -194,6 +194,7 @@ static int fd_open(BlockDriverState *bs)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int64_t raw_getlength(BlockDriverState *bs);
|
static int64_t raw_getlength(BlockDriverState *bs);
|
||||||
|
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs);
|
||||||
|
|
||||||
typedef struct RawPosixAIOData {
|
typedef struct RawPosixAIOData {
|
||||||
BlockDriverState *bs;
|
BlockDriverState *bs;
|
||||||
|
@ -804,6 +805,13 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||||
#endif
|
#endif
|
||||||
s->needs_alignment = raw_needs_alignment(bs);
|
s->needs_alignment = raw_needs_alignment(bs);
|
||||||
|
|
||||||
|
bs->supported_write_flags = BDRV_REQ_FUA;
|
||||||
|
if (s->use_linux_aio && !laio_has_fua()) {
|
||||||
|
bs->supported_write_flags &= ~BDRV_REQ_FUA;
|
||||||
|
} else if (s->use_linux_io_uring && !luring_has_fua()) {
|
||||||
|
bs->supported_write_flags &= ~BDRV_REQ_FUA;
|
||||||
|
}
|
||||||
|
|
||||||
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
|
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
|
||||||
if (S_ISREG(st.st_mode)) {
|
if (S_ISREG(st.st_mode)) {
|
||||||
/* When extending regular files, we get zeros from the OS */
|
/* When extending regular files, we get zeros from the OS */
|
||||||
|
@ -2477,7 +2485,8 @@ static inline bool raw_check_linux_aio(BDRVRawState *s)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||||
uint64_t bytes, QEMUIOVector *qiov, int type)
|
uint64_t bytes, QEMUIOVector *qiov, int type,
|
||||||
|
int flags)
|
||||||
{
|
{
|
||||||
BDRVRawState *s = bs->opaque;
|
BDRVRawState *s = bs->opaque;
|
||||||
RawPosixAIOData acb;
|
RawPosixAIOData acb;
|
||||||
|
@ -2508,13 +2517,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||||
#ifdef CONFIG_LINUX_IO_URING
|
#ifdef CONFIG_LINUX_IO_URING
|
||||||
} else if (raw_check_linux_io_uring(s)) {
|
} else if (raw_check_linux_io_uring(s)) {
|
||||||
assert(qiov->size == bytes);
|
assert(qiov->size == bytes);
|
||||||
ret = luring_co_submit(bs, s->fd, offset, qiov, type);
|
ret = luring_co_submit(bs, s->fd, offset, qiov, type, flags);
|
||||||
goto out;
|
goto out;
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_LINUX_AIO
|
#ifdef CONFIG_LINUX_AIO
|
||||||
} else if (raw_check_linux_aio(s)) {
|
} else if (raw_check_linux_aio(s)) {
|
||||||
assert(qiov->size == bytes);
|
assert(qiov->size == bytes);
|
||||||
ret = laio_co_submit(s->fd, offset, qiov, type,
|
ret = laio_co_submit(s->fd, offset, qiov, type, flags,
|
||||||
s->aio_max_batch);
|
s->aio_max_batch);
|
||||||
goto out;
|
goto out;
|
||||||
#endif
|
#endif
|
||||||
|
@ -2534,6 +2543,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||||
|
|
||||||
assert(qiov->size == bytes);
|
assert(qiov->size == bytes);
|
||||||
ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
|
ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
|
||||||
|
if (ret == 0 && (flags & BDRV_REQ_FUA)) {
|
||||||
|
/* TODO Use pwritev2() instead if it's available */
|
||||||
|
ret = raw_co_flush_to_disk(bs);
|
||||||
|
}
|
||||||
goto out; /* Avoid the compiler err of unused label */
|
goto out; /* Avoid the compiler err of unused label */
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
@ -2571,14 +2584,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
|
||||||
int64_t bytes, QEMUIOVector *qiov,
|
int64_t bytes, QEMUIOVector *qiov,
|
||||||
BdrvRequestFlags flags)
|
BdrvRequestFlags flags)
|
||||||
{
|
{
|
||||||
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
|
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
|
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
|
||||||
int64_t bytes, QEMUIOVector *qiov,
|
int64_t bytes, QEMUIOVector *qiov,
|
||||||
BdrvRequestFlags flags)
|
BdrvRequestFlags flags)
|
||||||
{
|
{
|
||||||
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
|
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||||||
|
@ -2600,12 +2613,12 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||||||
|
|
||||||
#ifdef CONFIG_LINUX_IO_URING
|
#ifdef CONFIG_LINUX_IO_URING
|
||||||
if (raw_check_linux_io_uring(s)) {
|
if (raw_check_linux_io_uring(s)) {
|
||||||
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
|
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_LINUX_AIO
|
#ifdef CONFIG_LINUX_AIO
|
||||||
if (s->has_laio_fdsync && raw_check_linux_aio(s)) {
|
if (s->has_laio_fdsync && raw_check_linux_aio(s)) {
|
||||||
return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
|
return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
|
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
|
||||||
|
@ -3540,7 +3553,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
|
||||||
}
|
}
|
||||||
|
|
||||||
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
|
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
|
||||||
return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
|
return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1058,6 +1058,10 @@ bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
|
||||||
return -ENOMEDIUM;
|
return -ENOMEDIUM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bs->open_flags & BDRV_O_NO_FLUSH) {
|
||||||
|
flags &= ~BDRV_REQ_FUA;
|
||||||
|
}
|
||||||
|
|
||||||
if ((flags & BDRV_REQ_FUA) &&
|
if ((flags & BDRV_REQ_FUA) &&
|
||||||
(~bs->supported_write_flags & BDRV_REQ_FUA)) {
|
(~bs->supported_write_flags & BDRV_REQ_FUA)) {
|
||||||
flags &= ~BDRV_REQ_FUA;
|
flags &= ~BDRV_REQ_FUA;
|
||||||
|
|
|
@ -335,15 +335,24 @@ static void luring_deferred_fn(void *opaque)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
|
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
|
||||||
uint64_t offset, int type)
|
uint64_t offset, int type, BdrvRequestFlags flags)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct io_uring_sqe *sqes = &luringcb->sqeq;
|
struct io_uring_sqe *sqes = &luringcb->sqeq;
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case QEMU_AIO_WRITE:
|
case QEMU_AIO_WRITE:
|
||||||
|
#ifdef HAVE_IO_URING_PREP_WRITEV2
|
||||||
|
{
|
||||||
|
int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
|
||||||
|
io_uring_prep_writev2(sqes, fd, luringcb->qiov->iov,
|
||||||
|
luringcb->qiov->niov, offset, luring_flags);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
assert(flags == 0);
|
||||||
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
|
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
|
||||||
luringcb->qiov->niov, offset);
|
luringcb->qiov->niov, offset);
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case QEMU_AIO_ZONE_APPEND:
|
case QEMU_AIO_ZONE_APPEND:
|
||||||
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
|
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
|
||||||
|
@ -380,7 +389,8 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
|
||||||
}
|
}
|
||||||
|
|
||||||
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
|
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
|
||||||
QEMUIOVector *qiov, int type)
|
QEMUIOVector *qiov, int type,
|
||||||
|
BdrvRequestFlags flags)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
AioContext *ctx = qemu_get_current_aio_context();
|
AioContext *ctx = qemu_get_current_aio_context();
|
||||||
|
@ -393,7 +403,7 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
|
||||||
};
|
};
|
||||||
trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
|
trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
|
||||||
type);
|
type);
|
||||||
ret = luring_do_submit(fd, &luringcb, s, offset, type);
|
ret = luring_do_submit(fd, &luringcb, s, offset, type, flags);
|
||||||
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -448,3 +458,12 @@ void luring_cleanup(LuringState *s)
|
||||||
trace_luring_cleanup_state(s);
|
trace_luring_cleanup_state(s);
|
||||||
g_free(s);
|
g_free(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool luring_has_fua(void)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_IO_URING_PREP_WRITEV2
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
|
@ -368,7 +368,8 @@ static void laio_deferred_fn(void *opaque)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
|
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
|
||||||
int type, uint64_t dev_max_batch)
|
int type, BdrvRequestFlags flags,
|
||||||
|
uint64_t dev_max_batch)
|
||||||
{
|
{
|
||||||
LinuxAioState *s = laiocb->ctx;
|
LinuxAioState *s = laiocb->ctx;
|
||||||
struct iocb *iocbs = &laiocb->iocb;
|
struct iocb *iocbs = &laiocb->iocb;
|
||||||
|
@ -376,7 +377,15 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case QEMU_AIO_WRITE:
|
case QEMU_AIO_WRITE:
|
||||||
|
#ifdef HAVE_IO_PREP_PWRITEV2
|
||||||
|
{
|
||||||
|
int laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
|
||||||
|
io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
assert(flags == 0);
|
||||||
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
|
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case QEMU_AIO_ZONE_APPEND:
|
case QEMU_AIO_ZONE_APPEND:
|
||||||
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
|
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
|
||||||
|
@ -409,7 +418,8 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
|
||||||
}
|
}
|
||||||
|
|
||||||
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
|
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
|
||||||
int type, uint64_t dev_max_batch)
|
int type, BdrvRequestFlags flags,
|
||||||
|
uint64_t dev_max_batch)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
AioContext *ctx = qemu_get_current_aio_context();
|
AioContext *ctx = qemu_get_current_aio_context();
|
||||||
|
@ -422,7 +432,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
|
||||||
.qiov = qiov,
|
.qiov = qiov,
|
||||||
};
|
};
|
||||||
|
|
||||||
ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
|
ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -505,3 +515,12 @@ bool laio_has_fdsync(int fd)
|
||||||
io_destroy(ctx);
|
io_destroy(ctx);
|
||||||
return (ret == -EINVAL) ? false : true;
|
return (ret == -EINVAL) ? false : true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool laio_has_fua(void)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_IO_PREP_PWRITEV2
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
|
@ -296,6 +296,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
|
||||||
bdrv_graph_wrunlock();
|
bdrv_graph_wrunlock();
|
||||||
|
|
||||||
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
|
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
|
||||||
|
memset(bs->opaque, 0, drv->instance_size);
|
||||||
open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
|
open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
|
||||||
qobject_unref(options);
|
qobject_unref(options);
|
||||||
if (open_ret < 0) {
|
if (open_ret < 0) {
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#endif
|
#endif
|
||||||
#include "hw/virtio/virtio-bus.h"
|
#include "hw/virtio/virtio-bus.h"
|
||||||
#include "migration/qemu-file-types.h"
|
#include "migration/qemu-file-types.h"
|
||||||
|
#include "hw/virtio/iothread-vq-mapping.h"
|
||||||
#include "hw/virtio/virtio-access.h"
|
#include "hw/virtio/virtio-access.h"
|
||||||
#include "hw/virtio/virtio-blk-common.h"
|
#include "hw/virtio/virtio-blk-common.h"
|
||||||
#include "qemu/coroutine.h"
|
#include "qemu/coroutine.h"
|
||||||
|
@ -1423,128 +1424,6 @@ static const BlockDevOps virtio_block_ops = {
|
||||||
.drained_end = virtio_blk_drained_end,
|
.drained_end = virtio_blk_drained_end,
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool
|
|
||||||
validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
|
|
||||||
uint16_t num_queues, Error **errp)
|
|
||||||
{
|
|
||||||
g_autofree unsigned long *vqs = bitmap_new(num_queues);
|
|
||||||
g_autoptr(GHashTable) iothreads =
|
|
||||||
g_hash_table_new(g_str_hash, g_str_equal);
|
|
||||||
|
|
||||||
for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
|
|
||||||
const char *name = node->value->iothread;
|
|
||||||
uint16List *vq;
|
|
||||||
|
|
||||||
if (!iothread_by_id(name)) {
|
|
||||||
error_setg(errp, "IOThread \"%s\" object does not exist", name);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!g_hash_table_add(iothreads, (gpointer)name)) {
|
|
||||||
error_setg(errp,
|
|
||||||
"duplicate IOThread name \"%s\" in iothread-vq-mapping",
|
|
||||||
name);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (node != list) {
|
|
||||||
if (!!node->value->vqs != !!list->value->vqs) {
|
|
||||||
error_setg(errp, "either all items in iothread-vq-mapping "
|
|
||||||
"must have vqs or none of them must have it");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (vq = node->value->vqs; vq; vq = vq->next) {
|
|
||||||
if (vq->value >= num_queues) {
|
|
||||||
error_setg(errp, "vq index %u for IOThread \"%s\" must be "
|
|
||||||
"less than num_queues %u in iothread-vq-mapping",
|
|
||||||
vq->value, name, num_queues);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (test_and_set_bit(vq->value, vqs)) {
|
|
||||||
error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
|
|
||||||
"because it is already assigned", vq->value, name);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (list->value->vqs) {
|
|
||||||
for (uint16_t i = 0; i < num_queues; i++) {
|
|
||||||
if (!test_bit(i, vqs)) {
|
|
||||||
error_setg(errp,
|
|
||||||
"missing vq %u IOThread assignment in iothread-vq-mapping",
|
|
||||||
i);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* apply_iothread_vq_mapping:
|
|
||||||
* @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads.
|
|
||||||
* @vq_aio_context: The array of AioContext pointers to fill in.
|
|
||||||
* @num_queues: The length of @vq_aio_context.
|
|
||||||
* @errp: If an error occurs, a pointer to the area to store the error.
|
|
||||||
*
|
|
||||||
* Fill in the AioContext for each virtqueue in the @vq_aio_context array given
|
|
||||||
* the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
|
|
||||||
*
|
|
||||||
* Returns: %true on success, %false on failure.
|
|
||||||
**/
|
|
||||||
static bool apply_iothread_vq_mapping(
|
|
||||||
IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
|
|
||||||
AioContext **vq_aio_context,
|
|
||||||
uint16_t num_queues,
|
|
||||||
Error **errp)
|
|
||||||
{
|
|
||||||
IOThreadVirtQueueMappingList *node;
|
|
||||||
size_t num_iothreads = 0;
|
|
||||||
size_t cur_iothread = 0;
|
|
||||||
|
|
||||||
if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list,
|
|
||||||
num_queues, errp)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (node = iothread_vq_mapping_list; node; node = node->next) {
|
|
||||||
num_iothreads++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (node = iothread_vq_mapping_list; node; node = node->next) {
|
|
||||||
IOThread *iothread = iothread_by_id(node->value->iothread);
|
|
||||||
AioContext *ctx = iothread_get_aio_context(iothread);
|
|
||||||
|
|
||||||
/* Released in virtio_blk_vq_aio_context_cleanup() */
|
|
||||||
object_ref(OBJECT(iothread));
|
|
||||||
|
|
||||||
if (node->value->vqs) {
|
|
||||||
uint16List *vq;
|
|
||||||
|
|
||||||
/* Explicit vq:IOThread assignment */
|
|
||||||
for (vq = node->value->vqs; vq; vq = vq->next) {
|
|
||||||
assert(vq->value < num_queues);
|
|
||||||
vq_aio_context[vq->value] = ctx;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* Round-robin vq:IOThread assignment */
|
|
||||||
for (unsigned i = cur_iothread; i < num_queues;
|
|
||||||
i += num_iothreads) {
|
|
||||||
vq_aio_context[i] = ctx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cur_iothread++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Context: BQL held */
|
/* Context: BQL held */
|
||||||
static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
|
static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
|
||||||
{
|
{
|
||||||
|
@ -1577,7 +1456,7 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
|
||||||
s->vq_aio_context = g_new(AioContext *, conf->num_queues);
|
s->vq_aio_context = g_new(AioContext *, conf->num_queues);
|
||||||
|
|
||||||
if (conf->iothread_vq_mapping_list) {
|
if (conf->iothread_vq_mapping_list) {
|
||||||
if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list,
|
if (!iothread_vq_mapping_apply(conf->iothread_vq_mapping_list,
|
||||||
s->vq_aio_context,
|
s->vq_aio_context,
|
||||||
conf->num_queues,
|
conf->num_queues,
|
||||||
errp)) {
|
errp)) {
|
||||||
|
@ -1611,12 +1490,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
|
||||||
assert(!s->ioeventfd_started);
|
assert(!s->ioeventfd_started);
|
||||||
|
|
||||||
if (conf->iothread_vq_mapping_list) {
|
if (conf->iothread_vq_mapping_list) {
|
||||||
IOThreadVirtQueueMappingList *node;
|
iothread_vq_mapping_cleanup(conf->iothread_vq_mapping_list);
|
||||||
|
|
||||||
for (node = conf->iothread_vq_mapping_list; node; node = node->next) {
|
|
||||||
IOThread *iothread = iothread_by_id(node->value->iothread);
|
|
||||||
object_unref(OBJECT(iothread));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conf->iothread) {
|
if (conf->iothread) {
|
||||||
|
|
|
@ -968,8 +968,7 @@ static void ide_dma_cb(void *opaque, int ret)
|
||||||
BDRV_SECTOR_SIZE, ide_dma_cb, s);
|
BDRV_SECTOR_SIZE, ide_dma_cb, s);
|
||||||
break;
|
break;
|
||||||
case IDE_DMA_TRIM:
|
case IDE_DMA_TRIM:
|
||||||
s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk),
|
s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, BDRV_SECTOR_SIZE,
|
||||||
&s->sg, offset, BDRV_SECTOR_SIZE,
|
|
||||||
ide_issue_trim, s, ide_dma_cb, s,
|
ide_issue_trim, s, ide_dma_cb, s,
|
||||||
DMA_DIRECTION_TO_DEVICE);
|
DMA_DIRECTION_TO_DEVICE);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -187,8 +187,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
|
||||||
pmac_ide_transfer_cb, io);
|
pmac_ide_transfer_cb, io);
|
||||||
break;
|
break;
|
||||||
case IDE_DMA_TRIM:
|
case IDE_DMA_TRIM:
|
||||||
s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk), &s->sg,
|
s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, 0x1, ide_issue_trim, s,
|
||||||
offset, 0x1, ide_issue_trim, s,
|
|
||||||
pmac_ide_transfer_cb, io,
|
pmac_ide_transfer_cb, io,
|
||||||
DMA_DIRECTION_TO_DEVICE);
|
DMA_DIRECTION_TO_DEVICE);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -100,9 +100,16 @@ static void scsi_device_for_each_req_sync(SCSIDevice *s,
|
||||||
assert(!runstate_is_running());
|
assert(!runstate_is_running());
|
||||||
assert(qemu_in_main_thread());
|
assert(qemu_in_main_thread());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Locking is not necessary because the guest is stopped and no other
|
||||||
|
* threads can be accessing the requests list, but take the lock for
|
||||||
|
* consistency.
|
||||||
|
*/
|
||||||
|
WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
|
||||||
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
|
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
|
||||||
fn(req, opaque);
|
fn(req, opaque);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -115,21 +122,29 @@ static void scsi_device_for_each_req_async_bh(void *opaque)
|
||||||
{
|
{
|
||||||
g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
|
g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
|
||||||
SCSIDevice *s = data->s;
|
SCSIDevice *s = data->s;
|
||||||
AioContext *ctx;
|
g_autoptr(GList) reqs = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build a list of requests in this AioContext so fn() can be invoked later
|
||||||
|
* outside requests_lock.
|
||||||
|
*/
|
||||||
|
WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
|
||||||
|
AioContext *ctx = qemu_get_current_aio_context();
|
||||||
SCSIRequest *req;
|
SCSIRequest *req;
|
||||||
SCSIRequest *next;
|
SCSIRequest *next;
|
||||||
|
|
||||||
/*
|
|
||||||
* The BB cannot have changed contexts between this BH being scheduled and
|
|
||||||
* now: BBs' AioContexts, when they have a node attached, can only be
|
|
||||||
* changed via bdrv_try_change_aio_context(), in a drained section. While
|
|
||||||
* we have the in-flight counter incremented, that drain must block.
|
|
||||||
*/
|
|
||||||
ctx = blk_get_aio_context(s->conf.blk);
|
|
||||||
assert(ctx == qemu_get_current_aio_context());
|
|
||||||
|
|
||||||
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
|
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
|
||||||
data->fn(req, data->fn_opaque);
|
if (req->ctx == ctx) {
|
||||||
|
scsi_req_ref(req); /* dropped after calling fn() */
|
||||||
|
reqs = g_list_prepend(reqs, req);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Call fn() on each request */
|
||||||
|
for (GList *elem = g_list_first(reqs); elem; elem = g_list_next(elem)) {
|
||||||
|
data->fn(elem->data, data->fn_opaque);
|
||||||
|
scsi_req_unref(elem->data);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Drop the reference taken by scsi_device_for_each_req_async() */
|
/* Drop the reference taken by scsi_device_for_each_req_async() */
|
||||||
|
@ -139,9 +154,35 @@ static void scsi_device_for_each_req_async_bh(void *opaque)
|
||||||
blk_dec_in_flight(s->conf.blk);
|
blk_dec_in_flight(s->conf.blk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void scsi_device_for_each_req_async_do_ctx(gpointer key, gpointer value,
|
||||||
|
gpointer user_data)
|
||||||
|
{
|
||||||
|
AioContext *ctx = key;
|
||||||
|
SCSIDeviceForEachReqAsyncData *params = user_data;
|
||||||
|
SCSIDeviceForEachReqAsyncData *data;
|
||||||
|
|
||||||
|
data = g_new(SCSIDeviceForEachReqAsyncData, 1);
|
||||||
|
data->s = params->s;
|
||||||
|
data->fn = params->fn;
|
||||||
|
data->fn_opaque = params->fn_opaque;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hold a reference to the SCSIDevice until
|
||||||
|
* scsi_device_for_each_req_async_bh() finishes.
|
||||||
|
*/
|
||||||
|
object_ref(OBJECT(data->s));
|
||||||
|
|
||||||
|
/* Paired with scsi_device_for_each_req_async_bh() */
|
||||||
|
blk_inc_in_flight(data->s->conf.blk);
|
||||||
|
|
||||||
|
aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
|
* Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
|
||||||
* runs in the AioContext that is executing the request.
|
* must be thread-safe because it runs concurrently in each AioContext that is
|
||||||
|
* executing a request.
|
||||||
|
*
|
||||||
* Keeps the BlockBackend's in-flight counter incremented until everything is
|
* Keeps the BlockBackend's in-flight counter incremented until everything is
|
||||||
* done, so draining it will settle all scheduled @fn() calls.
|
* done, so draining it will settle all scheduled @fn() calls.
|
||||||
*/
|
*/
|
||||||
|
@ -151,24 +192,26 @@ static void scsi_device_for_each_req_async(SCSIDevice *s,
|
||||||
{
|
{
|
||||||
assert(qemu_in_main_thread());
|
assert(qemu_in_main_thread());
|
||||||
|
|
||||||
SCSIDeviceForEachReqAsyncData *data =
|
/* The set of AioContexts where the requests are being processed */
|
||||||
g_new(SCSIDeviceForEachReqAsyncData, 1);
|
g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
|
||||||
|
WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
|
||||||
|
SCSIRequest *req;
|
||||||
|
QTAILQ_FOREACH(req, &s->requests, next) {
|
||||||
|
g_hash_table_add(aio_contexts, req->ctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
data->s = s;
|
/* Schedule a BH for each AioContext */
|
||||||
data->fn = fn;
|
SCSIDeviceForEachReqAsyncData params = {
|
||||||
data->fn_opaque = opaque;
|
.s = s,
|
||||||
|
.fn = fn,
|
||||||
/*
|
.fn_opaque = opaque,
|
||||||
* Hold a reference to the SCSIDevice until
|
};
|
||||||
* scsi_device_for_each_req_async_bh() finishes.
|
g_hash_table_foreach(
|
||||||
*/
|
aio_contexts,
|
||||||
object_ref(OBJECT(s));
|
scsi_device_for_each_req_async_do_ctx,
|
||||||
|
¶ms
|
||||||
/* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */
|
);
|
||||||
blk_inc_in_flight(s->conf.blk);
|
|
||||||
aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
|
|
||||||
scsi_device_for_each_req_async_bh,
|
|
||||||
data);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scsi_device_realize(SCSIDevice *s, Error **errp)
|
static void scsi_device_realize(SCSIDevice *s, Error **errp)
|
||||||
|
@ -349,6 +392,7 @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp)
|
||||||
dev->lun = lun;
|
dev->lun = lun;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qemu_mutex_init(&dev->requests_lock);
|
||||||
QTAILQ_INIT(&dev->requests);
|
QTAILQ_INIT(&dev->requests);
|
||||||
scsi_device_realize(dev, &local_err);
|
scsi_device_realize(dev, &local_err);
|
||||||
if (local_err) {
|
if (local_err) {
|
||||||
|
@ -369,6 +413,8 @@ static void scsi_qdev_unrealize(DeviceState *qdev)
|
||||||
|
|
||||||
scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
|
scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
|
||||||
|
|
||||||
|
qemu_mutex_destroy(&dev->requests_lock);
|
||||||
|
|
||||||
scsi_device_unrealize(dev);
|
scsi_device_unrealize(dev);
|
||||||
|
|
||||||
blockdev_mark_auto_del(dev->conf.blk);
|
blockdev_mark_auto_del(dev->conf.blk);
|
||||||
|
@ -868,6 +914,7 @@ invalid_opcode:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
req->ctx = qemu_get_current_aio_context();
|
||||||
req->cmd = cmd;
|
req->cmd = cmd;
|
||||||
req->residual = req->cmd.xfer;
|
req->residual = req->cmd.xfer;
|
||||||
|
|
||||||
|
@ -964,7 +1011,10 @@ static void scsi_req_enqueue_internal(SCSIRequest *req)
|
||||||
req->sg = NULL;
|
req->sg = NULL;
|
||||||
}
|
}
|
||||||
req->enqueued = true;
|
req->enqueued = true;
|
||||||
|
|
||||||
|
WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
|
||||||
QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
|
QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t scsi_req_enqueue(SCSIRequest *req)
|
int32_t scsi_req_enqueue(SCSIRequest *req)
|
||||||
|
@ -984,7 +1034,9 @@ static void scsi_req_dequeue(SCSIRequest *req)
|
||||||
trace_scsi_req_dequeue(req->dev->id, req->lun, req->tag);
|
trace_scsi_req_dequeue(req->dev->id, req->lun, req->tag);
|
||||||
req->retry = false;
|
req->retry = false;
|
||||||
if (req->enqueued) {
|
if (req->enqueued) {
|
||||||
|
WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
|
||||||
QTAILQ_REMOVE(&req->dev->requests, req, next);
|
QTAILQ_REMOVE(&req->dev->requests, req, next);
|
||||||
|
}
|
||||||
req->enqueued = false;
|
req->enqueued = false;
|
||||||
scsi_req_unref(req);
|
scsi_req_unref(req);
|
||||||
}
|
}
|
||||||
|
@ -1961,8 +2013,7 @@ static void scsi_device_class_init(ObjectClass *klass, void *data)
|
||||||
|
|
||||||
static void scsi_dev_instance_init(Object *obj)
|
static void scsi_dev_instance_init(Object *obj)
|
||||||
{
|
{
|
||||||
DeviceState *dev = DEVICE(obj);
|
SCSIDevice *s = SCSI_DEVICE(obj);
|
||||||
SCSIDevice *s = SCSI_DEVICE(dev);
|
|
||||||
|
|
||||||
device_add_bootindex_property(obj, &s->conf.bootindex,
|
device_add_bootindex_property(obj, &s->conf.bootindex,
|
||||||
"bootindex", NULL,
|
"bootindex", NULL,
|
||||||
|
|
|
@ -106,7 +106,6 @@ struct SCSIDiskState {
|
||||||
uint64_t max_unmap_size;
|
uint64_t max_unmap_size;
|
||||||
uint64_t max_io_size;
|
uint64_t max_io_size;
|
||||||
uint32_t quirks;
|
uint32_t quirks;
|
||||||
QEMUBH *bh;
|
|
||||||
char *version;
|
char *version;
|
||||||
char *serial;
|
char *serial;
|
||||||
char *vendor;
|
char *vendor;
|
||||||
|
@ -329,9 +328,8 @@ static void scsi_aio_complete(void *opaque, int ret)
|
||||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
|
||||||
/* The request must only run in the BlockBackend's AioContext */
|
/* The request must run in its AioContext */
|
||||||
assert(blk_get_aio_context(s->qdev.conf.blk) ==
|
assert(r->req.ctx == qemu_get_current_aio_context());
|
||||||
qemu_get_current_aio_context());
|
|
||||||
|
|
||||||
assert(r->req.aiocb != NULL);
|
assert(r->req.aiocb != NULL);
|
||||||
r->req.aiocb = NULL;
|
r->req.aiocb = NULL;
|
||||||
|
@ -431,12 +429,10 @@ static void scsi_dma_complete(void *opaque, int ret)
|
||||||
|
|
||||||
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
|
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
|
||||||
{
|
{
|
||||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
|
||||||
uint32_t n;
|
uint32_t n;
|
||||||
|
|
||||||
/* The request must only run in the BlockBackend's AioContext */
|
/* The request must run in its AioContext */
|
||||||
assert(blk_get_aio_context(s->qdev.conf.blk) ==
|
assert(r->req.ctx == qemu_get_current_aio_context());
|
||||||
qemu_get_current_aio_context());
|
|
||||||
|
|
||||||
assert(r->req.aiocb == NULL);
|
assert(r->req.aiocb == NULL);
|
||||||
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
|
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
|
||||||
|
@ -488,8 +484,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
|
||||||
if (r->req.sg) {
|
if (r->req.sg) {
|
||||||
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
|
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
|
||||||
r->req.residual -= r->req.sg->size;
|
r->req.residual -= r->req.sg->size;
|
||||||
r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
|
r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
|
||||||
r->req.sg, r->sector << BDRV_SECTOR_BITS,
|
|
||||||
BDRV_SECTOR_SIZE,
|
BDRV_SECTOR_SIZE,
|
||||||
sdc->dma_readv, r, scsi_dma_complete, r,
|
sdc->dma_readv, r, scsi_dma_complete, r,
|
||||||
DMA_DIRECTION_FROM_DEVICE);
|
DMA_DIRECTION_FROM_DEVICE);
|
||||||
|
@ -564,12 +559,10 @@ static void scsi_read_data(SCSIRequest *req)
|
||||||
|
|
||||||
static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
|
static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
|
||||||
{
|
{
|
||||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
|
||||||
uint32_t n;
|
uint32_t n;
|
||||||
|
|
||||||
/* The request must only run in the BlockBackend's AioContext */
|
/* The request must run in its AioContext */
|
||||||
assert(blk_get_aio_context(s->qdev.conf.blk) ==
|
assert(r->req.ctx == qemu_get_current_aio_context());
|
||||||
qemu_get_current_aio_context());
|
|
||||||
|
|
||||||
assert (r->req.aiocb == NULL);
|
assert (r->req.aiocb == NULL);
|
||||||
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
|
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
|
||||||
|
@ -651,8 +644,7 @@ static void scsi_write_data(SCSIRequest *req)
|
||||||
if (r->req.sg) {
|
if (r->req.sg) {
|
||||||
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
|
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
|
||||||
r->req.residual -= r->req.sg->size;
|
r->req.residual -= r->req.sg->size;
|
||||||
r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
|
r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
|
||||||
r->req.sg, r->sector << BDRV_SECTOR_BITS,
|
|
||||||
BDRV_SECTOR_SIZE,
|
BDRV_SECTOR_SIZE,
|
||||||
sdc->dma_writev, r, scsi_dma_complete, r,
|
sdc->dma_writev, r, scsi_dma_complete, r,
|
||||||
DMA_DIRECTION_TO_DEVICE);
|
DMA_DIRECTION_TO_DEVICE);
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "system/block-backend.h"
|
#include "system/block-backend.h"
|
||||||
#include "hw/scsi/scsi.h"
|
#include "hw/scsi/scsi.h"
|
||||||
#include "scsi/constants.h"
|
#include "scsi/constants.h"
|
||||||
|
#include "hw/virtio/iothread-vq-mapping.h"
|
||||||
#include "hw/virtio/virtio-bus.h"
|
#include "hw/virtio/virtio-bus.h"
|
||||||
|
|
||||||
/* Context: BQL held */
|
/* Context: BQL held */
|
||||||
|
@ -28,7 +29,14 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
|
||||||
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
|
||||||
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
|
||||||
|
|
||||||
if (vs->conf.iothread) {
|
if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) {
|
||||||
|
error_setg(errp,
|
||||||
|
"iothread and iothread-vq-mapping properties cannot be set "
|
||||||
|
"at the same time");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vs->conf.iothread || vs->conf.iothread_vq_mapping_list) {
|
||||||
if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
|
if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
|
||||||
error_setg(errp,
|
error_setg(errp,
|
||||||
"device is incompatible with iothread "
|
"device is incompatible with iothread "
|
||||||
|
@ -39,13 +47,62 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
|
||||||
error_setg(errp, "ioeventfd is required for iothread");
|
error_setg(errp, "ioeventfd is required for iothread");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
s->ctx = iothread_get_aio_context(vs->conf.iothread);
|
}
|
||||||
} else {
|
|
||||||
if (!virtio_device_ioeventfd_enabled(vdev)) {
|
s->vq_aio_context = g_new(AioContext *, vs->conf.num_queues +
|
||||||
|
VIRTIO_SCSI_VQ_NUM_FIXED);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle the ctrl virtqueue in the main loop thread where device resets
|
||||||
|
* can be performed.
|
||||||
|
*/
|
||||||
|
s->vq_aio_context[0] = qemu_get_aio_context();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle the event virtqueue in the main loop thread where its no_poll
|
||||||
|
* behavior won't stop IOThread polling.
|
||||||
|
*/
|
||||||
|
s->vq_aio_context[1] = qemu_get_aio_context();
|
||||||
|
|
||||||
|
if (vs->conf.iothread_vq_mapping_list) {
|
||||||
|
if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list,
|
||||||
|
&s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED],
|
||||||
|
vs->conf.num_queues, errp)) {
|
||||||
|
g_free(s->vq_aio_context);
|
||||||
|
s->vq_aio_context = NULL;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
s->ctx = qemu_get_aio_context();
|
} else if (vs->conf.iothread) {
|
||||||
|
AioContext *ctx = iothread_get_aio_context(vs->conf.iothread);
|
||||||
|
for (uint16_t i = 0; i < vs->conf.num_queues; i++) {
|
||||||
|
s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Released in virtio_scsi_dataplane_cleanup() */
|
||||||
|
object_ref(OBJECT(vs->conf.iothread));
|
||||||
|
} else {
|
||||||
|
AioContext *ctx = qemu_get_aio_context();
|
||||||
|
for (unsigned i = 0; i < vs->conf.num_queues; i++) {
|
||||||
|
s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Context: BQL held */
|
||||||
|
void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s)
|
||||||
|
{
|
||||||
|
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
|
||||||
|
|
||||||
|
if (vs->conf.iothread_vq_mapping_list) {
|
||||||
|
iothread_vq_mapping_cleanup(vs->conf.iothread_vq_mapping_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vs->conf.iothread) {
|
||||||
|
object_unref(OBJECT(vs->conf.iothread));
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free(s->vq_aio_context);
|
||||||
|
s->vq_aio_context = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
|
static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
|
||||||
|
@ -66,31 +123,20 @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Context: BH in IOThread */
|
/* Context: BH in IOThread */
|
||||||
static void virtio_scsi_dataplane_stop_bh(void *opaque)
|
static void virtio_scsi_dataplane_stop_vq_bh(void *opaque)
|
||||||
{
|
{
|
||||||
VirtIOSCSI *s = opaque;
|
AioContext *ctx = qemu_get_current_aio_context();
|
||||||
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
|
VirtQueue *vq = opaque;
|
||||||
EventNotifier *host_notifier;
|
EventNotifier *host_notifier;
|
||||||
int i;
|
|
||||||
|
|
||||||
virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx);
|
virtio_queue_aio_detach_host_notifier(vq, ctx);
|
||||||
host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq);
|
host_notifier = virtio_queue_get_host_notifier(vq);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test and clear notifier after disabling event, in case poll callback
|
* Test and clear notifier after disabling event, in case poll callback
|
||||||
* didn't have time to run.
|
* didn't have time to run.
|
||||||
*/
|
*/
|
||||||
virtio_queue_host_notifier_read(host_notifier);
|
virtio_queue_host_notifier_read(host_notifier);
|
||||||
|
|
||||||
virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx);
|
|
||||||
host_notifier = virtio_queue_get_host_notifier(vs->event_vq);
|
|
||||||
virtio_queue_host_notifier_read(host_notifier);
|
|
||||||
|
|
||||||
for (i = 0; i < vs->conf.num_queues; i++) {
|
|
||||||
virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx);
|
|
||||||
host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]);
|
|
||||||
virtio_queue_host_notifier_read(host_notifier);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Context: BQL held */
|
/* Context: BQL held */
|
||||||
|
@ -154,11 +200,14 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
|
||||||
smp_wmb(); /* paired with aio_notify_accept() */
|
smp_wmb(); /* paired with aio_notify_accept() */
|
||||||
|
|
||||||
if (s->bus.drain_count == 0) {
|
if (s->bus.drain_count == 0) {
|
||||||
virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
|
virtio_queue_aio_attach_host_notifier(vs->ctrl_vq,
|
||||||
virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
|
s->vq_aio_context[0]);
|
||||||
|
virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq,
|
||||||
|
s->vq_aio_context[1]);
|
||||||
|
|
||||||
for (i = 0; i < vs->conf.num_queues; i++) {
|
for (i = 0; i < vs->conf.num_queues; i++) {
|
||||||
virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
|
AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
|
||||||
|
virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -207,7 +256,11 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
|
||||||
s->dataplane_stopping = true;
|
s->dataplane_stopping = true;
|
||||||
|
|
||||||
if (s->bus.drain_count == 0) {
|
if (s->bus.drain_count == 0) {
|
||||||
aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
|
for (i = 0; i < vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED; i++) {
|
||||||
|
VirtQueue *vq = virtio_get_queue(&vs->parent_obj, i);
|
||||||
|
AioContext *ctx = s->vq_aio_context[i];
|
||||||
|
aio_wait_bh_oneshot(ctx, virtio_scsi_dataplane_stop_vq_bh, vq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_drain_all(); /* ensure there are no in-flight requests */
|
blk_drain_all(); /* ensure there are no in-flight requests */
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include "hw/qdev-properties.h"
|
#include "hw/qdev-properties.h"
|
||||||
#include "hw/scsi/scsi.h"
|
#include "hw/scsi/scsi.h"
|
||||||
#include "scsi/constants.h"
|
#include "scsi/constants.h"
|
||||||
|
#include "hw/virtio/iothread-vq-mapping.h"
|
||||||
#include "hw/virtio/virtio-bus.h"
|
#include "hw/virtio/virtio-bus.h"
|
||||||
#include "hw/virtio/virtio-access.h"
|
#include "hw/virtio/virtio-access.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
@ -47,7 +48,7 @@ typedef struct VirtIOSCSIReq {
|
||||||
/* Used for two-stage request submission and TMFs deferred to BH */
|
/* Used for two-stage request submission and TMFs deferred to BH */
|
||||||
QTAILQ_ENTRY(VirtIOSCSIReq) next;
|
QTAILQ_ENTRY(VirtIOSCSIReq) next;
|
||||||
|
|
||||||
/* Used for cancellation of request during TMFs */
|
/* Used for cancellation of request during TMFs. Atomic. */
|
||||||
int remaining;
|
int remaining;
|
||||||
|
|
||||||
SCSIRequest *sreq;
|
SCSIRequest *sreq;
|
||||||
|
@ -102,13 +103,18 @@ static void virtio_scsi_free_req(VirtIOSCSIReq *req)
|
||||||
g_free(req);
|
g_free(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
|
static void virtio_scsi_complete_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
|
||||||
{
|
{
|
||||||
VirtIOSCSI *s = req->dev;
|
VirtIOSCSI *s = req->dev;
|
||||||
VirtQueue *vq = req->vq;
|
VirtQueue *vq = req->vq;
|
||||||
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
||||||
|
|
||||||
qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
|
qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
|
||||||
|
|
||||||
|
if (vq_lock) {
|
||||||
|
qemu_mutex_lock(vq_lock);
|
||||||
|
}
|
||||||
|
|
||||||
virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
|
virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
|
||||||
if (s->dataplane_started && !s->dataplane_fenced) {
|
if (s->dataplane_started && !s->dataplane_fenced) {
|
||||||
virtio_notify_irqfd(vdev, vq);
|
virtio_notify_irqfd(vdev, vq);
|
||||||
|
@ -116,6 +122,10 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
|
||||||
virtio_notify(vdev, vq);
|
virtio_notify(vdev, vq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (vq_lock) {
|
||||||
|
qemu_mutex_unlock(vq_lock);
|
||||||
|
}
|
||||||
|
|
||||||
if (req->sreq) {
|
if (req->sreq) {
|
||||||
req->sreq->hba_private = NULL;
|
req->sreq->hba_private = NULL;
|
||||||
scsi_req_unref(req->sreq);
|
scsi_req_unref(req->sreq);
|
||||||
|
@ -123,34 +133,20 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
|
||||||
virtio_scsi_free_req(req);
|
virtio_scsi_free_req(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtio_scsi_complete_req_bh(void *opaque)
|
static void virtio_scsi_bad_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
|
||||||
{
|
|
||||||
VirtIOSCSIReq *req = opaque;
|
|
||||||
|
|
||||||
virtio_scsi_complete_req(req);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop
|
|
||||||
* thread cannot touch the virtqueue since that could race with an IOThread.
|
|
||||||
*/
|
|
||||||
static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req)
|
|
||||||
{
|
|
||||||
VirtIOSCSI *s = req->dev;
|
|
||||||
|
|
||||||
if (!s->ctx || s->ctx == qemu_get_aio_context()) {
|
|
||||||
/* No need to schedule a BH when there is no IOThread */
|
|
||||||
virtio_scsi_complete_req(req);
|
|
||||||
} else {
|
|
||||||
/* Run request completion in the IOThread */
|
|
||||||
aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void virtio_scsi_bad_req(VirtIOSCSIReq *req)
|
|
||||||
{
|
{
|
||||||
virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
|
virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
|
||||||
|
|
||||||
|
if (vq_lock) {
|
||||||
|
qemu_mutex_lock(vq_lock);
|
||||||
|
}
|
||||||
|
|
||||||
virtqueue_detach_element(req->vq, &req->elem, 0);
|
virtqueue_detach_element(req->vq, &req->elem, 0);
|
||||||
|
|
||||||
|
if (vq_lock) {
|
||||||
|
qemu_mutex_unlock(vq_lock);
|
||||||
|
}
|
||||||
|
|
||||||
virtio_scsi_free_req(req);
|
virtio_scsi_free_req(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,12 +231,21 @@ static int virtio_scsi_parse_req(VirtIOSCSIReq *req,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq)
|
static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq, QemuMutex *vq_lock)
|
||||||
{
|
{
|
||||||
VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s;
|
VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s;
|
||||||
VirtIOSCSIReq *req;
|
VirtIOSCSIReq *req;
|
||||||
|
|
||||||
|
if (vq_lock) {
|
||||||
|
qemu_mutex_lock(vq_lock);
|
||||||
|
}
|
||||||
|
|
||||||
req = virtqueue_pop(vq, sizeof(VirtIOSCSIReq) + vs->cdb_size);
|
req = virtqueue_pop(vq, sizeof(VirtIOSCSIReq) + vs->cdb_size);
|
||||||
|
|
||||||
|
if (vq_lock) {
|
||||||
|
qemu_mutex_unlock(vq_lock);
|
||||||
|
}
|
||||||
|
|
||||||
if (!req) {
|
if (!req) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -294,136 +299,157 @@ typedef struct {
|
||||||
VirtIOSCSIReq *tmf_req;
|
VirtIOSCSIReq *tmf_req;
|
||||||
} VirtIOSCSICancelNotifier;
|
} VirtIOSCSICancelNotifier;
|
||||||
|
|
||||||
|
static void virtio_scsi_tmf_dec_remaining(VirtIOSCSIReq *tmf)
|
||||||
|
{
|
||||||
|
if (qatomic_fetch_dec(&tmf->remaining) == 1) {
|
||||||
|
trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(tmf->req.tmf.lun),
|
||||||
|
tmf->req.tmf.tag, tmf->resp.tmf.response);
|
||||||
|
|
||||||
|
virtio_scsi_complete_req(tmf, &tmf->dev->ctrl_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
|
static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
|
||||||
{
|
{
|
||||||
VirtIOSCSICancelNotifier *n = container_of(notifier,
|
VirtIOSCSICancelNotifier *n = container_of(notifier,
|
||||||
VirtIOSCSICancelNotifier,
|
VirtIOSCSICancelNotifier,
|
||||||
notifier);
|
notifier);
|
||||||
|
|
||||||
if (--n->tmf_req->remaining == 0) {
|
virtio_scsi_tmf_dec_remaining(n->tmf_req);
|
||||||
VirtIOSCSIReq *req = n->tmf_req;
|
|
||||||
|
|
||||||
trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(req->req.tmf.lun),
|
|
||||||
req->req.tmf.tag, req->resp.tmf.response);
|
|
||||||
virtio_scsi_complete_req(req);
|
|
||||||
}
|
|
||||||
g_free(n);
|
g_free(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
|
static void virtio_scsi_tmf_cancel_req(VirtIOSCSIReq *tmf, SCSIRequest *r)
|
||||||
{
|
{
|
||||||
if (s->dataplane_started && d && blk_is_available(d->conf.blk)) {
|
VirtIOSCSICancelNotifier *notifier;
|
||||||
assert(blk_get_aio_context(d->conf.blk) == s->ctx);
|
|
||||||
}
|
assert(r->ctx == qemu_get_current_aio_context());
|
||||||
|
|
||||||
|
/* Decremented in virtio_scsi_cancel_notify() */
|
||||||
|
qatomic_inc(&tmf->remaining);
|
||||||
|
|
||||||
|
notifier = g_new(VirtIOSCSICancelNotifier, 1);
|
||||||
|
notifier->notifier.notify = virtio_scsi_cancel_notify;
|
||||||
|
notifier->tmf_req = tmf;
|
||||||
|
scsi_req_cancel_async(r, ¬ifier->notifier);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
|
/* Execute a TMF on the requests in the current AioContext */
|
||||||
|
static void virtio_scsi_do_tmf_aio_context(void *opaque)
|
||||||
{
|
{
|
||||||
VirtIOSCSI *s = req->dev;
|
AioContext *ctx = qemu_get_current_aio_context();
|
||||||
SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
|
VirtIOSCSIReq *tmf = opaque;
|
||||||
BusChild *kid;
|
VirtIOSCSI *s = tmf->dev;
|
||||||
int target;
|
SCSIDevice *d = virtio_scsi_device_get(s, tmf->req.tmf.lun);
|
||||||
|
SCSIRequest *r;
|
||||||
|
bool match_tag;
|
||||||
|
|
||||||
switch (req->req.tmf.subtype) {
|
|
||||||
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
|
|
||||||
if (!d) {
|
if (!d) {
|
||||||
req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
|
tmf->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
|
||||||
goto out;
|
virtio_scsi_tmf_dec_remaining(tmf);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
|
|
||||||
req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
|
/*
|
||||||
goto out;
|
* This function could handle other subtypes that need to be processed in
|
||||||
}
|
* the request's AioContext in the future, but for now only request
|
||||||
qatomic_inc(&s->resetting);
|
* cancelation subtypes are performed here.
|
||||||
device_cold_reset(&d->qdev);
|
*/
|
||||||
qatomic_dec(&s->resetting);
|
switch (tmf->req.tmf.subtype) {
|
||||||
|
case VIRTIO_SCSI_T_TMF_ABORT_TASK:
|
||||||
|
match_tag = true;
|
||||||
break;
|
break;
|
||||||
|
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
|
||||||
case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
|
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
|
||||||
target = req->req.tmf.lun[1];
|
match_tag = false;
|
||||||
qatomic_inc(&s->resetting);
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
|
|
||||||
SCSIDevice *d1 = SCSI_DEVICE(kid->child);
|
|
||||||
if (d1->channel == 0 && d1->id == target) {
|
|
||||||
device_cold_reset(&d1->qdev);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
qatomic_dec(&s->resetting);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
|
||||||
object_unref(OBJECT(d));
|
QTAILQ_FOREACH(r, &d->requests, next) {
|
||||||
virtio_scsi_complete_req_from_main_loop(req);
|
VirtIOSCSIReq *cmd_req = r->hba_private;
|
||||||
|
assert(cmd_req); /* request has hba_private while enqueued */
|
||||||
|
|
||||||
|
if (r->ctx != ctx) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (match_tag && cmd_req->req.cmd.tag != tmf->req.tmf.tag) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
virtio_scsi_tmf_cancel_req(tmf, r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Incremented by virtio_scsi_do_tmf() */
|
||||||
|
virtio_scsi_tmf_dec_remaining(tmf);
|
||||||
|
|
||||||
|
object_unref(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Some TMFs must be processed from the main loop thread */
|
static void dummy_bh(void *opaque)
|
||||||
static void virtio_scsi_do_tmf_bh(void *opaque)
|
|
||||||
{
|
{
|
||||||
VirtIOSCSI *s = opaque;
|
/* Do nothing */
|
||||||
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
|
}
|
||||||
VirtIOSCSIReq *req;
|
|
||||||
VirtIOSCSIReq *tmp;
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for pending virtio_scsi_defer_tmf_to_aio_context() BHs.
|
||||||
|
*/
|
||||||
|
static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s)
|
||||||
|
{
|
||||||
GLOBAL_STATE_CODE();
|
GLOBAL_STATE_CODE();
|
||||||
|
|
||||||
WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
|
assert(!s->dataplane_started);
|
||||||
QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
|
|
||||||
QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
|
|
||||||
QTAILQ_INSERT_TAIL(&reqs, req, next);
|
|
||||||
}
|
|
||||||
|
|
||||||
qemu_bh_delete(s->tmf_bh);
|
for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
|
||||||
s->tmf_bh = NULL;
|
AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
|
||||||
}
|
|
||||||
|
|
||||||
QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
|
/* Our BH only runs after previously scheduled BHs */
|
||||||
QTAILQ_REMOVE(&reqs, req, next);
|
aio_wait_bh_oneshot(ctx, dummy_bh, NULL);
|
||||||
virtio_scsi_do_one_tmf_bh(req);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
|
/*
|
||||||
|
* Run the TMF in a specific AioContext, handling only requests in that
|
||||||
|
* AioContext. This is necessary because requests can run in different
|
||||||
|
* AioContext and it is only possible to cancel them from the AioContext where
|
||||||
|
* they are running.
|
||||||
|
*/
|
||||||
|
static void virtio_scsi_defer_tmf_to_aio_context(VirtIOSCSIReq *tmf,
|
||||||
|
AioContext *ctx)
|
||||||
{
|
{
|
||||||
VirtIOSCSIReq *req;
|
/* Decremented in virtio_scsi_do_tmf_aio_context() */
|
||||||
VirtIOSCSIReq *tmp;
|
qatomic_inc(&tmf->remaining);
|
||||||
|
|
||||||
GLOBAL_STATE_CODE();
|
/* See virtio_scsi_flush_defer_tmf_to_aio_context() cleanup during reset */
|
||||||
|
aio_bh_schedule_oneshot(ctx, virtio_scsi_do_tmf_aio_context, tmf);
|
||||||
/* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */
|
|
||||||
if (s->tmf_bh) {
|
|
||||||
qemu_bh_delete(s->tmf_bh);
|
|
||||||
s->tmf_bh = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
|
|
||||||
QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
|
|
||||||
|
|
||||||
/* SAM-6 6.3.2 Hard reset */
|
|
||||||
req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
|
|
||||||
virtio_scsi_complete_req(req);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
|
/*
|
||||||
|
* Returns the AioContext for a given TMF's tag field or NULL. Note that the
|
||||||
|
* request identified by the tag may have completed by the time you can execute
|
||||||
|
* a BH in the AioContext, so don't assume the request still exists in your BH.
|
||||||
|
*/
|
||||||
|
static AioContext *find_aio_context_for_tmf_tag(SCSIDevice *d,
|
||||||
|
VirtIOSCSIReq *tmf)
|
||||||
{
|
{
|
||||||
VirtIOSCSI *s = req->dev;
|
WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
|
||||||
|
SCSIRequest *r;
|
||||||
|
SCSIRequest *next;
|
||||||
|
|
||||||
WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
|
QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
|
||||||
QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
|
VirtIOSCSIReq *cmd_req = r->hba_private;
|
||||||
|
|
||||||
if (!s->tmf_bh) {
|
/* hba_private is non-NULL while the request is enqueued */
|
||||||
s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
|
assert(cmd_req);
|
||||||
qemu_bh_schedule(s->tmf_bh);
|
|
||||||
|
if (cmd_req->req.cmd.tag == tmf->req.tmf.tag) {
|
||||||
|
return r->ctx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return 0 if the request is ready to be completed and return to guest;
|
/* Return 0 if the request is ready to be completed and return to guest;
|
||||||
|
@ -433,9 +459,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
{
|
{
|
||||||
SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
|
SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
|
||||||
SCSIRequest *r, *next;
|
SCSIRequest *r, *next;
|
||||||
|
AioContext *ctx;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
virtio_scsi_ctx_check(s, d);
|
|
||||||
/* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */
|
/* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */
|
||||||
req->resp.tmf.response = VIRTIO_SCSI_S_OK;
|
req->resp.tmf.response = VIRTIO_SCSI_S_OK;
|
||||||
|
|
||||||
|
@ -450,7 +476,22 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
req->req.tmf.tag, req->req.tmf.subtype);
|
req->req.tmf.tag, req->req.tmf.subtype);
|
||||||
|
|
||||||
switch (req->req.tmf.subtype) {
|
switch (req->req.tmf.subtype) {
|
||||||
case VIRTIO_SCSI_T_TMF_ABORT_TASK:
|
case VIRTIO_SCSI_T_TMF_ABORT_TASK: {
|
||||||
|
if (!d) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
|
||||||
|
goto incorrect_lun;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx = find_aio_context_for_tmf_tag(d, req);
|
||||||
|
if (ctx) {
|
||||||
|
virtio_scsi_defer_tmf_to_aio_context(req, ctx);
|
||||||
|
ret = -EINPROGRESS;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case VIRTIO_SCSI_T_TMF_QUERY_TASK:
|
case VIRTIO_SCSI_T_TMF_QUERY_TASK:
|
||||||
if (!d) {
|
if (!d) {
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -458,44 +499,82 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
|
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
|
||||||
goto incorrect_lun;
|
goto incorrect_lun;
|
||||||
}
|
}
|
||||||
QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
|
|
||||||
|
WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
|
||||||
|
QTAILQ_FOREACH(r, &d->requests, next) {
|
||||||
VirtIOSCSIReq *cmd_req = r->hba_private;
|
VirtIOSCSIReq *cmd_req = r->hba_private;
|
||||||
if (cmd_req && cmd_req->req.cmd.tag == req->req.tmf.tag) {
|
assert(cmd_req); /* request has hba_private while enqueued */
|
||||||
break;
|
|
||||||
}
|
if (cmd_req->req.cmd.tag == req->req.tmf.tag) {
|
||||||
}
|
|
||||||
if (r) {
|
|
||||||
/*
|
/*
|
||||||
* Assert that the request has not been completed yet, we
|
* "If the specified command is present in the task set,
|
||||||
* check for it in the loop above.
|
* then return a service response set to FUNCTION
|
||||||
*/
|
* SUCCEEDED".
|
||||||
assert(r->hba_private);
|
|
||||||
if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) {
|
|
||||||
/* "If the specified command is present in the task set, then
|
|
||||||
* return a service response set to FUNCTION SUCCEEDED".
|
|
||||||
*/
|
*/
|
||||||
req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
|
req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
|
||||||
} else {
|
}
|
||||||
VirtIOSCSICancelNotifier *notifier;
|
|
||||||
|
|
||||||
req->remaining = 1;
|
|
||||||
notifier = g_new(VirtIOSCSICancelNotifier, 1);
|
|
||||||
notifier->tmf_req = req;
|
|
||||||
notifier->notifier.notify = virtio_scsi_cancel_notify;
|
|
||||||
scsi_req_cancel_async(r, ¬ifier->notifier);
|
|
||||||
ret = -EINPROGRESS;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
|
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
|
||||||
case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
|
if (!d) {
|
||||||
virtio_scsi_defer_tmf_to_bh(req);
|
goto fail;
|
||||||
ret = -EINPROGRESS;
|
}
|
||||||
|
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
|
||||||
|
goto incorrect_lun;
|
||||||
|
}
|
||||||
|
qatomic_inc(&s->resetting);
|
||||||
|
device_cold_reset(&d->qdev);
|
||||||
|
qatomic_dec(&s->resetting);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: {
|
||||||
|
BusChild *kid;
|
||||||
|
int target = req->req.tmf.lun[1];
|
||||||
|
qatomic_inc(&s->resetting);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
|
||||||
|
SCSIDevice *d1 = SCSI_DEVICE(kid->child);
|
||||||
|
if (d1->channel == 0 && d1->id == target) {
|
||||||
|
device_cold_reset(&d1->qdev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
qatomic_dec(&s->resetting);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
|
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
|
||||||
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
|
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
|
||||||
|
g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
|
||||||
|
|
||||||
|
if (!d) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
|
||||||
|
goto incorrect_lun;
|
||||||
|
}
|
||||||
|
|
||||||
|
qatomic_inc(&req->remaining);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
|
||||||
|
ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
|
||||||
|
|
||||||
|
if (!g_hash_table_add(aio_contexts, ctx)) {
|
||||||
|
continue; /* skip previously added AioContext */
|
||||||
|
}
|
||||||
|
|
||||||
|
virtio_scsi_defer_tmf_to_aio_context(req, ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtio_scsi_tmf_dec_remaining(req);
|
||||||
|
ret = -EINPROGRESS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
|
case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
|
||||||
if (!d) {
|
if (!d) {
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -504,34 +583,19 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
goto incorrect_lun;
|
goto incorrect_lun;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add 1 to "remaining" until virtio_scsi_do_tmf returns.
|
WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
|
||||||
* This way, if the bus starts calling back to the notifiers
|
|
||||||
* even before we finish the loop, virtio_scsi_cancel_notify
|
|
||||||
* will not complete the TMF too early.
|
|
||||||
*/
|
|
||||||
req->remaining = 1;
|
|
||||||
QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
|
QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
|
||||||
if (r->hba_private) {
|
/* Request has hba_private while enqueued */
|
||||||
if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) {
|
assert(r->hba_private);
|
||||||
/* "If there is any command present in the task set, then
|
|
||||||
|
/*
|
||||||
|
* "If there is any command present in the task set, then
|
||||||
* return a service response set to FUNCTION SUCCEEDED".
|
* return a service response set to FUNCTION SUCCEEDED".
|
||||||
*/
|
*/
|
||||||
req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
|
req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
|
||||||
break;
|
break;
|
||||||
} else {
|
|
||||||
VirtIOSCSICancelNotifier *notifier;
|
|
||||||
|
|
||||||
req->remaining++;
|
|
||||||
notifier = g_new(VirtIOSCSICancelNotifier, 1);
|
|
||||||
notifier->notifier.notify = virtio_scsi_cancel_notify;
|
|
||||||
notifier->tmf_req = req;
|
|
||||||
scsi_req_cancel_async(r, ¬ifier->notifier);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (--req->remaining > 0) {
|
|
||||||
ret = -EINPROGRESS;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
|
case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
|
||||||
|
@ -562,7 +626,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
|
|
||||||
if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0,
|
if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0,
|
||||||
&type, sizeof(type)) < sizeof(type)) {
|
&type, sizeof(type)) < sizeof(type)) {
|
||||||
virtio_scsi_bad_req(req);
|
virtio_scsi_bad_req(req, &s->ctrl_lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -570,7 +634,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
if (type == VIRTIO_SCSI_T_TMF) {
|
if (type == VIRTIO_SCSI_T_TMF) {
|
||||||
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq),
|
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq),
|
||||||
sizeof(VirtIOSCSICtrlTMFResp)) < 0) {
|
sizeof(VirtIOSCSICtrlTMFResp)) < 0) {
|
||||||
virtio_scsi_bad_req(req);
|
virtio_scsi_bad_req(req, &s->ctrl_lock);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
r = virtio_scsi_do_tmf(s, req);
|
r = virtio_scsi_do_tmf(s, req);
|
||||||
|
@ -580,7 +644,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
|
type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
|
||||||
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq),
|
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq),
|
||||||
sizeof(VirtIOSCSICtrlANResp)) < 0) {
|
sizeof(VirtIOSCSICtrlANResp)) < 0) {
|
||||||
virtio_scsi_bad_req(req);
|
virtio_scsi_bad_req(req, &s->ctrl_lock);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
req->req.an.event_requested =
|
req->req.an.event_requested =
|
||||||
|
@ -600,7 +664,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
type == VIRTIO_SCSI_T_AN_SUBSCRIBE)
|
type == VIRTIO_SCSI_T_AN_SUBSCRIBE)
|
||||||
trace_virtio_scsi_an_resp(virtio_scsi_get_lun(req->req.an.lun),
|
trace_virtio_scsi_an_resp(virtio_scsi_get_lun(req->req.an.lun),
|
||||||
req->resp.an.response);
|
req->resp.an.response);
|
||||||
virtio_scsi_complete_req(req);
|
virtio_scsi_complete_req(req, &s->ctrl_lock);
|
||||||
} else {
|
} else {
|
||||||
assert(r == -EINPROGRESS);
|
assert(r == -EINPROGRESS);
|
||||||
}
|
}
|
||||||
|
@ -610,7 +674,7 @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||||
{
|
{
|
||||||
VirtIOSCSIReq *req;
|
VirtIOSCSIReq *req;
|
||||||
|
|
||||||
while ((req = virtio_scsi_pop_req(s, vq))) {
|
while ((req = virtio_scsi_pop_req(s, vq, &s->ctrl_lock))) {
|
||||||
virtio_scsi_handle_ctrl_req(s, req);
|
virtio_scsi_handle_ctrl_req(s, req);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -625,9 +689,12 @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||||
*/
|
*/
|
||||||
static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s)
|
static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s)
|
||||||
{
|
{
|
||||||
if (!s->ctx || s->dataplane_started) {
|
if (s->dataplane_started) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (s->vq_aio_context[0] == qemu_get_aio_context()) {
|
||||||
|
return false; /* not using IOThreads */
|
||||||
|
}
|
||||||
|
|
||||||
virtio_device_start_ioeventfd(&s->parent_obj.parent_obj);
|
virtio_device_start_ioeventfd(&s->parent_obj.parent_obj);
|
||||||
return !s->dataplane_fenced;
|
return !s->dataplane_fenced;
|
||||||
|
@ -654,7 +721,7 @@ static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req)
|
||||||
* in virtio_scsi_command_complete.
|
* in virtio_scsi_command_complete.
|
||||||
*/
|
*/
|
||||||
req->resp_size = sizeof(VirtIOSCSICmdResp);
|
req->resp_size = sizeof(VirtIOSCSICmdResp);
|
||||||
virtio_scsi_complete_req(req);
|
virtio_scsi_complete_req(req, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtio_scsi_command_failed(SCSIRequest *r)
|
static void virtio_scsi_command_failed(SCSIRequest *r)
|
||||||
|
@ -788,7 +855,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
virtio_scsi_fail_cmd_req(req);
|
virtio_scsi_fail_cmd_req(req);
|
||||||
return -ENOTSUP;
|
return -ENOTSUP;
|
||||||
} else {
|
} else {
|
||||||
virtio_scsi_bad_req(req);
|
virtio_scsi_bad_req(req, NULL);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -801,7 +868,6 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||||
virtio_scsi_complete_cmd_req(req);
|
virtio_scsi_complete_cmd_req(req);
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
virtio_scsi_ctx_check(s, d);
|
|
||||||
req->sreq = scsi_req_new(d, req->req.cmd.tag,
|
req->sreq = scsi_req_new(d, req->req.cmd.tag,
|
||||||
virtio_scsi_get_lun(req->req.cmd.lun),
|
virtio_scsi_get_lun(req->req.cmd.lun),
|
||||||
req->req.cmd.cdb, vs->cdb_size, req);
|
req->req.cmd.cdb, vs->cdb_size, req);
|
||||||
|
@ -843,7 +909,7 @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||||
virtio_queue_set_notification(vq, 0);
|
virtio_queue_set_notification(vq, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((req = virtio_scsi_pop_req(s, vq))) {
|
while ((req = virtio_scsi_pop_req(s, vq, NULL))) {
|
||||||
ret = virtio_scsi_handle_cmd_req_prepare(s, req);
|
ret = virtio_scsi_handle_cmd_req_prepare(s, req);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
QTAILQ_INSERT_TAIL(&reqs, req, next);
|
QTAILQ_INSERT_TAIL(&reqs, req, next);
|
||||||
|
@ -936,7 +1002,7 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
|
||||||
|
|
||||||
assert(!s->dataplane_started);
|
assert(!s->dataplane_started);
|
||||||
|
|
||||||
virtio_scsi_reset_tmf_bh(s);
|
virtio_scsi_flush_defer_tmf_to_aio_context(s);
|
||||||
|
|
||||||
qatomic_inc(&s->resetting);
|
qatomic_inc(&s->resetting);
|
||||||
bus_cold_reset(BUS(&s->bus));
|
bus_cold_reset(BUS(&s->bus));
|
||||||
|
@ -944,7 +1010,10 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
|
||||||
|
|
||||||
vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
|
vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
|
||||||
vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
|
vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
|
||||||
|
|
||||||
|
WITH_QEMU_LOCK_GUARD(&s->event_lock) {
|
||||||
s->events_dropped = false;
|
s->events_dropped = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -973,7 +1042,8 @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
req = virtio_scsi_pop_req(s, vs->event_vq);
|
req = virtio_scsi_pop_req(s, vs->event_vq, &s->event_lock);
|
||||||
|
WITH_QEMU_LOCK_GUARD(&s->event_lock) {
|
||||||
if (!req) {
|
if (!req) {
|
||||||
s->events_dropped = true;
|
s->events_dropped = true;
|
||||||
return;
|
return;
|
||||||
|
@ -983,9 +1053,10 @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
|
||||||
event |= VIRTIO_SCSI_T_EVENTS_MISSED;
|
event |= VIRTIO_SCSI_T_EVENTS_MISSED;
|
||||||
s->events_dropped = false;
|
s->events_dropped = false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
|
if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
|
||||||
virtio_scsi_bad_req(req);
|
virtio_scsi_bad_req(req, &s->event_lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1005,12 +1076,18 @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
|
||||||
}
|
}
|
||||||
trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason);
|
trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason);
|
||||||
|
|
||||||
virtio_scsi_complete_req(req);
|
virtio_scsi_complete_req(req, &s->event_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
|
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
|
||||||
{
|
{
|
||||||
if (s->events_dropped) {
|
bool events_dropped;
|
||||||
|
|
||||||
|
WITH_QEMU_LOCK_GUARD(&s->event_lock) {
|
||||||
|
events_dropped = s->events_dropped;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (events_dropped) {
|
||||||
VirtIOSCSIEventInfo info = {
|
VirtIOSCSIEventInfo info = {
|
||||||
.event = VIRTIO_SCSI_T_NO_EVENT,
|
.event = VIRTIO_SCSI_T_NO_EVENT,
|
||||||
};
|
};
|
||||||
|
@ -1061,14 +1138,16 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||||
{
|
{
|
||||||
VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
|
VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
|
||||||
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
|
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
|
||||||
|
AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED];
|
||||||
SCSIDevice *sd = SCSI_DEVICE(dev);
|
SCSIDevice *sd = SCSI_DEVICE(dev);
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (s->ctx && !s->dataplane_fenced) {
|
if (ctx != qemu_get_aio_context() && !s->dataplane_fenced) {
|
||||||
ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp);
|
/*
|
||||||
if (ret < 0) {
|
* Try to make the BlockBackend's AioContext match ours. Ignore failure
|
||||||
return;
|
* because I/O will still work although block jobs and other users
|
||||||
}
|
* might be slower when multiple AioContexts use a BlockBackend.
|
||||||
|
*/
|
||||||
|
blk_set_aio_context(sd->conf.blk, ctx, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
|
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
|
||||||
|
@ -1103,7 +1182,7 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||||
|
|
||||||
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
|
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
|
||||||
|
|
||||||
if (s->ctx) {
|
if (s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED] != qemu_get_aio_context()) {
|
||||||
/* If other users keep the BlockBackend in the iothread, that's ok */
|
/* If other users keep the BlockBackend in the iothread, that's ok */
|
||||||
blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
|
blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
|
||||||
}
|
}
|
||||||
|
@ -1137,7 +1216,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
|
||||||
|
|
||||||
for (uint32_t i = 0; i < total_queues; i++) {
|
for (uint32_t i = 0; i < total_queues; i++) {
|
||||||
VirtQueue *vq = virtio_get_queue(vdev, i);
|
VirtQueue *vq = virtio_get_queue(vdev, i);
|
||||||
virtio_queue_aio_detach_host_notifier(vq, s->ctx);
|
virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1163,10 +1242,12 @@ static void virtio_scsi_drained_end(SCSIBus *bus)
|
||||||
|
|
||||||
for (uint32_t i = 0; i < total_queues; i++) {
|
for (uint32_t i = 0; i < total_queues; i++) {
|
||||||
VirtQueue *vq = virtio_get_queue(vdev, i);
|
VirtQueue *vq = virtio_get_queue(vdev, i);
|
||||||
|
AioContext *ctx = s->vq_aio_context[i];
|
||||||
|
|
||||||
if (vq == vs->event_vq) {
|
if (vq == vs->event_vq) {
|
||||||
virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
|
virtio_queue_aio_attach_host_notifier_no_poll(vq, ctx);
|
||||||
} else {
|
} else {
|
||||||
virtio_queue_aio_attach_host_notifier(vq, s->ctx);
|
virtio_queue_aio_attach_host_notifier(vq, ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1235,8 +1316,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
|
||||||
VirtIOSCSI *s = VIRTIO_SCSI(dev);
|
VirtIOSCSI *s = VIRTIO_SCSI(dev);
|
||||||
Error *err = NULL;
|
Error *err = NULL;
|
||||||
|
|
||||||
QTAILQ_INIT(&s->tmf_bh_list);
|
qemu_mutex_init(&s->ctrl_lock);
|
||||||
qemu_mutex_init(&s->tmf_bh_lock);
|
qemu_mutex_init(&s->event_lock);
|
||||||
|
|
||||||
virtio_scsi_common_realize(dev,
|
virtio_scsi_common_realize(dev,
|
||||||
virtio_scsi_handle_ctrl,
|
virtio_scsi_handle_ctrl,
|
||||||
|
@ -1271,15 +1352,16 @@ void virtio_scsi_common_unrealize(DeviceState *dev)
|
||||||
virtio_cleanup(vdev);
|
virtio_cleanup(vdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* main loop */
|
||||||
static void virtio_scsi_device_unrealize(DeviceState *dev)
|
static void virtio_scsi_device_unrealize(DeviceState *dev)
|
||||||
{
|
{
|
||||||
VirtIOSCSI *s = VIRTIO_SCSI(dev);
|
VirtIOSCSI *s = VIRTIO_SCSI(dev);
|
||||||
|
|
||||||
virtio_scsi_reset_tmf_bh(s);
|
virtio_scsi_dataplane_cleanup(s);
|
||||||
|
|
||||||
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
|
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
|
||||||
virtio_scsi_common_unrealize(dev);
|
virtio_scsi_common_unrealize(dev);
|
||||||
qemu_mutex_destroy(&s->tmf_bh_lock);
|
qemu_mutex_destroy(&s->event_lock);
|
||||||
|
qemu_mutex_destroy(&s->ctrl_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const Property virtio_scsi_properties[] = {
|
static const Property virtio_scsi_properties[] = {
|
||||||
|
@ -1299,6 +1381,8 @@ static const Property virtio_scsi_properties[] = {
|
||||||
VIRTIO_SCSI_F_CHANGE, true),
|
VIRTIO_SCSI_F_CHANGE, true),
|
||||||
DEFINE_PROP_LINK("iothread", VirtIOSCSI, parent_obj.conf.iothread,
|
DEFINE_PROP_LINK("iothread", VirtIOSCSI, parent_obj.conf.iothread,
|
||||||
TYPE_IOTHREAD, IOThread *),
|
TYPE_IOTHREAD, IOThread *),
|
||||||
|
DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOSCSI,
|
||||||
|
parent_obj.conf.iothread_vq_mapping_list),
|
||||||
};
|
};
|
||||||
|
|
||||||
static const VMStateDescription vmstate_virtio_scsi = {
|
static const VMStateDescription vmstate_virtio_scsi = {
|
||||||
|
|
131
hw/virtio/iothread-vq-mapping.c
Normal file
131
hw/virtio/iothread-vq-mapping.c
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
/*
|
||||||
|
* IOThread Virtqueue Mapping
|
||||||
|
*
|
||||||
|
* Copyright Red Hat, Inc
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
#include "system/iothread.h"
|
||||||
|
#include "hw/virtio/iothread-vq-mapping.h"
|
||||||
|
|
||||||
|
static bool
|
||||||
|
iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
|
||||||
|
num_queues, Error **errp)
|
||||||
|
{
|
||||||
|
g_autofree unsigned long *vqs = bitmap_new(num_queues);
|
||||||
|
g_autoptr(GHashTable) iothreads =
|
||||||
|
g_hash_table_new(g_str_hash, g_str_equal);
|
||||||
|
|
||||||
|
for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
|
||||||
|
const char *name = node->value->iothread;
|
||||||
|
uint16List *vq;
|
||||||
|
|
||||||
|
if (!iothread_by_id(name)) {
|
||||||
|
error_setg(errp, "IOThread \"%s\" object does not exist", name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!g_hash_table_add(iothreads, (gpointer)name)) {
|
||||||
|
error_setg(errp,
|
||||||
|
"duplicate IOThread name \"%s\" in iothread-vq-mapping",
|
||||||
|
name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node != list) {
|
||||||
|
if (!!node->value->vqs != !!list->value->vqs) {
|
||||||
|
error_setg(errp, "either all items in iothread-vq-mapping "
|
||||||
|
"must have vqs or none of them must have it");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (vq = node->value->vqs; vq; vq = vq->next) {
|
||||||
|
if (vq->value >= num_queues) {
|
||||||
|
error_setg(errp, "vq index %u for IOThread \"%s\" must be "
|
||||||
|
"less than num_queues %u in iothread-vq-mapping",
|
||||||
|
vq->value, name, num_queues);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_and_set_bit(vq->value, vqs)) {
|
||||||
|
error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
|
||||||
|
"because it is already assigned", vq->value, name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list->value->vqs) {
|
||||||
|
for (uint16_t i = 0; i < num_queues; i++) {
|
||||||
|
if (!test_bit(i, vqs)) {
|
||||||
|
error_setg(errp,
|
||||||
|
"missing vq %u IOThread assignment in iothread-vq-mapping",
|
||||||
|
i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool iothread_vq_mapping_apply(
|
||||||
|
IOThreadVirtQueueMappingList *list,
|
||||||
|
AioContext **vq_aio_context,
|
||||||
|
uint16_t num_queues,
|
||||||
|
Error **errp)
|
||||||
|
{
|
||||||
|
IOThreadVirtQueueMappingList *node;
|
||||||
|
size_t num_iothreads = 0;
|
||||||
|
size_t cur_iothread = 0;
|
||||||
|
|
||||||
|
if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (node = list; node; node = node->next) {
|
||||||
|
num_iothreads++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (node = list; node; node = node->next) {
|
||||||
|
IOThread *iothread = iothread_by_id(node->value->iothread);
|
||||||
|
AioContext *ctx = iothread_get_aio_context(iothread);
|
||||||
|
|
||||||
|
/* Released in virtio_blk_vq_aio_context_cleanup() */
|
||||||
|
object_ref(OBJECT(iothread));
|
||||||
|
|
||||||
|
if (node->value->vqs) {
|
||||||
|
uint16List *vq;
|
||||||
|
|
||||||
|
/* Explicit vq:IOThread assignment */
|
||||||
|
for (vq = node->value->vqs; vq; vq = vq->next) {
|
||||||
|
assert(vq->value < num_queues);
|
||||||
|
vq_aio_context[vq->value] = ctx;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Round-robin vq:IOThread assignment */
|
||||||
|
for (unsigned i = cur_iothread; i < num_queues;
|
||||||
|
i += num_iothreads) {
|
||||||
|
vq_aio_context[i] = ctx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_iothread++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
|
||||||
|
{
|
||||||
|
IOThreadVirtQueueMappingList *node;
|
||||||
|
|
||||||
|
for (node = list; node; node = node->next) {
|
||||||
|
IOThread *iothread = iothread_by_id(node->value->iothread);
|
||||||
|
object_unref(OBJECT(iothread));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
system_virtio_ss = ss.source_set()
|
system_virtio_ss = ss.source_set()
|
||||||
system_virtio_ss.add(files('virtio-bus.c'))
|
system_virtio_ss.add(files('virtio-bus.c'))
|
||||||
|
system_virtio_ss.add(files('iothread-vq-mapping.c'))
|
||||||
system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
|
system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
|
||||||
system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
|
system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
|
||||||
system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
|
system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
|
||||||
|
|
|
@ -123,6 +123,10 @@ struct BHListSlice {
|
||||||
|
|
||||||
typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
|
typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
|
||||||
|
|
||||||
|
typedef struct AioPolledEvent {
|
||||||
|
int64_t ns; /* current polling time in nanoseconds */
|
||||||
|
} AioPolledEvent;
|
||||||
|
|
||||||
struct AioContext {
|
struct AioContext {
|
||||||
GSource source;
|
GSource source;
|
||||||
|
|
||||||
|
@ -229,7 +233,6 @@ struct AioContext {
|
||||||
int poll_disable_cnt;
|
int poll_disable_cnt;
|
||||||
|
|
||||||
/* Polling mode parameters */
|
/* Polling mode parameters */
|
||||||
int64_t poll_ns; /* current polling time in nanoseconds */
|
|
||||||
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
|
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
|
||||||
int64_t poll_grow; /* polling time growth factor */
|
int64_t poll_grow; /* polling time growth factor */
|
||||||
int64_t poll_shrink; /* polling time shrink factor */
|
int64_t poll_shrink; /* polling time shrink factor */
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#define QEMU_RAW_AIO_H
|
#define QEMU_RAW_AIO_H
|
||||||
|
|
||||||
#include "block/aio.h"
|
#include "block/aio.h"
|
||||||
|
#include "block/block-common.h"
|
||||||
#include "qemu/iov.h"
|
#include "qemu/iov.h"
|
||||||
|
|
||||||
/* AIO request types */
|
/* AIO request types */
|
||||||
|
@ -58,11 +59,18 @@ void laio_cleanup(LinuxAioState *s);
|
||||||
|
|
||||||
/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
|
/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
|
||||||
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
|
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
|
||||||
int type, uint64_t dev_max_batch);
|
int type, BdrvRequestFlags flags,
|
||||||
|
uint64_t dev_max_batch);
|
||||||
|
|
||||||
bool laio_has_fdsync(int);
|
bool laio_has_fdsync(int);
|
||||||
|
bool laio_has_fua(void);
|
||||||
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
|
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
|
||||||
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
|
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
|
||||||
|
#else
|
||||||
|
static inline bool laio_has_fua(void)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
/* io_uring.c - Linux io_uring implementation */
|
/* io_uring.c - Linux io_uring implementation */
|
||||||
#ifdef CONFIG_LINUX_IO_URING
|
#ifdef CONFIG_LINUX_IO_URING
|
||||||
|
@ -71,9 +79,16 @@ void luring_cleanup(LuringState *s);
|
||||||
|
|
||||||
/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
|
/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
|
||||||
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
|
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
|
||||||
QEMUIOVector *qiov, int type);
|
QEMUIOVector *qiov, int type,
|
||||||
|
BdrvRequestFlags flags);
|
||||||
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
|
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
|
||||||
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
|
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
|
||||||
|
bool luring_has_fua(void);
|
||||||
|
#else
|
||||||
|
static inline bool luring_has_fua(void)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
|
@ -24,6 +24,7 @@ struct SCSIRequest {
|
||||||
SCSIBus *bus;
|
SCSIBus *bus;
|
||||||
SCSIDevice *dev;
|
SCSIDevice *dev;
|
||||||
const SCSIReqOps *ops;
|
const SCSIReqOps *ops;
|
||||||
|
AioContext *ctx;
|
||||||
uint32_t refcount;
|
uint32_t refcount;
|
||||||
uint32_t tag;
|
uint32_t tag;
|
||||||
uint32_t lun;
|
uint32_t lun;
|
||||||
|
@ -48,6 +49,8 @@ struct SCSIRequest {
|
||||||
bool dma_started;
|
bool dma_started;
|
||||||
BlockAIOCB *aiocb;
|
BlockAIOCB *aiocb;
|
||||||
QEMUSGList *sg;
|
QEMUSGList *sg;
|
||||||
|
|
||||||
|
/* Protected by SCSIDevice->requests_lock */
|
||||||
QTAILQ_ENTRY(SCSIRequest) next;
|
QTAILQ_ENTRY(SCSIRequest) next;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -76,10 +79,7 @@ struct SCSIDevice
|
||||||
uint8_t sense[SCSI_SENSE_BUF_SIZE];
|
uint8_t sense[SCSI_SENSE_BUF_SIZE];
|
||||||
uint32_t sense_len;
|
uint32_t sense_len;
|
||||||
|
|
||||||
/*
|
QemuMutex requests_lock; /* protects the requests list */
|
||||||
* The requests list is only accessed from the AioContext that executes
|
|
||||||
* requests or from the main loop when IOThread processing is stopped.
|
|
||||||
*/
|
|
||||||
QTAILQ_HEAD(, SCSIRequest) requests;
|
QTAILQ_HEAD(, SCSIRequest) requests;
|
||||||
|
|
||||||
uint32_t channel;
|
uint32_t channel;
|
||||||
|
|
45
include/hw/virtio/iothread-vq-mapping.h
Normal file
45
include/hw/virtio/iothread-vq-mapping.h
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* IOThread Virtqueue Mapping
|
||||||
|
*
|
||||||
|
* Copyright Red Hat, Inc
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
|
||||||
|
#define HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
|
||||||
|
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "qapi/qapi-types-virtio.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* iothread_vq_mapping_apply:
|
||||||
|
* @list: The mapping of virtqueues to IOThreads.
|
||||||
|
* @vq_aio_context: The array of AioContext pointers to fill in.
|
||||||
|
* @num_queues: The length of @vq_aio_context.
|
||||||
|
* @errp: If an error occurs, a pointer to the area to store the error.
|
||||||
|
*
|
||||||
|
* Fill in the AioContext for each virtqueue in the @vq_aio_context array given
|
||||||
|
* the iothread-vq-mapping parameter in @list.
|
||||||
|
*
|
||||||
|
* iothread_vq_mapping_cleanup() must be called to free IOThread object
|
||||||
|
* references after this function returns success.
|
||||||
|
*
|
||||||
|
* Returns: %true on success, %false on failure.
|
||||||
|
**/
|
||||||
|
bool iothread_vq_mapping_apply(
|
||||||
|
IOThreadVirtQueueMappingList *list,
|
||||||
|
AioContext **vq_aio_context,
|
||||||
|
uint16_t num_queues,
|
||||||
|
Error **errp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* iothread_vq_mapping_cleanup:
|
||||||
|
* @list: The mapping of virtqueues to IOThreads.
|
||||||
|
*
|
||||||
|
* Release IOThread object references that were acquired by
|
||||||
|
* iothread_vq_mapping_apply().
|
||||||
|
*/
|
||||||
|
void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list);
|
||||||
|
|
||||||
|
#endif /* HW_VIRTIO_IOTHREAD_VQ_MAPPING_H */
|
|
@ -22,6 +22,7 @@
|
||||||
#include "hw/virtio/virtio.h"
|
#include "hw/virtio/virtio.h"
|
||||||
#include "hw/scsi/scsi.h"
|
#include "hw/scsi/scsi.h"
|
||||||
#include "chardev/char-fe.h"
|
#include "chardev/char-fe.h"
|
||||||
|
#include "qapi/qapi-types-virtio.h"
|
||||||
#include "system/iothread.h"
|
#include "system/iothread.h"
|
||||||
|
|
||||||
#define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
|
#define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
|
||||||
|
@ -60,6 +61,7 @@ struct VirtIOSCSIConf {
|
||||||
CharBackend chardev;
|
CharBackend chardev;
|
||||||
uint32_t boot_tpgt;
|
uint32_t boot_tpgt;
|
||||||
IOThread *iothread;
|
IOThread *iothread;
|
||||||
|
IOThreadVirtQueueMappingList *iothread_vq_mapping_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct VirtIOSCSI;
|
struct VirtIOSCSI;
|
||||||
|
@ -82,18 +84,14 @@ struct VirtIOSCSI {
|
||||||
|
|
||||||
SCSIBus bus;
|
SCSIBus bus;
|
||||||
int resetting; /* written from main loop thread, read from any thread */
|
int resetting; /* written from main loop thread, read from any thread */
|
||||||
|
|
||||||
|
QemuMutex event_lock; /* protects event_vq and events_dropped */
|
||||||
bool events_dropped;
|
bool events_dropped;
|
||||||
|
|
||||||
/*
|
QemuMutex ctrl_lock; /* protects ctrl_vq */
|
||||||
* TMFs deferred to main loop BH. These fields are protected by
|
|
||||||
* tmf_bh_lock.
|
|
||||||
*/
|
|
||||||
QemuMutex tmf_bh_lock;
|
|
||||||
QEMUBH *tmf_bh;
|
|
||||||
QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
|
|
||||||
|
|
||||||
/* Fields for dataplane below */
|
/* Fields for dataplane below */
|
||||||
AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
|
AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */
|
||||||
|
|
||||||
bool dataplane_started;
|
bool dataplane_started;
|
||||||
bool dataplane_starting;
|
bool dataplane_starting;
|
||||||
|
@ -111,6 +109,7 @@ void virtio_scsi_common_realize(DeviceState *dev,
|
||||||
void virtio_scsi_common_unrealize(DeviceState *dev);
|
void virtio_scsi_common_unrealize(DeviceState *dev);
|
||||||
|
|
||||||
void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
|
void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
|
||||||
|
void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s);
|
||||||
int virtio_scsi_dataplane_start(VirtIODevice *s);
|
int virtio_scsi_dataplane_start(VirtIODevice *s);
|
||||||
void virtio_scsi_dataplane_stop(VirtIODevice *s);
|
void virtio_scsi_dataplane_stop(VirtIODevice *s);
|
||||||
|
|
||||||
|
|
|
@ -86,7 +86,6 @@ bool blk_supports_write_perm(BlockBackend *blk);
|
||||||
bool blk_is_sg(BlockBackend *blk);
|
bool blk_is_sg(BlockBackend *blk);
|
||||||
void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
|
void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
|
||||||
int blk_get_flags(BlockBackend *blk);
|
int blk_get_flags(BlockBackend *blk);
|
||||||
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
|
|
||||||
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
|
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
|
||||||
Error **errp);
|
Error **errp);
|
||||||
void blk_add_aio_context_notifier(BlockBackend *blk,
|
void blk_add_aio_context_notifier(BlockBackend *blk,
|
||||||
|
|
|
@ -290,8 +290,7 @@ typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
|
||||||
BlockCompletionFunc *cb, void *cb_opaque,
|
BlockCompletionFunc *cb, void *cb_opaque,
|
||||||
void *opaque);
|
void *opaque);
|
||||||
|
|
||||||
BlockAIOCB *dma_blk_io(AioContext *ctx,
|
BlockAIOCB *dma_blk_io(QEMUSGList *sg, uint64_t offset, uint32_t align,
|
||||||
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
|
||||||
DMAIOFunc *io_func, void *io_func_opaque,
|
DMAIOFunc *io_func, void *io_func_opaque,
|
||||||
BlockCompletionFunc *cb, void *opaque, DMADirection dir);
|
BlockCompletionFunc *cb, void *opaque, DMADirection dir);
|
||||||
BlockAIOCB *dma_blk_read(BlockBackend *blk,
|
BlockAIOCB *dma_blk_read(BlockBackend *blk,
|
||||||
|
|
|
@ -2727,6 +2727,14 @@ config_host_data.set('HAVE_OPTRESET',
|
||||||
cc.has_header_symbol('getopt.h', 'optreset'))
|
cc.has_header_symbol('getopt.h', 'optreset'))
|
||||||
config_host_data.set('HAVE_IPPROTO_MPTCP',
|
config_host_data.set('HAVE_IPPROTO_MPTCP',
|
||||||
cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
|
cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
|
||||||
|
if libaio.found()
|
||||||
|
config_host_data.set('HAVE_IO_PREP_PWRITEV2',
|
||||||
|
cc.has_header_symbol('libaio.h', 'io_prep_pwritev2'))
|
||||||
|
endif
|
||||||
|
if linux_io_uring.found()
|
||||||
|
config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
|
||||||
|
cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
|
||||||
|
endif
|
||||||
|
|
||||||
# has_member
|
# has_member
|
||||||
config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
|
config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
|
||||||
|
|
449
scripts/qcow2-to-stdout.py
Executable file
449
scripts/qcow2-to-stdout.py
Executable file
|
@ -0,0 +1,449 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# This tool reads a disk image in any format and converts it to qcow2,
|
||||||
|
# writing the result directly to stdout.
|
||||||
|
#
|
||||||
|
# Copyright (C) 2024 Igalia, S.L.
|
||||||
|
#
|
||||||
|
# Authors: Alberto Garcia <berto@igalia.com>
|
||||||
|
# Madeeha Javed <javed@igalia.com>
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
#
|
||||||
|
# qcow2 files produced by this script are always arranged like this:
|
||||||
|
#
|
||||||
|
# - qcow2 header
|
||||||
|
# - refcount table
|
||||||
|
# - refcount blocks
|
||||||
|
# - L1 table
|
||||||
|
# - L2 tables
|
||||||
|
# - Data clusters
|
||||||
|
#
|
||||||
|
# A note about variable names: in qcow2 there is one refcount table
|
||||||
|
# and one (active) L1 table, although each can occupy several
|
||||||
|
# clusters. For the sake of simplicity the code sometimes talks about
|
||||||
|
# refcount tables and L1 tables when referring to those clusters.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import errno
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import struct
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
QCOW2_DEFAULT_CLUSTER_SIZE = 65536
|
||||||
|
QCOW2_DEFAULT_REFCOUNT_BITS = 16
|
||||||
|
QCOW2_FEATURE_NAME_TABLE = 0x6803F857
|
||||||
|
QCOW2_DATA_FILE_NAME_STRING = 0x44415441
|
||||||
|
QCOW2_V3_HEADER_LENGTH = 112 # Header length in QEMU 9.0. Must be a multiple of 8
|
||||||
|
QCOW2_INCOMPAT_DATA_FILE_BIT = 2
|
||||||
|
QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT = 1
|
||||||
|
QCOW_OFLAG_COPIED = 1 << 63
|
||||||
|
QEMU_STORAGE_DAEMON = "qemu-storage-daemon"
|
||||||
|
|
||||||
|
|
||||||
|
def bitmap_set(bitmap, idx):
|
||||||
|
bitmap[idx // 8] |= 1 << (idx % 8)
|
||||||
|
|
||||||
|
|
||||||
|
def bitmap_is_set(bitmap, idx):
|
||||||
|
return (bitmap[idx // 8] & (1 << (idx % 8))) != 0
|
||||||
|
|
||||||
|
|
||||||
|
def bitmap_iterator(bitmap, length):
|
||||||
|
for idx in range(length):
|
||||||
|
if bitmap_is_set(bitmap, idx):
|
||||||
|
yield idx
|
||||||
|
|
||||||
|
|
||||||
|
def align_up(num, d):
|
||||||
|
return d * math.ceil(num / d)
|
||||||
|
|
||||||
|
|
||||||
|
# Holes in the input file contain only zeroes so we can skip them and
|
||||||
|
# save time. This function returns the indexes of the clusters that
|
||||||
|
# are known to contain data. Those are the ones that we need to read.
|
||||||
|
def clusters_with_data(fd, cluster_size):
|
||||||
|
data_to = 0
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
data_from = os.lseek(fd, data_to, os.SEEK_DATA)
|
||||||
|
data_to = align_up(os.lseek(fd, data_from, os.SEEK_HOLE), cluster_size)
|
||||||
|
for idx in range(data_from // cluster_size, data_to // cluster_size):
|
||||||
|
yield idx
|
||||||
|
except OSError as err:
|
||||||
|
if err.errno == errno.ENXIO: # End of file reached
|
||||||
|
break
|
||||||
|
raise err
|
||||||
|
|
||||||
|
|
||||||
|
# write_qcow2_content() expects a raw input file. If we have a different
|
||||||
|
# format we can use qemu-storage-daemon to make it appear as raw.
|
||||||
|
@contextmanager
|
||||||
|
def get_input_as_raw_file(input_file, input_format):
|
||||||
|
if input_format == "raw":
|
||||||
|
yield input_file
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
pid_file = os.path.join(temp_dir, "pid")
|
||||||
|
raw_file = os.path.join(temp_dir, "raw")
|
||||||
|
open(raw_file, "wb").close()
|
||||||
|
ret = subprocess.run(
|
||||||
|
[
|
||||||
|
QEMU_STORAGE_DAEMON,
|
||||||
|
"--daemonize",
|
||||||
|
"--pidfile", pid_file,
|
||||||
|
"--blockdev", f"driver=file,node-name=file0,driver=file,filename={input_file},read-only=on",
|
||||||
|
"--blockdev", f"driver={input_format},node-name=disk0,file=file0,read-only=on",
|
||||||
|
"--export", f"type=fuse,id=export0,node-name=disk0,mountpoint={raw_file},writable=off",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
if ret.returncode != 0:
|
||||||
|
sys.exit("[Error] Could not start the qemu-storage-daemon:\n" +
|
||||||
|
ret.stderr.decode().rstrip('\n'))
|
||||||
|
yield raw_file
|
||||||
|
finally:
|
||||||
|
# Kill the storage daemon on exit
|
||||||
|
# and remove all temporary files
|
||||||
|
if os.path.exists(pid_file):
|
||||||
|
with open(pid_file, "r") as f:
|
||||||
|
pid = int(f.readline())
|
||||||
|
os.kill(pid, signal.SIGTERM)
|
||||||
|
while os.path.exists(pid_file):
|
||||||
|
time.sleep(0.1)
|
||||||
|
os.unlink(raw_file)
|
||||||
|
os.rmdir(temp_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def write_features(cluster, offset, data_file_name):
|
||||||
|
if data_file_name is not None:
|
||||||
|
encoded_name = data_file_name.encode("utf-8")
|
||||||
|
padded_name_len = align_up(len(encoded_name), 8)
|
||||||
|
struct.pack_into(f">II{padded_name_len}s", cluster, offset,
|
||||||
|
QCOW2_DATA_FILE_NAME_STRING,
|
||||||
|
len(encoded_name),
|
||||||
|
encoded_name)
|
||||||
|
offset += 8 + padded_name_len
|
||||||
|
|
||||||
|
qcow2_features = [
|
||||||
|
# Incompatible
|
||||||
|
(0, 0, "dirty bit"),
|
||||||
|
(0, 1, "corrupt bit"),
|
||||||
|
(0, 2, "external data file"),
|
||||||
|
(0, 3, "compression type"),
|
||||||
|
(0, 4, "extended L2 entries"),
|
||||||
|
# Compatible
|
||||||
|
(1, 0, "lazy refcounts"),
|
||||||
|
# Autoclear
|
||||||
|
(2, 0, "bitmaps"),
|
||||||
|
(2, 1, "raw external data"),
|
||||||
|
]
|
||||||
|
struct.pack_into(">I", cluster, offset, QCOW2_FEATURE_NAME_TABLE)
|
||||||
|
struct.pack_into(">I", cluster, offset + 4, len(qcow2_features) * 48)
|
||||||
|
offset += 8
|
||||||
|
for feature_type, feature_bit, feature_name in qcow2_features:
|
||||||
|
struct.pack_into(">BB46s", cluster, offset,
|
||||||
|
feature_type, feature_bit, feature_name.encode("ascii"))
|
||||||
|
offset += 48
|
||||||
|
|
||||||
|
|
||||||
|
def write_qcow2_content(input_file, cluster_size, refcount_bits, data_file_name, data_file_raw):
|
||||||
|
# Some basic values
|
||||||
|
l1_entries_per_table = cluster_size // 8
|
||||||
|
l2_entries_per_table = cluster_size // 8
|
||||||
|
refcounts_per_table = cluster_size // 8
|
||||||
|
refcounts_per_block = cluster_size * 8 // refcount_bits
|
||||||
|
|
||||||
|
# Virtual disk size, number of data clusters and L1 entries
|
||||||
|
disk_size = align_up(os.path.getsize(input_file), 512)
|
||||||
|
total_data_clusters = math.ceil(disk_size / cluster_size)
|
||||||
|
l1_entries = math.ceil(total_data_clusters / l2_entries_per_table)
|
||||||
|
allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table)
|
||||||
|
|
||||||
|
# Max L1 table size is 32 MB (QCOW_MAX_L1_SIZE in block/qcow2.h)
|
||||||
|
if (l1_entries * 8) > (32 * 1024 * 1024):
|
||||||
|
sys.exit("[Error] The image size is too large. Try using a larger cluster size.")
|
||||||
|
|
||||||
|
# Two bitmaps indicating which L1 and L2 entries are set
|
||||||
|
l1_bitmap = bytearray(allocated_l1_tables * l1_entries_per_table // 8)
|
||||||
|
l2_bitmap = bytearray(l1_entries * l2_entries_per_table // 8)
|
||||||
|
allocated_l2_tables = 0
|
||||||
|
allocated_data_clusters = 0
|
||||||
|
|
||||||
|
if data_file_raw:
|
||||||
|
# If data_file_raw is set then all clusters are allocated and
|
||||||
|
# we don't need to read the input file at all.
|
||||||
|
allocated_l2_tables = l1_entries
|
||||||
|
for idx in range(l1_entries):
|
||||||
|
bitmap_set(l1_bitmap, idx)
|
||||||
|
for idx in range(total_data_clusters):
|
||||||
|
bitmap_set(l2_bitmap, idx)
|
||||||
|
else:
|
||||||
|
# Open the input file for reading
|
||||||
|
fd = os.open(input_file, os.O_RDONLY)
|
||||||
|
zero_cluster = bytes(cluster_size)
|
||||||
|
# Read all the clusters that contain data
|
||||||
|
for idx in clusters_with_data(fd, cluster_size):
|
||||||
|
cluster = os.pread(fd, cluster_size, cluster_size * idx)
|
||||||
|
# If the last cluster is smaller than cluster_size pad it with zeroes
|
||||||
|
if len(cluster) < cluster_size:
|
||||||
|
cluster += bytes(cluster_size - len(cluster))
|
||||||
|
# If a cluster has non-zero data then it must be allocated
|
||||||
|
# in the output file and its L2 entry must be set
|
||||||
|
if cluster != zero_cluster:
|
||||||
|
bitmap_set(l2_bitmap, idx)
|
||||||
|
allocated_data_clusters += 1
|
||||||
|
# Allocated data clusters also need their corresponding L1 entry and L2 table
|
||||||
|
l1_idx = math.floor(idx / l2_entries_per_table)
|
||||||
|
if not bitmap_is_set(l1_bitmap, l1_idx):
|
||||||
|
bitmap_set(l1_bitmap, l1_idx)
|
||||||
|
allocated_l2_tables += 1
|
||||||
|
|
||||||
|
# Total amount of allocated clusters excluding the refcount blocks and table
|
||||||
|
total_allocated_clusters = 1 + allocated_l1_tables + allocated_l2_tables
|
||||||
|
if data_file_name is None:
|
||||||
|
total_allocated_clusters += allocated_data_clusters
|
||||||
|
|
||||||
|
# Clusters allocated for the refcount blocks and table
|
||||||
|
allocated_refcount_blocks = math.ceil(total_allocated_clusters / refcounts_per_block)
|
||||||
|
allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table)
|
||||||
|
|
||||||
|
# Now we have a problem because allocated_refcount_blocks and allocated_refcount_tables...
|
||||||
|
# (a) increase total_allocated_clusters, and
|
||||||
|
# (b) need to be recalculated when total_allocated_clusters is increased
|
||||||
|
# So we need to repeat the calculation as long as the numbers change
|
||||||
|
while True:
|
||||||
|
new_total_allocated_clusters = total_allocated_clusters + allocated_refcount_tables + allocated_refcount_blocks
|
||||||
|
new_allocated_refcount_blocks = math.ceil(new_total_allocated_clusters / refcounts_per_block)
|
||||||
|
if new_allocated_refcount_blocks > allocated_refcount_blocks:
|
||||||
|
allocated_refcount_blocks = new_allocated_refcount_blocks
|
||||||
|
allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Now that we have the final numbers we can update total_allocated_clusters
|
||||||
|
total_allocated_clusters += allocated_refcount_tables + allocated_refcount_blocks
|
||||||
|
|
||||||
|
# At this point we have the exact number of clusters that the output
|
||||||
|
# image is going to use so we can calculate all the offsets.
|
||||||
|
current_cluster_idx = 1
|
||||||
|
|
||||||
|
refcount_table_offset = current_cluster_idx * cluster_size
|
||||||
|
current_cluster_idx += allocated_refcount_tables
|
||||||
|
|
||||||
|
refcount_block_offset = current_cluster_idx * cluster_size
|
||||||
|
current_cluster_idx += allocated_refcount_blocks
|
||||||
|
|
||||||
|
l1_table_offset = current_cluster_idx * cluster_size
|
||||||
|
current_cluster_idx += allocated_l1_tables
|
||||||
|
|
||||||
|
l2_table_offset = current_cluster_idx * cluster_size
|
||||||
|
current_cluster_idx += allocated_l2_tables
|
||||||
|
|
||||||
|
data_clusters_offset = current_cluster_idx * cluster_size
|
||||||
|
|
||||||
|
# Calculate some values used in the qcow2 header
|
||||||
|
if allocated_l1_tables == 0:
|
||||||
|
l1_table_offset = 0
|
||||||
|
|
||||||
|
hdr_cluster_bits = int(math.log2(cluster_size))
|
||||||
|
hdr_refcount_bits = int(math.log2(refcount_bits))
|
||||||
|
hdr_length = QCOW2_V3_HEADER_LENGTH
|
||||||
|
hdr_incompat_features = 0
|
||||||
|
if data_file_name is not None:
|
||||||
|
hdr_incompat_features |= 1 << QCOW2_INCOMPAT_DATA_FILE_BIT
|
||||||
|
hdr_autoclear_features = 0
|
||||||
|
if data_file_raw:
|
||||||
|
hdr_autoclear_features |= 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT
|
||||||
|
|
||||||
|
### Write qcow2 header
|
||||||
|
cluster = bytearray(cluster_size)
|
||||||
|
struct.pack_into(">4sIQIIQIIQQIIQQQQII", cluster, 0,
|
||||||
|
b"QFI\xfb", # QCOW magic string
|
||||||
|
3, # version
|
||||||
|
0, # backing file offset
|
||||||
|
0, # backing file sizes
|
||||||
|
hdr_cluster_bits,
|
||||||
|
disk_size,
|
||||||
|
0, # encryption method
|
||||||
|
l1_entries,
|
||||||
|
l1_table_offset,
|
||||||
|
refcount_table_offset,
|
||||||
|
allocated_refcount_tables,
|
||||||
|
0, # number of snapshots
|
||||||
|
0, # snapshot table offset
|
||||||
|
hdr_incompat_features,
|
||||||
|
0, # compatible features
|
||||||
|
hdr_autoclear_features,
|
||||||
|
hdr_refcount_bits,
|
||||||
|
hdr_length,
|
||||||
|
)
|
||||||
|
|
||||||
|
write_features(cluster, hdr_length, data_file_name)
|
||||||
|
|
||||||
|
sys.stdout.buffer.write(cluster)
|
||||||
|
|
||||||
|
### Write refcount table
|
||||||
|
cur_offset = refcount_block_offset
|
||||||
|
remaining_refcount_table_entries = allocated_refcount_blocks # Each entry is a pointer to a refcount block
|
||||||
|
while remaining_refcount_table_entries > 0:
|
||||||
|
cluster = bytearray(cluster_size)
|
||||||
|
to_write = min(remaining_refcount_table_entries, refcounts_per_table)
|
||||||
|
remaining_refcount_table_entries -= to_write
|
||||||
|
for idx in range(to_write):
|
||||||
|
struct.pack_into(">Q", cluster, idx * 8, cur_offset)
|
||||||
|
cur_offset += cluster_size
|
||||||
|
sys.stdout.buffer.write(cluster)
|
||||||
|
|
||||||
|
### Write refcount blocks
|
||||||
|
remaining_refcount_block_entries = total_allocated_clusters # One entry for each allocated cluster
|
||||||
|
for tbl in range(allocated_refcount_blocks):
|
||||||
|
cluster = bytearray(cluster_size)
|
||||||
|
to_write = min(remaining_refcount_block_entries, refcounts_per_block)
|
||||||
|
remaining_refcount_block_entries -= to_write
|
||||||
|
# All refcount entries contain the number 1. The only difference
|
||||||
|
# is their bit width, defined when the image is created.
|
||||||
|
for idx in range(to_write):
|
||||||
|
if refcount_bits == 64:
|
||||||
|
struct.pack_into(">Q", cluster, idx * 8, 1)
|
||||||
|
elif refcount_bits == 32:
|
||||||
|
struct.pack_into(">L", cluster, idx * 4, 1)
|
||||||
|
elif refcount_bits == 16:
|
||||||
|
struct.pack_into(">H", cluster, idx * 2, 1)
|
||||||
|
elif refcount_bits == 8:
|
||||||
|
cluster[idx] = 1
|
||||||
|
elif refcount_bits == 4:
|
||||||
|
cluster[idx // 2] |= 1 << ((idx % 2) * 4)
|
||||||
|
elif refcount_bits == 2:
|
||||||
|
cluster[idx // 4] |= 1 << ((idx % 4) * 2)
|
||||||
|
elif refcount_bits == 1:
|
||||||
|
cluster[idx // 8] |= 1 << (idx % 8)
|
||||||
|
sys.stdout.buffer.write(cluster)
|
||||||
|
|
||||||
|
### Write L1 table
|
||||||
|
cur_offset = l2_table_offset
|
||||||
|
for tbl in range(allocated_l1_tables):
|
||||||
|
cluster = bytearray(cluster_size)
|
||||||
|
for idx in range(l1_entries_per_table):
|
||||||
|
l1_idx = tbl * l1_entries_per_table + idx
|
||||||
|
if bitmap_is_set(l1_bitmap, l1_idx):
|
||||||
|
struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED)
|
||||||
|
cur_offset += cluster_size
|
||||||
|
sys.stdout.buffer.write(cluster)
|
||||||
|
|
||||||
|
### Write L2 tables
|
||||||
|
cur_offset = data_clusters_offset
|
||||||
|
for tbl in range(l1_entries):
|
||||||
|
# Skip the empty L2 tables. We can identify them because
|
||||||
|
# there is no L1 entry pointing at them.
|
||||||
|
if bitmap_is_set(l1_bitmap, tbl):
|
||||||
|
cluster = bytearray(cluster_size)
|
||||||
|
for idx in range(l2_entries_per_table):
|
||||||
|
l2_idx = tbl * l2_entries_per_table + idx
|
||||||
|
if bitmap_is_set(l2_bitmap, l2_idx):
|
||||||
|
if data_file_name is None:
|
||||||
|
struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED)
|
||||||
|
cur_offset += cluster_size
|
||||||
|
else:
|
||||||
|
struct.pack_into(">Q", cluster, idx * 8, (l2_idx * cluster_size) | QCOW_OFLAG_COPIED)
|
||||||
|
sys.stdout.buffer.write(cluster)
|
||||||
|
|
||||||
|
### Write data clusters
|
||||||
|
if data_file_name is None:
|
||||||
|
for idx in bitmap_iterator(l2_bitmap, total_data_clusters):
|
||||||
|
cluster = os.pread(fd, cluster_size, cluster_size * idx)
|
||||||
|
# If the last cluster is smaller than cluster_size pad it with zeroes
|
||||||
|
if len(cluster) < cluster_size:
|
||||||
|
cluster += bytes(cluster_size - len(cluster))
|
||||||
|
sys.stdout.buffer.write(cluster)
|
||||||
|
|
||||||
|
if not data_file_raw:
|
||||||
|
os.close(fd)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Command-line arguments
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="This program converts a QEMU disk image to qcow2 "
|
||||||
|
"and writes it to the standard output"
|
||||||
|
)
|
||||||
|
parser.add_argument("input_file", help="name of the input file")
|
||||||
|
parser.add_argument(
|
||||||
|
"-f",
|
||||||
|
dest="input_format",
|
||||||
|
metavar="input_format",
|
||||||
|
help="format of the input file (default: raw)",
|
||||||
|
default="raw",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-c",
|
||||||
|
dest="cluster_size",
|
||||||
|
metavar="cluster_size",
|
||||||
|
help=f"qcow2 cluster size (default: {QCOW2_DEFAULT_CLUSTER_SIZE})",
|
||||||
|
default=QCOW2_DEFAULT_CLUSTER_SIZE,
|
||||||
|
type=int,
|
||||||
|
choices=[1 << x for x in range(9, 22)],
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-r",
|
||||||
|
dest="refcount_bits",
|
||||||
|
metavar="refcount_bits",
|
||||||
|
help=f"width of the reference count entries (default: {QCOW2_DEFAULT_REFCOUNT_BITS})",
|
||||||
|
default=QCOW2_DEFAULT_REFCOUNT_BITS,
|
||||||
|
type=int,
|
||||||
|
choices=[1 << x for x in range(7)],
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-d",
|
||||||
|
dest="data_file",
|
||||||
|
help="create an image with input_file as an external data file",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-R",
|
||||||
|
dest="data_file_raw",
|
||||||
|
help="enable data_file_raw on the generated image (implies -d)",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.data_file_raw:
|
||||||
|
args.data_file = True
|
||||||
|
|
||||||
|
if not os.path.isfile(args.input_file):
|
||||||
|
sys.exit(f"[Error] {args.input_file} does not exist or is not a regular file.")
|
||||||
|
|
||||||
|
if args.data_file and args.input_format != "raw":
|
||||||
|
sys.exit("[Error] External data files can only be used with raw input images")
|
||||||
|
|
||||||
|
# A 512 byte header is too small for the data file name extension
|
||||||
|
if args.data_file and args.cluster_size == 512:
|
||||||
|
sys.exit("[Error] External data files require a larger cluster size")
|
||||||
|
|
||||||
|
if sys.stdout.isatty():
|
||||||
|
sys.exit("[Error] Refusing to write to a tty. Try redirecting stdout.")
|
||||||
|
|
||||||
|
if args.data_file:
|
||||||
|
data_file_name = args.input_file
|
||||||
|
else:
|
||||||
|
data_file_name = None
|
||||||
|
|
||||||
|
with get_input_as_raw_file(args.input_file, args.input_format) as raw_file:
|
||||||
|
write_qcow2_content(
|
||||||
|
raw_file,
|
||||||
|
args.cluster_size,
|
||||||
|
args.refcount_bits,
|
||||||
|
data_file_name,
|
||||||
|
args.data_file_raw,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -211,7 +211,7 @@ static const AIOCBInfo dma_aiocb_info = {
|
||||||
.cancel_async = dma_aio_cancel,
|
.cancel_async = dma_aio_cancel,
|
||||||
};
|
};
|
||||||
|
|
||||||
BlockAIOCB *dma_blk_io(AioContext *ctx,
|
BlockAIOCB *dma_blk_io(
|
||||||
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
||||||
DMAIOFunc *io_func, void *io_func_opaque,
|
DMAIOFunc *io_func, void *io_func_opaque,
|
||||||
BlockCompletionFunc *cb,
|
BlockCompletionFunc *cb,
|
||||||
|
@ -223,7 +223,7 @@ BlockAIOCB *dma_blk_io(AioContext *ctx,
|
||||||
|
|
||||||
dbs->acb = NULL;
|
dbs->acb = NULL;
|
||||||
dbs->sg = sg;
|
dbs->sg = sg;
|
||||||
dbs->ctx = ctx;
|
dbs->ctx = qemu_get_current_aio_context();
|
||||||
dbs->offset = offset;
|
dbs->offset = offset;
|
||||||
dbs->align = align;
|
dbs->align = align;
|
||||||
dbs->sg_cur_index = 0;
|
dbs->sg_cur_index = 0;
|
||||||
|
@ -251,7 +251,7 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
|
||||||
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
||||||
void (*cb)(void *opaque, int ret), void *opaque)
|
void (*cb)(void *opaque, int ret), void *opaque)
|
||||||
{
|
{
|
||||||
return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
|
return dma_blk_io(sg, offset, align,
|
||||||
dma_blk_read_io_func, blk, cb, opaque,
|
dma_blk_read_io_func, blk, cb, opaque,
|
||||||
DMA_DIRECTION_FROM_DEVICE);
|
DMA_DIRECTION_FROM_DEVICE);
|
||||||
}
|
}
|
||||||
|
@ -269,7 +269,7 @@ BlockAIOCB *dma_blk_write(BlockBackend *blk,
|
||||||
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
||||||
void (*cb)(void *opaque, int ret), void *opaque)
|
void (*cb)(void *opaque, int ret), void *opaque)
|
||||||
{
|
{
|
||||||
return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
|
return dma_blk_io(sg, offset, align,
|
||||||
dma_blk_write_io_func, blk, cb, opaque,
|
dma_blk_write_io_func, blk, cb, opaque,
|
||||||
DMA_DIRECTION_TO_DEVICE);
|
DMA_DIRECTION_TO_DEVICE);
|
||||||
}
|
}
|
||||||
|
|
|
@ -181,7 +181,7 @@ QEMU X.Y.Z monitor - type 'help' for more information
|
||||||
|
|
||||||
Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
|
Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
|
||||||
QEMU X.Y.Z monitor - type 'help' for more information
|
QEMU X.Y.Z monitor - type 'help' for more information
|
||||||
(qemu) QEMU_PROG: -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on: Cannot change iothread of active block backend
|
(qemu) quit
|
||||||
|
|
||||||
Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on
|
Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on
|
||||||
QEMU X.Y.Z monitor - type 'help' for more information
|
QEMU X.Y.Z monitor - type 'help' for more information
|
||||||
|
|
|
@ -22,7 +22,7 @@ import iotests
|
||||||
|
|
||||||
from iotests import filter_qemu_io, filter_qtest
|
from iotests import filter_qemu_io, filter_qtest
|
||||||
|
|
||||||
iotests.script_initialize(supported_fmts=['generic'],
|
iotests.script_initialize(supported_fmts=['qcow2', 'qed', 'raw'],
|
||||||
supported_protocols=['file'],
|
supported_protocols=['file'],
|
||||||
supported_platforms=['linux'])
|
supported_platforms=['linux'])
|
||||||
|
|
||||||
|
|
114
util/aio-posix.c
114
util/aio-posix.c
|
@ -28,6 +28,9 @@
|
||||||
/* Stop userspace polling on a handler if it isn't active for some time */
|
/* Stop userspace polling on a handler if it isn't active for some time */
|
||||||
#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
|
#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
|
||||||
|
|
||||||
|
static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
|
||||||
|
int64_t block_ns);
|
||||||
|
|
||||||
bool aio_poll_disabled(AioContext *ctx)
|
bool aio_poll_disabled(AioContext *ctx)
|
||||||
{
|
{
|
||||||
return qatomic_read(&ctx->poll_disable_cnt);
|
return qatomic_read(&ctx->poll_disable_cnt);
|
||||||
|
@ -392,7 +395,8 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
|
||||||
* scanning all handlers with aio_dispatch_handlers().
|
* scanning all handlers with aio_dispatch_handlers().
|
||||||
*/
|
*/
|
||||||
static bool aio_dispatch_ready_handlers(AioContext *ctx,
|
static bool aio_dispatch_ready_handlers(AioContext *ctx,
|
||||||
AioHandlerList *ready_list)
|
AioHandlerList *ready_list,
|
||||||
|
int64_t block_ns)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
AioHandler *node;
|
AioHandler *node;
|
||||||
|
@ -400,6 +404,14 @@ static bool aio_dispatch_ready_handlers(AioContext *ctx,
|
||||||
while ((node = QLIST_FIRST(ready_list))) {
|
while ((node = QLIST_FIRST(ready_list))) {
|
||||||
QLIST_REMOVE(node, node_ready);
|
QLIST_REMOVE(node, node_ready);
|
||||||
progress = aio_dispatch_handler(ctx, node) || progress;
|
progress = aio_dispatch_handler(ctx, node) || progress;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adjust polling time only after aio_dispatch_handler(), which can
|
||||||
|
* add the handler to ctx->poll_aio_handlers.
|
||||||
|
*/
|
||||||
|
if (ctx->poll_max_ns && QLIST_IS_INSERTED(node, node_poll)) {
|
||||||
|
adjust_polling_time(ctx, &node->poll, block_ns);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return progress;
|
return progress;
|
||||||
|
@ -579,13 +591,19 @@ static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
|
||||||
static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
|
static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
|
||||||
int64_t *timeout)
|
int64_t *timeout)
|
||||||
{
|
{
|
||||||
|
AioHandler *node;
|
||||||
int64_t max_ns;
|
int64_t max_ns;
|
||||||
|
|
||||||
if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
|
if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
|
max_ns = 0;
|
||||||
|
QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
|
||||||
|
max_ns = MAX(max_ns, node->poll.ns);
|
||||||
|
}
|
||||||
|
max_ns = qemu_soonest_timeout(*timeout, max_ns);
|
||||||
|
|
||||||
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
|
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
|
||||||
/*
|
/*
|
||||||
* Enable poll mode. It pairs with the poll_set_started() in
|
* Enable poll mode. It pairs with the poll_set_started() in
|
||||||
|
@ -600,6 +618,46 @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
|
||||||
|
int64_t block_ns)
|
||||||
|
{
|
||||||
|
if (block_ns <= poll->ns) {
|
||||||
|
/* This is the sweet spot, no adjustment needed */
|
||||||
|
} else if (block_ns > ctx->poll_max_ns) {
|
||||||
|
/* We'd have to poll for too long, poll less */
|
||||||
|
int64_t old = poll->ns;
|
||||||
|
|
||||||
|
if (ctx->poll_shrink) {
|
||||||
|
poll->ns /= ctx->poll_shrink;
|
||||||
|
} else {
|
||||||
|
poll->ns = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_poll_shrink(ctx, old, poll->ns);
|
||||||
|
} else if (poll->ns < ctx->poll_max_ns &&
|
||||||
|
block_ns < ctx->poll_max_ns) {
|
||||||
|
/* There is room to grow, poll longer */
|
||||||
|
int64_t old = poll->ns;
|
||||||
|
int64_t grow = ctx->poll_grow;
|
||||||
|
|
||||||
|
if (grow == 0) {
|
||||||
|
grow = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (poll->ns) {
|
||||||
|
poll->ns *= grow;
|
||||||
|
} else {
|
||||||
|
poll->ns = 4000; /* start polling at 4 microseconds */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (poll->ns > ctx->poll_max_ns) {
|
||||||
|
poll->ns = ctx->poll_max_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_poll_grow(ctx, old, poll->ns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool aio_poll(AioContext *ctx, bool blocking)
|
bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
{
|
{
|
||||||
AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
|
AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
|
||||||
|
@ -607,6 +665,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
bool use_notify_me;
|
bool use_notify_me;
|
||||||
int64_t timeout;
|
int64_t timeout;
|
||||||
int64_t start = 0;
|
int64_t start = 0;
|
||||||
|
int64_t block_ns = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There cannot be two concurrent aio_poll calls for the same AioContext (or
|
* There cannot be two concurrent aio_poll calls for the same AioContext (or
|
||||||
|
@ -679,49 +738,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
||||||
|
|
||||||
aio_notify_accept(ctx);
|
aio_notify_accept(ctx);
|
||||||
|
|
||||||
/* Adjust polling time */
|
/* Calculate blocked time for adaptive polling */
|
||||||
if (ctx->poll_max_ns) {
|
if (ctx->poll_max_ns) {
|
||||||
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
|
block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
|
||||||
|
|
||||||
if (block_ns <= ctx->poll_ns) {
|
|
||||||
/* This is the sweet spot, no adjustment needed */
|
|
||||||
} else if (block_ns > ctx->poll_max_ns) {
|
|
||||||
/* We'd have to poll for too long, poll less */
|
|
||||||
int64_t old = ctx->poll_ns;
|
|
||||||
|
|
||||||
if (ctx->poll_shrink) {
|
|
||||||
ctx->poll_ns /= ctx->poll_shrink;
|
|
||||||
} else {
|
|
||||||
ctx->poll_ns = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
trace_poll_shrink(ctx, old, ctx->poll_ns);
|
|
||||||
} else if (ctx->poll_ns < ctx->poll_max_ns &&
|
|
||||||
block_ns < ctx->poll_max_ns) {
|
|
||||||
/* There is room to grow, poll longer */
|
|
||||||
int64_t old = ctx->poll_ns;
|
|
||||||
int64_t grow = ctx->poll_grow;
|
|
||||||
|
|
||||||
if (grow == 0) {
|
|
||||||
grow = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx->poll_ns) {
|
|
||||||
ctx->poll_ns *= grow;
|
|
||||||
} else {
|
|
||||||
ctx->poll_ns = 4000; /* start polling at 4 microseconds */
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx->poll_ns > ctx->poll_max_ns) {
|
|
||||||
ctx->poll_ns = ctx->poll_max_ns;
|
|
||||||
}
|
|
||||||
|
|
||||||
trace_poll_grow(ctx, old, ctx->poll_ns);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
progress |= aio_bh_poll(ctx);
|
progress |= aio_bh_poll(ctx);
|
||||||
progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
|
progress |= aio_dispatch_ready_handlers(ctx, &ready_list, block_ns);
|
||||||
|
|
||||||
aio_free_deleted_handlers(ctx);
|
aio_free_deleted_handlers(ctx);
|
||||||
|
|
||||||
|
@ -767,11 +790,18 @@ void aio_context_use_g_source(AioContext *ctx)
|
||||||
void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
|
void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
|
||||||
int64_t grow, int64_t shrink, Error **errp)
|
int64_t grow, int64_t shrink, Error **errp)
|
||||||
{
|
{
|
||||||
|
AioHandler *node;
|
||||||
|
|
||||||
|
qemu_lockcnt_inc(&ctx->list_lock);
|
||||||
|
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
||||||
|
node->poll.ns = 0;
|
||||||
|
}
|
||||||
|
qemu_lockcnt_dec(&ctx->list_lock);
|
||||||
|
|
||||||
/* No thread synchronization here, it doesn't matter if an incorrect value
|
/* No thread synchronization here, it doesn't matter if an incorrect value
|
||||||
* is used once.
|
* is used once.
|
||||||
*/
|
*/
|
||||||
ctx->poll_max_ns = max_ns;
|
ctx->poll_max_ns = max_ns;
|
||||||
ctx->poll_ns = 0;
|
|
||||||
ctx->poll_grow = grow;
|
ctx->poll_grow = grow;
|
||||||
ctx->poll_shrink = shrink;
|
ctx->poll_shrink = shrink;
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,7 @@ struct AioHandler {
|
||||||
#endif
|
#endif
|
||||||
int64_t poll_idle_timeout; /* when to stop userspace polling */
|
int64_t poll_idle_timeout; /* when to stop userspace polling */
|
||||||
bool poll_ready; /* has polling detected an event? */
|
bool poll_ready; /* has polling detected an event? */
|
||||||
|
AioPolledEvent poll;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Add a handler to a ready list */
|
/* Add a handler to a ready list */
|
||||||
|
|
|
@ -609,7 +609,6 @@ AioContext *aio_context_new(Error **errp)
|
||||||
qemu_rec_mutex_init(&ctx->lock);
|
qemu_rec_mutex_init(&ctx->lock);
|
||||||
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
|
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
|
||||||
|
|
||||||
ctx->poll_ns = 0;
|
|
||||||
ctx->poll_max_ns = 0;
|
ctx->poll_max_ns = 0;
|
||||||
ctx->poll_grow = 0;
|
ctx->poll_grow = 0;
|
||||||
ctx->poll_shrink = 0;
|
ctx->poll_shrink = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue