Block layer patches

-----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJanYJPAAoJEH8JsnLIjy/WxjUQAJA+DTOmGXvaNpMs65BrU79K
 /r/iGVrzHv/RMLmrWMnqj96W9SnpMuiAP9hVLNsekqClY9q4ME4DpGcXhWfhSvF5
 FC51ehvFJdfo8cPorsevcqNj60iWebjcx3lFfUq2606UOyYih3oijYxr6gSwWbRc
 GAgdGMqsvGYpzgqAQVEWHUhaX0La49/OzY42aR+E+LCBNfTYvlydvyoc+tUTdIpW
 1eM/ASGndGsN0Cf2vxlbKgJ0/P6v+cRZuuIDhKZqre+YG+yM+pq7yZb+o7nf/P36
 TPR93BsT7FSVAizRK7VFRuPIynHpiaxYygrJERCXF0sxsV4OlKjpmt/uUPamWFh+
 46Jx2NK1AuAx87BdErgmA119ObO3oAPxK0+2p981obb6SphTbbPxDj6SOlYCt4mJ
 mhff4JtIiwCmDSckAwd2mkBI1Tvl9qqcELrpyd2t2eU4ec2vf7fPd85EsK/Mq6Kr
 dbfqFvjNaaMxChoqFgkHAveYJ7zYqRFI2IY5o9c1QyZehCGPWjScxHXZZYdpDl59
 YF9DkYQDOyvEX2jmMECaO1r/0nnO+BqQHu5ItJuTte9rjP9Q0do3iBISiIefewtf
 yji6/QNn2hFrnr1HPAwLFFC3kPgc8Mq8mIUb53j8vG/01KhVRCcnJm2K6D4IUwLZ
 S6ZnQJB97eE4y7YR5dNt
 =2axz
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

# gpg: Signature made Mon 05 Mar 2018 17:45:51 GMT
# gpg:                using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (38 commits)
  block: Fix NULL dereference on empty drive error
  qcow2: Replace align_offset() with ROUND_UP()
  block/ssh: Add basic .bdrv_truncate()
  block/ssh: Make ssh_grow_file() blocking
  block/ssh: Pull ssh_grow_file() from ssh_create()
  qemu-img: Make resize error message more general
  qcow2: make qcow2_co_create2() a coroutine_fn
  block: rename .bdrv_create() to .bdrv_co_create_opts()
  Revert "IDE: Do not flush empty CDROM drives"
  block: test blk_aio_flush() with blk->root == NULL
  block: add BlockBackend->in_flight counter
  block: extract AIO_WAIT_WHILE() from BlockDriverState
  aio: rename aio_context_in_iothread() to in_aio_context_home_thread()
  docs: document how to use the l2-cache-entry-size parameter
  specs/qcow2: Fix documentation of the compressed cluster descriptor
  iotest 033: add misaligned write-zeroes test via truncate
  block: fix write with zero flag set and iovector provided
  block: Drop unused .bdrv_co_get_block_status()
  vvfat: Switch to .bdrv_co_block_status()
  vpc: Switch to .bdrv_co_block_status()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

# Conflicts:
#	include/block/block.h
This commit is contained in:
Peter Maydell 2018-03-06 11:20:44 +00:00
commit 58e2e17dba
48 changed files with 983 additions and 613 deletions

116
include/block/aio-wait.h Normal file
View file

@ -0,0 +1,116 @@
/*
* AioContext wait support
*
* Copyright (C) 2018 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef QEMU_AIO_WAIT_H
#define QEMU_AIO_WAIT_H
#include "block/aio.h"
/**
* AioWait:
*
* An object that facilitates synchronous waiting on a condition. The main
* loop can wait on an operation running in an IOThread as follows:
*
* AioWait *wait = ...;
* AioContext *ctx = ...;
* MyWork work = { .done = false };
* schedule_my_work_in_iothread(ctx, &work);
* AIO_WAIT_WHILE(wait, ctx, !work.done);
*
* The IOThread must call aio_wait_kick() to notify the main loop when
* work.done changes:
*
* static void do_work(...)
* {
* ...
* work.done = true;
* aio_wait_kick(wait);
* }
*/
typedef struct {
/* Is the main loop waiting for a kick? Accessed with atomic ops. */
bool need_kick;
} AioWait;
/**
* AIO_WAIT_WHILE:
* @wait: the aio wait object
* @ctx: the aio context
* @cond: wait while this conditional expression is true
*
* Wait while a condition is true. Use this to implement synchronous
* operations that require event loop activity.
*
* The caller must be sure that something calls aio_wait_kick() when the value
* of @cond might have changed.
*
* The caller's thread must be the IOThread that owns @ctx or the main loop
* thread (with @ctx acquired exactly once). This function cannot be used to
* wait on conditions between two IOThreads since that could lead to deadlock,
* go via the main loop instead.
*/
#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \
bool waited_ = false; \
bool busy_ = true; \
AioWait *wait_ = (wait); \
AioContext *ctx_ = (ctx); \
if (in_aio_context_home_thread(ctx_)) { \
while ((cond) || busy_) { \
busy_ = aio_poll(ctx_, (cond)); \
waited_ |= !!(cond) | busy_; \
} \
} else { \
assert(qemu_get_current_aio_context() == \
qemu_get_aio_context()); \
assert(!wait_->need_kick); \
/* Set wait_->need_kick before evaluating cond. */ \
atomic_mb_set(&wait_->need_kick, true); \
while (busy_) { \
if ((cond)) { \
waited_ = busy_ = true; \
aio_context_release(ctx_); \
aio_poll(qemu_get_aio_context(), true); \
aio_context_acquire(ctx_); \
} else { \
busy_ = aio_poll(ctx_, false); \
waited_ |= busy_; \
} \
} \
atomic_set(&wait_->need_kick, false); \
} \
waited_; })
/**
* aio_wait_kick:
* @wait: the aio wait object that should re-evaluate its condition
*
* Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During
* synchronous operations performed in an IOThread, the main thread lets the
* IOThread's event loop run, waiting for the operation to complete. A
* aio_wait_kick() call will wake up the main thread.
*/
void aio_wait_kick(AioWait *wait);
#endif /* QEMU_AIO_WAIT */

View file

@ -534,11 +534,14 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
AioContext *qemu_get_current_aio_context(void);
/**
* in_aio_context_home_thread:
* @ctx: the aio context
*
* Return whether we are running in the I/O thread that manages @ctx.
* Return whether we are running in the thread that normally runs @ctx. Note
* that acquiring/releasing ctx does not affect the outcome, each AioContext
* still only has one home thread that is responsible for running it.
*/
static inline bool aio_context_in_iothread(AioContext *ctx)
static inline bool in_aio_context_home_thread(AioContext *ctx)
{
return ctx == qemu_get_current_aio_context();
}

View file

@ -3,6 +3,7 @@
#include "block/aio.h"
#include "qapi/qapi-types-block-core.h"
#include "block/aio-wait.h"
#include "qemu/iov.h"
#include "qemu/coroutine.h"
#include "block/accounting.h"
@ -115,19 +116,19 @@ typedef struct HDGeometry {
* BDRV_BLOCK_ZERO: offset reads as zero
* BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
* layer (short for DATA || ZERO), set by block layer
* BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
* layer rather than any backing, set by block layer
* BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
* layer, set by block layer
*
* Internal flag:
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
* that the block layer recompute the answer from the returned
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
*
* If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of
* the return value (old interface) or the entire map parameter (new
* interface) represent the offset in the returned BDS that is allocated for
* the corresponding raw data. However, whether that offset actually
* contains data also depends on BDRV_BLOCK_DATA, as follows:
* If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
* host offset within the returned BDS that is allocated for the
* corresponding raw guest data. However, whether that offset
* actually contains data also depends on BDRV_BLOCK_DATA, as follows:
*
* DATA ZERO OFFSET_VALID
* t t t sectors read as zero, returned file is zero at offset
@ -367,41 +368,14 @@ void bdrv_drain_all_begin(void);
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
/* Returns NULL when bs == NULL */
AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
#define BDRV_POLL_WHILE(bs, cond) ({ \
bool waited_ = false; \
bool busy_ = true; \
BlockDriverState *bs_ = (bs); \
AioContext *ctx_ = bdrv_get_aio_context(bs_); \
if (aio_context_in_iothread(ctx_)) { \
while ((cond) || busy_) { \
busy_ = aio_poll(ctx_, (cond)); \
waited_ |= !!(cond) | busy_; \
} \
} else { \
assert(qemu_get_current_aio_context() == \
qemu_get_aio_context()); \
/* Ask bdrv_dec_in_flight to wake up the main \
* QEMU AioContext. Extra I/O threads never take \
* other I/O threads' AioContexts (see for example \
* block_job_defer_to_main_loop for how to do it). \
*/ \
assert(!bs_->wakeup); \
/* Set bs->wakeup before evaluating cond. */ \
atomic_mb_set(&bs_->wakeup, true); \
while (busy_) { \
if ((cond)) { \
waited_ = busy_ = true; \
aio_context_release(ctx_); \
aio_poll(qemu_get_aio_context(), true); \
aio_context_acquire(ctx_); \
} else { \
busy_ = aio_poll(ctx_, false); \
waited_ |= busy_; \
} \
} \
atomic_set(&bs_->wakeup, false); \
} \
waited_; })
AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \
bdrv_get_aio_context(bs_), \
cond); })
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);

View file

@ -26,6 +26,7 @@
#include "block/accounting.h"
#include "block/block.h"
#include "block/aio-wait.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
#include "qemu/stats64.h"
@ -128,7 +129,8 @@ struct BlockDriver {
int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
Error **errp);
void (*bdrv_close)(BlockDriverState *bs);
int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
int coroutine_fn (*bdrv_co_create_opts)(const char *filename, QemuOpts *opts,
Error **errp);
int (*bdrv_make_empty)(BlockDriverState *bs);
void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
@ -202,15 +204,22 @@ struct BlockDriver {
/*
* Building block for bdrv_block_status[_above] and
* bdrv_is_allocated[_above]. The driver should answer only
* according to the current layer, and should not set
* BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h
* for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block
* layer guarantees input aligned to request_alignment, as well as
* non-NULL pnum and file.
* according to the current layer, and should only need to set
* BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
* and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
* layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
* block.h for the overall meaning of the bits. As a hint, the
* flag want_zero is true if the caller cares more about precise
* mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
* overall allocation (favor larger *pnum, perhaps by reporting
* _DATA instead of _ZERO). The block layer guarantees input
* clamped to bdrv_getlength() and aligned to request_alignment,
* as well as non-NULL pnum, map, and file; in turn, the driver
* must return an error or set pnum to an aligned non-zero value.
*/
int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum,
BlockDriverState **file);
int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file);
/*
* Invalidate any cached meta-data.
@ -709,10 +718,8 @@ struct BlockDriverState {
unsigned int in_flight;
unsigned int serialising_in_flight;
/* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic
* ops.
*/
bool wakeup;
/* Kicked to signal main loop when a request completes. */
AioWait wait;
/* counter for nested bdrv_io_plug.
* Accessed with atomic ops.
@ -1031,23 +1038,27 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
uint64_t *nperm, uint64_t *nshared);
/*
* Default implementation for drivers to pass bdrv_co_get_block_status() to
* Default implementation for drivers to pass bdrv_co_block_status() to
* their file.
*/
int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors,
int *pnum,
BlockDriverState **file);
int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file);
/*
* Default implementation for drivers to pass bdrv_co_get_block_status() to
* Default implementation for drivers to pass bdrv_co_block_status() to
* their backing file.
*/
int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors,
int *pnum,
BlockDriverState **file);
int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file);
const char *bdrv_get_parent_name(const BlockDriverState *bs);
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
bool blk_dev_has_removable_media(BlockBackend *blk);