summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-zoned.c308
1 files changed, 61 insertions, 247 deletions
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 781caca0381e..77f12fc2086f 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -41,7 +41,6 @@ static const char *const zone_cond_name[] = {
/*
* Per-zone write plug.
* @node: hlist_node structure for managing the plug using a hash table.
- * @link: To list the plug in the zone write plug error list of the disk.
* @ref: Zone write plug reference counter. A zone write plug reference is
* always at least 1 when the plug is hashed in the disk plug hash table.
* The reference is incremented whenever a new BIO needing plugging is
@@ -63,7 +62,6 @@ static const char *const zone_cond_name[] = {
*/
struct blk_zone_wplug {
struct hlist_node node;
- struct list_head link;
refcount_t ref;
spinlock_t lock;
unsigned int flags;
@@ -80,8 +78,8 @@ struct blk_zone_wplug {
* - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
* that is, that write BIOs are being throttled due to a write BIO already
* being executed or the zone write plug bio list is not empty.
- * - BLK_ZONE_WPLUG_ERROR: Indicates that a write error happened which will be
- * recovered with a report zone to update the zone write pointer offset.
+ * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
+ * write pointer offset and need to update it.
* - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
* from the disk hash table and that the initial reference to the zone
* write plug set when the plug was first added to the hash table has been
@@ -91,11 +89,9 @@ struct blk_zone_wplug {
* freed once all remaining references from BIOs or functions are dropped.
*/
#define BLK_ZONE_WPLUG_PLUGGED (1U << 0)
-#define BLK_ZONE_WPLUG_ERROR (1U << 1)
+#define BLK_ZONE_WPLUG_NEED_WP_UPDATE (1U << 1)
#define BLK_ZONE_WPLUG_UNHASHED (1U << 2)
-#define BLK_ZONE_WPLUG_BUSY (BLK_ZONE_WPLUG_PLUGGED | BLK_ZONE_WPLUG_ERROR)
-
/**
* blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
* @zone_cond: BLK_ZONE_COND_XXX.
@@ -163,6 +159,11 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
{
struct gendisk *disk = bdev->bd_disk;
sector_t capacity = get_capacity(disk);
+ struct disk_report_zones_cb_args args = {
+ .disk = disk,
+ .user_cb = cb,
+ .user_data = data,
+ };
if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones))
return -EOPNOTSUPP;
@@ -170,7 +171,8 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
if (!nr_zones || sector >= capacity)
return 0;
- return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
+ return disk->fops->report_zones(disk, sector, nr_zones,
+ disk_report_zones_cb, &args);
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
@@ -451,7 +453,7 @@ static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
{
if (refcount_dec_and_test(&zwplug->ref)) {
WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
- WARN_ON_ONCE(!list_empty(&zwplug->link));
+ WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED));
call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
@@ -465,8 +467,8 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)
return false;
- /* If the zone write plug is still busy, it cannot be removed. */
- if (zwplug->flags & BLK_ZONE_WPLUG_BUSY)
+ /* If the zone write plug is still plugged, it cannot be removed. */
+ if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
return false;
/*
@@ -549,7 +551,6 @@ again:
return NULL;
INIT_HLIST_NODE(&zwplug->node);
- INIT_LIST_HEAD(&zwplug->link);
refcount_set(&zwplug->ref, 2);
spin_lock_init(&zwplug->lock);
zwplug->flags = 0;
@@ -598,124 +599,29 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
}
/*
- * Abort (fail) all plugged BIOs of a zone write plug that are not aligned
- * with the assumed write pointer location of the zone when the BIO will
- * be unplugged.
- */
-static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
- struct blk_zone_wplug *zwplug)
-{
- unsigned int wp_offset = zwplug->wp_offset;
- struct bio_list bl = BIO_EMPTY_LIST;
- struct bio *bio;
-
- while ((bio = bio_list_pop(&zwplug->bio_list))) {
- if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) ||
- (bio_op(bio) != REQ_OP_ZONE_APPEND &&
- bio_offset_from_zone_start(bio) != wp_offset)) {
- blk_zone_wplug_bio_io_error(zwplug, bio);
- continue;
- }
-
- wp_offset += bio_sectors(bio);
- bio_list_add(&bl, bio);
- }
-
- bio_list_merge(&zwplug->bio_list, &bl);
-}
-
-static inline void disk_zone_wplug_set_error(struct gendisk *disk,
- struct blk_zone_wplug *zwplug)
-{
- unsigned long flags;
-
- if (zwplug->flags & BLK_ZONE_WPLUG_ERROR)
- return;
-
- /*
- * At this point, we already have a reference on the zone write plug.
- * However, since we are going to add the plug to the disk zone write
- * plugs work list, increase its reference count. This reference will
- * be dropped in disk_zone_wplugs_work() once the error state is
- * handled, or in disk_zone_wplug_clear_error() if the zone is reset or
- * finished.
- */
- zwplug->flags |= BLK_ZONE_WPLUG_ERROR;
- refcount_inc(&zwplug->ref);
-
- spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
- list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list);
- spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
-}
-
-static inline void disk_zone_wplug_clear_error(struct gendisk *disk,
- struct blk_zone_wplug *zwplug)
-{
- unsigned long flags;
-
- if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR))
- return;
-
- /*
- * We are racing with the error handling work which drops the reference
- * on the zone write plug after handling the error state. So remove the
- * plug from the error list and drop its reference count only if the
- * error handling has not yet started, that is, if the zone write plug
- * is still listed.
- */
- spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
- if (!list_empty(&zwplug->link)) {
- list_del_init(&zwplug->link);
- zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
- disk_put_zone_wplug(zwplug);
- }
- spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
-}
-
-/*
- * Set a zone write plug write pointer offset to either 0 (zone reset case)
- * or to the zone size (zone finish case). This aborts all plugged BIOs, which
- * is fine to do as doing a zone reset or zone finish while writes are in-flight
- * is a mistake from the user which will most likely cause all plugged BIOs to
- * fail anyway.
+ * Set a zone write plug write pointer offset to the specified value.
+ * This aborts all plugged BIOs, which is fine as this function is called for
+ * a zone reset operation, a zone finish operation or if the zone needs a wp
+ * update from a report zone after a write error.
*/
static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
struct blk_zone_wplug *zwplug,
unsigned int wp_offset)
{
- unsigned long flags;
-
- spin_lock_irqsave(&zwplug->lock, flags);
-
- /*
- * Make sure that a BIO completion or another zone reset or finish
- * operation has not already removed the plug from the hash table.
- */
- if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) {
- spin_unlock_irqrestore(&zwplug->lock, flags);
- return;
- }
+ lockdep_assert_held(&zwplug->lock);
/* Update the zone write pointer and abort all plugged BIOs. */
+ zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE;
zwplug->wp_offset = wp_offset;
disk_zone_wplug_abort(zwplug);
/*
- * Updating the write pointer offset puts back the zone
- * in a good state. So clear the error flag and decrement the
- * error count if we were in error state.
- */
- disk_zone_wplug_clear_error(disk, zwplug);
-
- /*
* The zone write plug now has no BIO plugged: remove it from the
* hash table so that it cannot be seen. The plug will be freed
* when the last reference is dropped.
*/
if (disk_should_remove_zone_wplug(disk, zwplug))
disk_remove_zone_wplug(disk, zwplug);
-
- spin_unlock_irqrestore(&zwplug->lock, flags);
}
static unsigned int blk_zone_wp_offset(struct blk_zone *zone)
@@ -752,7 +658,7 @@ static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk,
return;
spin_lock_irqsave(&zwplug->lock, flags);
- if (zwplug->flags & BLK_ZONE_WPLUG_ERROR)
+ if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE)
disk_zone_wplug_set_wp_offset(disk, zwplug,
blk_zone_wp_offset(zone));
spin_unlock_irqrestore(&zwplug->lock, flags);
@@ -776,6 +682,7 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
struct gendisk *disk = bio->bi_bdev->bd_disk;
sector_t sector = bio->bi_iter.bi_sector;
struct blk_zone_wplug *zwplug;
+ unsigned long flags;
/* Conventional zones cannot be reset nor finished. */
if (!bdev_zone_is_seq(bio->bi_bdev, sector)) {
@@ -801,7 +708,9 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
*/
zwplug = disk_get_zone_wplug(disk, sector);
if (zwplug) {
+ spin_lock_irqsave(&zwplug->lock, flags);
disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset);
+ spin_unlock_irqrestore(&zwplug->lock, flags);
disk_put_zone_wplug(zwplug);
}
@@ -812,6 +721,7 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
struct blk_zone_wplug *zwplug;
+ unsigned long flags;
sector_t sector;
/*
@@ -823,7 +733,9 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
sector += disk->queue->limits.chunk_sectors) {
zwplug = disk_get_zone_wplug(disk, sector);
if (zwplug) {
+ spin_lock_irqsave(&zwplug->lock, flags);
disk_zone_wplug_set_wp_offset(disk, zwplug, 0);
+ spin_unlock_irqrestore(&zwplug->lock, flags);
disk_put_zone_wplug(zwplug);
}
}
@@ -1006,12 +918,22 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
struct gendisk *disk = bio->bi_bdev->bd_disk;
/*
+ * If we lost track of the zone write pointer due to a write error,
+ * the user must either execute a report zones, reset the zone or finish
+ * the to recover a reliable write pointer position. Fail BIOs if the
+ * user did not do that as we cannot handle emulated zone append
+ * otherwise.
+ */
+ if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE)
+ return false;
+
+ /*
* Check that the user is not attempting to write to a full zone.
* We know such BIO will fail, and that would potentially overflow our
* write pointer offset beyond the end of the zone.
*/
if (disk_zone_wplug_is_full(disk, zwplug))
- goto err;
+ return false;
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
/*
@@ -1030,24 +952,18 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND);
} else {
/*
- * Check for non-sequential writes early because we avoid a
- * whole lot of error handling trouble if we don't send it off
- * to the driver.
+ * Check for non-sequential writes early as we know that BIOs
+ * with a start sector not unaligned to the zone write pointer
+ * will fail.
*/
if (bio_offset_from_zone_start(bio) != zwplug->wp_offset)
- goto err;
+ return false;
}
/* Advance the zone write pointer offset. */
zwplug->wp_offset += bio_sectors(bio);
return true;
-
-err:
- /* We detected an invalid write BIO: schedule error recovery. */
- disk_zone_wplug_set_error(disk, zwplug);
- kblockd_schedule_work(&disk->zone_wplugs_work);
- return false;
}
static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
@@ -1097,20 +1013,20 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING);
/*
- * If the zone is already plugged or has a pending error, add the BIO
- * to the plug BIO list. Do the same for REQ_NOWAIT BIOs to ensure that
- * we will not see a BLK_STS_AGAIN failure if we let the BIO execute.
+ * If the zone is already plugged, add the BIO to the plug BIO list.
+ * Do the same for REQ_NOWAIT BIOs to ensure that we will not see a
+ * BLK_STS_AGAIN failure if we let the BIO execute.
* Otherwise, plug and let the BIO execute.
*/
- if (zwplug->flags & BLK_ZONE_WPLUG_BUSY || (bio->bi_opf & REQ_NOWAIT))
+ if ((zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) ||
+ (bio->bi_opf & REQ_NOWAIT))
goto plug;
- /*
- * If an error is detected when preparing the BIO, add it to the BIO
- * list so that error recovery can deal with it.
- */
- if (!blk_zone_wplug_prepare_bio(zwplug, bio))
- goto plug;
+ if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
+ spin_unlock_irqrestore(&zwplug->lock, flags);
+ bio_io_error(bio);
+ return true;
+ }
zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
@@ -1210,16 +1126,6 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
spin_lock_irqsave(&zwplug->lock, flags);
- /*
- * If we had an error, schedule error recovery. The recovery work
- * will restart submission of plugged BIOs.
- */
- if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) {
- spin_unlock_irqrestore(&zwplug->lock, flags);
- kblockd_schedule_work(&disk->zone_wplugs_work);
- return;
- }
-
/* Schedule submission of the next plugged BIO if we have one. */
if (!bio_list_empty(&zwplug->bio_list)) {
disk_zone_wplug_schedule_bio_work(disk, zwplug);
@@ -1262,12 +1168,13 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
}
/*
- * If the BIO failed, mark the plug as having an error to trigger
- * recovery.
+ * If the BIO failed, abort all plugged BIOs and mark the plug as
+ * needing a write pointer update.
*/
if (bio->bi_status != BLK_STS_OK) {
spin_lock_irqsave(&zwplug->lock, flags);
- disk_zone_wplug_set_error(disk, zwplug);
+ disk_zone_wplug_abort(zwplug);
+ zwplug->flags |= BLK_ZONE_WPLUG_NEED_WP_UPDATE;
spin_unlock_irqrestore(&zwplug->lock, flags);
}
@@ -1323,6 +1230,7 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
*/
spin_lock_irqsave(&zwplug->lock, flags);
+again:
bio = bio_list_pop(&zwplug->bio_list);
if (!bio) {
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
@@ -1331,10 +1239,8 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
}
if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
- /* Error recovery will decide what to do with the BIO. */
- bio_list_add_head(&zwplug->bio_list, bio);
- spin_unlock_irqrestore(&zwplug->lock, flags);
- goto put_zwplug;
+ blk_zone_wplug_bio_io_error(zwplug, bio);
+ goto again;
}
spin_unlock_irqrestore(&zwplug->lock, flags);
@@ -1356,97 +1262,6 @@ put_zwplug:
disk_put_zone_wplug(zwplug);
}
-static int blk_zone_wplug_report_zone_cb(struct blk_zone *zone,
- unsigned int idx, void *data)
-{
- struct blk_zone *zonep = data;
-
- *zonep = *zone;
- return 0;
-}
-
-static void disk_zone_wplug_handle_error(struct gendisk *disk,
- struct blk_zone_wplug *zwplug)
-{
- sector_t zone_start_sector =
- bdev_zone_sectors(disk->part0) * zwplug->zone_no;
- unsigned int noio_flag;
- struct blk_zone zone;
- unsigned long flags;
- int ret;
-
- /* Get the current zone information from the device. */
- noio_flag = memalloc_noio_save();
- ret = disk->fops->report_zones(disk, zone_start_sector, 1,
- blk_zone_wplug_report_zone_cb, &zone);
- memalloc_noio_restore(noio_flag);
-
- spin_lock_irqsave(&zwplug->lock, flags);
-
- /*
- * A zone reset or finish may have cleared the error already. In such
- * case, do nothing as the report zones may have seen the "old" write
- * pointer value before the reset/finish operation completed.
- */
- if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR))
- goto unlock;
-
- zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
-
- if (ret != 1) {
- /*
- * We failed to get the zone information, meaning that something
- * is likely really wrong with the device. Abort all remaining
- * plugged BIOs as otherwise we could endup waiting forever on
- * plugged BIOs to complete if there is a queue freeze on-going.
- */
- disk_zone_wplug_abort(zwplug);
- goto unplug;
- }
-
- /* Update the zone write pointer offset. */
- zwplug->wp_offset = blk_zone_wp_offset(&zone);
- disk_zone_wplug_abort_unaligned(disk, zwplug);
-
- /* Restart BIO submission if we still have any BIO left. */
- if (!bio_list_empty(&zwplug->bio_list)) {
- disk_zone_wplug_schedule_bio_work(disk, zwplug);
- goto unlock;
- }
-
-unplug:
- zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
- if (disk_should_remove_zone_wplug(disk, zwplug))
- disk_remove_zone_wplug(disk, zwplug);
-
-unlock:
- spin_unlock_irqrestore(&zwplug->lock, flags);
-}
-
-static void disk_zone_wplugs_work(struct work_struct *work)
-{
- struct gendisk *disk =
- container_of(work, struct gendisk, zone_wplugs_work);
- struct blk_zone_wplug *zwplug;
- unsigned long flags;
-
- spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
-
- while (!list_empty(&disk->zone_wplugs_err_list)) {
- zwplug = list_first_entry(&disk->zone_wplugs_err_list,
- struct blk_zone_wplug, link);
- list_del_init(&zwplug->link);
- spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
-
- disk_zone_wplug_handle_error(disk, zwplug);
- disk_put_zone_wplug(zwplug);
-
- spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
- }
-
- spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
-}
-
static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk)
{
return 1U << disk->zone_wplugs_hash_bits;
@@ -1455,8 +1270,6 @@ static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk)
void disk_init_zone_resources(struct gendisk *disk)
{
spin_lock_init(&disk->zone_wplugs_lock);
- INIT_LIST_HEAD(&disk->zone_wplugs_err_list);
- INIT_WORK(&disk->zone_wplugs_work, disk_zone_wplugs_work);
}
/*
@@ -1555,8 +1368,6 @@ void disk_free_zone_resources(struct gendisk *disk)
if (!disk->zone_wplugs_pool)
return;
- cancel_work_sync(&disk->zone_wplugs_work);
-
if (disk->zone_wplugs_wq) {
destroy_workqueue(disk->zone_wplugs_wq);
disk->zone_wplugs_wq = NULL;
@@ -1753,6 +1564,8 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
if (!disk->zone_wplugs_hash)
return 0;
+ disk_zone_wplug_sync_wp_offset(disk, zone);
+
wp_offset = blk_zone_wp_offset(zone);
if (!wp_offset || wp_offset >= zone->capacity)
return 0;
@@ -1883,6 +1696,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
memalloc_noio_restore(noio_flag);
return ret;
}
+
ret = disk->fops->report_zones(disk, 0, UINT_MAX,
blk_revalidate_zone_cb, &args);
if (!ret) {