diff options
Diffstat (limited to 'drivers/block')
43 files changed, 1192 insertions, 975 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ecceaaa1a66f..583b671b1d2d 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -16,13 +16,7 @@ menuconfig BLK_DEV if BLK_DEV -config BLK_DEV_NULL_BLK - tristate "Null test block driver" - select CONFIGFS_FS - -config BLK_DEV_NULL_BLK_FAULT_INJECTION - bool "Support fault injection for Null test block driver" - depends on BLK_DEV_NULL_BLK && FAULT_INJECTION +source "drivers/block/null_blk/Kconfig" config BLK_DEV_FD tristate "Normal floppy disk support" @@ -451,6 +445,7 @@ config BLK_DEV_RBD config BLK_DEV_RSXX tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver" depends on PCI + select CRC32 help Device driver for IBM's high speed PCIe SSD storage device: Flash Adapter 900GB Full Height. diff --git a/drivers/block/Makefile b/drivers/block/Makefile index e1f63117ee94..a3170859e01d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -41,12 +41,7 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ -obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o -null_blk-objs := null_blk_main.o -ifeq ($(CONFIG_BLK_DEV_ZONED), y) -null_blk-$(CONFIG_TRACING) += null_blk_trace.o -endif -null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ skd-y := skd_main.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 71c2b1564558..9e2d0c6a3877 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -201,7 +201,7 @@ struct amiga_floppy_struct { int busy; /* true when drive is active */ int dirty; /* true when trackbuf is not on disk */ int status; /* current error code for unit */ - struct gendisk *gendisk; + struct gendisk *gendisk[2]; struct blk_mq_tag_set tag_set; }; @@ -1669,6 +1669,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) return -EBUSY; } + if (unit[drive].type->code == FD_NODRIVE) { + mutex_unlock(&amiflop_mutex); + return -ENXIO; + } + if (mode & (FMODE_READ|FMODE_WRITE)) { bdev_check_media_change(bdev); if (mode & FMODE_WRITE) { @@ -1695,7 +1700,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) unit[drive].dtype=&data_types[system]; unit[drive].blocks=unit[drive].type->heads*unit[drive].type->tracks* data_types[system].sects*unit[drive].type->sect_mult; - set_capacity(unit[drive].gendisk, unit[drive].blocks); + set_capacity(unit[drive].gendisk[system], unit[drive].blocks); printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive, unit[drive].type->name, data_types[system].name); @@ -1772,36 +1777,68 @@ static const struct blk_mq_ops amiflop_mq_ops = { .queue_rq = amiflop_queue_rq, }; -static struct gendisk *fd_alloc_disk(int drive) +static int fd_alloc_disk(int drive, int system) { struct gendisk *disk; disk = alloc_disk(1); if (!disk) goto out; - - disk->queue = blk_mq_init_sq_queue(&unit[drive].tag_set, &amiflop_mq_ops, - 2, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disk->queue)) { - disk->queue = NULL; + disk->queue = blk_mq_init_queue(&unit[drive].tag_set); + if (IS_ERR(disk->queue)) goto out_put_disk; - } + disk->major = FLOPPY_MAJOR; + disk->first_minor = drive + system; + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + if (system) + sprintf(disk->disk_name, "fd%d_msdos", drive); + else + sprintf(disk->disk_name, "fd%d", drive); + disk->private_data = &unit[drive]; + set_capacity(disk, 880 * 2); + + unit[drive].gendisk[system] = disk; + add_disk(disk); + return 0; + +out_put_disk: + disk->queue = NULL; + put_disk(disk); +out: + return -ENOMEM; +} + +static int fd_alloc_drive(int drive) +{ unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); if (!unit[drive].trackbuf) - goto out_cleanup_queue; + goto out; - return disk; + memset(&unit[drive].tag_set, 0, sizeof(unit[drive].tag_set)); + unit[drive].tag_set.ops = &amiflop_mq_ops; + unit[drive].tag_set.nr_hw_queues = 1; + unit[drive].tag_set.nr_maps = 1; + unit[drive].tag_set.queue_depth = 2; + unit[drive].tag_set.numa_node = NUMA_NO_NODE; + unit[drive].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + if (blk_mq_alloc_tag_set(&unit[drive].tag_set)) + goto out_cleanup_trackbuf; -out_cleanup_queue: - blk_cleanup_queue(disk->queue); - disk->queue = NULL; + pr_cont(" fd%d", drive); + + if (fd_alloc_disk(drive, 0) || fd_alloc_disk(drive, 1)) + goto out_cleanup_tagset; + return 0; + +out_cleanup_tagset: blk_mq_free_tag_set(&unit[drive].tag_set); -out_put_disk: - put_disk(disk); +out_cleanup_trackbuf: + kfree(unit[drive].trackbuf); out: unit[drive].type->code = FD_NODRIVE; - return NULL; + return -ENOMEM; } static int __init fd_probe_drives(void) @@ -1812,29 +1849,16 @@ static int __init fd_probe_drives(void) drives=0; nomem=0; for(drive=0;drive<FD_MAX_UNITS;drive++) { - struct gendisk *disk; fd_probe(drive); if (unit[drive].type->code == FD_NODRIVE) continue; - disk = fd_alloc_disk(drive); - if (!disk) { + if (fd_alloc_drive(drive) < 0) { pr_cont(" no mem for fd%d", drive); nomem = 1; continue; } - unit[drive].gendisk = disk; drives++; - - pr_cont(" fd%d",drive); - disk->major = FLOPPY_MAJOR; - disk->first_minor = drive; - disk->fops = &floppy_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; - sprintf(disk->disk_name, "fd%d", drive); - disk->private_data = &unit[drive]; - set_capacity(disk, 880*2); - add_disk(disk); } if ((drives > 0) || (nomem == 0)) { if (drives == 0) @@ -1846,15 +1870,6 @@ static int __init fd_probe_drives(void) return -ENOMEM; } -static struct kobject *floppy_find(dev_t dev, int *part, void *data) -{ - int drive = *part & 3; - if (unit[drive].type->code == FD_NODRIVE) - return NULL; - *part = 0; - return get_disk_and_module(unit[drive].gendisk); -} - static int __init amiga_floppy_probe(struct platform_device *pdev) { int i, ret; @@ -1884,9 +1899,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev) if (fd_probe_drives() < 1) /* No usable drives */ goto out_probe; - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - /* initialize variables */ timer_setup(&motor_on_timer, motor_on_callback, 0); motor_on_timer.expires = 0; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 313f0b946fe2..ac720bdcd983 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -890,19 +890,13 @@ void aoecmd_sleepwork(struct work_struct *work) { struct aoedev *d = container_of(work, struct aoedev, work); - struct block_device *bd; - u64 ssize; if (d->flags & DEVFL_GDALLOC) aoeblk_gdalloc(d); if (d->flags & DEVFL_NEWSIZE) { - ssize = get_capacity(d->gd); - bd = bdget_disk(d->gd, 0); - if (bd) { - bd_set_nr_sectors(bd, ssize); - bdput(bd); - } + set_capacity_and_notify(d->gd, d->ssize); + spin_lock_irq(&d->lock); d->flags |= DEVFL_UP; d->flags &= ~DEVFL_NEWSIZE; @@ -971,10 +965,9 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) d->geo.start = 0; if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) return; - if (d->gd != NULL) { - set_capacity(d->gd, ssize); + if (d->gd != NULL) d->flags |= DEVFL_NEWSIZE; - } else + else d->flags |= DEVFL_GDALLOC; schedule_work(&d->work); } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 3e881fdb06e0..104b713f4055 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -297,7 +297,7 @@ static struct atari_floppy_struct { unsigned int wpstat; /* current state of WP signal (for disk change detection) */ int flags; /* flags */ - struct gendisk *disk; + struct gendisk *disk[NUM_DISK_MINORS]; int ref; int type; struct blk_mq_tag_set tag_set; @@ -723,12 +723,16 @@ static void fd_error( void ) static int do_format(int drive, int type, struct atari_format_descr *desc) { - struct request_queue *q = unit[drive].disk->queue; + struct request_queue *q; unsigned char *p; int sect, nsect; unsigned long flags; int ret; + if (type) + type--; + + q = unit[drive].disk[type]->queue; blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); @@ -738,7 +742,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) local_irq_restore(flags); if (type) { - if (--type >= NUM_DISK_MINORS || + if (type >= NUM_DISK_MINORS || minor2disktype[type].drive_types > DriveType) { ret = -EINVAL; goto out; @@ -1154,7 +1158,7 @@ static void fd_rwsec_done1(int status) if (SUDT[-1].blocks > ReqBlock) { /* try another disk type */ SUDT--; - set_capacity(unit[SelectedDrive].disk, + set_capacity(unit[SelectedDrive].disk[0], SUDT->blocks); } else Probing = 0; @@ -1169,7 +1173,7 @@ static void fd_rwsec_done1(int status) /* record not found, but not probing. Maybe stretch wrong ? Restart probing */ if (SUD.autoprobe) { SUDT = atari_disk_type + StartDiskType[DriveType]; - set_capacity(unit[SelectedDrive].disk, + set_capacity(unit[SelectedDrive].disk[0], SUDT->blocks); Probing = 1; } @@ -1515,7 +1519,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, if (!UDT) { Probing = 1; UDT = atari_disk_type + StartDiskType[DriveType]; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(bd->rq->rq_disk, UDT->blocks); UD.autoprobe = 1; } } @@ -1533,7 +1537,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, } type = minor2disktype[type].index; UDT = &atari_disk_type[type]; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(bd->rq->rq_disk, UDT->blocks); UD.autoprobe = 0; } @@ -1658,7 +1662,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, printk (KERN_INFO "floppy%d: setting %s %p!\n", drive, dtp->name, dtp); UDT = dtp; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(disk, UDT->blocks); if (cmd == FDDEFPRM) { /* save settings as permanent default type */ @@ -1702,7 +1706,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, return -EINVAL; UDT = dtp; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(disk, UDT->blocks); return 0; case FDMSGON: @@ -1725,7 +1729,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, UDT = NULL; /* MSch: invalidate default_params */ default_params[drive].blocks = 0; - set_capacity(floppy->disk, MAX_DISK_SIZE * 2); + set_capacity(disk, MAX_DISK_SIZE * 2); fallthrough; case FDFMTEND: case FDFLUSH: @@ -1962,14 +1966,50 @@ static const struct blk_mq_ops ataflop_mq_ops = { .commit_rqs = ataflop_commit_rqs, }; -static struct kobject *floppy_find(dev_t dev, int *part, void *data) +static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { - int drive = *part & 3; - int type = *part >> 2; + struct gendisk *disk; + int ret; + + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; + + disk->queue = blk_mq_init_queue(&unit[drive].tag_set); + if (IS_ERR(disk->queue)) { + ret = PTR_ERR(disk->queue); + disk->queue = NULL; + put_disk(disk); + return ret; + } + + disk->major = FLOPPY_MAJOR; + disk->first_minor = drive + (type << 2); + sprintf(disk->disk_name, "fd%d", drive); + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + disk->private_data = &unit[drive]; + set_capacity(disk, MAX_DISK_SIZE * 2); + + unit[drive].disk[type] = disk; + return 0; +} + +static DEFINE_MUTEX(ataflop_probe_lock); + +static void ataflop_probe(dev_t dev) +{ + int drive = MINOR(dev) & 3; + int type = MINOR(dev) >> 2; + if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS) - return NULL; - *part = 0; - return get_disk_and_module(unit[drive].disk); + return; + mutex_lock(&ataflop_probe_lock); + if (!unit[drive].disk[type]) { + if (ataflop_alloc_disk(drive, type) == 0) + add_disk(unit[drive].disk[type]); + } + mutex_unlock(&ataflop_probe_lock); } static int __init atari_floppy_init (void) @@ -1981,23 +2021,26 @@ static int __init atari_floppy_init (void) /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */ return -ENODEV; - if (register_blkdev(FLOPPY_MAJOR,"fd")) - return -EBUSY; + mutex_lock(&ataflop_probe_lock); + ret = __register_blkdev(FLOPPY_MAJOR, "fd", ataflop_probe); + if (ret) + goto out_unlock; for (i = 0; i < FD_MAX_UNITS; i++) { - unit[i].disk = alloc_disk(1); - if (!unit[i].disk) { - ret = -ENOMEM; + memset(&unit[i].tag_set, 0, sizeof(unit[i].tag_set)); + unit[i].tag_set.ops = &ataflop_mq_ops; + unit[i].tag_set.nr_hw_queues = 1; + unit[i].tag_set.nr_maps = 1; + unit[i].tag_set.queue_depth = 2; + unit[i].tag_set.numa_node = NUMA_NO_NODE; + unit[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ret = blk_mq_alloc_tag_set(&unit[i].tag_set); + if (ret) goto err; - } - unit[i].disk->queue = blk_mq_init_sq_queue(&unit[i].tag_set, - &ataflop_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(unit[i].disk->queue)) { - put_disk(unit[i].disk); - ret = PTR_ERR(unit[i].disk->queue); - unit[i].disk->queue = NULL; + ret = ataflop_alloc_disk(i, 0); + if (ret) { + blk_mq_free_tag_set(&unit[i].tag_set); goto err; } } @@ -2027,19 +2070,9 @@ static int __init atari_floppy_init (void) for (i = 0; i < FD_MAX_UNITS; i++) { unit[i].track = -1; unit[i].flags = 0; - unit[i].disk->major = FLOPPY_MAJOR; - unit[i].disk->first_minor = i; - sprintf(unit[i].disk->disk_name, "fd%d", i); - unit[i].disk->fops = &floppy_fops; - unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE; - unit[i].disk->private_data = &unit[i]; - set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); - add_disk(unit[i].disk); + add_disk(unit[i].disk[0]); } - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - printk(KERN_INFO "Atari floppy driver: max. %cD, %strack buffering\n", DriveType == 0 ? 'D' : DriveType == 1 ? 'H' : 'E', UseTrackbuffer ? "" : "no "); @@ -2049,14 +2082,14 @@ static int __init atari_floppy_init (void) err: while (--i >= 0) { - struct gendisk *disk = unit[i].disk; - - blk_cleanup_queue(disk->queue); + blk_cleanup_queue(unit[i].disk[0]->queue); + put_disk(unit[i].disk[0]); blk_mq_free_tag_set(&unit[i].tag_set); - put_disk(unit[i].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); +out_unlock: + mutex_unlock(&ataflop_probe_lock); return ret; } @@ -2101,13 +2134,17 @@ __setup("floppy=", atari_floppy_setup); static void __exit atari_floppy_exit(void) { - int i; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); + int i, type; + for (i = 0; i < FD_MAX_UNITS; i++) { - del_gendisk(unit[i].disk); - blk_cleanup_queue(unit[i].disk->queue); + for (type = 0; type < NUM_DISK_MINORS; type++) { + if (!unit[i].disk[type]) + continue; + del_gendisk(unit[i].disk[type]); + blk_cleanup_queue(unit[i].disk[type]->queue); + put_disk(unit[i].disk[type]); + } blk_mq_free_tag_set(&unit[i].tag_set); - put_disk(unit[i].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index cc49a921339f..c43a6ab4b1f3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -426,14 +426,15 @@ static void brd_free(struct brd_device *brd) kfree(brd); } -static struct brd_device *brd_init_one(int i, bool *new) +static void brd_probe(dev_t dev) { struct brd_device *brd; + int i = MINOR(dev) / max_part; - *new = false; + mutex_lock(&brd_devices_mutex); list_for_each_entry(brd, &brd_devices, brd_list) { if (brd->brd_number == i) - goto out; + goto out_unlock; } brd = brd_alloc(i); @@ -442,9 +443,9 @@ static struct brd_device *brd_init_one(int i, bool *new) add_disk(brd->brd_disk); list_add_tail(&brd->brd_list, &brd_devices); } - *new = true; -out: - return brd; + +out_unlock: + mutex_unlock(&brd_devices_mutex); } static void brd_del_one(struct brd_device *brd) @@ -454,23 +455,6 @@ static void brd_del_one(struct brd_device *brd) brd_free(brd); } -static struct kobject *brd_probe(dev_t dev, int *part, void *data) -{ - struct brd_device *brd; - struct kobject *kobj; - bool new; - - mutex_lock(&brd_devices_mutex); - brd = brd_init_one(MINOR(dev) / max_part, &new); - kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL; - mutex_unlock(&brd_devices_mutex); - - if (new) - *part = 0; - - return kobj; -} - static inline void brd_check_and_reset_par(void) { if (unlikely(!max_part)) @@ -510,11 +494,12 @@ static int __init brd_init(void) * dynamically. */ - if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) + if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) return -EIO; brd_check_and_reset_par(); + mutex_lock(&brd_devices_mutex); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); if (!brd) @@ -532,9 +517,7 @@ static int __init brd_init(void) brd->brd_disk->queue = brd->brd_queue; add_disk(brd->brd_disk); } - - blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, - THIS_MODULE, brd_probe, NULL, NULL); + mutex_unlock(&brd_devices_mutex); pr_info("brd: module loaded\n"); return 0; @@ -544,6 +527,7 @@ out_free: list_del(&brd->brd_list); brd_free(brd); } + mutex_unlock(&brd_devices_mutex); unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); pr_info("brd: module NOT loaded !!!\n"); @@ -557,7 +541,6 @@ static void __exit brd_exit(void) list_for_each_entry_safe(brd, next, &brd_devices, brd_list) brd_del_one(brd); - blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS); unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); pr_info("brd: module unloaded\n"); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 65b95aef8dbc..1c8c18b2a25f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2036,8 +2036,7 @@ void drbd_set_my_capacity(struct drbd_device *device, sector_t size) { char ppb[10]; - set_capacity(device->vdisk, size); - revalidate_disk_size(device->vdisk, false); + set_capacity_and_notify(device->vdisk, size); drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); @@ -2068,8 +2067,7 @@ void drbd_device_cleanup(struct drbd_device *device) } D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); - set_capacity(device->vdisk, 0); - revalidate_disk_size(device->vdisk, false); + set_capacity_and_notify(device->vdisk, 0); if (device->bitmap) { /* maybe never allocated. */ drbd_bm_resize(device, 0, 1); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dc333dbe5232..09c86ef3f0fd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2802,7 +2802,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) if (c_min_rate == 0) return false; - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - atomic_read(&device->rs_sect_ev); if (atomic_read(&device->ap_actlog_cnt) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ba56f3f05312..02044ab7f767 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1678,7 +1678,8 @@ void drbd_rs_controller_reset(struct drbd_device *device) atomic_set(&device->rs_sect_in, 0); atomic_set(&device->rs_sect_ev, 0); device->rs_in_flight = 0; - device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors); + device->rs_last_events = + (int)part_stat_read_accum(disk->part0, sectors); /* Updating the RCU protected object in place is necessary since this function gets called from atomic context. diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 7df79ae6b0a1..dfe1dfc901cc 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -402,7 +402,6 @@ static struct floppy_drive_params drive_params[N_DRIVE]; static struct floppy_drive_struct drive_state[N_DRIVE]; static struct floppy_write_errors write_errors[N_DRIVE]; static struct timer_list motor_off_timer[N_DRIVE]; -static struct gendisk *disks[N_DRIVE]; static struct blk_mq_tag_set tag_sets[N_DRIVE]; static struct block_device *opened_bdev[N_DRIVE]; static DEFINE_MUTEX(open_lock); @@ -477,6 +476,8 @@ static struct floppy_struct floppy_type[32] = { { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5" */ }; +static struct gendisk *disks[N_DRIVE][ARRAY_SIZE(floppy_type)]; + #define SECTSIZE (_FD_SECTSIZE(*floppy)) /* Auto-detection: Disk type used until the next media change occurs. */ @@ -4111,7 +4112,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) new_dev = MINOR(bdev->bd_dev); drive_state[drive].fd_device = new_dev; - set_capacity(disks[drive], floppy_sizes[new_dev]); + set_capacity(disks[drive][ITYPE(new_dev)], floppy_sizes[new_dev]); if (old_dev != -1 && old_dev != new_dev) { if (buffer_drive == drive) buffer_track = -1; @@ -4579,15 +4580,58 @@ static bool floppy_available(int drive) return true; } -static struct kobject *floppy_find(dev_t dev, int *part, void *data) +static int floppy_alloc_disk(unsigned int drive, unsigned int type) { - int drive = (*part & 3) | ((*part & 0x80) >> 5); - if (drive >= N_DRIVE || !floppy_available(drive)) - return NULL; - if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type)) - return NULL; - *part = 0; - return get_disk_and_module(disks[drive]); + struct gendisk *disk; + int err; + + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; + + disk->queue = blk_mq_init_queue(&tag_sets[drive]); + if (IS_ERR(disk->queue)) { + err = PTR_ERR(disk->queue); + disk->queue = NULL; + put_disk(disk); + return err; + } + + blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); + blk_queue_max_hw_sectors(disk->queue, 64); + disk->major = FLOPPY_MAJOR; + disk->first_minor = TOMINOR(drive) | (type << 2); + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + if (type) + sprintf(disk->disk_name, "fd%d_type%d", drive, type); + else + sprintf(disk->disk_name, "fd%d", drive); + /* to be cleaned up... */ + disk->private_data = (void *)(long)drive; + disk->flags |= GENHD_FL_REMOVABLE; + + disks[drive][type] = disk; + return 0; +} + +static DEFINE_MUTEX(floppy_probe_lock); + +static void floppy_probe(dev_t dev) +{ + unsigned int drive = (MINOR(dev) & 3) | ((MINOR(dev) & 0x80) >> 5); + unsigned int type = (MINOR(dev) >> 2) & 0x1f; + + if (drive >= N_DRIVE || !floppy_available(drive) || + type >= ARRAY_SIZE(floppy_type)) + return; + + mutex_lock(&floppy_probe_lock); + if (!disks[drive][type]) { + if (floppy_alloc_disk(drive, type) == 0) + add_disk(disks[drive][type]); + } + mutex_unlock(&floppy_probe_lock); } static int __init do_floppy_init(void) @@ -4609,33 +4653,25 @@ static int __init do_floppy_init(void) return -ENOMEM; for (drive = 0; drive < N_DRIVE; drive++) { - disks[drive] = alloc_disk(1); - if (!disks[drive]) { - err = -ENOMEM; + memset(&tag_sets[drive], 0, sizeof(tag_sets[drive])); + tag_sets[drive].ops = &floppy_mq_ops; + tag_sets[drive].nr_hw_queues = 1; + tag_sets[drive].nr_maps = 1; + tag_sets[drive].queue_depth = 2; + tag_sets[drive].numa_node = NUMA_NO_NODE; + tag_sets[drive].flags = BLK_MQ_F_SHOULD_MERGE; + err = blk_mq_alloc_tag_set(&tag_sets[drive]); + if (err) goto out_put_disk; - } - disks[drive]->queue = blk_mq_init_sq_queue(&tag_sets[drive], - &floppy_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disks[drive]->queue)) { - err = PTR_ERR(disks[drive]->queue); - disks[drive]->queue = NULL; + err = floppy_alloc_disk(drive, 0); + if (err) goto out_put_disk; - } - - blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH); - blk_queue_max_hw_sectors(disks[drive]->queue, 64); - disks[drive]->major = FLOPPY_MAJOR; - disks[drive]->first_minor = TOMINOR(drive); - disks[drive]->fops = &floppy_fops; - disks[drive]->events = DISK_EVENT_MEDIA_CHANGE; - sprintf(disks[drive]->disk_name, "fd%d", drive); timer_setup(&motor_off_timer[drive], motor_off_callback, 0); } - err = register_blkdev(FLOPPY_MAJOR, "fd"); + err = __register_blkdev(FLOPPY_MAJOR, "fd", floppy_probe); if (err) goto out_put_disk; @@ -4643,9 +4679,6 @@ static int __init do_floppy_init(void) if (err) goto out_unreg_blkdev; - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - for (i = 0; i < 256; i++) if (ITYPE(i)) floppy_sizes[i] = floppy_type[ITYPE(i)].size; @@ -4673,7 +4706,7 @@ static int __init do_floppy_init(void) if (fdc_state[0].address == -1) { cancel_delayed_work(&fd_timeout); err = -ENODEV; - goto out_unreg_region; + goto out_unreg_driver; } #if N_FDC > 1 fdc_state[1].address = FDC2; @@ -4684,7 +4717,7 @@ static int __init do_floppy_init(void) if (err) { cancel_delayed_work(&fd_timeout); err = -EBUSY; - goto out_unreg_region; + goto out_unreg_driver; } /* initialise drive state */ @@ -4761,10 +4794,8 @@ static int __init do_floppy_init(void) if (err) goto out_remove_drives; - /* to be cleaned up... */ - disks[drive]->private_data = (void *)(long)drive; - disks[drive]->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&floppy_device[drive].dev, disks[drive], NULL); + device_add_disk(&floppy_device[drive].dev, disks[drive][0], + NULL); } return 0; @@ -4772,30 +4803,27 @@ static int __init do_floppy_init(void) out_remove_drives: while (drive--) { if (floppy_available(drive)) { - del_gendisk(disks[drive]); + del_gendisk(disks[drive][0]); platform_device_unregister(&floppy_device[drive]); } } out_release_dma: if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); -out_unreg_region: - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); +out_unreg_driver: platform_driver_unregister(&floppy_driver); out_unreg_blkdev: unregister_blkdev(FLOPPY_MAJOR, "fd"); out_put_disk: destroy_workqueue(floppy_wq); for (drive = 0; drive < N_DRIVE; drive++) { - if (!disks[drive]) + if (!disks[drive][0]) break; - if (disks[drive]->queue) { - del_timer_sync(&motor_off_timer[drive]); - blk_cleanup_queue(disks[drive]->queue); - disks[drive]->queue = NULL; - blk_mq_free_tag_set(&tag_sets[drive]); - } - put_disk(disks[drive]); + del_timer_sync(&motor_off_timer[drive]); + blk_cleanup_queue(disks[drive][0]->queue); + disks[drive][0]->queue = NULL; + blk_mq_free_tag_set(&tag_sets[drive]); + put_disk(disks[drive][0]); } return err; } @@ -5006,9 +5034,8 @@ module_init(floppy_module_init); static void __exit floppy_module_exit(void) { - int drive; + int drive, i; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); unregister_blkdev(FLOPPY_MAJOR, "fd"); platform_driver_unregister(&floppy_driver); @@ -5018,10 +5045,16 @@ static void __exit floppy_module_exit(void) del_timer_sync(&motor_off_timer[drive]); if (floppy_available(drive)) { - del_gendisk(disks[drive]); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + del_gendisk(disks[drive][i]); + } platform_device_unregister(&floppy_device[drive]); } - blk_cleanup_queue(disks[drive]->queue); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + blk_cleanup_queue(disks[drive][i]->queue); + } blk_mq_free_tag_set(&tag_sets[drive]); /* @@ -5029,10 +5062,17 @@ static void __exit floppy_module_exit(void) * queue reference in put_disk(). */ if (!(allowed_drive_mask & (1 << drive)) || - fdc_state[FDC(drive)].version == FDC_NONE) - disks[drive]->queue = NULL; + fdc_state[FDC(drive)].version == FDC_NONE) { + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + disks[drive][i]->queue = NULL; + } + } - put_disk(disks[drive]); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + put_disk(disks[drive][i]); + } } cancel_delayed_work_sync(&fd_timeout); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a58084c2ed7c..e5ff328f0917 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -251,12 +251,8 @@ loop_validate_block_size(unsigned short bsize) */ static void loop_set_size(struct loop_device *lo, loff_t size) { - struct block_device *bdev = lo->lo_device; - - bd_set_nr_sectors(bdev, size); - - if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false)) - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); + if (!set_capacity_and_notify(lo->lo_disk, size)) + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); } static inline int @@ -679,10 +675,10 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; - if (f->f_mapping->host->i_bdev == bdev) + if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; - l = f->f_mapping->host->i_bdev->bd_disk->private_data; + l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; if (l->lo_state != Lo_bound) { return -EINVAL; } @@ -889,9 +885,7 @@ static void loop_config_discard(struct loop_device *lo) * file-backed loop devices: discarded regions read back as zero. */ if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { - struct request_queue *backingq; - - backingq = bdev_get_queue(inode->i_bdev); + struct request_queue *backingq = bdev_get_queue(I_BDEV(inode)); max_discard_sectors = backingq->limits.max_write_zeroes_sectors; granularity = backingq->limits.discard_granularity ?: @@ -1075,7 +1069,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct file *file; struct inode *inode; struct address_space *mapping; - struct block_device *claimed_bdev = NULL; int error; loff_t size; bool partscan; @@ -1094,8 +1087,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev->bd_contains; - error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure); + error = bd_prepare_to_claim(bdev, loop_configure); if (error) goto out_putf; } @@ -1138,7 +1130,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (error) goto out_unlock; - set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); + set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; @@ -1168,9 +1160,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); - set_blocksize(bdev, S_ISBLK(inode->i_mode) ? - block_size(inode->i_bdev) : PAGE_SIZE); - lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1185,15 +1174,15 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_configure); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); out_bdev: - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_configure); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); out: @@ -1252,7 +1241,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { - bd_set_nr_sectors(bdev, 0); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } @@ -2235,24 +2223,18 @@ out: return ret; } -static struct kobject *loop_probe(dev_t dev, int *part, void *data) +static void loop_probe(dev_t dev) { + int idx = MINOR(dev) >> part_shift; struct loop_device *lo; - struct kobject *kobj; - int err; + + if (max_loop && idx >= max_loop) + return; mutex_lock(&loop_ctl_mutex); - err = loop_lookup(&lo, MINOR(dev) >> part_shift); - if (err < 0) - err = loop_add(&lo, MINOR(dev) >> part_shift); - if (err < 0) - kobj = NULL; - else - kobj = get_disk_and_module(lo->lo_disk); + if (loop_lookup(&lo, idx) < 0) + loop_add(&lo, idx); mutex_unlock(&loop_ctl_mutex); - - *part = 0; - return kobj; } static long loop_control_ioctl(struct file *file, unsigned int cmd, @@ -2322,7 +2304,6 @@ MODULE_ALIAS("devname:loop-control"); static int __init loop_init(void) { int i, nr; - unsigned long range; struct loop_device *lo; int err; @@ -2359,27 +2340,21 @@ static int __init loop_init(void) * /dev/loop-control interface, or be instantiated by accessing * a 'dead' device node. */ - if (max_loop) { + if (max_loop) nr = max_loop; - range = max_loop << part_shift; - } else { + else nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; - range = 1UL << MINORBITS; - } err = misc_register(&loop_misc); if (err < 0) goto err_out; - if (register_blkdev(LOOP_MAJOR, "loop")) { + if (__register_blkdev(LOOP_MAJOR, "loop", loop_probe)) { err = -EIO; goto misc_out; } - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, - THIS_MODULE, loop_probe, NULL, NULL); - /* pre-create number of devices given by config or max_loop */ mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) @@ -2405,16 +2380,11 @@ static int loop_exit_cb(int id, void *ptr, void *data) static void __exit loop_exit(void) { - unsigned long range; - - range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; - mutex_lock(&loop_ctl_mutex); idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); idr_destroy(&loop_index_idr); - blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); misc_deregister(&loop_misc); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 153e2cdecb4d..53ac59d19ae5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3687,7 +3687,6 @@ skip_create_disk: /* Enable the block device and add it to /dev */ device_add_disk(&dd->pdev->dev, dd->disk, NULL); - dd->bdev = bdget_disk(dd->disk, 0); /* * Now that the disk is active, initialize any sysfs attributes * managed by the protocol layer. @@ -3721,9 +3720,6 @@ start_service_thread: return rv; kthread_run_error: - bdput(dd->bdev); - dd->bdev = NULL; - /* Delete our gendisk. This also removes the device from /dev */ del_gendisk(dd->disk); @@ -3804,14 +3800,6 @@ static int mtip_block_remove(struct driver_data *dd) blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); blk_mq_unquiesce_queue(dd->queue); - /* - * Delete our gendisk structure. This also removes the device - * from /dev - */ - if (dd->bdev) { - bdput(dd->bdev); - dd->bdev = NULL; - } if (dd->disk) { if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) del_gendisk(dd->disk); @@ -4206,9 +4194,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) } while (atomic_read(&dd->irq_workers_active) != 0 && time_before(jiffies, to)); - if (!dd->sr) - fsync_bdev(dd->bdev); - if (atomic_read(&dd->irq_workers_active) != 0) { dev_warn(&dd->pdev->dev, "Completion workers still active!\n"); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index e22a7f0523bf..88f4206310e4 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -463,8 +463,6 @@ struct driver_data { int isr_binding; - struct block_device *bdev; - struct list_head online_list; /* linkage for online list */ struct list_head remove_list; /* linkage for removing list */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index aaae9220f3a0..6727358e147d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -296,40 +296,33 @@ static void nbd_size_clear(struct nbd_device *nbd) } } -static void nbd_size_update(struct nbd_device *nbd, bool start) +static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, + loff_t blksize) { - struct nbd_config *config = nbd->config; - struct block_device *bdev = bdget_disk(nbd->disk, 0); - sector_t nr_sectors = config->bytesize >> 9; + if (!blksize) + blksize = NBD_DEF_BLKSIZE; + if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize)) + return -EINVAL; + + nbd->config->bytesize = bytesize; + nbd->config->blksize = blksize; - if (config->flags & NBD_FLAG_SEND_TRIM) { - nbd->disk->queue->limits.discard_granularity = config->blksize; - nbd->disk->queue->limits.discard_alignment = config->blksize; + if (!nbd->task_recv) + return 0; + + if (nbd->config->flags & NBD_FLAG_SEND_TRIM) { + nbd->disk->queue->limits.discard_granularity = blksize; + nbd->disk->queue->limits.discard_alignment = blksize; blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); } - blk_queue_logical_block_size(nbd->disk->queue, config->blksize); - blk_queue_physical_block_size(nbd->disk->queue, config->blksize); - set_capacity(nbd->disk, nr_sectors); - if (bdev) { - if (bdev->bd_disk) { - bd_set_nr_sectors(bdev, nr_sectors); - if (start) - set_blocksize(bdev, config->blksize); - } else - set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); - bdput(bdev); - } - kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); -} + blk_queue_logical_block_size(nbd->disk->queue, blksize); + blk_queue_physical_block_size(nbd->disk->queue, blksize); -static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, - loff_t nr_blocks) -{ - struct nbd_config *config = nbd->config; - config->blksize = blocksize; - config->bytesize = blocksize * nr_blocks; - if (nbd->task_recv != NULL) - nbd_size_update(nbd, false); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); + if (!set_capacity_and_notify(nbd->disk, bytesize >> 9)) + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + return 0; } static void nbd_complete_rq(struct request *req) @@ -1140,7 +1133,7 @@ static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) return; - bd_set_nr_sectors(bdev, 0); + set_capacity(bdev->bd_disk, 0); } static void nbd_parse_flags(struct nbd_device *nbd) @@ -1309,8 +1302,7 @@ static int nbd_start_device(struct nbd_device *nbd) args->index = i; queue_work(nbd->recv_workq, &args->work); } - nbd_size_update(nbd, true); - return error; + return nbd_set_size(nbd, config->bytesize, config->blksize); } static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) @@ -1352,14 +1344,6 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd, nbd_config_put(nbd); } -static bool nbd_is_valid_blksize(unsigned long blksize) -{ - if (!blksize || !is_power_of_2(blksize) || blksize < 512 || - blksize > PAGE_SIZE) - return false; - return true; -} - static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout) { nbd->tag_set.timeout = timeout * HZ; @@ -1384,20 +1368,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SOCK: return nbd_add_socket(nbd, arg, false); case NBD_SET_BLKSIZE: - if (!arg) - arg = NBD_DEF_BLKSIZE; - if (!nbd_is_valid_blksize(arg)) - return -EINVAL; - nbd_size_set(nbd, arg, - div_s64(config->bytesize, arg)); - return 0; + return nbd_set_size(nbd, config->bytesize, arg); case NBD_SET_SIZE: - nbd_size_set(nbd, config->blksize, - div_s64(arg, config->blksize)); - return 0; + return nbd_set_size(nbd, arg, config->blksize); case NBD_SET_SIZE_BLOCKS: - nbd_size_set(nbd, config->blksize, arg); - return 0; + return nbd_set_size(nbd, arg * config->blksize, + config->blksize); case NBD_SET_TIMEOUT: nbd_set_cmd_timeout(nbd, arg); return 0; @@ -1501,9 +1477,11 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } else if (nbd_disconnected(nbd->config)) { - set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); + if (max_part) + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } out: mutex_unlock(&nbd_index_mutex); @@ -1513,12 +1491,10 @@ out: static void nbd_release(struct gendisk *disk, fmode_t mode) { struct nbd_device *nbd = disk->private_data; - struct block_device *bdev = bdget_disk(disk, 0); if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && - bdev->bd_openers == 0) + disk->part0->bd_openers == 0) nbd_disconnect_and_put(nbd); - bdput(bdev); nbd_config_put(nbd); nbd_put(nbd); @@ -1815,18 +1791,11 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd) if (info->attrs[NBD_ATTR_SIZE_BYTES]) bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); - if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); - if (!bsize) - bsize = NBD_DEF_BLKSIZE; - if (!nbd_is_valid_blksize(bsize)) { - printk(KERN_ERR "Invalid block size %llu\n", bsize); - return -EINVAL; - } - } if (bytes != config->bytesize || bsize != config->blksize) - nbd_size_set(nbd, bsize, div64_u64(bytes, bsize)); + return nbd_set_size(nbd, bytes, bsize); return 0; } diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig new file mode 100644 index 000000000000..6bf1f8ca20a2 --- /dev/null +++ b/drivers/block/null_blk/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Null block device driver configuration +# + +config BLK_DEV_NULL_BLK + tristate "Null test block driver" + select CONFIGFS_FS + +config BLK_DEV_NULL_BLK_FAULT_INJECTION + bool "Support fault injection for Null test block driver" + depends on BLK_DEV_NULL_BLK && FAULT_INJECTION diff --git a/drivers/block/null_blk/Makefile b/drivers/block/null_blk/Makefile new file mode 100644 index 000000000000..84c36e512ab8 --- /dev/null +++ b/drivers/block/null_blk/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +# needed for trace events +ccflags-y += -I$(src) + +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o +null_blk-objs := main.o +ifeq ($(CONFIG_BLK_DEV_ZONED), y) +null_blk-$(CONFIG_TRACING) += trace.o +endif +null_blk-$(CONFIG_BLK_DEV_ZONED) += zoned.o diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk/main.c index 4685ea401d5b..5357c3a4a36f 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk/main.c @@ -152,6 +152,10 @@ static int g_bs = 512; module_param_named(bs, g_bs, int, 0444); MODULE_PARM_DESC(bs, "Block size (in bytes)"); +static int g_max_sectors; +module_param_named(max_sectors, g_max_sectors, int, 0444); +MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)"); + static unsigned int nr_devices = 1; module_param(nr_devices, uint, 0444); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); @@ -346,6 +350,7 @@ NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); NULLB_DEVICE_ATTR(home_node, uint, NULL); NULLB_DEVICE_ATTR(queue_mode, uint, NULL); NULLB_DEVICE_ATTR(blocksize, uint, NULL); +NULLB_DEVICE_ATTR(max_sectors, uint, NULL); NULLB_DEVICE_ATTR(irqmode, uint, NULL); NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL); NULLB_DEVICE_ATTR(index, uint, NULL); @@ -463,6 +468,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_home_node, &nullb_device_attr_queue_mode, &nullb_device_attr_blocksize, + &nullb_device_attr_max_sectors, &nullb_device_attr_irqmode, &nullb_device_attr_hw_queue_depth, &nullb_device_attr_index, @@ -533,7 +539,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, - "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n"); + "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -588,6 +594,7 @@ static struct nullb_device *null_alloc_dev(void) dev->home_node = g_home_node; dev->queue_mode = g_queue_mode; dev->blocksize = g_bs; + dev->max_sectors = g_max_sectors; dev->irqmode = g_irqmode; dev->hw_queue_depth = g_hw_queue_depth; dev->blocking = g_blocking; @@ -1076,13 +1083,16 @@ static void nullb_fill_pattern(struct nullb *nullb, struct page *page, kunmap_atomic(dst); } -static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) +blk_status_t null_handle_discard(struct nullb_device *dev, + sector_t sector, sector_t nr_sectors) { + struct nullb *nullb = dev->nullb; + size_t n = nr_sectors << SECTOR_SHIFT; size_t temp; spin_lock_irq(&nullb->lock); while (n > 0) { - temp = min_t(size_t, n, nullb->dev->blocksize); + temp = min_t(size_t, n, dev->blocksize); null_free_sector(nullb, sector, false); if (null_cache_active(nullb)) null_free_sector(nullb, sector, true); @@ -1090,6 +1100,8 @@ static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) n -= temp; } spin_unlock_irq(&nullb->lock); + + return BLK_STS_OK; } static int null_handle_flush(struct nullb *nullb) @@ -1149,17 +1161,10 @@ static int null_handle_rq(struct nullb_cmd *cmd) struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; - sector_t sector; + sector_t sector = blk_rq_pos(rq); struct req_iterator iter; struct bio_vec bvec; - sector = blk_rq_pos(rq); - - if (req_op(rq) == REQ_OP_DISCARD) { - null_handle_discard(nullb, sector, blk_rq_bytes(rq)); - return 0; - } - spin_lock_irq(&nullb->lock); rq_for_each_segment(bvec, rq, iter) { len = bvec.bv_len; @@ -1183,18 +1188,10 @@ static int null_handle_bio(struct nullb_cmd *cmd) struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; - sector_t sector; + sector_t sector = bio->bi_iter.bi_sector; struct bio_vec bvec; struct bvec_iter iter; - sector = bio->bi_iter.bi_sector; - - if (bio_op(bio) == REQ_OP_DISCARD) { - null_handle_discard(nullb, sector, - bio_sectors(bio) << SECTOR_SHIFT); - return 0; - } - spin_lock_irq(&nullb->lock); bio_for_each_segment(bvec, bio, iter) { len = bvec.bv_len; @@ -1263,11 +1260,16 @@ static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, } static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, - enum req_opf op) + enum req_opf op, + sector_t sector, + sector_t nr_sectors) { struct nullb_device *dev = cmd->nq->dev; int err; + if (op == REQ_OP_DISCARD) + return null_handle_discard(dev, sector, nr_sectors); + if (dev->queue_mode == NULL_Q_BIO) err = null_handle_bio(cmd); else @@ -1343,7 +1345,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, } if (dev->memory_backed) - return null_handle_memory_backed(cmd, op); + return null_handle_memory_backed(cmd, op, sector, nr_sectors); return BLK_STS_OK; } @@ -1589,6 +1591,12 @@ static void null_config_discard(struct nullb *nullb) if (nullb->dev->discard == false) return; + if (!nullb->dev->memory_backed) { + nullb->dev->discard = false; + pr_info("discard option is ignored without memory backing\n"); + return; + } + if (nullb->dev->zoned) { nullb->dev->discard = false; pr_info("discard option is ignored in zoned mode\n"); @@ -1866,6 +1874,11 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_logical_block_size(nullb->q, dev->blocksize); blk_queue_physical_block_size(nullb->q, dev->blocksize); + if (!dev->max_sectors) + dev->max_sectors = queue_max_hw_sectors(nullb->q); + dev->max_sectors = min_t(unsigned int, dev->max_sectors, + BLK_DEF_MAX_SECTORS); + blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); null_config_discard(nullb); @@ -1909,6 +1922,12 @@ static int __init null_init(void) g_bs = PAGE_SIZE; } + if (g_max_sectors > BLK_DEF_MAX_SECTORS) { + pr_warn("invalid max sectors\n"); + pr_warn("defaults max sectors to %u\n", BLK_DEF_MAX_SECTORS); + g_max_sectors = BLK_DEF_MAX_SECTORS; + } + if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { pr_err("invalid home_node value\n"); g_home_node = NUMA_NO_NODE; diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk/null_blk.h index c24d9b5ad81a..83504f3cc9d6 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -12,6 +12,8 @@ #include <linux/configfs.h> #include <linux/badblocks.h> #include <linux/fault-inject.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> struct nullb_cmd { struct request *rq; @@ -32,6 +34,26 @@ struct nullb_queue { struct nullb_cmd *cmds; }; +struct nullb_zone { + /* + * Zone lock to prevent concurrent modification of a zone write + * pointer position and condition: with memory backing, a write + * command execution may sleep on memory allocation. For this case, + * use mutex as the zone lock. Otherwise, use the spinlock for + * locking the zone. + */ + union { + spinlock_t spinlock; + struct mutex mutex; + }; + enum blk_zone_type type; + enum blk_zone_cond cond; + sector_t start; + sector_t wp; + unsigned int len; + unsigned int capacity; +}; + struct nullb_device { struct nullb *nullb; struct config_item item; @@ -45,10 +67,11 @@ struct nullb_device { unsigned int nr_zones_imp_open; unsigned int nr_zones_exp_open; unsigned int nr_zones_closed; - struct blk_zone *zones; + unsigned int imp_close_zone_no; + struct nullb_zone *zones; sector_t zone_size_sects; - spinlock_t zone_lock; - unsigned long *zone_locks; + bool need_zone_res_mgmt; + spinlock_t zone_res_lock; unsigned long size; /* device size in MB */ unsigned long completion_nsec; /* time in ns to complete a request */ @@ -62,6 +85,7 @@ struct nullb_device { unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ unsigned int blocksize; /* block size */ + unsigned int max_sectors; /* Max sectors per command */ unsigned int irqmode; /* IRQ completion handler */ unsigned int hw_queue_depth; /* queue depth */ unsigned int index; /* index of the disk, only valid with a disk */ @@ -93,6 +117,8 @@ struct nullb { char disk_name[DISK_NAME_LEN]; }; +blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector, + sector_t nr_sectors); blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, unsigned int nr_sectors); diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk/trace.c index f246e7bff698..3711cba16071 100644 --- a/drivers/block/null_blk_trace.c +++ b/drivers/block/null_blk/trace.c @@ -4,7 +4,7 @@ * * Copyright (C) 2020 Western Digital Corporation or its affiliates. */ -#include "null_blk_trace.h" +#include "trace.h" /* * Helper to use for all null_blk traces to extract disk name. diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk/trace.h index 4f83032eb544..ce3b430e88c5 100644 --- a/drivers/block/null_blk_trace.h +++ b/drivers/block/null_blk/trace.h @@ -73,7 +73,7 @@ TRACE_EVENT(nullb_report_zones, #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE null_blk_trace +#define TRACE_INCLUDE_FILE trace /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk/zoned.c index beb34b4f76b0..148b871f263b 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -4,19 +4,58 @@ #include "null_blk.h" #define CREATE_TRACE_POINTS -#include "null_blk_trace.h" +#include "trace.h" -/* zone_size in MBs to sectors. */ -#define ZONE_SIZE_SHIFT 11 +#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT) static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) { return sect >> ilog2(dev->zone_size_sects); } +static inline void null_lock_zone_res(struct nullb_device *dev) +{ + if (dev->need_zone_res_mgmt) + spin_lock_irq(&dev->zone_res_lock); +} + +static inline void null_unlock_zone_res(struct nullb_device *dev) +{ + if (dev->need_zone_res_mgmt) + spin_unlock_irq(&dev->zone_res_lock); +} + +static inline void null_init_zone_lock(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_lock_init(&zone->spinlock); + else + mutex_init(&zone->mutex); +} + +static inline void null_lock_zone(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_lock_irq(&zone->spinlock); + else + mutex_lock(&zone->mutex); +} + +static inline void null_unlock_zone(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_unlock_irq(&zone->spinlock); + else + mutex_unlock(&zone->mutex); +} + int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) { - sector_t dev_size = (sector_t)dev->size * 1024 * 1024; + sector_t dev_capacity_sects, zone_capacity_sects; + struct nullb_zone *zone; sector_t sector = 0; unsigned int i; @@ -38,29 +77,19 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) return -EINVAL; } - dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT; - dev->nr_zones = dev_size >> - (SECTOR_SHIFT + ilog2(dev->zone_size_sects)); - dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone), - GFP_KERNEL | __GFP_ZERO); + zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity); + dev_capacity_sects = MB_TO_SECTS(dev->size); + dev->zone_size_sects = MB_TO_SECTS(dev->zone_size); + dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects); + if (dev_capacity_sects & (dev->zone_size_sects - 1)) + dev->nr_zones++; + + dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct nullb_zone), + GFP_KERNEL | __GFP_ZERO); if (!dev->zones) return -ENOMEM; - /* - * With memory backing, the zone_lock spinlock needs to be temporarily - * released to avoid scheduling in atomic context. To guarantee zone - * information protection, use a bitmap to lock zones with - * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing - * implies that the queue is marked with BLK_MQ_F_BLOCKING. - */ - spin_lock_init(&dev->zone_lock); - if (dev->memory_backed) { - dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); - if (!dev->zone_locks) { - kvfree(dev->zones); - return -ENOMEM; - } - } + spin_lock_init(&dev->zone_res_lock); if (dev->zone_nr_conv >= dev->nr_zones) { dev->zone_nr_conv = dev->nr_zones - 1; @@ -83,10 +112,13 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) dev->zone_max_open = 0; pr_info("zone_max_open limit disabled, limit >= zone count\n"); } + dev->need_zone_res_mgmt = dev->zone_max_active || dev->zone_max_open; + dev->imp_close_zone_no = dev->zone_nr_conv; for (i = 0; i < dev->zone_nr_conv; i++) { - struct blk_zone *zone = &dev->zones[i]; + zone = &dev->zones[i]; + null_init_zone_lock(dev, zone); zone->start = sector; zone->len = dev->zone_size_sects; zone->capacity = zone->len; @@ -98,11 +130,16 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) } for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { - struct blk_zone *zone = &dev->zones[i]; + zone = &dev->zones[i]; + null_init_zone_lock(dev, zone); zone->start = zone->wp = sector; - zone->len = dev->zone_size_sects; - zone->capacity = dev->zone_capacity << ZONE_SIZE_SHIFT; + if (zone->start + dev->zone_size_sects > dev_capacity_sects) + zone->len = dev_capacity_sects - zone->start; + else + zone->len = dev->zone_size_sects; + zone->capacity = + min_t(sector_t, zone->len, zone_capacity_sects); zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone->cond = BLK_ZONE_COND_EMPTY; @@ -140,32 +177,17 @@ int null_register_zoned_dev(struct nullb *nullb) void null_free_zoned_dev(struct nullb_device *dev) { - bitmap_free(dev->zone_locks); kvfree(dev->zones); } -static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno) -{ - if (dev->memory_backed) - wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); - spin_lock_irq(&dev->zone_lock); -} - -static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno) -{ - spin_unlock_irq(&dev->zone_lock); - - if (dev->memory_backed) - clear_and_wake_up_bit(zno, dev->zone_locks); -} - int null_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct nullb *nullb = disk->private_data; struct nullb_device *dev = nullb->dev; - unsigned int first_zone, i, zno; - struct blk_zone zone; + unsigned int first_zone, i; + struct nullb_zone *zone; + struct blk_zone blkz; int error; first_zone = null_zone_no(dev, sector); @@ -175,19 +197,25 @@ int null_report_zones(struct gendisk *disk, sector_t sector, nr_zones = min(nr_zones, dev->nr_zones - first_zone); trace_nullb_report_zones(nullb, nr_zones); - zno = first_zone; - for (i = 0; i < nr_zones; i++, zno++) { + memset(&blkz, 0, sizeof(struct blk_zone)); + zone = &dev->zones[first_zone]; + for (i = 0; i < nr_zones; i++, zone++) { /* * Stacked DM target drivers will remap the zone information by * modifying the zone information passed to the report callback. * So use a local copy to avoid corruption of the device zone * array. */ - null_lock_zone(dev, zno); - memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone)); - null_unlock_zone(dev, zno); - - error = cb(&zone, i, data); + null_lock_zone(dev, zone); + blkz.start = zone->start; + blkz.len = zone->len; + blkz.wp = zone->wp; + blkz.type = zone->type; + blkz.cond = zone->cond; + blkz.capacity = zone->capacity; + null_unlock_zone(dev, zone); + + error = cb(&blkz, i, data); if (error) return error; } @@ -203,7 +231,7 @@ size_t null_zone_valid_read_len(struct nullb *nullb, sector_t sector, unsigned int len) { struct nullb_device *dev = nullb->dev; - struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)]; + struct nullb_zone *zone = &dev->zones[null_zone_no(dev, sector)]; unsigned int nr_sectors = len >> SECTOR_SHIFT; /* Read must be below the write pointer position */ @@ -217,11 +245,9 @@ size_t null_zone_valid_read_len(struct nullb *nullb, return (zone->wp - sector) << SECTOR_SHIFT; } -static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t __null_close_zone(struct nullb_device *dev, + struct nullb_zone *zone) { - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) - return BLK_STS_IOERR; - switch (zone->cond) { case BLK_ZONE_COND_CLOSED: /* close operation on closed is not an error */ @@ -248,13 +274,24 @@ static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *z return BLK_STS_OK; } -static void null_close_first_imp_zone(struct nullb_device *dev) +static void null_close_imp_open_zone(struct nullb_device *dev) { - unsigned int i; + struct nullb_zone *zone; + unsigned int zno, i; + + zno = dev->imp_close_zone_no; + if (zno >= dev->nr_zones) + zno = dev->zone_nr_conv; for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { - if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) { - null_close_zone(dev, &dev->zones[i]); + zone = &dev->zones[zno]; + zno++; + if (zno >= dev->nr_zones) + zno = dev->zone_nr_conv; + + if (zone->cond == BLK_ZONE_COND_IMP_OPEN) { + __null_close_zone(dev, zone); + dev->imp_close_zone_no = zno; return; } } @@ -282,7 +319,7 @@ static blk_status_t null_check_open(struct nullb_device *dev) if (dev->nr_zones_imp_open) { if (null_check_active(dev) == BLK_STS_OK) { - null_close_first_imp_zone(dev); + null_close_imp_open_zone(dev); return BLK_STS_OK; } } @@ -303,7 +340,8 @@ static blk_status_t null_check_open(struct nullb_device *dev) * it is not certain that closing an implicit open zone will allow a new zone * to be opened, since we might already be at the active limit capacity. */ -static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_check_zone_resources(struct nullb_device *dev, + struct nullb_zone *zone) { blk_status_t ret; @@ -327,34 +365,23 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, { struct nullb_device *dev = cmd->nq->dev; unsigned int zno = null_zone_no(dev, sector); - struct blk_zone *zone = &dev->zones[zno]; + struct nullb_zone *zone = &dev->zones[zno]; blk_status_t ret; trace_nullb_zone_op(cmd, zno, zone->cond); - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { + if (append) + return BLK_STS_IOERR; return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + } - null_lock_zone(dev, zno); + null_lock_zone(dev, zone); - switch (zone->cond) { - case BLK_ZONE_COND_FULL: + if (zone->cond == BLK_ZONE_COND_FULL) { /* Cannot write to a full zone */ ret = BLK_STS_IOERR; goto unlock; - case BLK_ZONE_COND_EMPTY: - case BLK_ZONE_COND_CLOSED: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - break; - case BLK_ZONE_COND_IMP_OPEN: - case BLK_ZONE_COND_EXP_OPEN: - break; - default: - /* Invalid zone condition */ - ret = BLK_STS_IOERR; - goto unlock; } /* @@ -379,60 +406,69 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, goto unlock; } - if (zone->cond == BLK_ZONE_COND_CLOSED) { - dev->nr_zones_closed--; - dev->nr_zones_imp_open++; - } else if (zone->cond == BLK_ZONE_COND_EMPTY) { - dev->nr_zones_imp_open++; + if (zone->cond == BLK_ZONE_COND_CLOSED || + zone->cond == BLK_ZONE_COND_EMPTY) { + null_lock_zone_res(dev); + + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + null_unlock_zone_res(dev); + goto unlock; + } + if (zone->cond == BLK_ZONE_COND_CLOSED) { + dev->nr_zones_closed--; + dev->nr_zones_imp_open++; + } else if (zone->cond == BLK_ZONE_COND_EMPTY) { + dev->nr_zones_imp_open++; + } + + if (zone->cond != BLK_ZONE_COND_EXP_OPEN) + zone->cond = BLK_ZONE_COND_IMP_OPEN; + + null_unlock_zone_res(dev); } - if (zone->cond != BLK_ZONE_COND_EXP_OPEN) - zone->cond = BLK_ZONE_COND_IMP_OPEN; - /* - * Memory backing allocation may sleep: release the zone_lock spinlock - * to avoid scheduling in atomic context. Zone operation atomicity is - * still guaranteed through the zone_locks bitmap. - */ - if (dev->memory_backed) - spin_unlock_irq(&dev->zone_lock); ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); - if (dev->memory_backed) - spin_lock_irq(&dev->zone_lock); - if (ret != BLK_STS_OK) goto unlock; zone->wp += nr_sectors; if (zone->wp == zone->start + zone->capacity) { + null_lock_zone_res(dev); if (zone->cond == BLK_ZONE_COND_EXP_OPEN) dev->nr_zones_exp_open--; else if (zone->cond == BLK_ZONE_COND_IMP_OPEN) dev->nr_zones_imp_open--; zone->cond = BLK_ZONE_COND_FULL; + null_unlock_zone_res(dev); } + ret = BLK_STS_OK; unlock: - null_unlock_zone(dev, zno); + null_unlock_zone(dev, zone); return ret; } -static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_open_zone(struct nullb_device *dev, + struct nullb_zone *zone) { - blk_status_t ret; + blk_status_t ret = BLK_STS_OK; if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; + null_lock_zone_res(dev); + switch (zone->cond) { case BLK_ZONE_COND_EXP_OPEN: /* open operation on exp open is not an error */ - return BLK_STS_OK; + goto unlock; case BLK_ZONE_COND_EMPTY: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; break; case BLK_ZONE_COND_IMP_OPEN: dev->nr_zones_imp_open--; @@ -440,35 +476,57 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo case BLK_ZONE_COND_CLOSED: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; dev->nr_zones_closed--; break; case BLK_ZONE_COND_FULL: default: - return BLK_STS_IOERR; + ret = BLK_STS_IOERR; + goto unlock; } zone->cond = BLK_ZONE_COND_EXP_OPEN; dev->nr_zones_exp_open++; - return BLK_STS_OK; +unlock: + null_unlock_zone_res(dev); + + return ret; } -static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_close_zone(struct nullb_device *dev, + struct nullb_zone *zone) { blk_status_t ret; if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; + null_lock_zone_res(dev); + ret = __null_close_zone(dev, zone); + null_unlock_zone_res(dev); + + return ret; +} + +static blk_status_t null_finish_zone(struct nullb_device *dev, + struct nullb_zone *zone) +{ + blk_status_t ret = BLK_STS_OK; + + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; + + null_lock_zone_res(dev); + switch (zone->cond) { case BLK_ZONE_COND_FULL: /* finish operation on full is not an error */ - return BLK_STS_OK; + goto unlock; case BLK_ZONE_COND_EMPTY: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; break; case BLK_ZONE_COND_IMP_OPEN: dev->nr_zones_imp_open--; @@ -479,27 +537,35 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone * case BLK_ZONE_COND_CLOSED: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; dev->nr_zones_closed--; break; default: - return BLK_STS_IOERR; + ret = BLK_STS_IOERR; + goto unlock; } zone->cond = BLK_ZONE_COND_FULL; zone->wp = zone->start + zone->len; - return BLK_STS_OK; +unlock: + null_unlock_zone_res(dev); + + return ret; } -static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_reset_zone(struct nullb_device *dev, + struct nullb_zone *zone) { if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; + null_lock_zone_res(dev); + switch (zone->cond) { case BLK_ZONE_COND_EMPTY: /* reset operation on empty is not an error */ + null_unlock_zone_res(dev); return BLK_STS_OK; case BLK_ZONE_COND_IMP_OPEN: dev->nr_zones_imp_open--; @@ -513,12 +579,18 @@ static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *z case BLK_ZONE_COND_FULL: break; default: + null_unlock_zone_res(dev); return BLK_STS_IOERR; } zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; + null_unlock_zone_res(dev); + + if (dev->memory_backed) + return null_handle_discard(dev, zone->start, zone->len); + return BLK_STS_OK; } @@ -527,19 +599,19 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, { struct nullb_device *dev = cmd->nq->dev; unsigned int zone_no; - struct blk_zone *zone; + struct nullb_zone *zone; blk_status_t ret; size_t i; if (op == REQ_OP_ZONE_RESET_ALL) { for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { - null_lock_zone(dev, i); zone = &dev->zones[i]; + null_lock_zone(dev, zone); if (zone->cond != BLK_ZONE_COND_EMPTY) { null_reset_zone(dev, zone); trace_nullb_zone_op(cmd, i, zone->cond); } - null_unlock_zone(dev, i); + null_unlock_zone(dev, zone); } return BLK_STS_OK; } @@ -547,7 +619,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, zone_no = null_zone_no(dev, sector); zone = &dev->zones[zone_no]; - null_lock_zone(dev, zone_no); + null_lock_zone(dev, zone); switch (op) { case REQ_OP_ZONE_RESET: @@ -570,7 +642,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, if (ret == BLK_STS_OK) trace_nullb_zone_op(cmd, zone_no, zone->cond); - null_unlock_zone(dev, zone_no); + null_unlock_zone(dev, zone); return ret; } @@ -578,29 +650,28 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, sector_t nr_sectors) { - struct nullb_device *dev = cmd->nq->dev; - unsigned int zno = null_zone_no(dev, sector); + struct nullb_device *dev; + struct nullb_zone *zone; blk_status_t sts; switch (op) { case REQ_OP_WRITE: - sts = null_zone_write(cmd, sector, nr_sectors, false); - break; + return null_zone_write(cmd, sector, nr_sectors, false); case REQ_OP_ZONE_APPEND: - sts = null_zone_write(cmd, sector, nr_sectors, true); - break; + return null_zone_write(cmd, sector, nr_sectors, true); case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: - sts = null_zone_mgmt(cmd, op, sector); - break; + return null_zone_mgmt(cmd, op, sector); default: - null_lock_zone(dev, zno); + dev = cmd->nq->dev; + zone = &dev->zones[null_zone_no(dev, sector)]; + + null_lock_zone(dev, zone); sts = null_process_cmd(cmd, op, sector, nr_sectors); - null_unlock_zone(dev, zno); + null_unlock_zone(dev, zone); + return sts; } - - return sts; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 467dbd06b7cd..b8bb8ec7538d 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2130,8 +2130,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) } set_capacity(pd->disk, lba << 2); - set_capacity(pd->bdev->bd_disk, lba << 2); - bd_set_nr_sectors(pd->bdev, lba << 2); + set_capacity_and_notify(pd->bdev->bd_disk, lba << 2); q = bdev_get_queue(pd->bdev); if (write) { @@ -2584,9 +2583,11 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, case CDROM_LAST_WRITTEN: case CDROM_SEND_PACKET: case SCSI_IOCTL_SEND_COMMAND: - ret = __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg); + if (!bdev->bd_disk->fops->ioctl) + ret = -ENOTTY; + else + ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); break; - default: pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); ret = -ENOTTY; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 7b55811c2a81..ba3ece56cbb3 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -507,7 +507,7 @@ fail: return error; } -static int ps3disk_remove(struct ps3_system_bus_device *_dev) +static void ps3disk_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); @@ -526,7 +526,6 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) kfree(dev->bounce_buf); kfree(priv); ps3_system_bus_set_drvdata(_dev, NULL); - return 0; } static struct ps3_system_bus_driver ps3disk = { diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1088798c8dd0..b71d28372ef3 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -797,7 +797,7 @@ fail: return error; } -static int ps3vram_remove(struct ps3_system_bus_device *dev) +static void ps3vram_remove(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -817,7 +817,6 @@ static int ps3vram_remove(struct ps3_system_bus_device *dev) free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE)); kfree(priv); ps3_system_bus_set_drvdata(dev, NULL); - return 0; } static struct ps3_system_bus_driver ps3vram = { diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f84128abade3..59cfe71d0b3a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -692,12 +692,9 @@ static void rbd_release(struct gendisk *disk, fmode_t mode) put_device(&rbd_dev->dev); } -static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) +static int rbd_set_read_only(struct block_device *bdev, bool ro) { - int ro; - - if (get_user(ro, (int __user *)arg)) - return -EFAULT; + struct rbd_device *rbd_dev = bdev->bd_disk->private_data; /* * Both images mapped read-only and snapshots can't be marked @@ -710,43 +707,14 @@ static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) rbd_assert(!rbd_is_snap(rbd_dev)); } - /* Let blkdev_roset() handle it */ - return -ENOTTY; -} - -static int rbd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct rbd_device *rbd_dev = bdev->bd_disk->private_data; - int ret; - - switch (cmd) { - case BLKROSET: - ret = rbd_ioctl_set_ro(rbd_dev, arg); - break; - default: - ret = -ENOTTY; - } - - return ret; -} - -#ifdef CONFIG_COMPAT -static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - return rbd_ioctl(bdev, mode, cmd, arg); + return 0; } -#endif /* CONFIG_COMPAT */ static const struct block_device_operations rbd_bd_ops = { .owner = THIS_MODULE, .open = rbd_open, .release = rbd_release, - .ioctl = rbd_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = rbd_compat_ioctl, -#endif + .set_read_only = rbd_set_read_only, }; /* @@ -3957,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev, sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); for (i = 0; i < num_watchers; i++) { - if (!memcmp(&watchers[i].addr, &locker->info.addr, - sizeof(locker->info.addr)) && + /* + * Ignore addr->type while comparing. This mimics + * entity_addr_t::get_legacy_str() + strcmp(). + */ + if (ceph_addr_equal_no_type(&watchers[i].addr, + &locker->info.addr) && watchers[i].cookie == cookie) { struct rbd_client_id cid = { .gid = le64_to_cpu(watchers[i].name.num), @@ -4920,8 +4892,7 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) { size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; dout("setting size to %llu sectors", (unsigned long long)size); - set_capacity(rbd_dev->disk, size); - revalidate_disk_size(rbd_dev->disk, true); + set_capacity_and_notify(rbd_dev->disk, size); } } diff --git a/drivers/block/rnbd/Kconfig b/drivers/block/rnbd/Kconfig index 4b6d3d816d1f..2ff05a0d2646 100644 --- a/drivers/block/rnbd/Kconfig +++ b/drivers/block/rnbd/Kconfig @@ -7,6 +7,7 @@ config BLK_DEV_RNBD_CLIENT tristate "RDMA Network Block Device driver client" depends on INFINIBAND_RTRS_CLIENT select BLK_DEV_RNBD + select SG_POOL help RNBD client is a network block device driver using rdma transport. diff --git a/drivers/block/rnbd/README b/drivers/block/rnbd/README index 1773c0aa0bd4..080f58a5400a 100644 --- a/drivers/block/rnbd/README +++ b/drivers/block/rnbd/README @@ -90,3 +90,4 @@ Kleber Souza <kleber.souza@profitbricks.com> Lutz Pogrell <lutz.pogrell@cloud.ionos.com> Milind Dumbare <Milind.dumbare@gmail.com> Roman Penyaev <roman.penyaev@profitbricks.com> +Swapnil Ingle <ingleswapnil@gmail.com> diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index 4f4474eecadb..d4aa6bfc9555 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -37,7 +37,6 @@ enum { }; static const unsigned int rnbd_opt_mandatory[] = { - RNBD_OPT_PATH, RNBD_OPT_DEV_PATH, RNBD_OPT_SESSNAME, }; @@ -433,8 +432,9 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) * i.e. rnbd_clt_unmap_dev_store() leading to a sysfs warning because * of sysfs link already was removed already. */ - if (strlen(dev->blk_symlink_name) && try_module_get(THIS_MODULE)) { + if (dev->blk_symlink_name && try_module_get(THIS_MODULE)) { sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name); + kfree(dev->blk_symlink_name); module_put(THIS_MODULE); } } @@ -451,9 +451,11 @@ static int rnbd_clt_add_dev_kobj(struct rnbd_clt_dev *dev) ret = kobject_init_and_add(&dev->kobj, &rnbd_dev_ktype, gd_kobj, "%s", "rnbd"); - if (ret) + if (ret) { rnbd_clt_err(dev, "Failed to create device sysfs dir, err: %d\n", ret); + kobject_put(&dev->kobj); + } return ret; } @@ -481,16 +483,27 @@ static int rnbd_clt_get_path_name(struct rnbd_clt_dev *dev, char *buf, if (ret >= len) return -ENAMETOOLONG; + ret = snprintf(buf, len, "%s@%s", buf, dev->sess->sessname); + if (ret >= len) + return -ENAMETOOLONG; + return 0; } static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) { struct kobject *gd_kobj = &disk_to_dev(dev->gd)->kobj; - int ret; + int ret, len; + + len = strlen(dev->pathname) + strlen(dev->sess->sessname) + 2; + dev->blk_symlink_name = kzalloc(len, GFP_KERNEL); + if (!dev->blk_symlink_name) { + rnbd_clt_err(dev, "Failed to allocate memory for blk_symlink_name\n"); + return -ENOMEM; + } ret = rnbd_clt_get_path_name(dev, dev->blk_symlink_name, - sizeof(dev->blk_symlink_name)); + len); if (ret) { rnbd_clt_err(dev, "Failed to get /sys/block symlink path, err: %d\n", ret); @@ -508,7 +521,8 @@ static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) return 0; out_err: - dev->blk_symlink_name[0] = '\0'; + kfree(dev->blk_symlink_name); + dev->blk_symlink_name = NULL ; return ret; } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 8b2411ccbda9..45a470076652 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -59,6 +59,7 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) ida_simple_remove(&index_ida, dev->clt_device_id); mutex_unlock(&ida_lock); kfree(dev->hw_queues); + kfree(dev->pathname); rnbd_clt_put_sess(dev->sess); mutex_destroy(&dev->lock); kfree(dev); @@ -87,6 +88,8 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); dev->secure_discard = le16_to_cpu(rsp->secure_discard); dev->rotational = rsp->rotational; + dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK); + dev->fua = !!(rsp->cache_policy & RNBD_FUA); dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; dev->max_segments = BMAX_SEGMENTS; @@ -100,8 +103,7 @@ static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", dev->nsectors, new_nsectors); dev->nsectors = new_nsectors; - set_capacity(dev->gd, dev->nsectors); - revalidate_disk_size(dev->gd, true); + set_capacity_and_notify(dev->gd, dev->nsectors); return 0; } @@ -347,32 +349,48 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu; struct rtrs_permit *permit; + iu = kzalloc(sizeof(*iu), GFP_KERNEL); + if (!iu) { + return NULL; + } + permit = rnbd_get_permit(sess, con_type, wait ? RTRS_PERMIT_WAIT : RTRS_PERMIT_NOWAIT); - if (unlikely(!permit)) + if (unlikely(!permit)) { + kfree(iu); return NULL; - iu = rtrs_permit_to_pdu(permit); + } + iu->permit = permit; /* * 1st reference is dropped after finishing sending a "user" message, * 2nd reference is dropped after confirmation with the response is * returned. * 1st and 2nd can happen in any order, so the rnbd_iu should be - * released (rtrs_permit returned to ibbtrs) only leased after both + * released (rtrs_permit returned to rtrs) only after both * are finished. */ atomic_set(&iu->refcount, 2); init_waitqueue_head(&iu->comp.wait); iu->comp.errno = INT_MAX; + if (sg_alloc_table(&iu->sgt, 1, GFP_KERNEL)) { + rnbd_put_permit(sess, permit); + kfree(iu); + return NULL; + } + return iu; } static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu) { - if (atomic_dec_and_test(&iu->refcount)) + if (atomic_dec_and_test(&iu->refcount)) { + sg_free_table(&iu->sgt); rnbd_put_permit(sess, iu->permit); + kfree(iu); + } } static void rnbd_softirq_done_fn(struct request *rq) @@ -382,6 +400,7 @@ static void rnbd_softirq_done_fn(struct request *rq) struct rnbd_iu *iu; iu = blk_mq_rq_to_pdu(rq); + sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); rnbd_put_permit(sess, iu->permit); blk_mq_end_request(rq, errno_to_blk_status(iu->errno)); } @@ -475,8 +494,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) iu->buf = NULL; iu->dev = dev; - sg_mark_end(&iu->sglist[0]); - msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE); msg.device_id = cpu_to_le32(device_id); @@ -562,7 +579,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) iu->buf = rsp; iu->dev = dev; - sg_init_one(iu->sglist, rsp, sizeof(*rsp)); + sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN); msg.access_mode = dev->access_mode; @@ -570,7 +587,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) WARN_ON(!rnbd_clt_get_dev(dev)); err = send_usr_msg(sess->rtrs, READ, iu, - &vec, sizeof(*rsp), iu->sglist, 1, + &vec, sizeof(*rsp), iu->sgt.sgl, 1, msg_open_conf, &errno, wait); if (err) { rnbd_clt_put_dev(dev); @@ -607,8 +624,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) iu->buf = rsp; iu->sess = sess; - - sg_init_one(iu->sglist, rsp, sizeof(*rsp)); + sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO); msg.ver = RNBD_PROTO_VER_MAJOR; @@ -624,7 +640,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) goto put_iu; } err = send_usr_msg(sess->rtrs, READ, iu, - &vec, sizeof(*rsp), iu->sglist, 1, + &vec, sizeof(*rsp), iu->sgt.sgl, 1, msg_sess_info_conf, &errno, wait); if (err) { rnbd_clt_put_sess(sess); @@ -634,7 +650,6 @@ put_iu: } else { err = errno; } - rnbd_put_iu(sess, iu); return err; } @@ -803,7 +818,7 @@ static struct rnbd_clt_session *alloc_sess(const char *sessname) rnbd_init_cpu_qlists(sess->cpu_queues); /* - * That is simple percpu variable which stores cpu indeces, which are + * That is simple percpu variable which stores cpu indices, which are * incremented on each access. We need that for the sake of fairness * to wake up queues in a round-robin manner. */ @@ -1014,11 +1029,10 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, * See queue limits. */ if (req_op(rq) != REQ_OP_DISCARD) - sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sglist); + sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sgt.sgl); if (sg_cnt == 0) - /* Do not forget to mark the end */ - sg_mark_end(&iu->sglist[0]); + sg_mark_end(&iu->sgt.sgl[0]); msg.hdr.type = cpu_to_le16(RNBD_MSG_IO); msg.device_id = cpu_to_le32(dev->device_id); @@ -1027,13 +1041,13 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, .iov_base = &msg, .iov_len = sizeof(msg) }; - size = rnbd_clt_get_sg_size(iu->sglist, sg_cnt); + size = rnbd_clt_get_sg_size(iu->sgt.sgl, sg_cnt); req_ops = (struct rtrs_clt_req_ops) { .priv = iu, .conf_fn = msg_io_conf, }; err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit, - &vec, 1, size, iu->sglist, sg_cnt); + &vec, 1, size, iu->sgt.sgl, sg_cnt); if (unlikely(err)) { rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n", err); @@ -1120,6 +1134,7 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, struct rnbd_clt_dev *dev = rq->rq_disk->private_data; struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); int err; + blk_status_t ret = BLK_STS_IOERR; if (unlikely(dev->dev_state != DEV_STATE_MAPPED)) return BLK_STS_IOERR; @@ -1131,32 +1146,35 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_RESOURCE; } + iu->sgt.sgl = iu->first_sgl; + err = sg_alloc_table_chained(&iu->sgt, + /* Even-if the request has no segment, + * sglist must have one entry at least */ + blk_rq_nr_phys_segments(rq) ? : 1, + iu->sgt.sgl, + RNBD_INLINE_SG_CNT); + if (err) { + rnbd_clt_err_rl(dev, "sg_alloc_table_chained ret=%d\n", err); + rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); + rnbd_put_permit(dev->sess, iu->permit); + return BLK_STS_RESOURCE; + } + blk_mq_start_request(rq); err = rnbd_client_xfer_request(dev, rq, iu); if (likely(err == 0)) return BLK_STS_OK; if (unlikely(err == -EAGAIN || err == -ENOMEM)) { rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); - rnbd_put_permit(dev->sess, iu->permit); - return BLK_STS_RESOURCE; + ret = BLK_STS_RESOURCE; } - + sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); rnbd_put_permit(dev->sess, iu->permit); - return BLK_STS_IOERR; -} - -static int rnbd_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); - - sg_init_table(iu->sglist, BMAX_SEGMENTS); - return 0; + return ret; } static struct blk_mq_ops rnbd_mq_ops = { .queue_rq = rnbd_queue_rq, - .init_request = rnbd_init_request, .complete = rnbd_softirq_done_fn, }; @@ -1170,7 +1188,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) tag_set->numa_node = NUMA_NO_NODE; tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_TAG_QUEUE_SHARED; - tag_set->cmd_size = sizeof(struct rnbd_iu); + tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE; tag_set->nr_hw_queues = num_online_cpus(); return blk_mq_alloc_tag_set(tag_set); @@ -1193,6 +1211,12 @@ find_and_get_or_create_sess(const char *sessname, else if (!first) return sess; + if (!path_cnt) { + pr_err("Session %s not found, and path parameter not given", sessname); + err = -ENXIO; + goto put_sess; + } + rtrs_ops = (struct rtrs_clt_ops) { .priv = sess, .link_ev = rnbd_clt_link_ev, @@ -1202,7 +1226,7 @@ find_and_get_or_create_sess(const char *sessname, */ sess->rtrs = rtrs_clt_open(&rtrs_ops, sessname, paths, path_cnt, port_nr, - sizeof(struct rnbd_iu), + 0, /* Do not use pdu of rtrs */ RECONNECT_DELAY, BMAX_SEGMENTS, BLK_MAX_SEGMENT_SIZE, MAX_RECONNECTS); @@ -1299,7 +1323,7 @@ static void setup_request_queue(struct rnbd_clt_dev *dev) blk_queue_max_segments(dev->queue, dev->max_segments); blk_queue_io_opt(dev->queue, dev->sess->max_io_size); blk_queue_virt_boundary(dev->queue, SZ_4K - 1); - blk_queue_write_cache(dev->queue, true, true); + blk_queue_write_cache(dev->queue, dev->wc, dev->fua); dev->queue->queuedata = dev; } @@ -1381,10 +1405,16 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, pathname, sess->sessname, ret); goto out_queues; } + + dev->pathname = kstrdup(pathname, GFP_KERNEL); + if (!dev->pathname) { + ret = -ENOMEM; + goto out_queues; + } + dev->clt_device_id = ret; dev->sess = sess; dev->access_mode = access_mode; - strlcpy(dev->pathname, pathname, sizeof(dev->pathname)); mutex_init(&dev->lock); refcount_set(&dev->refcount, 1); dev->dev_state = DEV_STATE_INIT; @@ -1404,17 +1434,20 @@ out_alloc: return ERR_PTR(ret); } -static bool __exists_dev(const char *pathname) +static bool __exists_dev(const char *pathname, const char *sessname) { struct rnbd_clt_session *sess; struct rnbd_clt_dev *dev; bool found = false; list_for_each_entry(sess, &sess_list, list) { + if (sessname && strncmp(sess->sessname, sessname, + sizeof(sess->sessname))) + continue; mutex_lock(&sess->lock); list_for_each_entry(dev, &sess->devs_list, list) { - if (!strncmp(dev->pathname, pathname, - sizeof(dev->pathname))) { + if (strlen(dev->pathname) == strlen(pathname) && + !strcmp(dev->pathname, pathname)) { found = true; break; } @@ -1427,12 +1460,12 @@ static bool __exists_dev(const char *pathname) return found; } -static bool exists_devpath(const char *pathname) +static bool exists_devpath(const char *pathname, const char *sessname) { bool found; mutex_lock(&sess_lock); - found = __exists_dev(pathname); + found = __exists_dev(pathname, sessname); mutex_unlock(&sess_lock); return found; @@ -1445,7 +1478,7 @@ static bool insert_dev_if_not_exists_devpath(const char *pathname, bool found; mutex_lock(&sess_lock); - found = __exists_dev(pathname); + found = __exists_dev(pathname, sess->sessname); if (!found) { mutex_lock(&sess->lock); list_add_tail(&dev->list, &sess->devs_list); @@ -1475,7 +1508,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, struct rnbd_clt_dev *dev; int ret; - if (exists_devpath(pathname)) + if (unlikely(exists_devpath(pathname, sessname))) return ERR_PTR(-EEXIST); sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr); @@ -1513,13 +1546,13 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, } rnbd_clt_info(dev, - "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d)\n", + "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n", dev->gd->disk_name, dev->nsectors, dev->logical_block_size, dev->physical_block_size, dev->max_write_same_sectors, dev->max_discard_sectors, dev->discard_granularity, dev->discard_alignment, dev->secure_discard, dev->max_segments, - dev->max_hw_sectors, dev->rotational); + dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua); mutex_unlock(&dev->lock); @@ -1651,7 +1684,7 @@ static void rnbd_destroy_sessions(void) /* * Here at this point there is no any concurrent access to sessions * list and devices list: - * 1. New session or device can'be be created - session sysfs files + * 1. New session or device can't be created - session sysfs files * are removed. * 2. Device or session can't be removed - module reference is taken * into account in unmap device sysfs callback. @@ -1664,7 +1697,8 @@ static void rnbd_destroy_sessions(void) */ list_for_each_entry_safe(sess, sn, &sess_list, list) { - WARN_ON(!rnbd_clt_get_sess(sess)); + if (!rnbd_clt_get_sess(sess)) + continue; close_rtrs(sess); list_for_each_entry_safe(dev, tn, &sess->devs_list, list) { /* diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index ed33654aa486..537d499dad3b 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -44,6 +44,13 @@ struct rnbd_iu_comp { int errno; }; +#ifdef CONFIG_ARCH_NO_SG_CHAIN +#define RNBD_INLINE_SG_CNT 0 +#else +#define RNBD_INLINE_SG_CNT 2 +#endif +#define RNBD_RDMA_SGL_SIZE (sizeof(struct scatterlist) * RNBD_INLINE_SG_CNT) + struct rnbd_iu { union { struct request *rq; /* for block io */ @@ -56,11 +63,12 @@ struct rnbd_iu { /* use to send msg associated with a sess */ struct rnbd_clt_session *sess; }; - struct scatterlist sglist[BMAX_SEGMENTS]; + struct sg_table sgt; struct work_struct work; int errno; struct rnbd_iu_comp comp; atomic_t refcount; + struct scatterlist first_sgl[]; /* must be the last one */ }; struct rnbd_cpu_qlist { @@ -108,10 +116,12 @@ struct rnbd_clt_dev { u32 clt_device_id; struct mutex lock; enum rnbd_clt_dev_state dev_state; - char pathname[NAME_MAX]; + char *pathname; enum rnbd_access_mode access_mode; bool read_only; bool rotational; + bool wc; + bool fua; u32 max_hw_sectors; u32 max_write_same_sectors; u32 max_discard_sectors; @@ -126,7 +136,7 @@ struct rnbd_clt_dev { struct list_head list; struct gendisk *gd; struct kobject kobj; - char blk_symlink_name[NAME_MAX]; + char *blk_symlink_name; refcount_t refcount; struct work_struct unmap_on_rmmod_work; }; diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h index ca166241452c..c1bc5c0fef71 100644 --- a/drivers/block/rnbd/rnbd-proto.h +++ b/drivers/block/rnbd/rnbd-proto.h @@ -108,6 +108,11 @@ struct rnbd_msg_close { __le32 device_id; }; +enum rnbd_cache_policy { + RNBD_FUA = 1 << 0, + RNBD_WRITEBACK = 1 << 1, +}; + /** * struct rnbd_msg_open_rsp - response message to RNBD_MSG_OPEN * @hdr: message header @@ -124,6 +129,7 @@ struct rnbd_msg_close { * @max_segments: max segments hardware support in one transfer * @secure_discard: supports secure discard * @rotation: is a rotational disc? + * @cache_policy: support write-back caching or FUA? */ struct rnbd_msg_open_rsp { struct rnbd_msg_hdr hdr; @@ -139,7 +145,8 @@ struct rnbd_msg_open_rsp { __le16 max_segments; __le16 secure_discard; u8 rotational; - u8 reserved[11]; + u8 cache_policy; + u8 reserved[10]; }; /** diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c index 106775c074d1..05ffe488ddc6 100644 --- a/drivers/block/rnbd/rnbd-srv-sysfs.c +++ b/drivers/block/rnbd/rnbd-srv-sysfs.c @@ -47,13 +47,17 @@ int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, ret = kobject_init_and_add(&dev->dev_kobj, &dev_ktype, rnbd_devs_kobj, dev_name); - if (ret) + if (ret) { + kobject_put(&dev->dev_kobj); return ret; + } dev->dev_sessions_kobj = kobject_create_and_add("sessions", &dev->dev_kobj); - if (!dev->dev_sessions_kobj) - goto put_dev_kobj; + if (!dev->dev_sessions_kobj) { + ret = -ENOMEM; + goto free_dev_kobj; + } bdev_kobj = &disk_to_dev(bdev->bd_disk)->kobj; ret = sysfs_create_link(&dev->dev_kobj, bdev_kobj, "block_dev"); @@ -64,7 +68,8 @@ int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, put_sess_kobj: kobject_put(dev->dev_sessions_kobj); -put_dev_kobj: +free_dev_kobj: + kobject_del(&dev->dev_kobj); kobject_put(&dev->dev_kobj); return ret; } @@ -120,10 +125,46 @@ static ssize_t mapping_path_show(struct kobject *kobj, static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr = __ATTR_RO(mapping_path); +static ssize_t rnbd_srv_dev_session_force_close_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", + attr->attr.name); +} + +static ssize_t rnbd_srv_dev_session_force_close_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rnbd_srv_sess_dev *sess_dev; + + sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); + + if (!sysfs_streq(buf, "1")) { + rnbd_srv_err(sess_dev, "%s: invalid value: '%s'\n", + attr->attr.name, buf); + return -EINVAL; + } + + rnbd_srv_info(sess_dev, "force close requested\n"); + + /* first remove sysfs itself to avoid deadlock */ + sysfs_remove_file_self(&sess_dev->kobj, &attr->attr); + rnbd_srv_sess_dev_force_close(sess_dev); + + return count; +} + +static struct kobj_attribute rnbd_srv_dev_session_force_close_attr = + __ATTR(force_close, 0644, + rnbd_srv_dev_session_force_close_show, + rnbd_srv_dev_session_force_close_store); + static struct attribute *rnbd_srv_default_dev_sessions_attrs[] = { &rnbd_srv_dev_session_access_mode_attr.attr, &rnbd_srv_dev_session_ro_attr.attr, &rnbd_srv_dev_session_mapping_path_attr.attr, + &rnbd_srv_dev_session_force_close_attr.attr, NULL, }; @@ -145,7 +186,7 @@ static void rnbd_srv_sess_dev_release(struct kobject *kobj) struct rnbd_srv_sess_dev *sess_dev; sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); - rnbd_destroy_sess_dev(sess_dev); + rnbd_destroy_sess_dev(sess_dev, sess_dev->keep_id); } static struct kobj_type rnbd_srv_sess_dev_ktype = { @@ -160,18 +201,17 @@ int rnbd_srv_create_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev) ret = kobject_init_and_add(&sess_dev->kobj, &rnbd_srv_sess_dev_ktype, sess_dev->dev->dev_sessions_kobj, "%s", sess_dev->sess->sessname); - if (ret) + if (ret) { + kobject_put(&sess_dev->kobj); return ret; + } ret = sysfs_create_group(&sess_dev->kobj, &rnbd_srv_default_dev_session_attr_group); - if (ret) - goto err; - - return 0; - -err: - kobject_put(&sess_dev->kobj); + if (ret) { + kobject_del(&sess_dev->kobj); + kobject_put(&sess_dev->kobj); + } return ret; } diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index e1bc8b4cd592..a6a68d44f517 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -212,12 +212,20 @@ static void rnbd_put_srv_dev(struct rnbd_srv_dev *dev) kref_put(&dev->kref, destroy_device_cb); } -void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev) +void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id) { DECLARE_COMPLETION_ONSTACK(dc); - xa_erase(&sess_dev->sess->index_idr, sess_dev->device_id); + if (keep_id) + /* free the resources for the id but don't */ + /* allow to re-use the id itself because it */ + /* is still used by the client */ + xa_cmpxchg(&sess_dev->sess->index_idr, sess_dev->device_id, + sess_dev, NULL, 0); + else + xa_erase(&sess_dev->sess->index_idr, sess_dev->device_id); synchronize_rcu(); + sess_dev->destroy_comp = &dc; rnbd_put_sess_dev(sess_dev); wait_for_completion(&dc); /* wait for inflights to drop to zero */ @@ -328,6 +336,16 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs, } } +void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev) +{ + struct rnbd_srv_session *sess = sess_dev->sess; + + sess_dev->keep_id = true; + mutex_lock(&sess->lock); + rnbd_srv_destroy_dev_session_sysfs(sess_dev); + mutex_unlock(&sess->lock); +} + static int process_msg_close(struct rtrs_srv *rtrs, struct rnbd_srv_session *srv_sess, void *data, size_t datalen, const void *usr, @@ -534,6 +552,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, struct rnbd_srv_sess_dev *sess_dev) { struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev; + struct request_queue *q = bdev_get_queue(rnbd_dev->bdev); rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); rsp->device_id = @@ -558,8 +577,12 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev)); rsp->secure_discard = cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev)); - rsp->rotational = - !blk_queue_nonrot(bdev_get_queue(rnbd_dev->bdev)); + rsp->rotational = !blk_queue_nonrot(q); + rsp->cache_policy = 0; + if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + rsp->cache_policy |= RNBD_WRITEBACK; + if (blk_queue_fua(q)) + rsp->cache_policy |= RNBD_FUA; } static struct rnbd_srv_sess_dev * diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h index 5a8544b5e74f..b157371c25ed 100644 --- a/drivers/block/rnbd/rnbd-srv.h +++ b/drivers/block/rnbd/rnbd-srv.h @@ -56,6 +56,7 @@ struct rnbd_srv_sess_dev { struct rnbd_srv_dev *dev; struct kobject kobj; u32 device_id; + bool keep_id; fmode_t open_flags; struct kref kref; struct completion *destroy_comp; @@ -63,6 +64,7 @@ struct rnbd_srv_sess_dev { enum rnbd_access_mode access_mode; }; +void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev); /* rnbd-srv-sysfs.c */ int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, @@ -73,6 +75,6 @@ int rnbd_srv_create_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev); void rnbd_srv_destroy_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev); int rnbd_srv_create_sysfs_files(void); void rnbd_srv_destroy_sysfs_files(void); -void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev); +void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id); #endif /* RNBD_SRV_H */ diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 52dd1efa00f9..cc6a0bc6c005 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -745,18 +745,6 @@ static const struct block_device_operations floppy_fops = { .check_events = floppy_check_events, }; -static struct kobject *floppy_find(dev_t dev, int *part, void *data) -{ - struct swim_priv *swd = data; - int drive = (*part & 3); - - if (drive >= swd->floppy_count) - return NULL; - - *part = 0; - return get_disk_and_module(swd->unit[drive].disk); -} - static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) { struct floppy_state *fs = &swd->unit[swd->floppy_count]; @@ -846,9 +834,6 @@ static int swim_floppy_init(struct swim_priv *swd) add_disk(swd->unit[drive].disk); } - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, swd); - return 0; exit_put_disks: @@ -932,8 +917,6 @@ static int swim_remove(struct platform_device *dev) int drive; struct resource *res; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); - for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); blk_cleanup_queue(swd->unit[drive].disk->queue); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a314b9382442..145606dc52db 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -470,7 +470,7 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) cap_str_10, cap_str_2); - set_capacity_revalidate_and_notify(vblk->disk, capacity, true); + set_capacity_and_notify(vblk->disk, capacity); } static void virtblk_config_changed_work(struct work_struct *work) @@ -598,7 +598,6 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; blk_queue_write_cache(vblk->disk->queue, writeback, false); - revalidate_disk_size(vblk->disk, true); } static const char *const virtblk_cache_types[] = { diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index a1b9df2c4ef1..b0c71d3a81a0 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -356,9 +356,7 @@ struct pending_req { }; -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) +#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev) #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define xen_blkif_put(_b) \ diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 1ee9545ea2f1..e1c6798889f4 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2154,7 +2154,7 @@ static void blkfront_closing(struct blkfront_info *info) } if (info->gd) - bdev = bdget_disk(info->gd, 0); + bdev = bdgrab(info->gd->part0); mutex_unlock(&info->mutex); @@ -2364,7 +2364,7 @@ static void blkfront_connect(struct blkfront_info *info) return; printk(KERN_INFO "Setting capacity to %Lu\n", sectors); - set_capacity_revalidate_and_notify(info->gd, sectors, true); + set_capacity_and_notify(info->gd, sectors); return; case BLKIF_STATE_SUSPENDED: @@ -2513,7 +2513,7 @@ static int blkfront_remove(struct xenbus_device *xbdev) disk = info->gd; if (disk) - bdev = bdget_disk(disk, 0); + bdev = bdgrab(disk->part0); info->xbdev = NULL; mutex_unlock(&info->mutex); @@ -2590,19 +2590,11 @@ out: static void blkif_release(struct gendisk *disk, fmode_t mode) { struct blkfront_info *info = disk->private_data; - struct block_device *bdev; struct xenbus_device *xbdev; mutex_lock(&blkfront_mutex); - - bdev = bdget_disk(disk, 0); - - if (!bdev) { - WARN(1, "Block device %s yanked out from us!\n", disk->disk_name); + if (disk->part0->bd_openers) goto out_mutex; - } - if (bdev->bd_openers) - goto out; /* * Check if we have been instructed to close. We will have @@ -2614,7 +2606,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) if (xbdev && xbdev->state == XenbusStateClosing) { /* pending switch to state closed */ - dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); + dev_info(disk_to_dev(disk), "releasing disk\n"); xlvbd_release_gendisk(info); xenbus_frontend_closed(info->xbdev); } @@ -2623,14 +2615,12 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) if (!xbdev) { /* sudden device removal */ - dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); + dev_info(disk_to_dev(disk), "releasing disk\n"); xlvbd_release_gendisk(info); disk->private_data = NULL; free_info(info); } -out: - bdput(bdev); out_mutex: mutex_unlock(&blkfront_mutex); } diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 0e734802ee7c..c1d20818e649 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -42,7 +42,6 @@ #include <linux/zorro.h> - #define Z2MINOR_COMBINED (0) #define Z2MINOR_Z2ONLY (1) #define Z2MINOR_CHIPONLY (2) @@ -50,28 +49,28 @@ #define Z2MINOR_MEMLIST2 (5) #define Z2MINOR_MEMLIST3 (6) #define Z2MINOR_MEMLIST4 (7) -#define Z2MINOR_COUNT (8) /* Move this down when adding a new minor */ +#define Z2MINOR_COUNT (8) /* Move this down when adding a new minor */ #define Z2RAM_CHUNK1024 ( Z2RAM_CHUNKSIZE >> 10 ) static DEFINE_MUTEX(z2ram_mutex); -static u_long *z2ram_map = NULL; -static u_long z2ram_size = 0; -static int z2_count = 0; -static int chip_count = 0; -static int list_count = 0; -static int current_device = -1; +static u_long *z2ram_map = NULL; +static u_long z2ram_size = 0; +static int z2_count = 0; +static int chip_count = 0; +static int list_count = 0; +static int current_device = -1; static DEFINE_SPINLOCK(z2ram_lock); -static struct gendisk *z2ram_gendisk; +static struct gendisk *z2ram_gendisk[Z2MINOR_COUNT]; static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request *req = bd->rq; unsigned long start = blk_rq_pos(req) << 9; - unsigned long len = blk_rq_cur_bytes(req); + unsigned long len = blk_rq_cur_bytes(req); blk_mq_start_request(req); @@ -92,7 +91,7 @@ static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, if (len < size) size = len; - addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; + addr += z2ram_map[start >> Z2RAM_CHUNKSHIFT]; if (rq_data_dir(req) == READ) memcpy(buffer, (char *)addr, size); else @@ -106,323 +105,319 @@ static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static void -get_z2ram( void ) +static void get_z2ram(void) { - int i; - - for ( i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++ ) - { - if ( test_bit( i, zorro_unused_z2ram ) ) - { - z2_count++; - z2ram_map[z2ram_size++] = (unsigned long)ZTWO_VADDR(Z2RAM_START) + - (i << Z2RAM_CHUNKSHIFT); - clear_bit( i, zorro_unused_z2ram ); + int i; + + for (i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++) { + if (test_bit(i, zorro_unused_z2ram)) { + z2_count++; + z2ram_map[z2ram_size++] = + (unsigned long)ZTWO_VADDR(Z2RAM_START) + + (i << Z2RAM_CHUNKSHIFT); + clear_bit(i, zorro_unused_z2ram); + } } - } - return; + return; } -static void -get_chipram( void ) +static void get_chipram(void) { - while ( amiga_chip_avail() > ( Z2RAM_CHUNKSIZE * 4 ) ) - { - chip_count++; - z2ram_map[ z2ram_size ] = - (u_long)amiga_chip_alloc( Z2RAM_CHUNKSIZE, "z2ram" ); + while (amiga_chip_avail() > (Z2RAM_CHUNKSIZE * 4)) { + chip_count++; + z2ram_map[z2ram_size] = + (u_long) amiga_chip_alloc(Z2RAM_CHUNKSIZE, "z2ram"); + + if (z2ram_map[z2ram_size] == 0) { + break; + } - if ( z2ram_map[ z2ram_size ] == 0 ) - { - break; + z2ram_size++; } - z2ram_size++; - } - - return; + return; } static int z2_open(struct block_device *bdev, fmode_t mode) { - int device; - int max_z2_map = ( Z2RAM_SIZE / Z2RAM_CHUNKSIZE ) * - sizeof( z2ram_map[0] ); - int max_chip_map = ( amiga_chip_size / Z2RAM_CHUNKSIZE ) * - sizeof( z2ram_map[0] ); - int rc = -ENOMEM; - - device = MINOR(bdev->bd_dev); - - mutex_lock(&z2ram_mutex); - if ( current_device != -1 && current_device != device ) - { - rc = -EBUSY; - goto err_out; - } - - if ( current_device == -1 ) - { - z2_count = 0; - chip_count = 0; - list_count = 0; - z2ram_size = 0; - - /* Use a specific list entry. */ - if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) { - int index = device - Z2MINOR_MEMLIST1 + 1; - unsigned long size, paddr, vaddr; - - if (index >= m68k_realnum_memory) { - printk( KERN_ERR DEVICE_NAME - ": no such entry in z2ram_map\n" ); - goto err_out; - } + int device; + int max_z2_map = (Z2RAM_SIZE / Z2RAM_CHUNKSIZE) * sizeof(z2ram_map[0]); + int max_chip_map = (amiga_chip_size / Z2RAM_CHUNKSIZE) * + sizeof(z2ram_map[0]); + int rc = -ENOMEM; - paddr = m68k_memory[index].addr; - size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE-1); + device = MINOR(bdev->bd_dev); -#ifdef __powerpc__ - /* FIXME: ioremap doesn't build correct memory tables. */ - { - vfree(vmalloc (size)); - } - - vaddr = (unsigned long)ioremap_wt(paddr, size); - -#else - vaddr = (unsigned long)z_remap_nocache_nonser(paddr, size); -#endif - z2ram_map = - kmalloc_array(size / Z2RAM_CHUNKSIZE, - sizeof(z2ram_map[0]), - GFP_KERNEL); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } + mutex_lock(&z2ram_mutex); + if (current_device != -1 && current_device != device) { + rc = -EBUSY; + goto err_out; + } - while (size) { - z2ram_map[ z2ram_size++ ] = vaddr; - size -= Z2RAM_CHUNKSIZE; - vaddr += Z2RAM_CHUNKSIZE; - list_count++; - } + if (current_device == -1) { + z2_count = 0; + chip_count = 0; + list_count = 0; + z2ram_size = 0; - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK List Entry %d Memory\n", - list_count * Z2RAM_CHUNK1024, index ); - } else - - switch ( device ) - { - case Z2MINOR_COMBINED: - - z2ram_map = kmalloc( max_z2_map + max_chip_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } + /* Use a specific list entry. */ + if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) { + int index = device - Z2MINOR_MEMLIST1 + 1; + unsigned long size, paddr, vaddr; - get_z2ram(); - get_chipram(); - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n", - z2_count * Z2RAM_CHUNK1024, - chip_count * Z2RAM_CHUNK1024, - ( z2_count + chip_count ) * Z2RAM_CHUNK1024 ); - - break; - - case Z2MINOR_Z2ONLY: - z2ram_map = kmalloc( max_z2_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } + if (index >= m68k_realnum_memory) { + printk(KERN_ERR DEVICE_NAME + ": no such entry in z2ram_map\n"); + goto err_out; + } - get_z2ram(); + paddr = m68k_memory[index].addr; + size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE - 1); - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK of Zorro II RAM\n", - z2_count * Z2RAM_CHUNK1024 ); +#ifdef __powerpc__ + /* FIXME: ioremap doesn't build correct memory tables. */ + { + vfree(vmalloc(size)); + } - break; + vaddr = (unsigned long)ioremap_wt(paddr, size); - case Z2MINOR_CHIPONLY: - z2ram_map = kmalloc( max_chip_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; +#else + vaddr = + (unsigned long)z_remap_nocache_nonser(paddr, size); +#endif + z2ram_map = + kmalloc_array(size / Z2RAM_CHUNKSIZE, + sizeof(z2ram_map[0]), GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + while (size) { + z2ram_map[z2ram_size++] = vaddr; + size -= Z2RAM_CHUNKSIZE; + vaddr += Z2RAM_CHUNKSIZE; + list_count++; + } + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK List Entry %d Memory\n", + list_count * Z2RAM_CHUNK1024, index); + } else + switch (device) { + case Z2MINOR_COMBINED: + + z2ram_map = + kmalloc(max_z2_map + max_chip_map, + GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_z2ram(); + get_chipram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n", + z2_count * Z2RAM_CHUNK1024, + chip_count * Z2RAM_CHUNK1024, + (z2_count + + chip_count) * Z2RAM_CHUNK1024); + + break; + + case Z2MINOR_Z2ONLY: + z2ram_map = kmalloc(max_z2_map, GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_z2ram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK of Zorro II RAM\n", + z2_count * Z2RAM_CHUNK1024); + + break; + + case Z2MINOR_CHIPONLY: + z2ram_map = kmalloc(max_chip_map, GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_chipram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK Chip RAM\n", + chip_count * Z2RAM_CHUNK1024); + + break; + + default: + rc = -ENODEV; + goto err_out; + + break; + } + + if (z2ram_size == 0) { + printk(KERN_NOTICE DEVICE_NAME + ": no unused ZII/Chip RAM found\n"); + goto err_out_kfree; } - get_chipram(); - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK Chip RAM\n", - chip_count * Z2RAM_CHUNK1024 ); - - break; - - default: - rc = -ENODEV; - goto err_out; - - break; - } - - if ( z2ram_size == 0 ) - { - printk( KERN_NOTICE DEVICE_NAME - ": no unused ZII/Chip RAM found\n" ); - goto err_out_kfree; + current_device = device; + z2ram_size <<= Z2RAM_CHUNKSHIFT; + set_capacity(z2ram_gendisk[device], z2ram_size >> 9); } - current_device = device; - z2ram_size <<= Z2RAM_CHUNKSHIFT; - set_capacity(z2ram_gendisk, z2ram_size >> 9); - } - - mutex_unlock(&z2ram_mutex); - return 0; + mutex_unlock(&z2ram_mutex); + return 0; err_out_kfree: - kfree(z2ram_map); + kfree(z2ram_map); err_out: - mutex_unlock(&z2ram_mutex); - return rc; + mutex_unlock(&z2ram_mutex); + return rc; } -static void -z2_release(struct gendisk *disk, fmode_t mode) +static void z2_release(struct gendisk *disk, fmode_t mode) { - mutex_lock(&z2ram_mutex); - if ( current_device == -1 ) { - mutex_unlock(&z2ram_mutex); - return; - } - mutex_unlock(&z2ram_mutex); - /* - * FIXME: unmap memory - */ + mutex_lock(&z2ram_mutex); + if (current_device == -1) { + mutex_unlock(&z2ram_mutex); + return; + } + mutex_unlock(&z2ram_mutex); + /* + * FIXME: unmap memory + */ } -static const struct block_device_operations z2_fops = -{ - .owner = THIS_MODULE, - .open = z2_open, - .release = z2_release, +static const struct block_device_operations z2_fops = { + .owner = THIS_MODULE, + .open = z2_open, + .release = z2_release, }; -static struct kobject *z2_find(dev_t dev, int *part, void *data) -{ - *part = 0; - return get_disk_and_module(z2ram_gendisk); -} - -static struct request_queue *z2_queue; static struct blk_mq_tag_set tag_set; static const struct blk_mq_ops z2_mq_ops = { - .queue_rq = z2_queue_rq, + .queue_rq = z2_queue_rq, }; -static int __init -z2_init(void) +static int z2ram_register_disk(int minor) { - int ret; - - if (!MACH_IS_AMIGA) - return -ENODEV; - - ret = -EBUSY; - if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME)) - goto err; - - ret = -ENOMEM; - z2ram_gendisk = alloc_disk(1); - if (!z2ram_gendisk) - goto out_disk; - - z2_queue = blk_mq_init_sq_queue(&tag_set, &z2_mq_ops, 16, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(z2_queue)) { - ret = PTR_ERR(z2_queue); - z2_queue = NULL; - goto out_queue; - } - - z2ram_gendisk->major = Z2RAM_MAJOR; - z2ram_gendisk->first_minor = 0; - z2ram_gendisk->fops = &z2_fops; - sprintf(z2ram_gendisk->disk_name, "z2ram"); - - z2ram_gendisk->queue = z2_queue; - add_disk(z2ram_gendisk); - blk_register_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT, THIS_MODULE, - z2_find, NULL, NULL); - - return 0; - -out_queue: - put_disk(z2ram_gendisk); -out_disk: - unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); -err: - return ret; + struct request_queue *q; + struct gendisk *disk; + + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; + + q = blk_mq_init_queue(&tag_set); + if (IS_ERR(q)) { + put_disk(disk); + return PTR_ERR(q); + } + + disk->major = Z2RAM_MAJOR; + disk->first_minor = minor; + disk->fops = &z2_fops; + if (minor) + sprintf(disk->disk_name, "z2ram%d", minor); + else + sprintf(disk->disk_name, "z2ram"); + disk->queue = q; + + z2ram_gendisk[minor] = disk; + add_disk(disk); + return 0; } -static void __exit z2_exit(void) +static int __init z2_init(void) { - int i, j; - blk_unregister_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT); - unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); - del_gendisk(z2ram_gendisk); - put_disk(z2ram_gendisk); - blk_cleanup_queue(z2_queue); - blk_mq_free_tag_set(&tag_set); - - if ( current_device != -1 ) - { - i = 0; - - for ( j = 0 ; j < z2_count; j++ ) - { - set_bit( i++, zorro_unused_z2ram ); + int ret, i; + + if (!MACH_IS_AMIGA) + return -ENODEV; + + if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME)) + return -EBUSY; + + tag_set.ops = &z2_mq_ops; + tag_set.nr_hw_queues = 1; + tag_set.nr_maps = 1; + tag_set.queue_depth = 16; + tag_set.numa_node = NUMA_NO_NODE; + tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ret = blk_mq_alloc_tag_set(&tag_set); + if (ret) + goto out_unregister_blkdev; + + for (i = 0; i < Z2MINOR_COUNT; i++) { + ret = z2ram_register_disk(i); + if (ret && i == 0) + goto out_free_tagset; } - for ( j = 0 ; j < chip_count; j++ ) - { - if ( z2ram_map[ i ] ) - { - amiga_chip_free( (void *) z2ram_map[ i++ ] ); - } + return 0; + +out_free_tagset: + blk_mq_free_tag_set(&tag_set); +out_unregister_blkdev: + unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); + return ret; +} + +static void __exit z2_exit(void) +{ + int i, j; + + unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); + + for (i = 0; i < Z2MINOR_COUNT; i++) { + del_gendisk(z2ram_gendisk[i]); + blk_cleanup_queue(z2ram_gendisk[i]->queue); + put_disk(z2ram_gendisk[i]); } + blk_mq_free_tag_set(&tag_set); + + if (current_device != -1) { + i = 0; - if ( z2ram_map != NULL ) - { - kfree( z2ram_map ); + for (j = 0; j < z2_count; j++) { + set_bit(i++, zorro_unused_z2ram); + } + + for (j = 0; j < chip_count; j++) { + if (z2ram_map[i]) { + amiga_chip_free((void *)z2ram_map[i++]); + } + } + + if (z2ram_map != NULL) { + kfree(z2ram_map); + } } - } - return; -} + return; +} module_init(z2_init); module_exit(z2_exit); diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index fe7a4b7d30cf..668c6bf2554d 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -2,7 +2,7 @@ config ZRAM tristate "Compressed RAM block device support" depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO - select CRYPTO_LZO + depends on CRYPTO_LZO || CRYPTO_ZSTD || CRYPTO_LZ4 || CRYPTO_LZ4HC || CRYPTO_842 help Creates virtual block devices called /dev/zramX (X = 0, 1, ...). Pages written to these disks are compressed and stored in memory @@ -14,6 +14,46 @@ config ZRAM See Documentation/admin-guide/blockdev/zram.rst for more information. +choice + prompt "Default zram compressor" + default ZRAM_DEF_COMP_LZORLE + depends on ZRAM + +config ZRAM_DEF_COMP_LZORLE + bool "lzo-rle" + depends on CRYPTO_LZO + +config ZRAM_DEF_COMP_ZSTD + bool "zstd" + depends on CRYPTO_ZSTD + +config ZRAM_DEF_COMP_LZ4 + bool "lz4" + depends on CRYPTO_LZ4 + +config ZRAM_DEF_COMP_LZO + bool "lzo" + depends on CRYPTO_LZO + +config ZRAM_DEF_COMP_LZ4HC + bool "lz4hc" + depends on CRYPTO_LZ4HC + +config ZRAM_DEF_COMP_842 + bool "842" + depends on CRYPTO_842 + +endchoice + +config ZRAM_DEF_COMP + string + default "lzo-rle" if ZRAM_DEF_COMP_LZORLE + default "zstd" if ZRAM_DEF_COMP_ZSTD + default "lz4" if ZRAM_DEF_COMP_LZ4 + default "lzo" if ZRAM_DEF_COMP_LZO + default "lz4hc" if ZRAM_DEF_COMP_LZ4HC + default "842" if ZRAM_DEF_COMP_842 + config ZRAM_WRITEBACK bool "Write back incompressible or idle page to backing device" depends on ZRAM diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 33e3b76c4fa9..052aa3f65514 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -15,8 +15,10 @@ #include "zcomp.h" static const char * const backends[] = { +#if IS_ENABLED(CONFIG_CRYPTO_LZO) "lzo", "lzo-rle", +#endif #if IS_ENABLED(CONFIG_CRYPTO_LZ4) "lz4", #endif diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1b697208d661..e2933cb7a82a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -42,7 +42,7 @@ static DEFINE_IDR(zram_index_idr); static DEFINE_MUTEX(zram_index_mutex); static int zram_major; -static const char *default_compressor = "lzo-rle"; +static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; /* Module params (documentation at end) */ static unsigned int num_devices = 1; @@ -403,13 +403,10 @@ static void reset_bdev(struct zram *zram) return; bdev = zram->bdev; - if (zram->old_block_size) - set_blocksize(bdev, zram->old_block_size); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; - zram->old_block_size = 0; zram->bdev = NULL; zram->disk->fops = &zram_devops; kvfree(zram->bitmap); @@ -454,7 +451,7 @@ static ssize_t backing_dev_store(struct device *dev, struct file *backing_dev = NULL; struct inode *inode; struct address_space *mapping; - unsigned int bitmap_sz, old_block_size = 0; + unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; struct block_device *bdev = NULL; int err; @@ -509,14 +506,8 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - old_block_size = block_size(bdev); - err = set_blocksize(bdev, PAGE_SIZE); - if (err) - goto out; - reset_bdev(zram); - zram->old_block_size = old_block_size; zram->bdev = bdev; zram->backing_dev = backing_dev; zram->bitmap = bitmap; @@ -620,15 +611,19 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, return 1; } +#define PAGE_WB_SIG "page_index=" + +#define PAGE_WRITEBACK 0 #define HUGE_WRITEBACK 1 #define IDLE_WRITEBACK 2 + static ssize_t writeback_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; - unsigned long index; + unsigned long index = 0; struct bio bio; struct bio_vec bio_vec; struct page *page; @@ -640,8 +635,17 @@ static ssize_t writeback_store(struct device *dev, mode = IDLE_WRITEBACK; else if (sysfs_streq(buf, "huge")) mode = HUGE_WRITEBACK; - else - return -EINVAL; + else { + if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) + return -EINVAL; + + ret = kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index); + if (ret || index >= nr_pages) + return -EINVAL; + + nr_pages = 1; + mode = PAGE_WRITEBACK; + } down_read(&zram->init_lock); if (!init_done(zram)) { @@ -660,7 +664,7 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - for (index = 0; index < nr_pages; index++) { + while (nr_pages--) { struct bio_vec bvec; bvec.bv_page = page; @@ -1071,7 +1075,7 @@ static ssize_t mm_stat_show(struct device *dev, max_used = atomic_long_read(&zram->stats.max_used_pages); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n", + "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", orig_size << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.compr_data_size), mem_used << PAGE_SHIFT, @@ -1079,7 +1083,8 @@ static ssize_t mm_stat_show(struct device *dev, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.same_pages), pool_stats.pages_compacted, - (u64)atomic64_read(&zram->stats.huge_pages)); + (u64)atomic64_read(&zram->stats.huge_pages), + (u64)atomic64_read(&zram->stats.huge_pages_since)); up_read(&zram->init_lock); return ret; @@ -1411,6 +1416,7 @@ out: if (comp_len == PAGE_SIZE) { zram_set_flag(zram, index, ZRAM_HUGE); atomic64_inc(&zram->stats.huge_pages); + atomic64_inc(&zram->stats.huge_pages_since); } if (flags) { @@ -1695,8 +1701,8 @@ static void zram_reset_device(struct zram *zram) disksize = zram->disksize; zram->disksize = 0; - set_capacity(zram->disk, 0); - part_stat_set_all(&zram->disk->part0, 0); + set_capacity_and_notify(zram->disk, 0); + part_stat_set_all(zram->disk->part0, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ @@ -1741,9 +1747,7 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - - revalidate_disk_size(zram->disk, true); + set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); return len; @@ -1771,15 +1775,12 @@ static ssize_t reset_store(struct device *dev, return -EINVAL; zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - if (!bdev) - return -ENOMEM; + bdev = zram->disk->part0; mutex_lock(&bdev->bd_mutex); /* Do not reset an active device or claimed device */ if (bdev->bd_openers || zram->claim) { mutex_unlock(&bdev->bd_mutex); - bdput(bdev); return -EBUSY; } @@ -1790,8 +1791,6 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - revalidate_disk_size(zram->disk, true); - bdput(bdev); mutex_lock(&bdev->bd_mutex); zram->claim = false; @@ -1977,16 +1976,11 @@ out_free_dev: static int zram_remove(struct zram *zram) { - struct block_device *bdev; - - bdev = bdget_disk(zram->disk, 0); - if (!bdev) - return -ENOMEM; + struct block_device *bdev = zram->disk->part0; mutex_lock(&bdev->bd_mutex); if (bdev->bd_openers || zram->claim) { mutex_unlock(&bdev->bd_mutex); - bdput(bdev); return -EBUSY; } @@ -1998,7 +1992,6 @@ static int zram_remove(struct zram *zram) /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - bdput(bdev); pr_info("Removed device: %s\n", zram->disk->disk_name); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index f2fd46daa760..419a7e8281ee 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -78,6 +78,7 @@ struct zram_stats { atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t same_pages; /* no. of same element filled pages */ atomic64_t huge_pages; /* no. of huge pages */ + atomic64_t huge_pages_since; /* no. of huge pages since zram set up */ atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ atomic64_t writestall; /* no. of write slow paths */ @@ -118,7 +119,6 @@ struct zram { bool wb_limit_enable; u64 bd_wb_limit; struct block_device *bdev; - unsigned int old_block_size; unsigned long *bitmap; unsigned long nr_pages; #endif |