diff options
Diffstat (limited to 'drivers/md')
47 files changed, 862 insertions, 238 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index f45fb372e51b..b5ea378e66cb 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -610,6 +610,7 @@ config DM_INTEGRITY select CRYPTO select CRYPTO_SKCIPHER select ASYNC_XOR + select DM_AUDIT if AUDIT help This device-mapper target emulates a block device that has additional per-sector tags that can be used for storing @@ -642,4 +643,13 @@ config DM_ZONED If unsure, say N. +config DM_AUDIT + bool "DM audit events" + depends on AUDIT + help + Generate audit events for device-mapper. + + Enables audit logging of several security relevant events in the + particular device-mapper targets, especially the integrity target. + endif # MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 816945eeed7f..0454b0885b01 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -107,3 +107,7 @@ endif ifeq ($(CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG),y) dm-verity-objs += dm-verity-verify-sig.o endif + +ifeq ($(CONFIG_DM_AUDIT),y) +dm-mod-objs += dm-audit.o +endif diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 5fc989a6d452..9ed9c955add7 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -178,7 +178,6 @@ #define pr_fmt(fmt) "bcache: %s() " fmt, __func__ -#include <linux/bcache.h> #include <linux/bio.h> #include <linux/kobject.h> #include <linux/list.h> @@ -190,6 +189,7 @@ #include <linux/workqueue.h> #include <linux/kthread.h> +#include "bcache_ondisk.h" #include "bset.h" #include "util.h" #include "closure.h" @@ -395,8 +395,6 @@ struct cached_dev { atomic_t io_errors; unsigned int error_limit; unsigned int offline_seconds; - - char backing_dev_name[BDEVNAME_SIZE]; }; enum alloc_reserve { @@ -470,8 +468,6 @@ struct cache { atomic_long_t meta_sectors_written; atomic_long_t btree_sectors_written; atomic_long_t sectors_written; - - char cache_dev_name[BDEVNAME_SIZE]; }; struct gc_stat { diff --git a/drivers/md/bcache/bcache_ondisk.h b/drivers/md/bcache/bcache_ondisk.h new file mode 100644 index 000000000000..97413586195b --- /dev/null +++ b/drivers/md/bcache/bcache_ondisk.h @@ -0,0 +1,445 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_BCACHE_H +#define _LINUX_BCACHE_H + +/* + * Bcache on disk data structures + */ + +#include <linux/types.h> + +#define BITMASK(name, type, field, offset, size) \ +static inline __u64 name(const type *k) \ +{ return (k->field >> offset) & ~(~0ULL << size); } \ + \ +static inline void SET_##name(type *k, __u64 v) \ +{ \ + k->field &= ~(~(~0ULL << size) << offset); \ + k->field |= (v & ~(~0ULL << size)) << offset; \ +} + +/* Btree keys - all units are in sectors */ + +struct bkey { + __u64 high; + __u64 low; + __u64 ptr[]; +}; + +#define KEY_FIELD(name, field, offset, size) \ + BITMASK(name, struct bkey, field, offset, size) + +#define PTR_FIELD(name, offset, size) \ +static inline __u64 name(const struct bkey *k, unsigned int i) \ +{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ + \ +static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ +{ \ + k->ptr[i] &= ~(~(~0ULL << size) << offset); \ + k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ +} + +#define KEY_SIZE_BITS 16 +#define KEY_MAX_U64S 8 + +KEY_FIELD(KEY_PTRS, high, 60, 3) +KEY_FIELD(__PAD0, high, 58, 2) +KEY_FIELD(KEY_CSUM, high, 56, 2) +KEY_FIELD(__PAD1, high, 55, 1) +KEY_FIELD(KEY_DIRTY, high, 36, 1) + +KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) +KEY_FIELD(KEY_INODE, high, 0, 20) + +/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ + +static inline __u64 KEY_OFFSET(const struct bkey *k) +{ + return k->low; +} + +static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) +{ + k->low = v; +} + +/* + * The high bit being set is a relic from when we used it to do binary + * searches - it told you where a key started. It's not used anymore, + * and can probably be safely dropped. + */ +#define KEY(inode, offset, size) \ +((struct bkey) { \ + .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ + .low = (offset) \ +}) + +#define ZERO_KEY KEY(0, 0, 0) + +#define MAX_KEY_INODE (~(~0 << 20)) +#define MAX_KEY_OFFSET (~0ULL >> 1) +#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) + +#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) +#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) + +#define PTR_DEV_BITS 12 + +PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) +PTR_FIELD(PTR_OFFSET, 8, 43) +PTR_FIELD(PTR_GEN, 0, 8) + +#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) + +#define MAKE_PTR(gen, offset, dev) \ + ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) + +/* Bkey utility code */ + +static inline unsigned long bkey_u64s(const struct bkey *k) +{ + return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); +} + +static inline unsigned long bkey_bytes(const struct bkey *k) +{ + return bkey_u64s(k) * sizeof(__u64); +} + +#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src)) + +static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) +{ + SET_KEY_INODE(dest, KEY_INODE(src)); + SET_KEY_OFFSET(dest, KEY_OFFSET(src)); +} + +static inline struct bkey *bkey_next(const struct bkey *k) +{ + __u64 *d = (void *) k; + + return (struct bkey *) (d + bkey_u64s(k)); +} + +static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) +{ + __u64 *d = (void *) k; + + return (struct bkey *) (d + nr_keys); +} +/* Enough for a key with 6 pointers */ +#define BKEY_PAD 8 + +#define BKEY_PADDED(key) \ + union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } + +/* Superblock */ + +/* Version 0: Cache device + * Version 1: Backing device + * Version 2: Seed pointer into btree node checksum + * Version 3: Cache device with new UUID format + * Version 4: Backing device with data offset + */ +#define BCACHE_SB_VERSION_CDEV 0 +#define BCACHE_SB_VERSION_BDEV 1 +#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 +#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 +#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 +#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 +#define BCACHE_SB_MAX_VERSION 6 + +#define SB_SECTOR 8 +#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) +#define SB_SIZE 4096 +#define SB_LABEL_SIZE 32 +#define SB_JOURNAL_BUCKETS 256U +/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ +#define MAX_CACHES_PER_SET 8 + +#define BDEV_DATA_START_DEFAULT 16 /* sectors */ + +struct cache_sb_disk { + __le64 csum; + __le64 offset; /* sector where this sb was written */ + __le64 version; + + __u8 magic[16]; + + __u8 uuid[16]; + union { + __u8 set_uuid[16]; + __le64 set_magic; + }; + __u8 label[SB_LABEL_SIZE]; + + __le64 flags; + __le64 seq; + + __le64 feature_compat; + __le64 feature_incompat; + __le64 feature_ro_compat; + + __le64 pad[5]; + + union { + struct { + /* Cache devices */ + __le64 nbuckets; /* device size */ + + __le16 block_size; /* sectors */ + __le16 bucket_size; /* sectors */ + + __le16 nr_in_set; + __le16 nr_this_dev; + }; + struct { + /* Backing devices */ + __le64 data_offset; + + /* + * block_size from the cache device section is still used by + * backing devices, so don't add anything here until we fix + * things to not need it for backing devices anymore + */ + }; + }; + + __le32 last_mount; /* time overflow in y2106 */ + + __le16 first_bucket; + union { + __le16 njournal_buckets; + __le16 keys; + }; + __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ + __le16 obso_bucket_size_hi; /* obsoleted */ +}; + +/* + * This is for in-memory bcache super block. + * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member + * size, ordering and even whole struct size may be different + * from cache_sb_disk. + */ +struct cache_sb { + __u64 offset; /* sector where this sb was written */ + __u64 version; + + __u8 magic[16]; + + __u8 uuid[16]; + union { + __u8 set_uuid[16]; + __u64 set_magic; + }; + __u8 label[SB_LABEL_SIZE]; + + __u64 flags; + __u64 seq; + + __u64 feature_compat; + __u64 feature_incompat; + __u64 feature_ro_compat; + + union { + struct { + /* Cache devices */ + __u64 nbuckets; /* device size */ + + __u16 block_size; /* sectors */ + __u16 nr_in_set; + __u16 nr_this_dev; + __u32 bucket_size; /* sectors */ + }; + struct { + /* Backing devices */ + __u64 data_offset; + + /* + * block_size from the cache device section is still used by + * backing devices, so don't add anything here until we fix + * things to not need it for backing devices anymore + */ + }; + }; + + __u32 last_mount; /* time overflow in y2106 */ + + __u16 first_bucket; + union { + __u16 njournal_buckets; + __u16 keys; + }; + __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ +}; + +static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) +{ + return sb->version == BCACHE_SB_VERSION_BDEV + || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET + || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; +} + +BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); +#define CACHE_REPLACEMENT_LRU 0U +#define CACHE_REPLACEMENT_FIFO 1U +#define CACHE_REPLACEMENT_RANDOM 2U + +BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); +#define CACHE_MODE_WRITETHROUGH 0U +#define CACHE_MODE_WRITEBACK 1U +#define CACHE_MODE_WRITEAROUND 2U +#define CACHE_MODE_NONE 3U +BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); +#define BDEV_STATE_NONE 0U +#define BDEV_STATE_CLEAN 1U +#define BDEV_STATE_DIRTY 2U +#define BDEV_STATE_STALE 3U + +/* + * Magic numbers + * + * The various other data structures have their own magic numbers, which are + * xored with the first part of the cache set's UUID + */ + +#define JSET_MAGIC 0x245235c1a3625032ULL +#define PSET_MAGIC 0x6750e15f87337f91ULL +#define BSET_MAGIC 0x90135c78b99e07f5ULL + +static inline __u64 jset_magic(struct cache_sb *sb) +{ + return sb->set_magic ^ JSET_MAGIC; +} + +static inline __u64 pset_magic(struct cache_sb *sb) +{ + return sb->set_magic ^ PSET_MAGIC; +} + +static inline __u64 bset_magic(struct cache_sb *sb) +{ + return sb->set_magic ^ BSET_MAGIC; +} + +/* + * Journal + * + * On disk format for a journal entry: + * seq is monotonically increasing; every journal entry has its own unique + * sequence number. + * + * last_seq is the oldest journal entry that still has keys the btree hasn't + * flushed to disk yet. + * + * version is for on disk format changes. + */ + +#define BCACHE_JSET_VERSION_UUIDv1 1 +#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ +#define BCACHE_JSET_VERSION 1 + +struct jset { + __u64 csum; + __u64 magic; + __u64 seq; + __u32 version; + __u32 keys; + + __u64 last_seq; + + BKEY_PADDED(uuid_bucket); + BKEY_PADDED(btree_root); + __u16 btree_level; + __u16 pad[3]; + + __u64 prio_bucket[MAX_CACHES_PER_SET]; + + union { + struct bkey start[0]; + __u64 d[0]; + }; +}; + +/* Bucket prios/gens */ + +struct prio_set { + __u64 csum; + __u64 magic; + __u64 seq; + __u32 version; + __u32 pad; + + __u64 next_bucket; + + struct bucket_disk { + __u16 prio; + __u8 gen; + } __attribute((packed)) data[]; +}; + +/* UUIDS - per backing device/flash only volume metadata */ + +struct uuid_entry { + union { + struct { + __u8 uuid[16]; + __u8 label[32]; + __u32 first_reg; /* time overflow in y2106 */ + __u32 last_reg; + __u32 invalidated; + + __u32 flags; + /* Size of flash only volumes */ + __u64 sectors; + }; + + __u8 pad[128]; + }; +}; + +BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); + +/* Btree nodes */ + +/* Version 1: Seed pointer into btree node checksum + */ +#define BCACHE_BSET_CSUM 1 +#define BCACHE_BSET_VERSION 1 + +/* + * Btree nodes + * + * On disk a btree node is a list/log of these; within each set the keys are + * sorted + */ +struct bset { + __u64 csum; + __u64 magic; + __u64 seq; + __u32 version; + __u32 keys; + + union { + struct bkey start[0]; + __u64 d[0]; + }; +}; + +/* OBSOLETE */ + +/* UUIDS - per backing device/flash only volume metadata */ + +struct uuid_entry_v0 { + __u8 uuid[16]; + __u8 label[32]; + __u32 first_reg; + __u32 last_reg; + __u32 invalidated; + __u32 pad; +}; + +#endif /* _LINUX_BCACHE_H */ diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index a50dcfda656f..d795c84246b0 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -2,10 +2,10 @@ #ifndef _BCACHE_BSET_H #define _BCACHE_BSET_H -#include <linux/bcache.h> #include <linux/kernel.h> #include <linux/types.h> +#include "bcache_ondisk.h" #include "util.h" /* for time_stats */ /* diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 0595559de174..93b67b8d31c3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -141,7 +141,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) uint64_t crc = b->key.ptr[0]; void *data = (void *) i + 8, *end = bset_bkey_last(i); - crc = bch_crc64_update(crc, data, end - data); + crc = crc64_be(crc, data, end - data); return crc ^ 0xffffffffffffffffULL; } diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 116edda845c3..6230dfdd9286 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -127,21 +127,20 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) citer.bi_size = UINT_MAX; bio_for_each_segment(bv, bio, iter) { - void *p1 = kmap_atomic(bv.bv_page); + void *p1 = bvec_kmap_local(&bv); void *p2; cbv = bio_iter_iovec(check, citer); - p2 = page_address(cbv.bv_page); + p2 = bvec_kmap_local(&cbv); - cache_set_err_on(memcmp(p1 + bv.bv_offset, - p2 + bv.bv_offset, - bv.bv_len), + cache_set_err_on(memcmp(p1, p2, bv.bv_len), dc->disk.c, - "verify failed at dev %s sector %llu", - dc->backing_dev_name, + "verify failed at dev %pg sector %llu", + dc->bdev, (uint64_t) bio->bi_iter.bi_sector); - kunmap_atomic(p1); + kunmap_local(p2); + kunmap_local(p1); bio_advance_iter(check, &citer, bv.bv_len); } diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c index 6d2b7b84a7b7..634922c5601d 100644 --- a/drivers/md/bcache/features.c +++ b/drivers/md/bcache/features.c @@ -6,7 +6,7 @@ * Copyright 2020 Coly Li <colyli@suse.de> * */ -#include <linux/bcache.h> +#include "bcache_ondisk.h" #include "bcache.h" #include "features.h" diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index d1c8fd3977fc..09161b89c63e 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -2,10 +2,11 @@ #ifndef _BCACHE_FEATURES_H #define _BCACHE_FEATURES_H -#include <linux/bcache.h> #include <linux/kernel.h> #include <linux/types.h> +#include "bcache_ondisk.h" + #define BCH_FEATURE_COMPAT 0 #define BCH_FEATURE_RO_COMPAT 1 #define BCH_FEATURE_INCOMPAT 2 diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index e4388fe3ab7e..9c6f9ec55b72 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -65,15 +65,15 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors. */ if (bio->bi_opf & REQ_RAHEAD) { - pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n", - dc->backing_dev_name); + pr_warn_ratelimited("%pg: Read-ahead I/O failed on backing device, ignore\n", + dc->bdev); return; } errors = atomic_add_return(1, &dc->io_errors); if (errors < dc->error_limit) - pr_err("%s: IO error on backing device, unrecoverable\n", - dc->backing_dev_name); + pr_err("%pg: IO error on backing device, unrecoverable\n", + dc->bdev); else bch_cached_dev_error(dc); } @@ -123,13 +123,13 @@ void bch_count_io_errors(struct cache *ca, errors >>= IO_ERROR_SHIFT; if (errors < ca->set->error_limit) - pr_err("%s: IO error on %s%s\n", - ca->cache_dev_name, m, + pr_err("%pg: IO error on %s%s\n", + ca->bdev, m, is_read ? ", recovering." : "."); else bch_cache_set_error(ca->set, - "%s: too many IO errors %s\n", - ca->cache_dev_name, m); + "%pg: too many IO errors %s\n", + ca->bdev, m); } } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 23b28edae90f..d15aae6c51c1 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -46,7 +46,7 @@ static void bio_csum(struct bio *bio, struct bkey *k) bio_for_each_segment(bv, bio, iter) { void *d = kmap(bv.bv_page) + bv.bv_offset; - csum = bch_crc64_update(csum, d, bv.bv_len); + csum = crc64_be(csum, d, bv.bv_len); kunmap(bv.bv_page); } @@ -651,8 +651,8 @@ static void backing_request_endio(struct bio *bio) */ if (unlikely(s->iop.writeback && bio->bi_opf & REQ_PREFLUSH)) { - pr_err("Can't flush %s: returned bi_status %i\n", - dc->backing_dev_name, bio->bi_status); + pr_err("Can't flush %pg: returned bi_status %i\n", + dc->bdev, bio->bi_status); } else { /* set to orig_bio->bi_status in bio_complete() */ s->iop.status = bio->bi_status; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f2874c77ff79..4a9a65dff95e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1002,7 +1002,7 @@ static void calc_cached_dev_sectors(struct cache_set *c) struct cached_dev *dc; list_for_each_entry(dc, &c->cached_devs, list) - sectors += bdev_sectors(dc->bdev); + sectors += bdev_nr_sectors(dc->bdev); c->cached_dev_sectors = sectors; } @@ -1026,8 +1026,8 @@ static int cached_dev_status_update(void *arg) dc->offline_seconds = 0; if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { - pr_err("%s: device offline for %d seconds\n", - dc->backing_dev_name, + pr_err("%pg: device offline for %d seconds\n", + dc->bdev, BACKING_DEV_OFFLINE_TIMEOUT); pr_err("%s: disable I/O request due to backing device offline\n", dc->disk.name); @@ -1058,15 +1058,13 @@ int bch_cached_dev_run(struct cached_dev *dc) }; if (dc->io_disable) { - pr_err("I/O disabled on cached dev %s\n", - dc->backing_dev_name); + pr_err("I/O disabled on cached dev %pg\n", dc->bdev); ret = -EIO; goto out; } if (atomic_xchg(&dc->running, 1)) { - pr_info("cached dev %s is running already\n", - dc->backing_dev_name); + pr_info("cached dev %pg is running already\n", dc->bdev); ret = -EBUSY; goto out; } @@ -1082,7 +1080,9 @@ int bch_cached_dev_run(struct cached_dev *dc) closure_sync(&cl); } - add_disk(d->disk); + ret = add_disk(d->disk); + if (ret) + goto out; bd_link_disk_holder(dc->bdev, dc->disk.disk); /* * won't show up in the uevent file, use udevadm monitor -e instead @@ -1154,16 +1154,16 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_lock(&bch_register_lock); - calc_cached_dev_sectors(dc->disk.c); bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); + calc_cached_dev_sectors(dc->disk.c); clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); mutex_unlock(&bch_register_lock); - pr_info("Caching disabled for %s\n", dc->backing_dev_name); + pr_info("Caching disabled for %pg\n", dc->bdev); /* Drop ref we took in cached_dev_detach() */ closure_put(&dc->disk.cl); @@ -1203,29 +1203,27 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, return -ENOENT; if (dc->disk.c) { - pr_err("Can't attach %s: already attached\n", - dc->backing_dev_name); + pr_err("Can't attach %pg: already attached\n", dc->bdev); return -EINVAL; } if (test_bit(CACHE_SET_STOPPING, &c->flags)) { - pr_err("Can't attach %s: shutting down\n", - dc->backing_dev_name); + pr_err("Can't attach %pg: shutting down\n", dc->bdev); return -EINVAL; } if (dc->sb.block_size < c->cache->sb.block_size) { /* Will die */ - pr_err("Couldn't attach %s: block size less than set's block size\n", - dc->backing_dev_name); + pr_err("Couldn't attach %pg: block size less than set's block size\n", + dc->bdev); return -EINVAL; } /* Check whether already attached */ list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { - pr_err("Tried to attach %s but duplicate UUID already attached\n", - dc->backing_dev_name); + pr_err("Tried to attach %pg but duplicate UUID already attached\n", + dc->bdev); return -EINVAL; } @@ -1243,15 +1241,13 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, if (!u) { if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - pr_err("Couldn't find uuid for %s in set\n", - dc->backing_dev_name); + pr_err("Couldn't find uuid for %pg in set\n", dc->bdev); return -ENOENT; } u = uuid_find_empty(c); if (!u) { - pr_err("Not caching %s, no room for UUID\n", - dc->backing_dev_name); + pr_err("Not caching %pg, no room for UUID\n", dc->bdev); return -EINVAL; } } @@ -1319,8 +1315,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, */ kthread_stop(dc->writeback_thread); cancel_writeback_rate_update_dwork(dc); - pr_err("Couldn't run cached device %s\n", - dc->backing_dev_name); + pr_err("Couldn't run cached device %pg\n", dc->bdev); return ret; } @@ -1336,8 +1331,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); - pr_info("Caching %s as %s on set %pU\n", - dc->backing_dev_name, + pr_info("Caching %pg as %s on set %pU\n", + dc->bdev, dc->disk.disk->disk_name, dc->disk.c->set_uuid); return 0; @@ -1461,7 +1456,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, struct cache_set *c; int ret = -ENOMEM; - bdevname(bdev, dc->backing_dev_name); memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; dc->bdev->bd_holder = dc; @@ -1476,7 +1470,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; - pr_info("registered backing device %s\n", dc->backing_dev_name); + pr_info("registered backing device %pg\n", dc->bdev); list_add(&dc->list, &uncached_devices); /* attach to a matched cache set if it exists */ @@ -1493,7 +1487,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, return 0; err: - pr_notice("error %s: %s\n", dc->backing_dev_name, err); + pr_notice("error %pg: %s\n", dc->bdev, err); bcache_device_stop(&dc->disk); return ret; } @@ -1534,10 +1528,11 @@ static void flash_dev_flush(struct closure *cl) static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) { + int err = -ENOMEM; struct bcache_device *d = kzalloc(sizeof(struct bcache_device), GFP_KERNEL); if (!d) - return -ENOMEM; + goto err_ret; closure_init(&d->cl, NULL); set_closure_fn(&d->cl, flash_dev_flush, system_wq); @@ -1551,9 +1546,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) bcache_device_attach(d, c, u - c->uuids); bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); - add_disk(d->disk); + err = add_disk(d->disk); + if (err) + goto err; - if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache")) + err = kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache"); + if (err) goto err; bcache_device_link(d, c, "volume"); @@ -1567,7 +1565,8 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) return 0; err: kobject_put(&d->kobj); - return -ENOMEM; +err_ret: + return err; } static int flash_devs_run(struct cache_set *c) @@ -1621,8 +1620,8 @@ bool bch_cached_dev_error(struct cached_dev *dc) /* make others know io_disable is true earlier */ smp_mb(); - pr_err("stop %s: too many IO errors on backing device %s\n", - dc->disk.disk->disk_name, dc->backing_dev_name); + pr_err("stop %s: too many IO errors on backing device %pg\n", + dc->disk.disk->disk_name, dc->bdev); bcache_device_stop(&dc->disk); return true; @@ -2338,7 +2337,7 @@ err_btree_alloc: err_free: module_put(THIS_MODULE); if (err) - pr_notice("error %s: %s\n", ca->cache_dev_name, err); + pr_notice("error %pg: %s\n", ca->bdev, err); return ret; } @@ -2348,7 +2347,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, const char *err = NULL; /* must be set for any error case */ int ret = 0; - bdevname(bdev, ca->cache_dev_name); memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; ca->bdev->bd_holder = ca; @@ -2390,14 +2388,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto out; } - pr_info("registered cache device %s\n", ca->cache_dev_name); + pr_info("registered cache device %pg\n", ca->bdev); out: kobject_put(&ca->kobj); err: if (err) - pr_notice("error %s: %s\n", ca->cache_dev_name, err); + pr_notice("error %pg: %s\n", ca->bdev, err); return ret; } @@ -2617,8 +2615,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (SB_IS_BDEV(sb)) { struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); - if (!dc) + if (!dc) { + ret = -ENOMEM; + err = "cannot allocate memory"; goto out_put_sb_page; + } mutex_lock(&bch_register_lock); ret = register_bdev(sb, sb_disk, bdev, dc); @@ -2629,11 +2630,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, } else { struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - if (!ca) + if (!ca) { + ret = -ENOMEM; + err = "cannot allocate memory"; goto out_put_sb_page; + } /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(sb, sb_disk, bdev, ca) != 0) + ret = register_cache(sb, sb_disk, bdev, ca); + if (ret) goto out_free_sb; } @@ -2750,7 +2755,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) * The reason bch_register_lock is not held to call * bch_cache_set_stop() and bcache_device_stop() is to * avoid potential deadlock during reboot, because cache - * set or bcache device stopping process will acqurie + * set or bcache device stopping process will acquire * bch_register_lock too. * * We are safe here because bcache_is_reboot sets to diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 05ac1d6fbbf3..1f0dce30fa75 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -271,7 +271,7 @@ SHOW(__bch_cached_dev) } if (attr == &sysfs_backing_dev_name) { - snprintf(buf, BDEVNAME_SIZE + 1, "%s", dc->backing_dev_name); + snprintf(buf, BDEVNAME_SIZE + 1, "%pg", dc->bdev); strcat(buf, "\n"); return strlen(buf); } diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h index 215df32f567b..c1752ba2e05b 100644 --- a/drivers/md/bcache/sysfs.h +++ b/drivers/md/bcache/sysfs.h @@ -51,13 +51,27 @@ STORE(fn) \ #define sysfs_printf(file, fmt, ...) \ do { \ if (attr == &sysfs_ ## file) \ - return snprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__); \ + return sysfs_emit(buf, fmt "\n", __VA_ARGS__); \ } while (0) #define sysfs_print(file, var) \ do { \ if (attr == &sysfs_ ## file) \ - return snprint(buf, PAGE_SIZE, var); \ + return sysfs_emit(buf, \ + __builtin_types_compatible_p(typeof(var), int) \ + ? "%i\n" : \ + __builtin_types_compatible_p(typeof(var), unsigned int) \ + ? "%u\n" : \ + __builtin_types_compatible_p(typeof(var), long) \ + ? "%li\n" : \ + __builtin_types_compatible_p(typeof(var), unsigned long)\ + ? "%lu\n" : \ + __builtin_types_compatible_p(typeof(var), int64_t) \ + ? "%lli\n" : \ + __builtin_types_compatible_p(typeof(var), uint64_t) \ + ? "%llu\n" : \ + __builtin_types_compatible_p(typeof(var), const char *) \ + ? "%s\n" : "%i\n", var); \ } while (0) #define sysfs_hprint(file, val) \ diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index b64460a76267..6f3cb7c92130 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -340,23 +340,6 @@ static inline int bch_strtoul_h(const char *cp, long *res) _r; \ }) -#define snprint(buf, size, var) \ - snprintf(buf, size, \ - __builtin_types_compatible_p(typeof(var), int) \ - ? "%i\n" : \ - __builtin_types_compatible_p(typeof(var), unsigned int) \ - ? "%u\n" : \ - __builtin_types_compatible_p(typeof(var), long) \ - ? "%li\n" : \ - __builtin_types_compatible_p(typeof(var), unsigned long)\ - ? "%lu\n" : \ - __builtin_types_compatible_p(typeof(var), int64_t) \ - ? "%lli\n" : \ - __builtin_types_compatible_p(typeof(var), uint64_t) \ - ? "%llu\n" : \ - __builtin_types_compatible_p(typeof(var), const char *) \ - ? "%s\n" : "%i\n", var) - ssize_t bch_hprint(char *buf, int64_t v); bool bch_is_zero(const char *p, size_t n); @@ -548,14 +531,6 @@ static inline uint64_t bch_crc64(const void *p, size_t len) return crc ^ 0xffffffffffffffffULL; } -static inline uint64_t bch_crc64_update(uint64_t crc, - const void *p, - size_t len) -{ - crc = crc64_be(crc, p, len); - return crc; -} - /* * A stepwise-linear pseudo-exponential. This returns 1 << (x >> * frac_bits), with the less-significant bits filled in by linear @@ -584,8 +559,4 @@ static inline unsigned int fract_exp_two(unsigned int x, void bch_bio_map(struct bio *bio, void *base); int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask); -static inline sector_t bdev_sectors(struct block_device *bdev) -{ - return bdev->bd_inode->i_size >> 9; -} #endif /* _BCACHE_UTIL_H */ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 8120da278161..c7560f66dca8 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -45,7 +45,7 @@ static uint64_t __calc_target_rate(struct cached_dev *dc) * backing volume uses about 2% of the cache for dirty data. */ uint32_t bdev_share = - div64_u64(bdev_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT, + div64_u64(bdev_nr_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT, c->cached_dev_sectors); uint64_t cache_dirty_target = diff --git a/drivers/md/dm-audit.c b/drivers/md/dm-audit.c new file mode 100644 index 000000000000..3049dfe67e50 --- /dev/null +++ b/drivers/md/dm-audit.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Creating audit records for mapped devices. + * + * Copyright (C) 2021 Fraunhofer AISEC. All rights reserved. + * + * Authors: Michael Weiß <michael.weiss@aisec.fraunhofer.de> + */ + +#include <linux/audit.h> +#include <linux/module.h> +#include <linux/device-mapper.h> +#include <linux/bio.h> +#include <linux/blkdev.h> + +#include "dm-audit.h" +#include "dm-core.h" + +static struct audit_buffer *dm_audit_log_start(int audit_type, + const char *dm_msg_prefix, + const char *op) +{ + struct audit_buffer *ab; + + if (audit_enabled == AUDIT_OFF) + return NULL; + + ab = audit_log_start(audit_context(), GFP_KERNEL, audit_type); + if (unlikely(!ab)) + return NULL; + + audit_log_format(ab, "module=%s op=%s", dm_msg_prefix, op); + return ab; +} + +void dm_audit_log_ti(int audit_type, const char *dm_msg_prefix, const char *op, + struct dm_target *ti, int result) +{ + struct audit_buffer *ab = NULL; + struct mapped_device *md = dm_table_get_md(ti->table); + int dev_major = dm_disk(md)->major; + int dev_minor = dm_disk(md)->first_minor; + + switch (audit_type) { + case AUDIT_DM_CTRL: + ab = dm_audit_log_start(audit_type, dm_msg_prefix, op); + if (unlikely(!ab)) + return; + audit_log_task_info(ab); + audit_log_format(ab, " dev=%d:%d error_msg='%s'", dev_major, + dev_minor, !result ? ti->error : "success"); + break; + case AUDIT_DM_EVENT: + ab = dm_audit_log_start(audit_type, dm_msg_prefix, op); + if (unlikely(!ab)) + return; + audit_log_format(ab, " dev=%d:%d sector=?", dev_major, + dev_minor); + break; + default: /* unintended use */ + return; + } + + audit_log_format(ab, " res=%d", result); + audit_log_end(ab); +} +EXPORT_SYMBOL_GPL(dm_audit_log_ti); + +void dm_audit_log_bio(const char *dm_msg_prefix, const char *op, + struct bio *bio, sector_t sector, int result) +{ + struct audit_buffer *ab; + int dev_major = MAJOR(bio->bi_bdev->bd_dev); + int dev_minor = MINOR(bio->bi_bdev->bd_dev); + + ab = dm_audit_log_start(AUDIT_DM_EVENT, dm_msg_prefix, op); + if (unlikely(!ab)) + return; + + audit_log_format(ab, " dev=%d:%d sector=%llu res=%d", + dev_major, dev_minor, sector, result); + audit_log_end(ab); +} +EXPORT_SYMBOL_GPL(dm_audit_log_bio); diff --git a/drivers/md/dm-audit.h b/drivers/md/dm-audit.h new file mode 100644 index 000000000000..2385f2b659be --- /dev/null +++ b/drivers/md/dm-audit.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Creating audit records for mapped devices. + * + * Copyright (C) 2021 Fraunhofer AISEC. All rights reserved. + * + * Authors: Michael Weiß <michael.weiss@aisec.fraunhofer.de> + */ + +#ifndef DM_AUDIT_H +#define DM_AUDIT_H + +#include <linux/device-mapper.h> +#include <linux/audit.h> + +#ifdef CONFIG_DM_AUDIT +void dm_audit_log_bio(const char *dm_msg_prefix, const char *op, + struct bio *bio, sector_t sector, int result); + +/* + * dm_audit_log_ti() is not intended to be used directly in dm modules, + * the wrapper functions below should be called by dm modules instead. + */ +void dm_audit_log_ti(int audit_type, const char *dm_msg_prefix, const char *op, + struct dm_target *ti, int result); + +static inline void dm_audit_log_ctr(const char *dm_msg_prefix, + struct dm_target *ti, int result) +{ + dm_audit_log_ti(AUDIT_DM_CTRL, dm_msg_prefix, "ctr", ti, result); +} + +static inline void dm_audit_log_dtr(const char *dm_msg_prefix, + struct dm_target *ti, int result) +{ + dm_audit_log_ti(AUDIT_DM_CTRL, dm_msg_prefix, "dtr", ti, result); +} + +static inline void dm_audit_log_target(const char *dm_msg_prefix, const char *op, + struct dm_target *ti, int result) +{ + dm_audit_log_ti(AUDIT_DM_EVENT, dm_msg_prefix, op, ti, result); +} +#else +static inline void dm_audit_log_bio(const char *dm_msg_prefix, const char *op, + struct bio *bio, sector_t sector, + int result) +{ +} +static inline void dm_audit_log_target(const char *dm_msg_prefix, + const char *op, struct dm_target *ti, + int result) +{ +} +static inline void dm_audit_log_ctr(const char *dm_msg_prefix, + struct dm_target *ti, int result) +{ +} + +static inline void dm_audit_log_dtr(const char *dm_msg_prefix, + struct dm_target *ti, int result) +{ +} +#endif + +#endif diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 50f3e673729c..e9cbc70d5a0e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1525,7 +1525,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) { - sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT; + sector_t s = bdev_nr_sectors(c->bdev); if (s >= c->start) s -= c->start; else @@ -2082,7 +2082,6 @@ static void __exit dm_bufio_exit(void) int bug = 0; cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); - flush_workqueue(dm_bufio_wq); destroy_workqueue(dm_bufio_wq); if (dm_bufio_client_count) { diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 89a73204dbf4..2874f222c313 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -334,7 +334,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) int r; struct dm_block *sblock; struct cache_disk_superblock *disk_super; - sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; + sector_t bdev_size = bdev_nr_sectors(cmd->bdev); /* FIXME: see if we can lose the max sectors limit */ if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index bdd500447dea..447d030036d1 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1940,7 +1940,7 @@ static void cache_dtr(struct dm_target *ti) static sector_t get_dev_size(struct dm_dev *dev) { - return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; + return bdev_nr_sectors(dev->bdev); } /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index edd22e4d65df..4599632d7a84 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -1514,7 +1514,7 @@ error: static sector_t get_dev_size(struct dm_dev *dev) { - return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; + return bdev_nr_sectors(dev->bdev); } /*---------------------------------------------------------------------------*/ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 292f7896f733..d4ae31558826 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -42,6 +42,8 @@ #include <linux/device-mapper.h> +#include "dm-audit.h" + #define DM_MSG_PREFIX "crypt" /* @@ -1363,8 +1365,12 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { char b[BDEVNAME_SIZE]; - DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", bio_devname(ctx->bio_in, b), - (unsigned long long)le64_to_cpu(*sector)); + sector_t s = le64_to_cpu(*sector); + + DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", + bio_devname(ctx->bio_in, b), s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -2174,8 +2180,12 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (error == -EBADMSG) { char b[BDEVNAME_SIZE]; - DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", bio_devname(ctx->bio_in, b), - (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq))); + sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); + + DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", + bio_devname(ctx->bio_in, b), s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -2735,6 +2745,8 @@ static void crypt_dtr(struct dm_target *ti) dm_crypt_clients_n--; crypt_calculate_pages_per_client(); spin_unlock(&dm_crypt_clients_lock); + + dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1); } static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) @@ -3351,21 +3363,22 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&cc->write_thread_lock); cc->write_tree = RB_ROOT; - cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write/%s", devname); + cc->write_thread = kthread_run(dmcrypt_write, cc, "dmcrypt_write/%s", devname); if (IS_ERR(cc->write_thread)) { ret = PTR_ERR(cc->write_thread); cc->write_thread = NULL; ti->error = "Couldn't spawn write thread"; goto bad; } - wake_up_process(cc->write_thread); ti->num_flush_bios = 1; ti->limit_swap_bios = true; + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); return 0; bad: + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0); crypt_dtr(ti); return ret; } diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index 3163e2b1418e..03672204b0e3 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -415,7 +415,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, char *result, unsigned int maxlen) { struct dust_device *dd = ti->private; - sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT; + sector_t size = bdev_nr_sectors(dd->dev->bdev); bool invalid_msg = false; int r = -EINVAL; unsigned long long tmp, block; @@ -544,8 +544,7 @@ static int dust_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) /* * Only pass ioctls through if the device sizes match exactly. */ - if (dd->start || - ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) + if (dd->start || ti->len != bdev_nr_sectors(dev->bdev)) return 1; return 0; diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index d25989660a76..7ce5d509b940 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -416,7 +416,7 @@ static int ebs_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) * Only pass ioctls through if the device sizes match exactly. */ *bdev = dev->bdev; - return !!(ec->start || ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT); + return !!(ec->start || ti->len != bdev_nr_sectors(dev->bdev)); } static void ebs_io_hints(struct dm_target *ti, struct queue_limits *limits) diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 2a78f6874143..1f6bf152b3c7 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1681,7 +1681,7 @@ static int era_message(struct dm_target *ti, unsigned argc, char **argv, static sector_t get_dev_size(struct dm_dev *dev) { - return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; + return bdev_nr_sectors(dev->bdev); } static int era_iterate_devices(struct dm_target *ti, diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 3f4139ac1f60..b5f20eba3641 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -168,7 +168,7 @@ static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e) */ static inline sector_t get_dev_size(struct block_device *bdev) { - return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; + return bdev_nr_sectors(bdev); } static inline chunk_t sector_to_chunk(struct dm_exception_store *store, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 4b94ffe6f2d4..345229d7e59c 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -456,8 +456,7 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev /* * Only pass ioctls through if the device sizes match exactly. */ - if (fc->start || - ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT) + if (fc->start || ti->len != bdev_nr_sectors((*bdev))) return 1; return 0; } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index dc03b70f6e65..6319deccbe09 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -23,6 +23,8 @@ #include <linux/async_tx.h> #include <linux/dm-bufio.h> +#include "dm-audit.h" + #define DM_MSG_PREFIX "integrity" #define DEFAULT_INTERLEAVE_SECTORS 32768 @@ -539,6 +541,7 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) } if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) { dm_integrity_io_error(ic, "superblock mac", -EILSEQ); + dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); return -EILSEQ; } } @@ -876,8 +879,10 @@ static void rw_section_mac(struct dm_integrity_c *ic, unsigned section, bool wr) if (likely(wr)) memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR); else { - if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) + if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) { dm_integrity_io_error(ic, "journal mac", -EILSEQ); + dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0); + } } } } @@ -1765,7 +1770,7 @@ static void integrity_metadata(struct work_struct *w) char *mem, *checksums_ptr; again: - mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset; + mem = bvec_kmap_local(&bv); pos = 0; checksums_ptr = checksums; do { @@ -1775,17 +1780,22 @@ again: pos += ic->sectors_per_block << SECTOR_SHIFT; sector += ic->sectors_per_block; } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); - kunmap_atomic(mem); + kunmap_local(mem); r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { char b[BDEVNAME_SIZE]; - DMERR_LIMIT("%s: Checksum failed at sector 0x%llx", bio_devname(bio, b), - (sector - ((r + ic->tag_size - 1) / ic->tag_size))); + sector_t s; + + s = sector - ((r + ic->tag_size - 1) / ic->tag_size); + DMERR_LIMIT("%s: Checksum failed at sector 0x%llx", + bio_devname(bio, b), s); r = -EILSEQ; atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, s, 0); } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -1953,7 +1963,7 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, n_sectors -= bv.bv_len >> SECTOR_SHIFT; bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); retry_kmap: - mem = kmap_atomic(bv.bv_page); + mem = bvec_kmap_local(&bv); if (likely(dio->op == REQ_OP_WRITE)) flush_dcache_page(bv.bv_page); @@ -1967,7 +1977,7 @@ retry_kmap: if (unlikely(journal_entry_is_inprogress(je))) { flush_dcache_page(bv.bv_page); - kunmap_atomic(mem); + kunmap_local(mem); __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je)); goto retry_kmap; @@ -1991,6 +2001,8 @@ retry_kmap: if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", logical_sector); + dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum", + bio, logical_sector, 0); } } #endif @@ -2058,7 +2070,7 @@ retry_kmap: if (unlikely(dio->op == REQ_OP_READ)) flush_dcache_page(bv.bv_page); - kunmap_atomic(mem); + kunmap_local(mem); } while (n_sectors); if (likely(dio->op == REQ_OP_WRITE)) { @@ -2534,8 +2546,10 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), (char *)access_journal_data(ic, i, l), test_tag); - if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) + if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) { dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); + dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0); + } } journal_entry_set_unused(je2); @@ -4113,11 +4127,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } } - ic->data_device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; + ic->data_device_sectors = bdev_nr_sectors(ic->dev->bdev); if (!ic->meta_dev) ic->meta_device_sectors = ic->data_device_sectors; else - ic->meta_device_sectors = i_size_read(ic->meta_dev->bdev->bd_inode) >> SECTOR_SHIFT; + ic->meta_device_sectors = bdev_nr_sectors(ic->meta_dev->bdev); if (!journal_sectors) { journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, @@ -4367,7 +4381,7 @@ try_smaller_buffer: DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections)); DEBUG_print(" journal_entries %u\n", ic->journal_entries); DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); - DEBUG_print(" data_device_sectors 0x%llx\n", i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT); + DEBUG_print(" data_device_sectors 0x%llx\n", bdev_nr_sectors(ic->dev->bdev)); DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); @@ -4514,9 +4528,11 @@ try_smaller_buffer: if (ic->discard) ti->num_discard_bios = 1; + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); return 0; bad: + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0); dm_integrity_dtr(ti); return r; } @@ -4590,6 +4606,7 @@ static void dm_integrity_dtr(struct dm_target *ti) free_alg(&ic->journal_mac_alg); kfree(ic); + dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1); } static struct target_type integrity_target = { diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 679b4c0a2eea..66ba16713f69 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -135,8 +135,7 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev /* * Only pass ioctls through if the device sizes match exactly. */ - if (lc->start || - ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) + if (lc->start || ti->len != bdev_nr_sectors(dev->bdev)) return 1; return 0; } diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index d93a4db23512..0b3ef977ceeb 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -446,7 +446,7 @@ static int log_super(struct log_writes_c *lc) static inline sector_t logdev_last_sector(struct log_writes_c *lc) { - return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT; + return bdev_nr_sectors(lc->logdev->bdev); } static int log_writes_kthread(void *arg) @@ -753,7 +753,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) */ bio_for_each_segment(bv, bio, iter) { struct page *page; - void *src, *dst; + void *dst; page = alloc_page(GFP_NOIO); if (!page) { @@ -765,11 +765,9 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_KILL; } - src = kmap_atomic(bv.bv_page); dst = kmap_atomic(page); - memcpy(dst, src + bv.bv_offset, bv.bv_len); + memcpy_from_bvec(dst, &bv); kunmap_atomic(dst); - kunmap_atomic(src); block->vecs[i].bv_page = page; block->vecs[i].bv_len = bv.bv_len; block->vec_cnt++; @@ -851,7 +849,7 @@ static int log_writes_prepare_ioctl(struct dm_target *ti, /* * Only pass ioctls through if the device sizes match exactly. */ - if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) + if (ti->len != bdev_nr_sectors(dev->bdev)) return 1; return 0; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 1ecf75ef276a..06f328928a7f 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -447,7 +447,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, bdev_logical_block_size(lc->header_location. bdev)); - if (buf_size > i_size_read(dev->bdev->bd_inode)) { + if (buf_size > bdev_nr_bytes(dev->bdev)) { DMWARN("log device %s too small: need %llu bytes", dev->name, (unsigned long long)buf_size); kfree(lc); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 694aaca4eea2..90dc9cc48881 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -530,7 +530,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, bdev = pgpath->path.dev->bdev; q = bdev_get_queue(bdev); - clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, + clone = blk_mq_alloc_request(q, rq->cmd_flags | REQ_NOMERGE, BLK_MQ_REQ_NOWAIT); if (IS_ERR(clone)) { /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ @@ -579,7 +579,7 @@ static void multipath_release_clone(struct request *clone, clone->io_start_time_ns); } - blk_put_request(clone); + blk_mq_free_request(clone); } /* @@ -2061,7 +2061,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, /* * Only pass ioctls through if the device sizes match exactly. */ - if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT) + if (!r && ti->len != bdev_nr_sectors((*bdev))) return 1; return r; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d9ef52159a22..2b26435a6946 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1261,7 +1261,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, md_rdev_init(jdev); jdev->mddev = &rs->md; jdev->bdev = rs->journal_dev.dev->bdev; - jdev->sectors = to_sector(i_size_read(jdev->bdev->bd_inode)); + jdev->sectors = bdev_nr_sectors(jdev->bdev); if (jdev->sectors < MIN_RAID456_JOURNAL_SPACE) { rs->ti->error = "No space for raid4/5/6 journal"; return -ENOSPC; @@ -1607,7 +1607,7 @@ static int _check_data_dev_sectors(struct raid_set *rs) rdev_for_each(rdev, &rs->md) if (!test_bit(Journal, &rdev->flags) && rdev->bdev) { - ds = min(ds, to_sector(i_size_read(rdev->bdev->bd_inode))); + ds = min(ds, bdev_nr_sectors(rdev->bdev)); if (ds < rs->md.dev_sectors) { rs->ti->error = "Component device(s) too small"; return -EINVAL; @@ -2662,7 +2662,7 @@ static int rs_adjust_data_offsets(struct raid_set *rs) * Make sure we got a minimum amount of free sectors per device */ if (rs->data_offset && - to_sector(i_size_read(rdev->bdev->bd_inode)) - rs->md.dev_sectors < MIN_FREE_RESHAPE_SPACE) { + bdev_nr_sectors(rdev->bdev) - rs->md.dev_sectors < MIN_FREE_RESHAPE_SPACE) { rs->ti->error = data_offset ? "No space for forward reshape" : "No space for backward reshape"; return -ENOSPC; diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 028a92ff6d57..534dc2ca8bb0 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -529,7 +529,7 @@ static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev * Only pass ioctls through if the device sizes match exactly. */ if (ti->len + sctx->path_list[path_nr].start != - i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT) + bdev_nr_sectors((*bdev))) return 1; return 0; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 8b0f27a745d9..aa173f5bdc3d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -227,8 +227,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, { struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; - sector_t dev_size = - i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; + sector_t dev_size = bdev_nr_sectors(bdev); unsigned short logical_block_size_sectors = limits->logical_block_size >> SECTOR_SHIFT; char b[BDEVNAME_SIZE]; @@ -707,7 +706,7 @@ int dm_table_add_target(struct dm_table *t, const char *type, r = dm_split_args(&argc, &argv, params); if (r) { - tgt->error = "couldn't split parameters (insufficient memory)"; + tgt->error = "couldn't split parameters"; goto bad; } @@ -725,7 +724,7 @@ int dm_table_add_target(struct dm_table *t, const char *type, return 0; bad: - DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error); + DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, tgt->error, ERR_PTR(r)); dm_put_target_type(tgt->type); return r; } diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index c88ed14d49e6..1a96a07cbf44 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -549,7 +549,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) int r; struct dm_block *sblock; struct thin_disk_superblock *disk_super; - sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; + sector_t bdev_size = bdev_nr_sectors(pmd->bdev); if (bdev_size > THIN_METADATA_MAX_SECTORS) bdev_size = THIN_METADATA_MAX_SECTORS; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 4c67b77c23c1..ec119d2422d5 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3212,7 +3212,7 @@ static int metadata_pre_commit_callback(void *context) static sector_t get_dev_size(struct block_device *bdev) { - return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; + return bdev_nr_sectors(bdev); } static void warn_if_metadata_device_too_big(struct block_device *bdev) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index aae48a8b1a04..80133aae0db3 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -428,14 +428,14 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, unsigned len; struct bio_vec bv = bio_iter_iovec(bio, *iter); - page = kmap_atomic(bv.bv_page); + page = bvec_kmap_local(&bv); len = bv.bv_len; if (likely(len >= todo)) len = todo; - r = process(v, io, page + bv.bv_offset, len); - kunmap_atomic(page); + r = process(v, io, page, len); + kunmap_local(page); if (r < 0) return r; @@ -834,8 +834,7 @@ static int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev *bdev = v->data_dev->bdev; - if (v->data_start || - ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT) + if (v->data_start || ti->len != bdev_nr_sectors(v->data_dev->bdev)) return 1; return 0; } diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 18320444fb0a..4b8991cde223 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -2264,14 +2264,13 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) raw_spin_lock_init(&wc->endio_list_lock); INIT_LIST_HEAD(&wc->endio_list); - wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio"); + wc->endio_thread = kthread_run(writecache_endio_thread, wc, "writecache_endio"); if (IS_ERR(wc->endio_thread)) { r = PTR_ERR(wc->endio_thread); wc->endio_thread = NULL; ti->error = "Couldn't spawn endio thread"; goto bad; } - wake_up_process(wc->endio_thread); /* * Parse the mode (pmem or ssd) @@ -2341,7 +2340,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "Cache data device lookup failed"; goto bad; } - wc->memory_map_size = i_size_read(wc->ssd_dev->bdev->bd_inode); + wc->memory_map_size = bdev_nr_bytes(wc->ssd_dev->bdev); /* * Parse the cache block size @@ -2493,14 +2492,13 @@ invalid_optional: wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT; bio_list_init(&wc->flush_list); - wc->flush_thread = kthread_create(writecache_flush_thread, wc, "dm_writecache_flush"); + wc->flush_thread = kthread_run(writecache_flush_thread, wc, "dm_writecache_flush"); if (IS_ERR(wc->flush_thread)) { r = PTR_ERR(wc->flush_thread); wc->flush_thread = NULL; ti->error = "Couldn't spawn flush thread"; goto bad; } - wake_up_process(wc->flush_thread); r = calculate_memory_size(wc->memory_map_size, wc->block_size, &n_blocks, &n_metadata_blocks); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index ae1bc48c0043..166c4e9d99c9 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -733,7 +733,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path, dev->dev_idx = idx; (void)bdevname(dev->bdev, dev->name); - dev->capacity = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; + dev->capacity = bdev_nr_sectors(bdev); if (ti->begin) { ti->error = "Partial mapping is not supported"; goto err; @@ -967,7 +967,6 @@ static void dmz_dtr(struct dm_target *ti) struct dmz_target *dmz = ti->private; int i; - flush_workqueue(dmz->chunk_wq); destroy_workqueue(dmz->chunk_wq); for (i = 0; i < dmz->nr_ddevs; i++) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index be0eb2e1dd11..662742a310cb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1792,7 +1792,7 @@ static struct mapped_device *alloc_dev(int minor) format_dev_t(md->name, MKDEV(_major, minor)); - md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0); + md->wq = alloc_workqueue("kdmflush/%s", WQ_MEM_RECLAIM, 0, md->name); if (!md->wq) goto bad; @@ -2068,7 +2068,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) if (r) return r; - add_disk(md->disk); + r = add_disk(md->disk); + if (r) + return r; r = dm_sysfs_init(md); if (r) { diff --git a/drivers/md/md.c b/drivers/md/md.c index 22310d5d8d41..5111ed966947 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -354,7 +354,7 @@ static bool create_on_open = true; */ static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); static atomic_t md_event_count; -void md_new_event(struct mddev *mddev) +void md_new_event(void) { atomic_inc(&md_event_count); wake_up(&md_event_waiters); @@ -888,8 +888,7 @@ static struct md_personality *find_pers(int level, char *clevel) /* return the offset of the super block in 512byte sectors */ static inline sector_t calc_dev_sboffset(struct md_rdev *rdev) { - sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512; - return MD_NEW_SIZE_SECTORS(num_sectors); + return MD_NEW_SIZE_SECTORS(bdev_nr_sectors(rdev->bdev)); } static int alloc_disk_sb(struct md_rdev *rdev) @@ -1631,8 +1630,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ */ switch(minor_version) { case 0: - sb_start = i_size_read(rdev->bdev->bd_inode) >> 9; - sb_start -= 8*2; + sb_start = bdev_nr_sectors(rdev->bdev) - 8 * 2; sb_start &= ~(sector_t)(4*2-1); break; case 1: @@ -1787,10 +1785,9 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ else ret = 0; } - if (minor_version) { - sectors = (i_size_read(rdev->bdev->bd_inode) >> 9); - sectors -= rdev->data_offset; - } else + if (minor_version) + sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset; + else sectors = rdev->sb_start; if (sectors < le64_to_cpu(sb->data_size)) return -EINVAL; @@ -2168,8 +2165,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) return 0; /* too confusing */ if (rdev->sb_start < rdev->data_offset) { /* minor versions 1 and 2; superblock before data */ - max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9; - max_sectors -= rdev->data_offset; + max_sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset; if (!num_sectors || num_sectors > max_sectors) num_sectors = max_sectors; } else if (rdev->mddev->bitmap_info.offset) { @@ -2178,7 +2174,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) } else { /* minor version 0; superblock after data */ sector_t sb_start, bm_space; - sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9; + sector_t dev_size = bdev_nr_sectors(rdev->bdev); /* 8K is for superblock */ sb_start = dev_size - 8*2; @@ -2886,7 +2882,7 @@ static int add_bound_rdev(struct md_rdev *rdev) if (mddev->degraded) set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_new_event(mddev); + md_new_event(); md_wakeup_thread(mddev->thread); return 0; } @@ -2976,7 +2972,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) * -write_error - clears WriteErrorSeen * {,-}failfast - set/clear FailFast */ + + struct mddev *mddev = rdev->mddev; int err = -EINVAL; + bool need_update_sb = false; + if (cmd_match(buf, "faulty") && rdev->mddev->pers) { md_error(rdev->mddev, rdev); if (test_bit(Faulty, &rdev->flags)) @@ -2991,7 +2991,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->raid_disk >= 0) err = -EBUSY; else { - struct mddev *mddev = rdev->mddev; err = 0; if (mddev_is_clustered(mddev)) err = md_cluster_ops->remove_disk(mddev, rdev); @@ -3002,16 +3001,18 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); md_wakeup_thread(mddev->thread); } - md_new_event(mddev); + md_new_event(); } } } else if (cmd_match(buf, "writemostly")) { set_bit(WriteMostly, &rdev->flags); mddev_create_serial_pool(rdev->mddev, rdev, false); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-writemostly")) { mddev_destroy_serial_pool(rdev->mddev, rdev, false); clear_bit(WriteMostly, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "blocked")) { set_bit(Blocked, &rdev->flags); @@ -3037,9 +3038,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) err = 0; } else if (cmd_match(buf, "failfast")) { set_bit(FailFast, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-failfast")) { clear_bit(FailFast, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags)) { @@ -3118,6 +3121,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) clear_bit(ExternalBbl, &rdev->flags); err = 0; } + if (need_update_sb) + md_update_sb(mddev, 1); if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); return err ? err : len; @@ -3382,7 +3387,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) if (!sectors) return -EBUSY; } else if (!sectors) - sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - + sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset; if (!my_mddev->pers->resize) /* Cannot change size for RAID0 or Linear etc */ @@ -3709,7 +3714,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe kobject_init(&rdev->kobj, &rdev_ktype); - size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS; + size = bdev_nr_bytes(rdev->bdev) >> BLOCK_SIZE_BITS; if (!size) { pr_warn("md: %s has zero or unknown size, marking faulty!\n", bdevname(rdev->bdev,b)); @@ -4099,7 +4104,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) if (!mddev->thread) md_update_sb(mddev, 1); sysfs_notify_dirent_safe(mddev->sysfs_level); - md_new_event(mddev); + md_new_event(); rv = len; out_unlock: mddev_unlock(mddev); @@ -4620,7 +4625,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) export_rdev(rdev); mddev_unlock(mddev); if (!err) - md_new_event(mddev); + md_new_event(); return err ? err : len; } @@ -5490,6 +5495,10 @@ static struct attribute *md_default_attrs[] = { NULL, }; +static const struct attribute_group md_default_group = { + .attrs = md_default_attrs, +}; + static struct attribute *md_redundancy_attrs[] = { &md_scan_mode.attr, &md_last_scan_mode.attr, @@ -5512,6 +5521,12 @@ static const struct attribute_group md_redundancy_group = { .attrs = md_redundancy_attrs, }; +static const struct attribute_group *md_attr_groups[] = { + &md_default_group, + &md_bitmap_group, + NULL, +}; + static ssize_t md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { @@ -5587,7 +5602,7 @@ static const struct sysfs_ops md_sysfs_ops = { static struct kobj_type md_ktype = { .release = md_free, .sysfs_ops = &md_sysfs_ops, - .default_attrs = md_default_attrs, + .default_groups = md_attr_groups, }; int mdp_major = 0; @@ -5596,7 +5611,6 @@ static void mddev_delayed_delete(struct work_struct *ws) { struct mddev *mddev = container_of(ws, struct mddev, del_work); - sysfs_remove_group(&mddev->kobj, &md_bitmap_group); kobject_del(&mddev->kobj); kobject_put(&mddev->kobj); } @@ -5663,7 +5677,7 @@ static int md_alloc(dev_t dev, char *name) strcmp(mddev2->gendisk->disk_name, name) == 0) { spin_unlock(&all_mddevs_lock); error = -EEXIST; - goto abort; + goto out_unlock_disks_mutex; } spin_unlock(&all_mddevs_lock); } @@ -5676,7 +5690,7 @@ static int md_alloc(dev_t dev, char *name) error = -ENOMEM; disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) - goto abort; + goto out_unlock_disks_mutex; disk->major = MAJOR(mddev->unit); disk->first_minor = unit << shift; @@ -5700,27 +5714,25 @@ static int md_alloc(dev_t dev, char *name) disk->flags |= GENHD_FL_EXT_DEVT; disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; - add_disk(disk); + error = add_disk(disk); + if (error) + goto out_cleanup_disk; error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); - if (error) { - /* This isn't possible, but as kobject_init_and_add is marked - * __must_check, we must do something with the result - */ - pr_debug("md: cannot register %s/md - name in use\n", - disk->disk_name); - error = 0; - } - if (mddev->kobj.sd && - sysfs_create_group(&mddev->kobj, &md_bitmap_group)) - pr_debug("pointless warning\n"); - abort: + if (error) + goto out_del_gendisk; + + kobject_uevent(&mddev->kobj, KOBJ_ADD); + mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); + mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); + goto out_unlock_disks_mutex; + +out_del_gendisk: + del_gendisk(disk); +out_cleanup_disk: + blk_cleanup_disk(disk); +out_unlock_disks_mutex: mutex_unlock(&disks_mutex); - if (!error && mddev->kobj.sd) { - kobject_uevent(&mddev->kobj, KOBJ_ADD); - mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); - mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); - } mddev_put(mddev); return error; } @@ -6034,7 +6046,7 @@ int md_run(struct mddev *mddev) if (mddev->sb_flags) md_update_sb(mddev, 0); - md_new_event(mddev); + md_new_event(); return 0; bitmap_abort: @@ -6424,7 +6436,7 @@ static int do_md_stop(struct mddev *mddev, int mode, if (mddev->hold_active == UNTIL_STOP) mddev->hold_active = 0; } - md_new_event(mddev); + md_new_event(); sysfs_notify_dirent_safe(mddev->sysfs_state); return 0; } @@ -6880,7 +6892,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) if (!mddev->persistent) { pr_debug("md: nonpersistent superblock ...\n"); - rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; + rdev->sb_start = bdev_nr_sectors(rdev->bdev); } else rdev->sb_start = calc_dev_sboffset(rdev); rdev->sectors = rdev->sb_start; @@ -6928,7 +6940,7 @@ kick_rdev: md_wakeup_thread(mddev->thread); else md_update_sb(mddev, 1); - md_new_event(mddev); + md_new_event(); return 0; busy: @@ -6967,7 +6979,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) if (mddev->persistent) rdev->sb_start = calc_dev_sboffset(rdev); else - rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; + rdev->sb_start = bdev_nr_sectors(rdev->bdev); rdev->sectors = rdev->sb_start; @@ -7001,7 +7013,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - md_new_event(mddev); + md_new_event(); return 0; abort_export: @@ -7975,7 +7987,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) md_wakeup_thread(mddev->thread); if (mddev->event_work.func) queue_work(md_misc_wq, &mddev->event_work); - md_new_event(mddev); + md_new_event(); } EXPORT_SYMBOL(md_error); @@ -8859,7 +8871,7 @@ void md_do_sync(struct md_thread *thread) mddev->curr_resync = 3; /* no longer delayed */ mddev->curr_resync_completed = j; sysfs_notify_dirent_safe(mddev->sysfs_completed); - md_new_event(mddev); + md_new_event(); update_time = jiffies; blk_start_plug(&plug); @@ -8930,7 +8942,7 @@ void md_do_sync(struct md_thread *thread) /* this is the earliest that rebuild will be * visible in /proc/mdstat */ - md_new_event(mddev); + md_new_event(); if (last_check + window > io_sectors || j == max_sectors) continue; @@ -9154,7 +9166,7 @@ static int remove_and_add_spares(struct mddev *mddev, sysfs_link_rdev(mddev, rdev); if (!test_bit(Journal, &rdev->flags)) spares++; - md_new_event(mddev); + md_new_event(); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); } } @@ -9188,7 +9200,7 @@ static void md_start_sync(struct work_struct *ws) } else md_wakeup_thread(mddev->sync_thread); sysfs_notify_dirent_safe(mddev->sysfs_action); - md_new_event(mddev); + md_new_event(); } /* @@ -9447,7 +9459,7 @@ void md_reap_sync_thread(struct mddev *mddev) /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); sysfs_notify_dirent_safe(mddev->sysfs_action); - md_new_event(mddev); + md_new_event(); if (mddev->event_work.func) queue_work(md_misc_wq, &mddev->event_work); } diff --git a/drivers/md/md.h b/drivers/md/md.h index 4c96c36bd01a..53ea7a6961de 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -731,7 +731,7 @@ extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int op, int op_flags, bool metadata_op); extern void md_do_sync(struct md_thread *thread); -extern void md_new_event(struct mddev *mddev); +extern void md_new_event(void); extern void md_allow_write(struct mddev *mddev); extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 19598bd38939..7dc8026cf6ee 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1496,7 +1496,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!r1_bio->bios[i]) continue; - if (first_clone) { + if (first_clone && test_bit(WriteMostly, &rdev->flags)) { /* do behind I/O ? * Not if there are too many, or cannot * allocate memory, or a reader on WriteMostly @@ -1529,13 +1529,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->bios[i] = mbio; - mbio->bi_iter.bi_sector = (r1_bio->sector + - conf->mirrors[i].rdev->data_offset); - bio_set_dev(mbio, conf->mirrors[i].rdev->bdev); + mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); + bio_set_dev(mbio, rdev->bdev); mbio->bi_end_io = raid1_end_write_request; mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA)); - if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) && - !test_bit(WriteMostly, &conf->mirrors[i].rdev->flags) && + if (test_bit(FailFast, &rdev->flags) && + !test_bit(WriteMostly, &rdev->flags) && conf->raid_disks - mddev->degraded > 1) mbio->bi_opf |= MD_FAILFAST; mbio->bi_private = r1_bio; @@ -1546,7 +1545,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, trace_block_bio_remap(mbio, disk_devt(mddev->gendisk), r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ - mbio->bi_bdev = (void *)conf->mirrors[i].rdev; + mbio->bi_bdev = (void *)rdev; cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); if (cb) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index aa2636582841..dde98f65bd04 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4647,7 +4647,7 @@ out: } conf->reshape_checkpoint = jiffies; md_wakeup_thread(mddev->sync_thread); - md_new_event(mddev); + md_new_event(); return 0; abort: diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 02ed53b20654..9c1a5877cf9f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7732,10 +7732,7 @@ static int raid5_run(struct mddev *mddev) * discard data disk but write parity disk */ stripe = stripe * PAGE_SIZE; - /* Round up to power of 2, as discard handling - * currently assumes that */ - while ((stripe-1) & stripe) - stripe = (stripe | (stripe-1)) + 1; + stripe = roundup_pow_of_two(stripe); mddev->queue->limits.discard_alignment = stripe; mddev->queue->limits.discard_granularity = stripe; @@ -8282,7 +8279,7 @@ static int raid5_start_reshape(struct mddev *mddev) } conf->reshape_checkpoint = jiffies; md_wakeup_thread(mddev->sync_thread); - md_new_event(mddev); + md_new_event(); return 0; } |