diff options
Diffstat (limited to 'fs')
370 files changed, 11779 insertions, 11093 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 0429c8ee58f1..89bac3d2f05b 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -343,7 +343,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) v9ses->uid = make_kuid(current_user_ns(), uid); if (!uid_valid(v9ses->uid)) { ret = -EINVAL; - pr_info("Uknown uid %s\n", s); + pr_info("Unknown uid %s\n", s); } } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 03c9e325bfbc..5f2e48d41d72 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -533,7 +533,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) return retval; } -static int +static vm_fault_t v9fs_vm_page_mkwrite(struct vm_fault *vmf) { struct v9fs_inode *v9inode; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 42e102e2e74a..85ff859d3af5 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -859,8 +859,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode, - int *opened) + struct file *file, unsigned flags, umode_t mode) { int err; u32 perm; @@ -917,7 +916,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, v9inode->writeback_fid = (void *) inode_fid; } mutex_unlock(&v9inode->v_mutex); - err = finish_open(file, dentry, generic_file_open, opened); + err = finish_open(file, dentry, generic_file_open); if (err) goto error; @@ -925,7 +924,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(d_inode(dentry), file); - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; out: dput(res); return err; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 7f6ae21a27b3..4823e1c46999 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -241,8 +241,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t omode, - int *opened) + struct file *file, unsigned flags, umode_t omode) { int err = 0; kgid_t gid; @@ -352,13 +351,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ - err = finish_open(file, dentry, generic_file_open, opened); + err = finish_open(file, dentry, generic_file_open); if (err) goto err_clunk_old_fid; file->private_data = ofid; if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(inode, file); - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; out: v9fs_put_acl(dacl, pacl); dput(res); diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index f329eee6dc93..352abc39e891 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -105,7 +105,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, { struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len}; struct iov_iter from; - int retval; + int retval, err; iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len); @@ -126,7 +126,9 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, retval); else p9_client_write(fid, 0, &from, &retval); - p9_client_clunk(fid); + err = p9_client_clunk(fid); + if (!retval && err) + retval = err; return retval; } diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 56df483de619..b795f8da81f3 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -1,3 +1,6 @@ + +menu "Executable file formats" + config BINFMT_ELF bool "Kernel support for ELF binaries" depends on MMU @@ -187,3 +190,5 @@ config COREDUMP This option enables support for performing core dumps. You almost certainly want to say Y here. Not necessary on systems that never need debugging or only ever run flawless code. + +endmenu diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index c836c425ca94..e91028d4340a 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -287,7 +287,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj) ADFS_I(inode)->mmu_private = inode->i_size; } - insert_inode_hash(inode); + inode_fake_hash(inode); out: return inode; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 71fa525d63a0..7e099a7a4eb1 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -291,6 +291,7 @@ static void destroy_inodecache(void) static const struct super_operations adfs_sops = { .alloc_inode = adfs_alloc_inode, .destroy_inode = adfs_destroy_inode, + .drop_inode = generic_delete_inode, .write_inode = adfs_write_inode, .put_super = adfs_put_super, .statfs = adfs_statfs, diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 7d623008157f..855bf2b79fed 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -822,6 +822,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, { struct afs_vnode *dvnode = AFS_FS_I(dir); struct inode *inode; + struct dentry *d; struct key *key; int ret; @@ -862,43 +863,17 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, afs_stat_v(dvnode, n_lookup); inode = afs_do_lookup(dir, dentry, key); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - if (ret == -ENOENT) { - inode = afs_try_auto_mntpt(dentry, dir); - if (!IS_ERR(inode)) { - key_put(key); - goto success; - } - - ret = PTR_ERR(inode); - } - - key_put(key); - if (ret == -ENOENT) { - d_add(dentry, NULL); - _leave(" = NULL [negative]"); - return NULL; - } - _leave(" = %d [do]", ret); - return ERR_PTR(ret); - } - dentry->d_fsdata = (void *)(unsigned long)dvnode->status.data_version; - - /* instantiate the dentry */ key_put(key); - if (IS_ERR(inode)) { - _leave(" = %ld", PTR_ERR(inode)); - return ERR_CAST(inode); + if (inode == ERR_PTR(-ENOENT)) { + inode = afs_try_auto_mntpt(dentry, dir); + } else { + dentry->d_fsdata = + (void *)(unsigned long)dvnode->status.data_version; } - -success: - d_add(dentry, inode); - _leave(" = 0 { ino=%lu v=%u }", - d_inode(dentry)->i_ino, - d_inode(dentry)->i_generation); - - return NULL; + d = d_splice_alias(inode, dentry); + if (!IS_ERR_OR_NULL(d)) + d->d_fsdata = dentry->d_fsdata; + return d; } /* diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 174e843f0633..1cde710a8013 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -83,7 +83,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir) out: _leave("= %d", ret); - return ERR_PTR(ret); + return ret == -ENOENT ? NULL : ERR_PTR(ret); } /* @@ -141,12 +141,6 @@ out_p: static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct afs_vnode *vnode; - struct inode *inode; - int ret; - - vnode = AFS_FS_I(dir); - _enter("%pd", dentry); ASSERTCMP(d_inode(dentry), ==, NULL); @@ -160,22 +154,7 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr memcmp(dentry->d_name.name, "@cell", 5) == 0) return afs_lookup_atcell(dentry); - inode = afs_try_auto_mntpt(dentry, dir); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - if (ret == -ENOENT) { - d_add(dentry, NULL); - _leave(" = NULL [negative]"); - return NULL; - } - _leave(" = %d [do]", ret); - return ERR_PTR(ret); - } - - d_add(dentry, inode); - _leave(" = 0 { ino=%lu v=%u }", - d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); - return NULL; + return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry); } const struct inode_operations afs_dynroot_inode_operations = { diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a1b18082991b..35f2ae30f31f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -346,7 +346,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; - size_t offset; s64 tx_total_len; int ret; @@ -433,10 +432,10 @@ error_do_abort: rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, "KSD"); } else { - offset = 0; - rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL, - 0, &offset, false, &call->abort_code, - &call->service_id); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0); + rxrpc_kernel_recv_data(call->net->socket, rxcall, + &msg.msg_iter, false, + &call->abort_code, &call->service_id); ac->abort_code = call->abort_code; ac->responded = true; } @@ -467,13 +466,14 @@ static void afs_deliver_to_call(struct afs_call *call) state == AFS_CALL_SV_AWAIT_ACK ) { if (state == AFS_CALL_SV_AWAIT_ACK) { - size_t offset = 0; + struct iov_iter iter; + + iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0); ret = rxrpc_kernel_recv_data(call->net->socket, - call->rxcall, - NULL, 0, &offset, false, + call->rxcall, &iter, false, &remote_abort, &call->service_id); - trace_afs_recv_data(call, 0, offset, false, ret); + trace_afs_recv_data(call, 0, 0, false, ret); if (ret == -EINPROGRESS || ret == -EAGAIN) return; @@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, trace_afs_notify_call(rxcall, call); call->need_attention = true; - u = __atomic_add_unless(&call->usage, 1, 0); + u = atomic_fetch_add_unless(&call->usage, 1, 0); if (u != 0) { trace_afs_call(call, afs_call_trace_wake, u, atomic_read(&call->net->nr_outstanding_calls), @@ -894,6 +894,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, bool want_more) { struct afs_net *net = call->net; + struct iov_iter iter; + struct kvec iov; enum afs_call_state state; u32 remote_abort = 0; int ret; @@ -903,10 +905,14 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, ASSERTCMP(call->offset, <=, count); - ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, - buf, count, &call->offset, + iov.iov_base = buf + call->offset; + iov.iov_len = count - call->offset; + iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset); + + ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter, want_more, &remote_abort, &call->service_id); + call->offset += (count - call->offset) - iov_iter_count(&iter); trace_afs_recv_data(call, count, call->offset, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; @@ -5,6 +5,7 @@ * Implements an efficient asynchronous io interface. * * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. + * Copyright 2018 Christoph Hellwig. * * See ../COPYING for licensing terms. */ @@ -18,6 +19,7 @@ #include <linux/export.h> #include <linux/syscalls.h> #include <linux/backing-dev.h> +#include <linux/refcount.h> #include <linux/uio.h> #include <linux/sched/signal.h> @@ -164,10 +166,21 @@ struct fsync_iocb { bool datasync; }; +struct poll_iocb { + struct file *file; + struct wait_queue_head *head; + __poll_t events; + bool woken; + bool cancelled; + struct wait_queue_entry wait; + struct work_struct work; +}; + struct aio_kiocb { union { struct kiocb rw; struct fsync_iocb fsync; + struct poll_iocb poll; }; struct kioctx *ki_ctx; @@ -178,6 +191,7 @@ struct aio_kiocb { struct list_head ki_list; /* the aio core uses this * for cancellation */ + refcount_t ki_refcnt; /* * If the aio_resfd field of the userspace iocb is not zero, @@ -202,9 +216,7 @@ static const struct address_space_operations aio_ctx_aops; static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { - struct qstr this = QSTR_INIT("[aio]", 5); struct file *file; - struct path path; struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); if (IS_ERR(inode)) return ERR_CAST(inode); @@ -213,31 +225,17 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) inode->i_mapping->private_data = ctx; inode->i_size = PAGE_SIZE * nr_pages; - path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); - if (!path.dentry) { + file = alloc_file_pseudo(inode, aio_mnt, "[aio]", + O_RDWR, &aio_ring_fops); + if (IS_ERR(file)) iput(inode); - return ERR_PTR(-ENOMEM); - } - path.mnt = mntget(aio_mnt); - - d_instantiate(path.dentry, inode); - file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops); - if (IS_ERR(file)) { - path_put(&path); - return file; - } - - file->f_flags = O_RDWR; return file; } static struct dentry *aio_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - static const struct dentry_operations ops = { - .d_dname = simple_dname, - }; - struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops, + struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL, AIO_RING_MAGIC); if (!IS_ERR(root)) @@ -1015,6 +1013,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) percpu_ref_get(&ctx->reqs); INIT_LIST_HEAD(&req->ki_list); + refcount_set(&req->ki_refcnt, 0); req->ki_ctx = ctx; return req; out_put: @@ -1049,6 +1048,15 @@ out: return ret; } +static inline void iocb_put(struct aio_kiocb *iocb) +{ + if (refcount_read(&iocb->ki_refcnt) == 0 || + refcount_dec_and_test(&iocb->ki_refcnt)) { + percpu_ref_put(&iocb->ki_ctx->reqs); + kmem_cache_free(kiocb_cachep, iocb); + } +} + /* aio_complete * Called when the io request on the given iocb is complete. */ @@ -1118,8 +1126,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) eventfd_ctx_put(iocb->ki_eventfd); } - kmem_cache_free(kiocb_cachep, iocb); - /* * We have to order our ring_info tail store above and test * of the wait list below outside the wait lock. This is @@ -1130,8 +1136,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - - percpu_ref_put(&ctx->reqs); + iocb_put(iocb); } /* aio_read_events_ring @@ -1592,6 +1597,182 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) return 0; } +static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) +{ + struct file *file = iocb->poll.file; + + aio_complete(iocb, mangle_poll(mask), 0); + fput(file); +} + +static void aio_poll_complete_work(struct work_struct *work) +{ + struct poll_iocb *req = container_of(work, struct poll_iocb, work); + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); + struct poll_table_struct pt = { ._key = req->events }; + struct kioctx *ctx = iocb->ki_ctx; + __poll_t mask = 0; + + if (!READ_ONCE(req->cancelled)) + mask = vfs_poll(req->file, &pt) & req->events; + + /* + * Note that ->ki_cancel callers also delete iocb from active_reqs after + * calling ->ki_cancel. We need the ctx_lock roundtrip here to + * synchronize with them. In the cancellation case the list_del_init + * itself is not actually needed, but harmless so we keep it in to + * avoid further branches in the fast path. + */ + spin_lock_irq(&ctx->ctx_lock); + if (!mask && !READ_ONCE(req->cancelled)) { + add_wait_queue(req->head, &req->wait); + spin_unlock_irq(&ctx->ctx_lock); + return; + } + list_del_init(&iocb->ki_list); + spin_unlock_irq(&ctx->ctx_lock); + + aio_poll_complete(iocb, mask); +} + +/* assumes we are called with irqs disabled */ +static int aio_poll_cancel(struct kiocb *iocb) +{ + struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); + struct poll_iocb *req = &aiocb->poll; + + spin_lock(&req->head->lock); + WRITE_ONCE(req->cancelled, true); + if (!list_empty(&req->wait.entry)) { + list_del_init(&req->wait.entry); + schedule_work(&aiocb->poll.work); + } + spin_unlock(&req->head->lock); + + return 0; +} + +static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, + void *key) +{ + struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); + __poll_t mask = key_to_poll(key); + + req->woken = true; + + /* for instances that support it check for an event match first: */ + if (mask) { + if (!(mask & req->events)) + return 0; + + /* try to complete the iocb inline if we can: */ + if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { + list_del(&iocb->ki_list); + spin_unlock(&iocb->ki_ctx->ctx_lock); + + list_del_init(&req->wait.entry); + aio_poll_complete(iocb, mask); + return 1; + } + } + + list_del_init(&req->wait.entry); + schedule_work(&req->work); + return 1; +} + +struct aio_poll_table { + struct poll_table_struct pt; + struct aio_kiocb *iocb; + int error; +}; + +static void +aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, + struct poll_table_struct *p) +{ + struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); + + /* multiple wait queues per file are not supported */ + if (unlikely(pt->iocb->poll.head)) { + pt->error = -EINVAL; + return; + } + + pt->error = 0; + pt->iocb->poll.head = head; + add_wait_queue(head, &pt->iocb->poll.wait); +} + +static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) +{ + struct kioctx *ctx = aiocb->ki_ctx; + struct poll_iocb *req = &aiocb->poll; + struct aio_poll_table apt; + __poll_t mask; + + /* reject any unknown events outside the normal event mask. */ + if ((u16)iocb->aio_buf != iocb->aio_buf) + return -EINVAL; + /* reject fields that are not defined for poll */ + if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags) + return -EINVAL; + + INIT_WORK(&req->work, aio_poll_complete_work); + req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; + req->file = fget(iocb->aio_fildes); + if (unlikely(!req->file)) + return -EBADF; + + apt.pt._qproc = aio_poll_queue_proc; + apt.pt._key = req->events; + apt.iocb = aiocb; + apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ + + /* initialized the list so that we can do list_empty checks */ + INIT_LIST_HEAD(&req->wait.entry); + init_waitqueue_func_entry(&req->wait, aio_poll_wake); + + /* one for removal from waitqueue, one for this function */ + refcount_set(&aiocb->ki_refcnt, 2); + + mask = vfs_poll(req->file, &apt.pt) & req->events; + if (unlikely(!req->head)) { + /* we did not manage to set up a waitqueue, done */ + goto out; + } + + spin_lock_irq(&ctx->ctx_lock); + spin_lock(&req->head->lock); + if (req->woken) { + /* wake_up context handles the rest */ + mask = 0; + apt.error = 0; + } else if (mask || apt.error) { + /* if we get an error or a mask we are done */ + WARN_ON_ONCE(list_empty(&req->wait.entry)); + list_del_init(&req->wait.entry); + } else { + /* actually waiting for an event */ + list_add_tail(&aiocb->ki_list, &ctx->active_reqs); + aiocb->ki_cancel = aio_poll_cancel; + } + spin_unlock(&req->head->lock); + spin_unlock_irq(&ctx->ctx_lock); + +out: + if (unlikely(apt.error)) { + fput(req->file); + return apt.error; + } + + if (mask) + aio_poll_complete(aiocb, mask); + iocb_put(aiocb); + return 0; +} + static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, bool compat) { @@ -1665,6 +1846,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, case IOCB_CMD_FDSYNC: ret = aio_fsync(&req->fsync, &iocb, true); break; + case IOCB_CMD_POLL: + ret = aio_poll(req, &iocb); + break; default: pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); ret = -EINVAL; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 3168ee4e77f4..91262c34b797 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -71,8 +71,6 @@ struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags) { - struct qstr this; - struct path path; struct file *file; if (IS_ERR(anon_inode_inode)) @@ -82,39 +80,23 @@ struct file *anon_inode_getfile(const char *name, return ERR_PTR(-ENOENT); /* - * Link the inode to a directory entry by creating a unique name - * using the inode sequence number. - */ - file = ERR_PTR(-ENOMEM); - this.name = name; - this.len = strlen(name); - this.hash = 0; - path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); - if (!path.dentry) - goto err_module; - - path.mnt = mntget(anon_inode_mnt); - /* * We know the anon_inode inode count is always greater than zero, * so ihold() is safe. */ ihold(anon_inode_inode); - - d_instantiate(path.dentry, anon_inode_inode); - - file = alloc_file(&path, OPEN_FMODE(flags), fops); + file = alloc_file_pseudo(anon_inode_inode, anon_inode_mnt, name, + flags & (O_ACCMODE | O_NONBLOCK), fops); if (IS_ERR(file)) - goto err_dput; + goto err; + file->f_mapping = anon_inode_inode->i_mapping; - file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = priv; return file; -err_dput: - path_put(&path); -err_module: +err: + iput(anon_inode_inode); module_put(fops->owner); return file; } diff --git a/fs/attr.c b/fs/attr.c index e3d53bf12240..d22e8187477f 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -120,7 +120,6 @@ EXPORT_SYMBOL(setattr_prepare); * inode_newsize_ok - may this inode be truncated to a given size * @inode: the inode to be truncated * @offset: the new size to assign to the inode - * @Returns: 0 on success, -ve errno on failure * * inode_newsize_ok must be called with i_mutex held. * @@ -130,6 +129,8 @@ EXPORT_SYMBOL(setattr_prepare); * returned. @inode must be a file (not directory), with appropriate * permissions to allow truncate (inode_newsize_ok does NOT check these * conditions). + * + * Return: 0 on success, -ve errno on failure */ int inode_newsize_ok(const struct inode *inode, loff_t offset) { @@ -205,7 +206,7 @@ EXPORT_SYMBOL(setattr_copy); /** * notify_change - modify attributes of a filesytem object * @dentry: object affected - * @iattr: new attributes + * @attr: new attributes * @delegated_inode: returns inode, if the inode is delegated * * The caller must hold the i_mutex on the affected object. diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 125e8bbd22a2..8035d2a44561 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -134,7 +134,7 @@ static int bad_inode_update_time(struct inode *inode, struct timespec64 *time, static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry, struct file *file, unsigned int open_flag, - umode_t create_mode, int *opened) + umode_t create_mode) { return -EIO; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 816cc921cf36..efae2fb0930a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1751,7 +1751,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, const struct user_regset *regset = &view->regsets[i]; do_thread_regset_writeback(t->task, regset); if (regset->core_note_type && regset->get && - (!regset->active || regset->active(t->task, regset))) { + (!regset->active || regset->active(t->task, regset) > 0)) { int ret; size_t size = regset_size(t->task, regset); void *data = kmalloc(size, GFP_KERNEL); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 4b5fff31ef27..aa4a7a23ff99 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm) goto error; if (fmt->flags & MISC_FMT_OPEN_FILE) { - interp_file = filp_clone_open(fmt->interp_file); + interp_file = file_clone_open(fmt->interp_file); if (!IS_ERR(interp_file)) deny_write_access(interp_file); } else { diff --git a/fs/block_dev.c b/fs/block_dev.c index 0dd87aaeb39a..38b8ce05cbc7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -221,7 +221,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, ret = bio_iov_iter_get_pages(&bio, iter); if (unlikely(ret)) - return ret; + goto out; ret = bio.bi_iter.bi_size; if (iov_iter_rw(iter) == READ) { @@ -250,12 +250,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, put_page(bvec->bv_page); } - if (vecs != inline_vecs) - kfree(vecs); - if (unlikely(bio.bi_status)) ret = blk_status_to_errno(bio.bi_status); +out: + if (vecs != inline_vecs) + kfree(vecs); + bio_uninit(&bio); return ret; @@ -665,7 +666,8 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, result = blk_queue_enter(bdev->bd_queue, 0); if (result) return result; - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_READ); blk_queue_exit(bdev->bd_queue); return result; } @@ -703,7 +705,8 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, return result; set_page_writeback(page); - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_WRITE); if (result) { end_page_writeback(page); } else { diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 15e1dfef56a5..3b66c957ea6f 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -30,23 +30,22 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) name = XATTR_NAME_POSIX_ACL_DEFAULT; break; default: - BUG(); + return ERR_PTR(-EINVAL); } - size = btrfs_getxattr(inode, name, "", 0); + size = btrfs_getxattr(inode, name, NULL, 0); if (size > 0) { value = kzalloc(size, GFP_KERNEL); if (!value) return ERR_PTR(-ENOMEM); size = btrfs_getxattr(inode, name, value, size); } - if (size > 0) { + if (size > 0) acl = posix_acl_from_xattr(&init_user_ns, value, size); - } else if (size == -ERANGE || size == -ENODATA || size == 0) { + else if (size == -ENODATA || size == 0) acl = NULL; - } else { - acl = ERR_PTR(-EIO); - } + else + acl = ERR_PTR(size); kfree(value); return acl; diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 0a8e2e29a66b..ae750b1574a2 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -925,7 +925,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY); if (type == BTRFS_REF_TYPE_INVALID) - return -EINVAL; + return -EUCLEAN; offset = btrfs_extent_inline_ref_offset(leaf, iref); @@ -1793,7 +1793,7 @@ static int get_extent_inline_ref(unsigned long *ptr, *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref, BTRFS_REF_TYPE_ANY); if (*out_type == BTRFS_REF_TYPE_INVALID) - return -EINVAL; + return -EUCLEAN; *ptr += btrfs_extent_inline_ref_size(*out_type); WARN_ON(*ptr > end); @@ -2225,7 +2225,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, fspath = init_data_container(total_bytes); if (IS_ERR(fspath)) - return (void *)fspath; + return ERR_CAST(fspath); ifp = kmalloc(sizeof(*ifp), GFP_KERNEL); if (!ifp) { diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 7e075343daa5..1343ac57b438 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -178,7 +178,7 @@ struct btrfs_inode { struct btrfs_delayed_node *delayed_node; /* File creation time. */ - struct timespec i_otime; + struct timespec64 i_otime; /* Hook into fs_info->delayed_iputs */ struct list_head delayed_iput; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index a3fdb4fe967d..833cf3c35b4d 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1539,7 +1539,12 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, } device = multi->stripes[0].dev; - block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev); + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) || + !device->bdev || !device->name) + block_ctx_out->dev = NULL; + else + block_ctx_out->dev = btrfsic_dev_state_lookup( + device->bdev->bd_dev); block_ctx_out->dev_bytenr = multi->stripes[0].physical; block_ctx_out->start = bytenr; block_ctx_out->len = len; @@ -1624,7 +1629,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, bio = btrfs_io_bio_alloc(num_pages - i); bio_set_dev(bio, block_ctx->dev->bdev); bio->bi_iter.bi_sector = dev_bytenr >> 9; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; for (j = i; j < num_pages; j++) { ret = bio_add_page(bio, block_ctx->pagev[j], diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d3e447b45bf7..9bfa66592aa7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -5,7 +5,6 @@ #include <linux/kernel.h> #include <linux/bio.h> -#include <linux/buffer_head.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/pagemap.h> @@ -14,10 +13,7 @@ #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> -#include <linux/mpage.h> -#include <linux/swap.h> #include <linux/writeback.h> -#include <linux/bit_spinlock.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/log2.h> @@ -303,7 +299,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, struct bio *bio = NULL; struct compressed_bio *cb; unsigned long bytes_left; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; int pg_index = 0; struct page *page; u64 first_byte = disk_start; @@ -342,9 +337,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, page = compressed_pages[pg_index]; page->mapping = inode->i_mapping; if (bio->bi_iter.bi_size) - submit = io_tree->ops->merge_bio_hook(page, 0, - PAGE_SIZE, - bio, 0); + submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, bio, 0); page->mapping = NULL; if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < @@ -613,7 +606,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->len = bio->bi_iter.bi_size; comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); - bio_set_op_attrs (comp_bio, REQ_OP_READ, 0); + comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; refcount_set(&cb->pending_bios, 1); @@ -626,9 +619,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, page->index = em_start >> PAGE_SHIFT; if (comp_bio->bi_iter.bi_size) - submit = tree->ops->merge_bio_hook(page, 0, - PAGE_SIZE, - comp_bio, 0); + submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, + comp_bio, 0); page->mapping = NULL; if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < @@ -660,7 +652,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, } comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); - bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); + comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4bc326df472e..d436fb4c002e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -888,11 +888,7 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, btrfs_root_last_snapshot(&root->root_item) || btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) return 1; -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && - btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) - return 1; -#endif + return 0; } @@ -3128,8 +3124,7 @@ again: * higher levels * */ -static void fixup_low_keys(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, +static void fixup_low_keys(struct btrfs_path *path, struct btrfs_disk_key *key, int level) { int i; @@ -3181,7 +3176,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, btrfs_set_item_key(eb, &disk_key, slot); btrfs_mark_buffer_dirty(eb); if (slot == 0) - fixup_low_keys(fs_info, path, &disk_key, 1); + fixup_low_keys(path, &disk_key, 1); } /* @@ -3359,17 +3354,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, root_add_used(root, fs_info->nodesize); - memzero_extent_buffer(c, 0, sizeof(struct btrfs_header)); btrfs_set_header_nritems(c, 1); - btrfs_set_header_level(c, level); - btrfs_set_header_bytenr(c, c->start); - btrfs_set_header_generation(c, trans->transid); - btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(c, root->root_key.objectid); - - write_extent_buffer_fsid(c, fs_info->fsid); - write_extent_buffer_chunk_tree_uuid(c, fs_info->chunk_tree_uuid); - btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); lower_gen = btrfs_header_generation(lower); @@ -3498,15 +3483,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, return PTR_ERR(split); root_add_used(root, fs_info->nodesize); - - memzero_extent_buffer(split, 0, sizeof(struct btrfs_header)); - btrfs_set_header_level(split, btrfs_header_level(c)); - btrfs_set_header_bytenr(split, split->start); - btrfs_set_header_generation(split, trans->transid); - btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(split, root->root_key.objectid); - write_extent_buffer_fsid(split, fs_info->fsid); - write_extent_buffer_chunk_tree_uuid(split, fs_info->chunk_tree_uuid); + ASSERT(btrfs_header_level(c) == level); ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid); if (ret) { @@ -3945,7 +3922,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, clean_tree_block(fs_info, right); btrfs_item_key(right, &disk_key, 0); - fixup_low_keys(fs_info, path, &disk_key, 1); + fixup_low_keys(path, &disk_key, 1); /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { @@ -4292,15 +4269,6 @@ again: root_add_used(root, fs_info->nodesize); - memzero_extent_buffer(right, 0, sizeof(struct btrfs_header)); - btrfs_set_header_bytenr(right, right->start); - btrfs_set_header_generation(right, trans->transid); - btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(right, root->root_key.objectid); - btrfs_set_header_level(right, 0); - write_extent_buffer_fsid(right, fs_info->fsid); - write_extent_buffer_chunk_tree_uuid(right, fs_info->chunk_tree_uuid); - if (split == 0) { if (mid <= slot) { btrfs_set_header_nritems(right, 0); @@ -4320,7 +4288,7 @@ again: path->nodes[0] = right; path->slots[0] = 0; if (path->slots[1] == 0) - fixup_low_keys(fs_info, path, &disk_key, 1); + fixup_low_keys(path, &disk_key, 1); } /* * We create a new leaf 'right' for the required ins_len and @@ -4642,7 +4610,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info, btrfs_set_disk_key_offset(&disk_key, offset + size_diff); btrfs_set_item_key(leaf, &disk_key, slot); if (slot == 0) - fixup_low_keys(fs_info, path, &disk_key, 1); + fixup_low_keys(path, &disk_key, 1); } item = btrfs_item_nr(slot); @@ -4744,7 +4712,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, if (path->slots[0] == 0) { btrfs_cpu_key_to_disk(&disk_key, cpu_key); - fixup_low_keys(fs_info, path, &disk_key, 1); + fixup_low_keys(path, &disk_key, 1); } btrfs_unlock_up_safe(path, 1); @@ -4886,7 +4854,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *parent = path->nodes[level]; u32 nritems; int ret; @@ -4919,7 +4886,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key disk_key; btrfs_node_key(parent, &disk_key, 0); - fixup_low_keys(fs_info, path, &disk_key, level + 1); + fixup_low_keys(path, &disk_key, level + 1); } btrfs_mark_buffer_dirty(parent); } @@ -5022,7 +4989,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_disk_key disk_key; btrfs_item_key(leaf, &disk_key, 0); - fixup_low_keys(fs_info, path, &disk_key, 1); + fixup_low_keys(path, &disk_key, 1); } /* delete the leaf if it is mostly empty */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 118346aceea9..318be7864072 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -55,8 +55,6 @@ struct btrfs_ordered_sum; #define BTRFS_OLDEST_GENERATION 0ULL -#define BTRFS_COMPAT_EXTENT_TREE_V0 - /* * the max metadata block size. This limit is somewhat artificial, * but the memmove costs go through the roof for larger blocks. @@ -86,6 +84,14 @@ static const int btrfs_csum_sizes[] = { 4 }; #define BTRFS_DIRTY_METADATA_THRESH SZ_32M +/* + * Use large batch size to reduce overhead of metadata updates. On the reader + * side, we only read it when we are close to ENOSPC and the read overhead is + * mostly related to the number of CPUs, so it is OK to use arbitrary large + * value here. + */ +#define BTRFS_TOTAL_BYTES_PINNED_BATCH SZ_128M + #define BTRFS_MAX_EXTENT_SIZE SZ_128M @@ -342,8 +348,8 @@ struct btrfs_path { sizeof(struct btrfs_item)) struct btrfs_dev_replace { u64 replace_state; /* see #define above */ - u64 time_started; /* seconds since 1-Jan-1970 */ - u64 time_stopped; /* seconds since 1-Jan-1970 */ + time64_t time_started; /* seconds since 1-Jan-1970 */ + time64_t time_stopped; /* seconds since 1-Jan-1970 */ atomic64_t num_write_errors; atomic64_t num_uncorrectable_read_errors; @@ -359,8 +365,6 @@ struct btrfs_dev_replace { struct btrfs_device *srcdev; struct btrfs_device *tgtdev; - pid_t lock_owner; - atomic_t nesting_level; struct mutex lock_finishing_cancel_unmount; rwlock_t lock; atomic_t read_locks; @@ -1213,7 +1217,6 @@ struct btrfs_root { u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; - char *name; /* the dirty list is only used by non-reference counted roots */ struct list_head dirty_list; @@ -2428,32 +2431,6 @@ static inline u32 btrfs_file_extent_inline_item_len( return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; } -/* this returns the number of file bytes represented by the inline item. - * If an item is compressed, this is the uncompressed size - */ -static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb, - int slot, - const struct btrfs_file_extent_item *fi) -{ - struct btrfs_map_token token; - - btrfs_init_map_token(&token); - /* - * return the space used on disk if this item isn't - * compressed or encoded - */ - if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 && - btrfs_token_file_extent_encryption(eb, fi, &token) == 0 && - btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) { - return btrfs_file_extent_inline_item_len(eb, - btrfs_item_nr(slot)); - } - - /* otherwise use the ram bytes field */ - return btrfs_token_file_extent_ram_bytes(eb, fi, &token); -} - - /* btrfs_dev_stats_item */ static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, const struct btrfs_dev_stats_item *ptr, @@ -2676,7 +2653,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, u64 offset, u64 ram_bytes, struct btrfs_key *ins); int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 root_objectid, u64 owner, u64 offset, struct btrfs_key *ins); int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, @@ -2716,15 +2692,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_fs_info *info); int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr); int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytes_used, - u64 type, u64 chunk_offset, u64 size); + u64 bytes_used, u64 type, u64 chunk_offset, + u64 size); void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 group_start, - struct extent_map *em); + u64 group_start, struct extent_map *em); void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache); void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache); @@ -2786,7 +2761,6 @@ void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, unsigned short type); void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv); -void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv); int btrfs_block_rsv_add(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes, enum btrfs_reserve_flush_enum flush); @@ -2803,8 +2777,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *cache); +int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache); void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); @@ -2812,8 +2785,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end); int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 *actual_bytes); -int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 type); +int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type); int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); @@ -2822,10 +2794,10 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, int btrfs_start_write_no_snapshotting(struct btrfs_root *root); void btrfs_end_write_no_snapshotting(struct btrfs_root *root); void btrfs_wait_for_snapshot_creation(struct btrfs_root *root); -void check_system_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, const u64 type); +void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type); u64 add_new_free_space(struct btrfs_block_group_cache *block_group, u64 start, u64 end); +void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, @@ -3011,16 +2983,14 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); /* root-item.c */ -int btrfs_add_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 root_id, u64 ref_id, u64 dirid, u64 sequence, - const char *name, int name_len); -int btrfs_del_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, - const char *name, int name_len); +int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + u64 ref_id, u64 dirid, u64 sequence, const char *name, + int name_len); +int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + u64 ref_id, u64 dirid, u64 *sequence, const char *name, + int name_len); int btrfs_del_root(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, const struct btrfs_key *key); + const struct btrfs_key *key); int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_root_item *item); @@ -3196,7 +3166,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); -void btrfs_set_range_writeback(void *private_data, u64 start, u64 end); +void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end); vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); @@ -3452,7 +3422,7 @@ do { \ #ifdef CONFIG_BTRFS_ASSERT __cold -static inline void assfail(char *expr, char *file, int line) +static inline void assfail(const char *expr, const char *file, int line) { pr_err("assertion failed: %s, file: %s, line: %d\n", expr, file, line); @@ -3465,6 +3435,13 @@ static inline void assfail(char *expr, char *file, int line) #define ASSERT(expr) ((void)0) #endif +__cold +static inline void btrfs_print_v0_err(struct btrfs_fs_info *fs_info) +{ + btrfs_err(fs_info, +"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel"); +} + __printf(5, 6) __cold void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index fe6caa7e698b..f51b509f2d9b 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1222,7 +1222,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_trans_handle *trans; struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); struct btrfs_path *path; @@ -1418,7 +1418,6 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) /* Will return 0 or -ENOMEM */ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, const char *name, int name_len, struct btrfs_inode *dir, struct btrfs_disk_key *disk_key, u8 type, @@ -1458,11 +1457,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, */ BUG_ON(ret); - mutex_lock(&delayed_node->mutex); ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); if (unlikely(ret)) { - btrfs_err(fs_info, + btrfs_err(trans->fs_info, "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", name_len, name, delayed_node->root->objectid, delayed_node->inode_id, ret); @@ -1495,7 +1493,6 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info, } int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_inode *dir, u64 index) { struct btrfs_delayed_node *node; @@ -1511,7 +1508,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, item_key.type = BTRFS_DIR_INDEX_KEY; item_key.offset = index; - ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key); + ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node, + &item_key); if (!ret) goto end; @@ -1533,7 +1531,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, mutex_lock(&node->mutex); ret = __btrfs_add_delayed_deletion_item(node, item); if (unlikely(ret)) { - btrfs_err(fs_info, + btrfs_err(trans->fs_info, "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", index, node->root->objectid, node->inode_id, ret); BUG(); @@ -1837,7 +1835,7 @@ release_node: int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_delayed_node *delayed_node; /* diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index ca7a97f3ab6b..33536cd681d4 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -86,14 +86,12 @@ static inline void btrfs_init_delayed_root( } int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, const char *name, int name_len, struct btrfs_inode *dir, struct btrfs_disk_key *disk_key, u8 type, u64 index); int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_inode *dir, u64 index); int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 03dec673d12a..62ff545ba1f7 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -709,13 +709,13 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, * to make sure the delayed ref is eventually processed before this * transaction commits. */ -int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, struct btrfs_delayed_extent_op *extent_op, int *old_ref_mod, int *new_ref_mod) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; @@ -730,27 +730,33 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, if (!ref) return -ENOMEM; + head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); + if (!head_ref) { + kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); + return -ENOMEM; + } + + if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && + is_fstree(ref_root)) { + record = kmalloc(sizeof(*record), GFP_NOFS); + if (!record) { + kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); + kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); + return -ENOMEM; + } + } + if (parent) ref_type = BTRFS_SHARED_BLOCK_REF_KEY; else ref_type = BTRFS_TREE_BLOCK_REF_KEY; + init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, ref_root, action, ref_type); ref->root = ref_root; ref->parent = parent; ref->level = level; - head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); - if (!head_ref) - goto free_ref; - - if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && - is_fstree(ref_root)) { - record = kmalloc(sizeof(*record), GFP_NOFS); - if (!record) - goto free_head_ref; - } - init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root, 0, action, false, is_system); head_ref->extent_op = extent_op; @@ -779,25 +785,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, btrfs_qgroup_trace_extent_post(fs_info, record); return 0; - -free_head_ref: - kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); -free_ref: - kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); - - return -ENOMEM; } /* * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. */ -int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, u64 reserved, int action, int *old_ref_mod, int *new_ref_mod) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ea1aecb6a50d..d9f2a4ebd5db 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -234,14 +234,12 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea kmem_cache_free(btrfs_delayed_ref_head_cachep, head); } -int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, struct btrfs_delayed_extent_op *extent_op, int *old_ref_mod, int *new_ref_mod); -int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, +int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, u64 reserved, int action, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index e2ba0419297a..dec01970d8c5 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -6,14 +6,9 @@ #include <linux/sched.h> #include <linux/bio.h> #include <linux/slab.h> -#include <linux/buffer_head.h> #include <linux/blkdev.h> -#include <linux/random.h> -#include <linux/iocontext.h> -#include <linux/capability.h> #include <linux/kthread.h> #include <linux/math64.h> -#include <asm/div64.h> #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -465,7 +460,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; - dev_replace->time_started = get_seconds(); + dev_replace->time_started = ktime_get_real_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; @@ -511,7 +506,7 @@ leave: dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; btrfs_dev_replace_write_unlock(dev_replace); - btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); + btrfs_destroy_dev_replace_tgtdev(tgt_device); return ret; } @@ -618,7 +613,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; dev_replace->tgtdev = NULL; dev_replace->srcdev = NULL; - dev_replace->time_stopped = get_seconds(); + dev_replace->time_stopped = ktime_get_real_seconds(); dev_replace->item_needs_writeback = 1; /* replace old device with new one in mapping tree */ @@ -637,7 +632,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_rm_dev_replace_blocked(fs_info); if (tgt_device) - btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); + btrfs_destroy_dev_replace_tgtdev(tgt_device); btrfs_rm_dev_replace_unblocked(fs_info); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); @@ -663,7 +658,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->commit_bytes_used = src_device->bytes_used; - btrfs_assign_next_active_device(fs_info, src_device, tgt_device); + btrfs_assign_next_active_device(src_device, tgt_device); list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); fs_info->fs_devices->rw_devices++; @@ -672,11 +667,17 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_rm_dev_replace_blocked(fs_info); - btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device); + btrfs_rm_dev_replace_remove_srcdev(src_device); btrfs_rm_dev_replace_unblocked(fs_info); /* + * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will + * update on-disk dev stats value during commit transaction + */ + atomic_inc(&tgt_device->dev_stats_ccnt); + + /* * this is again a consistent state where no dev_replace procedure * is running, the target device is part of the filesystem, the * source device is not part of the filesystem anymore and its 1st @@ -807,7 +808,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) break; } dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; - dev_replace->time_stopped = get_seconds(); + dev_replace->time_stopped = ktime_get_real_seconds(); dev_replace->item_needs_writeback = 1; btrfs_dev_replace_write_unlock(dev_replace); btrfs_scrub_cancel(fs_info); @@ -826,7 +827,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) btrfs_dev_name(tgt_device)); if (tgt_device) - btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); + btrfs_destroy_dev_replace_tgtdev(tgt_device); leave: mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); @@ -848,7 +849,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; - dev_replace->time_stopped = get_seconds(); + dev_replace->time_stopped = ktime_get_real_seconds(); dev_replace->item_needs_writeback = 1; btrfs_info(fs_info, "suspending dev_replace for unmount"); break; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 39e9766d1cbd..a678b07fcf01 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -160,8 +160,8 @@ second_insert: } btrfs_release_path(path); - ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name, - name_len, dir, &disk_key, type, index); + ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir, + &disk_key, type, index); out_free: btrfs_free_path(path); if (ret) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 205092dc9390..5124c15705ce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,8 +5,6 @@ #include <linux/fs.h> #include <linux/blkdev.h> -#include <linux/scatterlist.h> -#include <linux/swap.h> #include <linux/radix-tree.h> #include <linux/writeback.h> #include <linux/buffer_head.h> @@ -54,7 +52,6 @@ static const struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); -static void free_fs_root(struct btrfs_root *root); static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info); @@ -108,12 +105,9 @@ void __cold btrfs_end_io_wq_exit(void) */ struct async_submit_bio { void *private_data; - struct btrfs_fs_info *fs_info; struct bio *bio; extent_submit_bio_start_t *submit_bio_start; - extent_submit_bio_done_t *submit_bio_done; int mirror_num; - unsigned long bio_flags; /* * bio_offset is optional, can be used if the pages in the bio * can't tell us where in the file the bio should go @@ -212,7 +206,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_map *em; int ret; @@ -615,8 +609,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - btrfs_err_rl(fs_info, "bad tree block start %llu %llu", - found_start, eb->start); + btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu", + eb->start, found_start); ret = -EIO; goto err; } @@ -628,8 +622,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, } found_level = btrfs_header_level(eb); if (found_level >= BTRFS_MAX_LEVEL) { - btrfs_err(fs_info, "bad tree block level %d", - (int)btrfs_header_level(eb)); + btrfs_err(fs_info, "bad tree block level %d on %llu", + (int)btrfs_header_level(eb), eb->start); ret = -EIO; goto err; } @@ -779,7 +773,7 @@ static void run_one_async_done(struct btrfs_work *work) return; } - async->submit_bio_done(async->private_data, async->bio, async->mirror_num); + btrfs_submit_bio_done(async->private_data, async->bio, async->mirror_num); } static void run_one_async_free(struct btrfs_work *work) @@ -793,8 +787,7 @@ static void run_one_async_free(struct btrfs_work *work) blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, void *private_data, - extent_submit_bio_start_t *submit_bio_start, - extent_submit_bio_done_t *submit_bio_done) + extent_submit_bio_start_t *submit_bio_start) { struct async_submit_bio *async; @@ -803,16 +796,13 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, return BLK_STS_RESOURCE; async->private_data = private_data; - async->fs_info = fs_info; async->bio = bio; async->mirror_num = mirror_num; async->submit_bio_start = submit_bio_start; - async->submit_bio_done = submit_bio_done; btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, run_one_async_done, run_one_async_free); - async->bio_flags = bio_flags; async->bio_offset = bio_offset; async->status = 0; @@ -851,24 +841,6 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio, return btree_csum_one_bio(bio); } -static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio, - int mirror_num) -{ - struct inode *inode = private_data; - blk_status_t ret; - - /* - * when we're called for a write, we're already in the async - * submission context. Just jump into btrfs_map_bio - */ - ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1); - if (ret) { - bio->bi_status = ret; - bio_endio(bio); - } - return ret; -} - static int check_async_write(struct btrfs_inode *bi) { if (atomic_read(&bi->sync_writers)) @@ -911,8 +883,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, */ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, bio_offset, private_data, - btree_submit_bio_start, - btree_submit_bio_done); + btree_submit_bio_start); } if (ret) @@ -961,8 +932,9 @@ static int btree_writepages(struct address_space *mapping, fs_info = BTRFS_I(mapping->host)->root->fs_info; /* this is a bit racy, but that's ok */ - ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, - BTRFS_DIRTY_METADATA_THRESH); + ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, + BTRFS_DIRTY_METADATA_THRESH, + fs_info->dirty_metadata_batch); if (ret < 0) return 0; } @@ -1181,7 +1153,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, root->highest_objectid = 0; root->nr_delalloc_inodes = 0; root->nr_ordered_extents = 0; - root->name = NULL; root->inode_tree = RB_ROOT; INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); root->block_rsv = NULL; @@ -1292,15 +1263,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, goto fail; } - memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); - btrfs_set_header_bytenr(leaf, leaf->start); - btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(leaf, objectid); root->node = leaf; - - write_extent_buffer_fsid(leaf, fs_info->fsid); - write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); btrfs_mark_buffer_dirty(leaf); root->commit_root = btrfs_root_node(root); @@ -1374,14 +1337,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, return ERR_CAST(leaf); } - memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); - btrfs_set_header_bytenr(leaf, leaf->start); - btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); root->node = leaf; - write_extent_buffer_fsid(root->node, fs_info->fsid); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; @@ -1546,7 +1503,7 @@ int btrfs_init_fs_root(struct btrfs_root *root) return 0; fail: - /* the caller is responsible to call free_fs_root */ + /* The caller is responsible to call btrfs_free_fs_root */ return ret; } @@ -1651,14 +1608,14 @@ again: ret = btrfs_insert_fs_root(fs_info, root); if (ret) { if (ret == -EEXIST) { - free_fs_root(root); + btrfs_free_fs_root(root); goto again; } goto fail; } return root; fail: - free_fs_root(root); + btrfs_free_fs_root(root); return ERR_PTR(ret); } @@ -1803,7 +1760,7 @@ static int transaction_kthread(void *arg) struct btrfs_trans_handle *trans; struct btrfs_transaction *cur; u64 transid; - unsigned long now; + time64_t now; unsigned long delay; bool cannot_commit; @@ -1819,7 +1776,7 @@ static int transaction_kthread(void *arg) goto sleep; } - now = get_seconds(); + now = ktime_get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && (now < cur->start_time || @@ -2196,8 +2153,6 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) { - fs_info->dev_replace.lock_owner = 0; - atomic_set(&fs_info->dev_replace.nesting_level, 0); mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); rwlock_init(&fs_info->dev_replace.lock); atomic_set(&fs_info->dev_replace.read_locks, 0); @@ -3075,6 +3030,13 @@ retry_root_backup: fs_info->generation = generation; fs_info->last_trans_committed = generation; + ret = btrfs_verify_dev_extents(fs_info); + if (ret) { + btrfs_err(fs_info, + "failed to verify dev extents against chunks: %d", + ret); + goto fail_block_groups; + } ret = btrfs_recover_balance(fs_info); if (ret) { btrfs_err(fs_info, "failed to recover balance: %d", ret); @@ -3875,10 +3837,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, __btrfs_remove_free_space_cache(root->free_ino_pinned); if (root->free_ino_ctl) __btrfs_remove_free_space_cache(root->free_ino_ctl); - free_fs_root(root); + btrfs_free_fs_root(root); } -static void free_fs_root(struct btrfs_root *root) +void btrfs_free_fs_root(struct btrfs_root *root) { iput(root->ino_cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); @@ -3890,15 +3852,9 @@ static void free_fs_root(struct btrfs_root *root) free_extent_buffer(root->commit_root); kfree(root->free_ino_ctl); kfree(root->free_ino_pinned); - kfree(root->name); btrfs_put_fs_root(root); } -void btrfs_free_fs_root(struct btrfs_root *root) -{ - free_fs_root(root); -} - int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) { u64 root_objectid = 0; @@ -4104,10 +4060,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* * This is a fast path so only do this check if we have sanity tests - * enabled. Normal people shouldn't be marking dummy buffers as dirty + * enabled. Normal people shouldn't be using umapped buffers as dirty * outside of the sanity tests. */ - if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags))) + if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags))) return; #endif root = BTRFS_I(buf->pages[0]->mapping->host)->root; @@ -4150,8 +4106,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info, if (flush_delayed) btrfs_balance_delayed_items(fs_info); - ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, - BTRFS_DIRTY_METADATA_THRESH); + ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, + BTRFS_DIRTY_METADATA_THRESH, + fs_info->dirty_metadata_batch); if (ret > 0) { balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping); } @@ -4563,21 +4520,11 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) return 0; } -static struct btrfs_fs_info *btree_fs_info(void *private_data) -{ - struct inode *inode = private_data; - return btrfs_sb(inode->i_sb); -} - static const struct extent_io_ops btree_extent_io_ops = { /* mandatory callbacks */ .submit_bio_hook = btree_submit_bio_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, - /* note we're sharing with inode.c for the merge bio hook */ - .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_failed_hook = btree_io_failed_hook, - .set_range_writeback = btrfs_set_range_writeback, - .tree_fs_info = btree_fs_info, /* optional callbacks */ }; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 1a3d277b027b..4cccba22640f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -120,8 +120,9 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, void *private_data, - extent_submit_bio_start_t *submit_bio_start, - extent_submit_bio_done_t *submit_bio_done); + extent_submit_bio_start_t *submit_bio_start); +blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, + int mirror_num); int btrfs_write_tree_block(struct extent_buffer *buf); void btrfs_wait_tree_block_writeback(struct extent_buffer *buf); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3d9fe58c0080..de6f75f5547b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -52,24 +52,21 @@ enum { }; static int __btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *node, u64 parent, - u64 root_objectid, u64 owner_objectid, - u64 owner_offset, int refs_to_drop, - struct btrfs_delayed_extent_op *extra_op); + struct btrfs_delayed_ref_node *node, u64 parent, + u64 root_objectid, u64 owner_objectid, + u64 owner_offset, int refs_to_drop, + struct btrfs_delayed_extent_op *extra_op); static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, struct extent_buffer *leaf, struct btrfs_extent_item *ei); static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 parent, u64 root_objectid, u64 flags, u64 owner, u64 offset, struct btrfs_key *ins, int ref_mod); static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op); -static int do_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 flags, +static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, int force); static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); @@ -220,9 +217,9 @@ static int add_excluded_extent(struct btrfs_fs_info *fs_info, return 0; } -static void free_excluded_extents(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *cache) +static void free_excluded_extents(struct btrfs_block_group_cache *cache) { + struct btrfs_fs_info *fs_info = cache->fs_info; u64 start, end; start = cache->key.objectid; @@ -234,9 +231,9 @@ static void free_excluded_extents(struct btrfs_fs_info *fs_info, start, end, EXTENT_UPTODATE); } -static int exclude_super_stripes(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *cache) +static int exclude_super_stripes(struct btrfs_block_group_cache *cache) { + struct btrfs_fs_info *fs_info = cache->fs_info; u64 bytenr; u64 *logical; int stripe_len; @@ -558,7 +555,7 @@ static noinline void caching_thread(struct btrfs_work *work) caching_ctl->progress = (u64)-1; up_read(&fs_info->commit_root_sem); - free_excluded_extents(fs_info, block_group); + free_excluded_extents(block_group); mutex_unlock(&caching_ctl->mutex); wake_up(&caching_ctl->wait); @@ -666,7 +663,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, wake_up(&caching_ctl->wait); if (ret == 1) { put_caching_control(caching_ctl); - free_excluded_extents(fs_info, cache); + free_excluded_extents(cache); return 0; } } else { @@ -758,7 +755,8 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes, space_info = __find_space_info(fs_info, flags); ASSERT(space_info); - percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); + percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes, + BTRFS_TOTAL_BYTES_PINNED_BATCH); } /* @@ -870,18 +868,16 @@ search_again: num_refs = btrfs_extent_refs(leaf, ei); extent_flags = btrfs_extent_flags(leaf, ei); } else { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - struct btrfs_extent_item_v0 *ei0; - BUG_ON(item_size != sizeof(*ei0)); - ei0 = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_item_v0); - num_refs = btrfs_extent_refs_v0(leaf, ei0); - /* FIXME: this isn't correct for data */ - extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; -#else - BUG(); -#endif + ret = -EINVAL; + btrfs_print_v0_err(fs_info); + if (trans) + btrfs_abort_transaction(trans, ret); + else + btrfs_handle_fs_error(fs_info, ret, NULL); + + goto out_free; } + BUG_ON(num_refs == 0); } else { num_refs = 0; @@ -1039,89 +1035,6 @@ out_free: * tree block info structure. */ -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 -static int convert_extent_item_v0(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_path *path, - u64 owner, u32 extra_size) -{ - struct btrfs_root *root = fs_info->extent_root; - struct btrfs_extent_item *item; - struct btrfs_extent_item_v0 *ei0; - struct btrfs_extent_ref_v0 *ref0; - struct btrfs_tree_block_info *bi; - struct extent_buffer *leaf; - struct btrfs_key key; - struct btrfs_key found_key; - u32 new_size = sizeof(*item); - u64 refs; - int ret; - - leaf = path->nodes[0]; - BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); - - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - ei0 = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_item_v0); - refs = btrfs_extent_refs_v0(leaf, ei0); - - if (owner == (u64)-1) { - while (1) { - if (path->slots[0] >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - return ret; - BUG_ON(ret > 0); /* Corruption */ - leaf = path->nodes[0]; - } - btrfs_item_key_to_cpu(leaf, &found_key, - path->slots[0]); - BUG_ON(key.objectid != found_key.objectid); - if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { - path->slots[0]++; - continue; - } - ref0 = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_ref_v0); - owner = btrfs_ref_objectid_v0(leaf, ref0); - break; - } - } - btrfs_release_path(path); - - if (owner < BTRFS_FIRST_FREE_OBJECTID) - new_size += sizeof(*bi); - - new_size -= sizeof(*ei0); - ret = btrfs_search_slot(trans, root, &key, path, - new_size + extra_size, 1); - if (ret < 0) - return ret; - BUG_ON(ret); /* Corruption */ - - btrfs_extend_item(fs_info, path, new_size); - - leaf = path->nodes[0]; - item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); - btrfs_set_extent_refs(leaf, item, refs); - /* FIXME: get real generation */ - btrfs_set_extent_generation(leaf, item, 0); - if (owner < BTRFS_FIRST_FREE_OBJECTID) { - btrfs_set_extent_flags(leaf, item, - BTRFS_EXTENT_FLAG_TREE_BLOCK | - BTRFS_BLOCK_FLAG_FULL_BACKREF); - bi = (struct btrfs_tree_block_info *)(item + 1); - /* FIXME: get first key of the block */ - memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi)); - btrfs_set_tree_block_level(leaf, bi, (int)owner); - } else { - btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); - } - btrfs_mark_buffer_dirty(leaf); - return 0; -} -#endif - /* * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required, * is_data == BTRFS_REF_TYPE_DATA, data type is requried, @@ -1216,13 +1129,12 @@ static int match_extent_data_ref(struct extent_buffer *leaf, } static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid, u64 owner, u64 offset) { - struct btrfs_root *root = fs_info->extent_root; + struct btrfs_root *root = trans->fs_info->extent_root; struct btrfs_key key; struct btrfs_extent_data_ref *ref; struct extent_buffer *leaf; @@ -1251,17 +1163,6 @@ again: if (parent) { if (!ret) return 0; -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - key.type = BTRFS_EXTENT_REF_V0_KEY; - btrfs_release_path(path); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto fail; - } - if (!ret) - return 0; -#endif goto fail; } @@ -1304,13 +1205,12 @@ fail: } static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid, u64 owner, u64 offset, int refs_to_add) { - struct btrfs_root *root = fs_info->extent_root; + struct btrfs_root *root = trans->fs_info->extent_root; struct btrfs_key key; struct extent_buffer *leaf; u32 size; @@ -1384,7 +1284,6 @@ fail: } static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, int refs_to_drop, int *last_ref) { @@ -1406,13 +1305,10 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, ref2 = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_shared_data_ref); num_refs = btrfs_shared_data_ref_count(leaf, ref2); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { - struct btrfs_extent_ref_v0 *ref0; - ref0 = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_ref_v0); - num_refs = btrfs_ref_count_v0(leaf, ref0); -#endif + } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { + btrfs_print_v0_err(trans->fs_info); + btrfs_abort_transaction(trans, -EINVAL); + return -EINVAL; } else { BUG(); } @@ -1421,21 +1317,13 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, num_refs -= refs_to_drop; if (num_refs == 0) { - ret = btrfs_del_item(trans, fs_info->extent_root, path); + ret = btrfs_del_item(trans, trans->fs_info->extent_root, path); *last_ref = 1; } else { if (key.type == BTRFS_EXTENT_DATA_REF_KEY) btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); else if (key.type == BTRFS_SHARED_DATA_REF_KEY) btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - else { - struct btrfs_extent_ref_v0 *ref0; - ref0 = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_ref_v0); - btrfs_set_ref_count_v0(leaf, ref0, num_refs); - } -#endif btrfs_mark_buffer_dirty(leaf); } return ret; @@ -1453,6 +1341,8 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path, leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); if (iref) { /* * If type is invalid, we should have bailed out earlier than @@ -1475,13 +1365,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path, ref2 = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_shared_data_ref); num_refs = btrfs_shared_data_ref_count(leaf, ref2); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { - struct btrfs_extent_ref_v0 *ref0; - ref0 = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_ref_v0); - num_refs = btrfs_ref_count_v0(leaf, ref0); -#endif } else { WARN_ON(1); } @@ -1489,12 +1372,11 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path, } static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid) { - struct btrfs_root *root = fs_info->extent_root; + struct btrfs_root *root = trans->fs_info->extent_root; struct btrfs_key key; int ret; @@ -1510,20 +1392,10 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) ret = -ENOENT; -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (ret == -ENOENT && parent) { - btrfs_release_path(path); - key.type = BTRFS_EXTENT_REF_V0_KEY; - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret > 0) - ret = -ENOENT; - } -#endif return ret; } static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid) @@ -1540,7 +1412,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, key.offset = root_objectid; } - ret = btrfs_insert_empty_item(trans, fs_info->extent_root, + ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root, path, &key, 0); btrfs_release_path(path); return ret; @@ -1599,13 +1471,13 @@ static int find_next_key(struct btrfs_path *path, int level, */ static noinline_for_stack int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_extent_inline_ref **ref_ret, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset, int insert) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->extent_root; struct btrfs_key key; struct extent_buffer *leaf; @@ -1635,8 +1507,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, extra_size = -1; /* - * Owner is our parent level, so we can just add one to get the level - * for the block we are interested in. + * Owner is our level, so we can just add one to get the level for the + * block we are interested in. */ if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) { key.type = BTRFS_METADATA_ITEM_KEY; @@ -1684,23 +1556,12 @@ again: leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (item_size < sizeof(*ei)) { - if (!insert) { - err = -ENOENT; - goto out; - } - ret = convert_extent_item_v0(trans, fs_info, path, owner, - extra_size); - if (ret < 0) { - err = ret; - goto out; - } - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); + if (unlikely(item_size < sizeof(*ei))) { + err = -EINVAL; + btrfs_print_v0_err(fs_info); + btrfs_abort_transaction(trans, err); + goto out; } -#endif - BUG_ON(item_size < sizeof(*ei)); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); flags = btrfs_extent_flags(leaf, ei); @@ -1727,7 +1588,7 @@ again: iref = (struct btrfs_extent_inline_ref *)ptr; type = btrfs_get_extent_inline_ref_type(leaf, iref, needed); if (type == BTRFS_REF_TYPE_INVALID) { - err = -EINVAL; + err = -EUCLEAN; goto out; } @@ -1863,7 +1724,6 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info, } static int lookup_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_extent_inline_ref **ref_ret, u64 bytenr, u64 num_bytes, u64 parent, @@ -1871,9 +1731,9 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, { int ret; - ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret, - bytenr, num_bytes, parent, - root_objectid, owner, offset, 0); + ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr, + num_bytes, parent, root_objectid, + owner, offset, 0); if (ret != -ENOENT) return ret; @@ -1881,12 +1741,11 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, *ref_ret = NULL; if (owner < BTRFS_FIRST_FREE_OBJECTID) { - ret = lookup_tree_block_ref(trans, fs_info, path, bytenr, - parent, root_objectid); + ret = lookup_tree_block_ref(trans, path, bytenr, parent, + root_objectid); } else { - ret = lookup_extent_data_ref(trans, fs_info, path, bytenr, - parent, root_objectid, owner, - offset); + ret = lookup_extent_data_ref(trans, path, bytenr, parent, + root_objectid, owner, offset); } return ret; } @@ -1895,14 +1754,14 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, * helper to update/remove inline back ref */ static noinline_for_stack -void update_inline_extent_backref(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, +void update_inline_extent_backref(struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, int refs_to_mod, struct btrfs_delayed_extent_op *extent_op, int *last_ref) { - struct extent_buffer *leaf; + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_extent_item *ei; struct btrfs_extent_data_ref *dref = NULL; struct btrfs_shared_data_ref *sref = NULL; @@ -1913,7 +1772,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info, int type; u64 refs; - leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); @@ -1965,7 +1823,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info, static noinline_for_stack int insert_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, @@ -1975,15 +1832,15 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_extent_inline_ref *iref; int ret; - ret = lookup_inline_extent_backref(trans, fs_info, path, &iref, - bytenr, num_bytes, parent, - root_objectid, owner, offset, 1); + ret = lookup_inline_extent_backref(trans, path, &iref, bytenr, + num_bytes, parent, root_objectid, + owner, offset, 1); if (ret == 0) { BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); - update_inline_extent_backref(fs_info, path, iref, - refs_to_add, extent_op, NULL); + update_inline_extent_backref(path, iref, refs_to_add, + extent_op, NULL); } else if (ret == -ENOENT) { - setup_inline_extent_backref(fs_info, path, iref, parent, + setup_inline_extent_backref(trans->fs_info, path, iref, parent, root_objectid, owner, offset, refs_to_add, extent_op); ret = 0; @@ -1992,7 +1849,6 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, } static int insert_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid, u64 owner, u64 offset, int refs_to_add) @@ -2000,18 +1856,17 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans, int ret; if (owner < BTRFS_FIRST_FREE_OBJECTID) { BUG_ON(refs_to_add != 1); - ret = insert_tree_block_ref(trans, fs_info, path, bytenr, - parent, root_objectid); + ret = insert_tree_block_ref(trans, path, bytenr, parent, + root_objectid); } else { - ret = insert_extent_data_ref(trans, fs_info, path, bytenr, - parent, root_objectid, - owner, offset, refs_to_add); + ret = insert_extent_data_ref(trans, path, bytenr, parent, + root_objectid, owner, offset, + refs_to_add); } return ret; } static int remove_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, int refs_to_drop, int is_data, int *last_ref) @@ -2020,14 +1875,14 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, BUG_ON(!is_data && refs_to_drop != 1); if (iref) { - update_inline_extent_backref(fs_info, path, iref, - -refs_to_drop, NULL, last_ref); + update_inline_extent_backref(path, iref, -refs_to_drop, NULL, + last_ref); } else if (is_data) { - ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop, + ret = remove_extent_data_ref(trans, path, refs_to_drop, last_ref); } else { *last_ref = 1; - ret = btrfs_del_item(trans, fs_info->extent_root, path); + ret = btrfs_del_item(trans, trans->fs_info->extent_root, path); } return ret; } @@ -2185,13 +2040,13 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, owner, offset, BTRFS_ADD_DELAYED_REF); if (owner < BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, + ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, parent, root_objectid, (int)owner, BTRFS_ADD_DELAYED_REF, NULL, &old_ref_mod, &new_ref_mod); } else { - ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, + ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, parent, root_objectid, owner, offset, 0, BTRFS_ADD_DELAYED_REF, @@ -2207,8 +2062,41 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return ret; } +/* + * __btrfs_inc_extent_ref - insert backreference for a given extent + * + * @trans: Handle of transaction + * + * @node: The delayed ref node used to get the bytenr/length for + * extent whose references are incremented. + * + * @parent: If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/ + * BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical + * bytenr of the parent block. Since new extents are always + * created with indirect references, this will only be the case + * when relocating a shared extent. In that case, root_objectid + * will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must + * be 0 + * + * @root_objectid: The id of the root where this modification has originated, + * this can be either one of the well-known metadata trees or + * the subvolume id which references this extent. + * + * @owner: For data extents it is the inode number of the owning file. + * For metadata extents this parameter holds the level in the + * tree of the extent. + * + * @offset: For metadata extents the offset is ignored and is currently + * always passed as 0. For data extents it is the fileoffset + * this extent belongs to. + * + * @refs_to_add Number of references to add + * + * @extent_op Pointer to a structure, holding information necessary when + * updating a tree block's flags + * + */ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *node, u64 parent, u64 root_objectid, u64 owner, u64 offset, int refs_to_add, @@ -2230,10 +2118,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path->reada = READA_FORWARD; path->leave_spinning = 1; /* this will setup the path even if it fails to insert the back ref */ - ret = insert_inline_extent_backref(trans, fs_info, path, bytenr, - num_bytes, parent, root_objectid, - owner, offset, - refs_to_add, extent_op); + ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes, + parent, root_objectid, owner, + offset, refs_to_add, extent_op); if ((ret < 0 && ret != -EAGAIN) || !ret) goto out; @@ -2256,8 +2143,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path->reada = READA_FORWARD; path->leave_spinning = 1; /* now insert the actual backref */ - ret = insert_extent_backref(trans, fs_info, path, bytenr, parent, - root_objectid, owner, offset, refs_to_add); + ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid, + owner, offset, refs_to_add); if (ret) btrfs_abort_transaction(trans, ret); out: @@ -2266,7 +2153,6 @@ out: } static int run_delayed_data_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op, int insert_reserved) @@ -2283,7 +2169,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; ref = btrfs_delayed_node_to_data_ref(node); - trace_run_delayed_data_ref(fs_info, node, ref, node->action); + trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action); if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; @@ -2292,17 +2178,16 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { if (extent_op) flags |= extent_op->flags_to_set; - ret = alloc_reserved_file_extent(trans, fs_info, - parent, ref_root, flags, - ref->objectid, ref->offset, - &ins, node->ref_mod); + ret = alloc_reserved_file_extent(trans, parent, ref_root, + flags, ref->objectid, + ref->offset, &ins, + node->ref_mod); } else if (node->action == BTRFS_ADD_DELAYED_REF) { - ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent, - ref_root, ref->objectid, - ref->offset, node->ref_mod, - extent_op); + ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, + ref->objectid, ref->offset, + node->ref_mod, extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { - ret = __btrfs_free_extent(trans, fs_info, node, parent, + ret = __btrfs_free_extent(trans, node, parent, ref_root, ref->objectid, ref->offset, node->ref_mod, extent_op); @@ -2331,10 +2216,10 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, } static int run_delayed_extent_op(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head, struct btrfs_delayed_extent_op *extent_op) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_key key; struct btrfs_path *path; struct btrfs_extent_item *ei; @@ -2400,18 +2285,14 @@ again: leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (item_size < sizeof(*ei)) { - ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0); - if (ret < 0) { - err = ret; - goto out; - } - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); + + if (unlikely(item_size < sizeof(*ei))) { + err = -EINVAL; + btrfs_print_v0_err(fs_info); + btrfs_abort_transaction(trans, err); + goto out; } -#endif - BUG_ON(item_size < sizeof(*ei)); + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); __run_delayed_extent_op(extent_op, leaf, ei); @@ -2422,7 +2303,6 @@ out: } static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op, int insert_reserved) @@ -2433,14 +2313,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, u64 ref_root = 0; ref = btrfs_delayed_node_to_tree_ref(node); - trace_run_delayed_tree_ref(fs_info, node, ref, node->action); + trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action); if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) parent = ref->parent; ref_root = ref->root; if (node->ref_mod != 1) { - btrfs_err(fs_info, + btrfs_err(trans->fs_info, "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", node->bytenr, node->ref_mod, node->action, ref_root, parent); @@ -2450,13 +2330,10 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, BUG_ON(!extent_op || !extent_op->update_flags); ret = alloc_reserved_tree_block(trans, node, extent_op); } else if (node->action == BTRFS_ADD_DELAYED_REF) { - ret = __btrfs_inc_extent_ref(trans, fs_info, node, - parent, ref_root, - ref->level, 0, 1, - extent_op); + ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, + ref->level, 0, 1, extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { - ret = __btrfs_free_extent(trans, fs_info, node, - parent, ref_root, + ret = __btrfs_free_extent(trans, node, parent, ref_root, ref->level, 0, 1, extent_op); } else { BUG(); @@ -2466,7 +2343,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, /* helper function to actually process a single delayed ref entry */ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op, int insert_reserved) @@ -2475,18 +2351,18 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, if (trans->aborted) { if (insert_reserved) - btrfs_pin_extent(fs_info, node->bytenr, + btrfs_pin_extent(trans->fs_info, node->bytenr, node->num_bytes, 1); return 0; } if (node->type == BTRFS_TREE_BLOCK_REF_KEY || node->type == BTRFS_SHARED_BLOCK_REF_KEY) - ret = run_delayed_tree_ref(trans, fs_info, node, extent_op, + ret = run_delayed_tree_ref(trans, node, extent_op, insert_reserved); else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || node->type == BTRFS_SHARED_DATA_REF_KEY) - ret = run_delayed_data_ref(trans, fs_info, node, extent_op, + ret = run_delayed_data_ref(trans, node, extent_op, insert_reserved); else BUG(); @@ -2528,7 +2404,6 @@ static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_ref } static int cleanup_extent_op(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head) { struct btrfs_delayed_extent_op *extent_op = head->extent_op; @@ -2542,21 +2417,22 @@ static int cleanup_extent_op(struct btrfs_trans_handle *trans, return 0; } spin_unlock(&head->lock); - ret = run_delayed_extent_op(trans, fs_info, head, extent_op); + ret = run_delayed_extent_op(trans, head, extent_op); btrfs_free_delayed_extent_op(extent_op); return ret ? ret : 1; } static int cleanup_ref_head(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head) { + + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; int ret; delayed_refs = &trans->transaction->delayed_refs; - ret = cleanup_extent_op(trans, fs_info, head); + ret = cleanup_extent_op(trans, head); if (ret < 0) { unselect_delayed_ref_head(delayed_refs, head); btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); @@ -2598,8 +2474,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, flags = BTRFS_BLOCK_GROUP_METADATA; space_info = __find_space_info(fs_info, flags); ASSERT(space_info); - percpu_counter_add(&space_info->total_bytes_pinned, - -head->num_bytes); + percpu_counter_add_batch(&space_info->total_bytes_pinned, + -head->num_bytes, + BTRFS_TOTAL_BYTES_PINNED_BATCH); if (head->is_data) { spin_lock(&delayed_refs->lock); @@ -2705,7 +2582,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, * up and move on to the next ref_head. */ if (!ref) { - ret = cleanup_ref_head(trans, fs_info, locked_ref); + ret = cleanup_ref_head(trans, locked_ref); if (ret > 0 ) { /* We dropped our lock, we need to loop. */ ret = 0; @@ -2752,7 +2629,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, locked_ref->extent_op = NULL; spin_unlock(&locked_ref->lock); - ret = run_one_delayed_ref(trans, fs_info, ref, extent_op, + ret = run_one_delayed_ref(trans, ref, extent_op, must_insert_reserved); btrfs_free_delayed_extent_op(extent_op); @@ -3227,12 +3104,6 @@ static noinline int check_committed_ref(struct btrfs_root *root, ret = 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (item_size < sizeof(*ei)) { - WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); - goto out; - } -#endif ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); if (item_size != sizeof(*ei) + @@ -4060,11 +3931,7 @@ static void update_space_info(struct btrfs_fs_info *info, u64 flags, struct btrfs_space_info *found; int factor; - if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10)) - factor = 2; - else - factor = 1; + factor = btrfs_bg_type_to_factor(flags); found = __find_space_info(info, flags); ASSERT(found); @@ -4289,7 +4156,7 @@ again: if (IS_ERR(trans)) return PTR_ERR(trans); - ret = do_chunk_alloc(trans, fs_info, alloc_target, + ret = do_chunk_alloc(trans, alloc_target, CHUNK_ALLOC_NO_FORCE); btrfs_end_transaction(trans); if (ret < 0) { @@ -4309,9 +4176,10 @@ again: * allocation, and no removed chunk in current transaction, * don't bother committing the transaction. */ - have_pinned_space = percpu_counter_compare( + have_pinned_space = __percpu_counter_compare( &data_sinfo->total_bytes_pinned, - used + bytes - data_sinfo->total_bytes); + used + bytes - data_sinfo->total_bytes, + BTRFS_TOTAL_BYTES_PINNED_BATCH); spin_unlock(&data_sinfo->lock); /* commit the current transaction and try again */ @@ -4358,7 +4226,7 @@ commit_trans: data_sinfo->flags, bytes, 1); spin_unlock(&data_sinfo->lock); - return ret; + return 0; } int btrfs_check_data_free_space(struct inode *inode, @@ -4511,9 +4379,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) * for allocating a chunk, otherwise if it's false, reserve space necessary for * removing a chunk. */ -void check_system_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 type) +void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *info; u64 left; u64 thresh; @@ -4552,7 +4420,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, * the paths we visit in the chunk tree (they were already COWed * or created in the current transaction for example). */ - ret = btrfs_alloc_chunk(trans, fs_info, flags); + ret = btrfs_alloc_chunk(trans, flags); } if (!ret) { @@ -4573,11 +4441,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans, * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. */ -static int do_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 flags, int force) +static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, + int force) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *space_info; - int wait_for_alloc = 0; + bool wait_for_alloc = false; + bool should_alloc = false; int ret = 0; /* Don't re-enter if we're already allocating a chunk */ @@ -4587,45 +4457,44 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info = __find_space_info(fs_info, flags); ASSERT(space_info); -again: - spin_lock(&space_info->lock); - if (force < space_info->force_alloc) - force = space_info->force_alloc; - if (space_info->full) { - if (should_alloc_chunk(fs_info, space_info, force)) - ret = -ENOSPC; - else - ret = 0; - spin_unlock(&space_info->lock); - return ret; - } - - if (!should_alloc_chunk(fs_info, space_info, force)) { - spin_unlock(&space_info->lock); - return 0; - } else if (space_info->chunk_alloc) { - wait_for_alloc = 1; - } else { - space_info->chunk_alloc = 1; - } - - spin_unlock(&space_info->lock); - - mutex_lock(&fs_info->chunk_mutex); + do { + spin_lock(&space_info->lock); + if (force < space_info->force_alloc) + force = space_info->force_alloc; + should_alloc = should_alloc_chunk(fs_info, space_info, force); + if (space_info->full) { + /* No more free physical space */ + if (should_alloc) + ret = -ENOSPC; + else + ret = 0; + spin_unlock(&space_info->lock); + return ret; + } else if (!should_alloc) { + spin_unlock(&space_info->lock); + return 0; + } else if (space_info->chunk_alloc) { + /* + * Someone is already allocating, so we need to block + * until this someone is finished and then loop to + * recheck if we should continue with our allocation + * attempt. + */ + wait_for_alloc = true; + spin_unlock(&space_info->lock); + mutex_lock(&fs_info->chunk_mutex); + mutex_unlock(&fs_info->chunk_mutex); + } else { + /* Proceed with allocation */ + space_info->chunk_alloc = 1; + wait_for_alloc = false; + spin_unlock(&space_info->lock); + } - /* - * The chunk_mutex is held throughout the entirety of a chunk - * allocation, so once we've acquired the chunk_mutex we know that the - * other guy is done and we need to recheck and see if we should - * allocate. - */ - if (wait_for_alloc) { - mutex_unlock(&fs_info->chunk_mutex); - wait_for_alloc = 0; cond_resched(); - goto again; - } + } while (wait_for_alloc); + mutex_lock(&fs_info->chunk_mutex); trans->allocating_chunk = true; /* @@ -4651,9 +4520,9 @@ again: * Check if we have enough space in SYSTEM chunk because we may need * to update devices. */ - check_system_chunk(trans, fs_info, flags); + check_system_chunk(trans, flags); - ret = btrfs_alloc_chunk(trans, fs_info, flags); + ret = btrfs_alloc_chunk(trans, flags); trans->allocating_chunk = false; spin_lock(&space_info->lock); @@ -4703,6 +4572,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info, u64 space_size; u64 avail; u64 used; + int factor; /* Don't overcommit when in mixed mode. */ if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) @@ -4737,10 +4607,8 @@ static int can_overcommit(struct btrfs_fs_info *fs_info, * doesn't include the parity drive, so we don't have to * change the math */ - if (profile & (BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10)) - avail >>= 1; + factor = btrfs_bg_type_to_factor(profile); + avail = div_u64(avail, factor); /* * If we aren't flushing all things, let us overcommit up to @@ -4912,8 +4780,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, return 0; /* See if there is enough pinned space to make this reservation */ - if (percpu_counter_compare(&space_info->total_bytes_pinned, - bytes) >= 0) + if (__percpu_counter_compare(&space_info->total_bytes_pinned, + bytes, + BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) goto commit; /* @@ -4930,8 +4799,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, bytes -= delayed_rsv->size; spin_unlock(&delayed_rsv->lock); - if (percpu_counter_compare(&space_info->total_bytes_pinned, - bytes) < 0) { + if (__percpu_counter_compare(&space_info->total_bytes_pinned, + bytes, + BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) { return -ENOSPC; } @@ -4984,7 +4854,7 @@ static void flush_space(struct btrfs_fs_info *fs_info, ret = PTR_ERR(trans); break; } - ret = do_chunk_alloc(trans, fs_info, + ret = do_chunk_alloc(trans, btrfs_metadata_alloc_profile(fs_info), CHUNK_ALLOC_NO_FORCE); btrfs_end_transaction(trans); @@ -5659,11 +5529,6 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, kfree(rsv); } -void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv) -{ - kfree(rsv); -} - int btrfs_block_rsv_add(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes, enum btrfs_reserve_flush_enum flush) @@ -6019,7 +5884,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; unsigned nr_extents; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; int ret = 0; @@ -6092,7 +5957,7 @@ out_fail: void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, bool qgroup_free) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); @@ -6121,7 +5986,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, bool qgroup_free) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; unsigned num_extents; spin_lock(&inode->lock); @@ -6219,12 +6084,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, cache = btrfs_lookup_block_group(info, bytenr); if (!cache) return -ENOENT; - if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10)) - factor = 2; - else - factor = 1; + factor = btrfs_bg_type_to_factor(cache->flags); + /* * If this block group has free space cache written out, we * need to make sure to load it if we are removing space. This @@ -6268,8 +6129,9 @@ static int update_block_group(struct btrfs_trans_handle *trans, trace_btrfs_space_reservation(info, "pinned", cache->space_info->flags, num_bytes, 1); - percpu_counter_add(&cache->space_info->total_bytes_pinned, - num_bytes); + percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, + num_bytes, + BTRFS_TOTAL_BYTES_PINNED_BATCH); set_extent_dirty(info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); @@ -6279,7 +6141,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (list_empty(&cache->dirty_list)) { list_add_tail(&cache->dirty_list, &trans->transaction->dirty_bgs); - trans->transaction->num_dirty_bgs++; + trans->transaction->num_dirty_bgs++; btrfs_get_block_group(cache); } spin_unlock(&trans->transaction->dirty_bgs_lock); @@ -6290,16 +6152,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, * dirty list to avoid races between cleaner kthread and space * cache writeout. */ - if (!alloc && old_val == 0) { - spin_lock(&info->unused_bgs_lock); - if (list_empty(&cache->bg_list)) { - btrfs_get_block_group(cache); - trace_btrfs_add_unused_block_group(cache); - list_add_tail(&cache->bg_list, - &info->unused_bgs); - } - spin_unlock(&info->unused_bgs_lock); - } + if (!alloc && old_val == 0) + btrfs_mark_bg_unused(cache); btrfs_put_block_group(cache); total -= num_bytes; @@ -6347,7 +6201,8 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info, trace_btrfs_space_reservation(fs_info, "pinned", cache->space_info->flags, num_bytes, 1); - percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes); + percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, + num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); set_extent_dirty(fs_info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); return 0; @@ -6711,7 +6566,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, trace_btrfs_space_reservation(fs_info, "pinned", space_info->flags, len, 0); space_info->max_extent_size = 0; - percpu_counter_add(&space_info->total_bytes_pinned, -len); + percpu_counter_add_batch(&space_info->total_bytes_pinned, + -len, BTRFS_TOTAL_BYTES_PINNED_BATCH); if (cache->ro) { space_info->bytes_readonly += len; readonly = true; @@ -6815,12 +6671,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) } static int __btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, - struct btrfs_delayed_ref_node *node, u64 parent, - u64 root_objectid, u64 owner_objectid, - u64 owner_offset, int refs_to_drop, - struct btrfs_delayed_extent_op *extent_op) + struct btrfs_delayed_ref_node *node, u64 parent, + u64 root_objectid, u64 owner_objectid, + u64 owner_offset, int refs_to_drop, + struct btrfs_delayed_extent_op *extent_op) { + struct btrfs_fs_info *info = trans->fs_info; struct btrfs_key key; struct btrfs_path *path; struct btrfs_root *extent_root = info->extent_root; @@ -6852,9 +6708,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (is_data) skinny_metadata = false; - ret = lookup_extent_backref(trans, info, path, &iref, - bytenr, num_bytes, parent, - root_objectid, owner_objectid, + ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes, + parent, root_objectid, owner_objectid, owner_offset); if (ret == 0) { extent_slot = path->slots[0]; @@ -6877,14 +6732,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, break; extent_slot--; } -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); - if (found_extent && item_size < sizeof(*ei)) - found_extent = 0; -#endif + if (!found_extent) { BUG_ON(iref); - ret = remove_extent_backref(trans, info, path, NULL, + ret = remove_extent_backref(trans, path, NULL, refs_to_drop, is_data, &last_ref); if (ret) { @@ -6957,42 +6808,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, extent_slot); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (item_size < sizeof(*ei)) { - BUG_ON(found_extent || extent_slot != path->slots[0]); - ret = convert_extent_item_v0(trans, info, path, owner_objectid, - 0); - if (ret < 0) { - btrfs_abort_transaction(trans, ret); - goto out; - } - - btrfs_release_path(path); - path->leave_spinning = 1; - - key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = num_bytes; - - ret = btrfs_search_slot(trans, extent_root, &key, path, - -1, 1); - if (ret) { - btrfs_err(info, - "umm, got %d back from search, was looking for %llu", - ret, bytenr); - btrfs_print_leaf(path->nodes[0]); - } - if (ret < 0) { - btrfs_abort_transaction(trans, ret); - goto out; - } - - extent_slot = path->slots[0]; - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, extent_slot); + if (unlikely(item_size < sizeof(*ei))) { + ret = -EINVAL; + btrfs_print_v0_err(info); + btrfs_abort_transaction(trans, ret); + goto out; } -#endif - BUG_ON(item_size < sizeof(*ei)); ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && @@ -7028,9 +6849,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); } if (found_extent) { - ret = remove_extent_backref(trans, info, path, - iref, refs_to_drop, - is_data, &last_ref); + ret = remove_extent_backref(trans, path, iref, + refs_to_drop, is_data, + &last_ref); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -7172,7 +6993,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, root->root_key.objectid, btrfs_header_level(buf), 0, BTRFS_DROP_DELAYED_REF); - ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start, + ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, parent, root->root_key.objectid, btrfs_header_level(buf), @@ -7251,13 +7072,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, old_ref_mod = new_ref_mod = 0; ret = 0; } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { - ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, + ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, parent, root_objectid, (int)owner, BTRFS_DROP_DELAYED_REF, NULL, &old_ref_mod, &new_ref_mod); } else { - ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, + ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, parent, root_objectid, owner, offset, 0, BTRFS_DROP_DELAYED_REF, @@ -7534,7 +7355,7 @@ search: * for the proper type. */ if (!block_group_bits(block_group, flags)) { - u64 extra = BTRFS_BLOCK_GROUP_DUP | + u64 extra = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | @@ -7738,7 +7559,7 @@ unclustered_alloc: goto loop; } checks: - search_start = ALIGN(offset, fs_info->stripesize); + search_start = round_up(offset, fs_info->stripesize); /* move on to the next group */ if (search_start + num_bytes > @@ -7750,7 +7571,6 @@ checks: if (offset < search_start) btrfs_add_free_space(block_group, offset, search_start - offset); - BUG_ON(offset > search_start); ret = btrfs_add_reserved_bytes(block_group, ram_bytes, num_bytes, delalloc); @@ -7826,8 +7646,7 @@ loop: goto out; } - ret = do_chunk_alloc(trans, fs_info, flags, - CHUNK_ALLOC_FORCE); + ret = do_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE); /* * If we can't allocate a new chunk we've already looped @@ -8053,11 +7872,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info, } static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 parent, u64 root_objectid, u64 flags, u64 owner, u64 offset, struct btrfs_key *ins, int ref_mod) { + struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_extent_item *extent_item; struct btrfs_extent_inline_ref *iref; @@ -8231,7 +8050,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, u64 offset, u64 ram_bytes, struct btrfs_key *ins) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret; BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); @@ -8240,7 +8058,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, root->root_key.objectid, owner, offset, BTRFS_ADD_DELAYED_EXTENT); - ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, + ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 0, root->root_key.objectid, owner, offset, ram_bytes, @@ -8254,10 +8072,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, * space cache bits as well */ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 root_objectid, u64 owner, u64 offset, struct btrfs_key *ins) { + struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_block_group_cache *block_group; struct btrfs_space_info *space_info; @@ -8285,15 +8103,15 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); - ret = alloc_reserved_file_extent(trans, fs_info, 0, root_objectid, - 0, owner, offset, ins, 1); + ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner, + offset, ins, 1); btrfs_put_block_group(block_group); return ret; } static struct extent_buffer * btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, int level) + u64 bytenr, int level, u64 owner) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *buf; @@ -8302,7 +8120,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (IS_ERR(buf)) return buf; - btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); clean_tree_block(fs_info, buf); @@ -8311,6 +8128,14 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_lock_blocking(buf); set_extent_buffer_uptodate(buf); + memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header)); + btrfs_set_header_level(buf, level); + btrfs_set_header_bytenr(buf, buf->start); + btrfs_set_header_generation(buf, trans->transid); + btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(buf, owner); + write_extent_buffer_fsid(buf, fs_info->fsid); + write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid); if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { buf->log_index = root->log_transid % 2; /* @@ -8419,7 +8244,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS if (btrfs_is_testing(fs_info)) { buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, - level); + level, root_objectid); if (!IS_ERR(buf)) root->alloc_bytenr += blocksize; return buf; @@ -8435,7 +8260,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, if (ret) goto out_unuse; - buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); + buf = btrfs_init_new_buffer(trans, root, ins.objectid, level, + root_objectid); if (IS_ERR(buf)) { ret = PTR_ERR(buf); goto out_free_reserved; @@ -8467,7 +8293,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, root_objectid, level, 0, BTRFS_ADD_DELAYED_EXTENT); - ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid, + ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, ins.offset, parent, root_objectid, level, BTRFS_ADD_DELAYED_EXTENT, @@ -8499,7 +8325,6 @@ struct walk_control { int keep_locks; int reada_slot; int reada_count; - int for_reloc; }; #define DROP_REFERENCE 1 @@ -8819,7 +8644,7 @@ skip: } if (need_account) { - ret = btrfs_qgroup_trace_subtree(trans, root, next, + ret = btrfs_qgroup_trace_subtree(trans, next, generation, level - 1); if (ret) { btrfs_err_rl(fs_info, @@ -8919,7 +8744,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, else ret = btrfs_dec_ref(trans, root, eb, 0); BUG_ON(ret); /* -ENOMEM */ - ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, eb); + ret = btrfs_qgroup_trace_leaf_items(trans, eb); if (ret) { btrfs_err_rl(fs_info, "error %d accounting leaf items. Quota is out of sync, rescan required.", @@ -9136,7 +8961,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, wc->stage = DROP_REFERENCE; wc->update_ref = update_ref; wc->keep_locks = 0; - wc->for_reloc = for_reloc; wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info); while (1) { @@ -9199,7 +9023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, if (err) goto out_end_trans; - ret = btrfs_del_root(trans, fs_info, &root->root_key); + ret = btrfs_del_root(trans, &root->root_key); if (ret) { btrfs_abort_transaction(trans, ret); err = ret; @@ -9302,7 +9126,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, wc->stage = DROP_REFERENCE; wc->update_ref = 0; wc->keep_locks = 1; - wc->for_reloc = 1; wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info); while (1) { @@ -9417,10 +9240,10 @@ out: return ret; } -int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *cache) +int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache) { + struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_trans_handle *trans; u64 alloc_flags; int ret; @@ -9454,7 +9277,7 @@ again: */ alloc_flags = update_block_group_flags(fs_info, cache->flags); if (alloc_flags != cache->flags) { - ret = do_chunk_alloc(trans, fs_info, alloc_flags, + ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); /* * ENOSPC is allowed here, we may have enough space @@ -9471,8 +9294,7 @@ again: if (!ret) goto out; alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags); - ret = do_chunk_alloc(trans, fs_info, alloc_flags, - CHUNK_ALLOC_FORCE); + ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); if (ret < 0) goto out; ret = inc_block_group_ro(cache, 0); @@ -9480,7 +9302,7 @@ out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { alloc_flags = update_block_group_flags(fs_info, cache->flags); mutex_lock(&fs_info->chunk_mutex); - check_system_chunk(trans, fs_info, alloc_flags); + check_system_chunk(trans, alloc_flags); mutex_unlock(&fs_info->chunk_mutex); } mutex_unlock(&fs_info->ro_block_group_mutex); @@ -9489,12 +9311,11 @@ out: return ret; } -int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 type) +int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) { - u64 alloc_flags = get_alloc_profile(fs_info, type); + u64 alloc_flags = get_alloc_profile(trans->fs_info, type); - return do_chunk_alloc(trans, fs_info, alloc_flags, CHUNK_ALLOC_FORCE); + return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); } /* @@ -9520,13 +9341,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) continue; } - if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_DUP)) - factor = 2; - else - factor = 1; - + factor = btrfs_bg_type_to_factor(block_group->flags); free_bytes += (block_group->key.offset - btrfs_block_group_used(&block_group->item)) * factor; @@ -9717,6 +9532,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, int ret = 0; struct btrfs_key found_key; struct extent_buffer *leaf; + struct btrfs_block_group_item bg; + u64 flags; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); @@ -9751,8 +9568,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, "logical %llu len %llu found bg but no related chunk", found_key.objectid, found_key.offset); ret = -ENOENT; + } else if (em->start != found_key.objectid || + em->len != found_key.offset) { + btrfs_err(fs_info, + "block group %llu len %llu mismatch with chunk %llu len %llu", + found_key.objectid, found_key.offset, + em->start, em->len); + ret = -EUCLEAN; } else { - ret = 0; + read_extent_buffer(leaf, &bg, + btrfs_item_ptr_offset(leaf, slot), + sizeof(bg)); + flags = btrfs_block_group_flags(&bg) & + BTRFS_BLOCK_GROUP_TYPE_MASK; + + if (flags != (em->map_lookup->type & + BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", + found_key.objectid, + found_key.offset, flags, + (BTRFS_BLOCK_GROUP_TYPE_MASK & + em->map_lookup->type)); + ret = -EUCLEAN; + } else { + ret = 0; + } } free_extent_map(em); goto out; @@ -9847,7 +9688,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) */ if (block_group->cached == BTRFS_CACHE_NO || block_group->cached == BTRFS_CACHE_ERROR) - free_excluded_extents(info, block_group); + free_excluded_extents(block_group); btrfs_remove_free_space_cache(block_group); ASSERT(block_group->cached != BTRFS_CACHE_STARTED); @@ -10003,6 +9844,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info, return cache; } + +/* + * Iterate all chunks and verify that each of them has the corresponding block + * group + */ +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map *em; + struct btrfs_block_group_cache *bg; + u64 start = 0; + int ret = 0; + + while (1) { + read_lock(&map_tree->map_tree.lock); + /* + * lookup_extent_mapping will return the first extent map + * intersecting the range, so setting @len to 1 is enough to + * get the first chunk. + */ + em = lookup_extent_mapping(&map_tree->map_tree, start, 1); + read_unlock(&map_tree->map_tree.lock); + if (!em) + break; + + bg = btrfs_lookup_block_group(fs_info, em->start); + if (!bg) { + btrfs_err(fs_info, + "chunk start=%llu len=%llu doesn't have corresponding block group", + em->start, em->len); + ret = -EUCLEAN; + free_extent_map(em); + break; + } + if (bg->key.objectid != em->start || + bg->key.offset != em->len || + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != + (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", + em->start, em->len, + em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, + bg->key.objectid, bg->key.offset, + bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); + ret = -EUCLEAN; + free_extent_map(em); + btrfs_put_block_group(bg); + break; + } + start = em->start + em->len; + free_extent_map(em); + btrfs_put_block_group(bg); + } + return ret; +} + int btrfs_read_block_groups(struct btrfs_fs_info *info) { struct btrfs_path *path; @@ -10089,13 +9986,13 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) * info has super bytes accounted for, otherwise we'll think * we have more space than we actually do. */ - ret = exclude_super_stripes(info, cache); + ret = exclude_super_stripes(cache); if (ret) { /* * We may have excluded something, so call this just in * case. */ - free_excluded_extents(info, cache); + free_excluded_extents(cache); btrfs_put_block_group(cache); goto error; } @@ -10110,14 +10007,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) if (found_key.offset == btrfs_block_group_used(&cache->item)) { cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; - free_excluded_extents(info, cache); + free_excluded_extents(cache); } else if (btrfs_block_group_used(&cache->item) == 0) { cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; add_new_free_space(cache, found_key.objectid, found_key.objectid + found_key.offset); - free_excluded_extents(info, cache); + free_excluded_extents(cache); } ret = btrfs_add_block_group_cache(info, cache); @@ -10140,15 +10037,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) if (btrfs_chunk_readonly(info, cache->key.objectid)) { inc_block_group_ro(cache, 1); } else if (btrfs_block_group_used(&cache->item) == 0) { - spin_lock(&info->unused_bgs_lock); - /* Should always be true but just in case. */ - if (list_empty(&cache->bg_list)) { - btrfs_get_block_group(cache); - trace_btrfs_add_unused_block_group(cache); - list_add_tail(&cache->bg_list, - &info->unused_bgs); - } - spin_unlock(&info->unused_bgs_lock); + ASSERT(list_empty(&cache->bg_list)); + btrfs_mark_bg_unused(cache); } } @@ -10176,7 +10066,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) btrfs_add_raid_kobjects(info); init_global_block_rsv(info); - ret = 0; + ret = check_chunk_block_group_mappings(info); error: btrfs_free_path(path); return ret; @@ -10206,8 +10096,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) sizeof(item)); if (ret) btrfs_abort_transaction(trans, ret); - ret = btrfs_finish_chunk_alloc(trans, fs_info, key.objectid, - key.offset); + ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset); if (ret) btrfs_abort_transaction(trans, ret); add_block_group_free_space(trans, block_group); @@ -10218,10 +10107,10 @@ next: trans->can_flush_pending_bgs = can_flush_pending_bgs; } -int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytes_used, +int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, u64 type, u64 chunk_offset, u64 size) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group_cache *cache; int ret; @@ -10240,20 +10129,20 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; cache->needs_free_space = 1; - ret = exclude_super_stripes(fs_info, cache); + ret = exclude_super_stripes(cache); if (ret) { /* * We may have excluded something, so call this just in * case. */ - free_excluded_extents(fs_info, cache); + free_excluded_extents(cache); btrfs_put_block_group(cache); return ret; } add_new_free_space(cache, chunk_offset, chunk_offset + size); - free_excluded_extents(fs_info, cache); + free_excluded_extents(cache); #ifdef CONFIG_BTRFS_DEBUG if (btrfs_should_fragment_free_space(cache)) { @@ -10311,9 +10200,9 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } int btrfs_remove_block_group(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 group_start, - struct extent_map *em) + u64 group_start, struct extent_map *em) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->extent_root; struct btrfs_path *path; struct btrfs_block_group_cache *block_group; @@ -10337,18 +10226,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * Free the reserved super bytes from this block group before * remove it. */ - free_excluded_extents(fs_info, block_group); + free_excluded_extents(block_group); btrfs_free_ref_tree_range(fs_info, block_group->key.objectid, block_group->key.offset); memcpy(&key, &block_group->key, sizeof(key)); index = btrfs_bg_flags_to_raid_index(block_group->flags); - if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10)) - factor = 2; - else - factor = 1; + factor = btrfs_bg_type_to_factor(block_group->flags); /* make sure this block group isn't part of an allocation cluster */ cluster = &fs_info->data_alloc_cluster; @@ -10687,7 +10571,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); - if (block_group->reserved || + if (block_group->reserved || block_group->pinned || btrfs_block_group_used(&block_group->item) || block_group->ro || list_is_singular(&block_group->list)) { @@ -10764,8 +10648,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) space_info->bytes_pinned -= block_group->pinned; space_info->bytes_readonly += block_group->pinned; - percpu_counter_add(&space_info->total_bytes_pinned, - -block_group->pinned); + percpu_counter_add_batch(&space_info->total_bytes_pinned, + -block_group->pinned, + BTRFS_TOTAL_BYTES_PINNED_BATCH); block_group->pinned = 0; spin_unlock(&block_group->lock); @@ -10782,8 +10667,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * Btrfs_remove_chunk will abort the transaction if things go * horribly wrong. */ - ret = btrfs_remove_chunk(trans, fs_info, - block_group->key.objectid); + ret = btrfs_remove_chunk(trans, block_group->key.objectid); if (ret) { if (trimming) @@ -11066,3 +10950,16 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) !atomic_read(&root->will_be_snapshotted)); } } + +void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&bg->bg_list)) { + btrfs_get_block_group(bg); + trace_btrfs_add_unused_block_group(bg); + list_add_tail(&bg->bg_list, &fs_info->unused_bgs); + } + spin_unlock(&fs_info->unused_bgs_lock); +} diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b3e45714d28f..4dd6faab02bb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -140,14 +140,6 @@ static int add_extent_changeset(struct extent_state *state, unsigned bits, static void flush_write_bio(struct extent_page_data *epd); -static inline struct btrfs_fs_info * -tree_fs_info(struct extent_io_tree *tree) -{ - if (tree->ops) - return tree->ops->tree_fs_info(tree->private_data); - return NULL; -} - int __init extent_io_init(void) { extent_state_cache = kmem_cache_create("btrfs_extent_state", @@ -564,8 +556,10 @@ alloc_extent_state_atomic(struct extent_state *prealloc) static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { - btrfs_panic(tree_fs_info(tree), err, - "Locking error: Extent tree was modified by another thread while locked."); + struct inode *inode = tree->private_data; + + btrfs_panic(btrfs_sb(inode->i_sb), err, + "locking error: extent tree was modified by another thread while locked"); } /* @@ -1386,14 +1380,6 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) } } -/* - * helper function to set both pages and extents in the tree writeback - */ -static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) -{ - tree->ops->set_range_writeback(tree->private_data, start, end); -} - /* find the first state struct with 'bits' set after 'start', and * return it. tree->lock must be held. NULL will returned if * nothing was found after 'start' @@ -2059,7 +2045,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int mirror_num) { u64 start = eb->start; - unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); + int i, num_pages = num_extent_pages(eb); int ret = 0; if (sb_rdonly(fs_info->sb)) @@ -2398,7 +2384,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, start - page_offset(page), (int)phy_offset, failed_bio->bi_end_io, NULL); - bio_set_op_attrs(bio, REQ_OP_READ, read_mode); + bio->bi_opf = REQ_OP_READ | read_mode; btrfs_debug(btrfs_sb(inode->i_sb), "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", @@ -2790,8 +2776,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, else contig = bio_end_sector(bio) == sector; - if (tree->ops && tree->ops->merge_bio_hook(page, offset, - page_size, bio, bio_flags)) + if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size, + bio, bio_flags)) can_merge = false; if (prev_bio_flags != bio_flags || !contig || !can_merge || @@ -3116,7 +3102,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], btrfs_get_extent, em_cached, - bio, 0, bio_flags, 0, prev_em_start); + bio, 0, bio_flags, REQ_RAHEAD, prev_em_start); put_page(pages[index]); } } @@ -3422,7 +3408,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, continue; } - set_range_writeback(tree, cur, cur + iosize - 1); + btrfs_set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { btrfs_err(BTRFS_I(inode)->root->fs_info, "page %lu not writeback, cur %llu end %llu", @@ -3538,7 +3524,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, struct btrfs_fs_info *fs_info, struct extent_page_data *epd) { - unsigned long i, num_pages; + int i, num_pages; int flush = 0; int ret = 0; @@ -3588,7 +3574,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!ret) return ret; - num_pages = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; @@ -3712,13 +3698,13 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; u64 offset = eb->start; u32 nritems; - unsigned long i, num_pages; + int i, num_pages; unsigned long start, end; unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); - num_pages = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb); atomic_set(&eb->io_pages, num_pages); /* set btree blocks beyond nritems with 0 to avoid stale content. */ @@ -4643,23 +4629,20 @@ int extent_buffer_under_io(struct extent_buffer *eb) } /* - * Helper for releasing extent buffer page. + * Release all pages attached to the extent buffer. */ -static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) +static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb) { - unsigned long index; - struct page *page; - int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); + int i; + int num_pages; + int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); BUG_ON(extent_buffer_under_io(eb)); - index = num_extent_pages(eb->start, eb->len); - if (index == 0) - return; + num_pages = num_extent_pages(eb); + for (i = 0; i < num_pages; i++) { + struct page *page = eb->pages[i]; - do { - index--; - page = eb->pages[index]; if (!page) continue; if (mapped) @@ -4691,7 +4674,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) /* One for when we allocated the page */ put_page(page); - } while (index != 0); + } } /* @@ -4699,7 +4682,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) */ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) { - btrfs_release_extent_buffer_page(eb); + btrfs_release_extent_buffer_pages(eb); __free_extent_buffer(eb); } @@ -4743,10 +4726,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) { - unsigned long i; + int i; struct page *p; struct extent_buffer *new; - unsigned long num_pages = num_extent_pages(src->start, src->len); + int num_pages = num_extent_pages(src); new = __alloc_extent_buffer(src->fs_info, src->start, src->len); if (new == NULL) @@ -4766,7 +4749,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) } set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); - set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); + set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags); return new; } @@ -4775,15 +4758,14 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, unsigned long len) { struct extent_buffer *eb; - unsigned long num_pages; - unsigned long i; - - num_pages = num_extent_pages(start, len); + int num_pages; + int i; eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) return NULL; + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { eb->pages[i] = alloc_page(GFP_NOFS); if (!eb->pages[i]) @@ -4791,7 +4773,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, } set_extent_buffer_uptodate(eb); btrfs_set_header_nritems(eb, 0); - set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); + set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); return eb; err: @@ -4843,11 +4825,11 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) static void mark_extent_buffer_accessed(struct extent_buffer *eb, struct page *accessed) { - unsigned long num_pages, i; + int num_pages, i; check_buffer_tree_ref(eb); - num_pages = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; @@ -4944,8 +4926,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start) { unsigned long len = fs_info->nodesize; - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; + int num_pages; + int i; unsigned long index = start >> PAGE_SHIFT; struct extent_buffer *eb; struct extent_buffer *exists = NULL; @@ -4967,6 +4949,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, if (!eb) return ERR_PTR(-ENOMEM); + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); if (!p) { @@ -5009,8 +4992,11 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, uptodate = 0; /* - * see below about how we avoid a nasty race with release page - * and why we unlock later + * We can't unlock the pages just yet since the extent buffer + * hasn't been properly inserted in the radix tree, this + * opens a race with btree_releasepage which can free a page + * while we are still filling in all pages for the buffer and + * we could crash. */ } if (uptodate) @@ -5039,21 +5025,12 @@ again: set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); /* - * there is a race where release page may have - * tried to find this extent buffer in the radix - * but failed. It will tell the VM it is safe to - * reclaim the, and it will clear the page private bit. - * We must make sure to set the page private bit properly - * after the extent buffer is in the radix tree so - * it doesn't get lost + * Now it's safe to unlock the pages because any calls to + * btree_releasepage will correctly detect that a page belongs to a + * live buffer and won't free them prematurely. */ - SetPageChecked(eb->pages[0]); - for (i = 1; i < num_pages; i++) { - p = eb->pages[i]; - ClearPageChecked(p); - unlock_page(p); - } - unlock_page(eb->pages[0]); + for (i = 0; i < num_pages; i++) + unlock_page(eb->pages[i]); return eb; free_eb: @@ -5075,9 +5052,10 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) __free_extent_buffer(eb); } -/* Expects to have eb->eb_lock already held */ static int release_extent_buffer(struct extent_buffer *eb) { + lockdep_assert_held(&eb->refs_lock); + WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) { @@ -5094,9 +5072,9 @@ static int release_extent_buffer(struct extent_buffer *eb) } /* Should be safe to release our pages at this point */ - btrfs_release_extent_buffer_page(eb); + btrfs_release_extent_buffer_pages(eb); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) { + if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) { __free_extent_buffer(eb); return 1; } @@ -5127,7 +5105,7 @@ void free_extent_buffer(struct extent_buffer *eb) spin_lock(&eb->refs_lock); if (atomic_read(&eb->refs) == 2 && - test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) + test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) atomic_dec(&eb->refs); if (atomic_read(&eb->refs) == 2 && @@ -5159,11 +5137,11 @@ void free_extent_buffer_stale(struct extent_buffer *eb) void clear_extent_buffer_dirty(struct extent_buffer *eb) { - unsigned long i; - unsigned long num_pages; + int i; + int num_pages; struct page *page; - num_pages = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; @@ -5189,15 +5167,15 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb) int set_extent_buffer_dirty(struct extent_buffer *eb) { - unsigned long i; - unsigned long num_pages; + int i; + int num_pages; int was_dirty = 0; check_buffer_tree_ref(eb); was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); - num_pages = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb); WARN_ON(atomic_read(&eb->refs) == 0); WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); @@ -5208,12 +5186,12 @@ int set_extent_buffer_dirty(struct extent_buffer *eb) void clear_extent_buffer_uptodate(struct extent_buffer *eb) { - unsigned long i; + int i; struct page *page; - unsigned long num_pages; + int num_pages; clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - num_pages = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; if (page) @@ -5223,12 +5201,12 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb) void set_extent_buffer_uptodate(struct extent_buffer *eb) { - unsigned long i; + int i; struct page *page; - unsigned long num_pages; + int num_pages; set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - num_pages = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; SetPageUptodate(page); @@ -5238,13 +5216,13 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb) int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, int wait, int mirror_num) { - unsigned long i; + int i; struct page *page; int err; int ret = 0; int locked_pages = 0; int all_uptodate = 1; - unsigned long num_pages; + int num_pages; unsigned long num_reads = 0; struct bio *bio = NULL; unsigned long bio_flags = 0; @@ -5252,7 +5230,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; - num_pages = num_extent_pages(eb->start, eb->len); + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; if (wait == WAIT_NONE) { @@ -5576,11 +5554,11 @@ void copy_extent_buffer_full(struct extent_buffer *dst, struct extent_buffer *src) { int i; - unsigned num_pages; + int num_pages; ASSERT(dst->len == src->len); - num_pages = num_extent_pages(dst->start, dst->len); + num_pages = num_extent_pages(dst); for (i = 0; i < num_pages; i++) copy_page(page_address(dst->pages[i]), page_address(src->pages[i])); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 0bfd4aeb822d..b4d03e677e1d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -46,7 +46,7 @@ #define EXTENT_BUFFER_STALE 6 #define EXTENT_BUFFER_WRITEBACK 7 #define EXTENT_BUFFER_READ_ERR 8 /* read IO error */ -#define EXTENT_BUFFER_DUMMY 9 +#define EXTENT_BUFFER_UNMAPPED 9 #define EXTENT_BUFFER_IN_TREE 10 #define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ @@ -92,9 +92,6 @@ typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio * typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, struct bio *bio, u64 bio_offset); -typedef blk_status_t (extent_submit_bio_done_t)(void *private_data, - struct bio *bio, int mirror_num); - struct extent_io_ops { /* * The following callbacks must be allways defined, the function @@ -104,12 +101,7 @@ struct extent_io_ops { int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror); - int (*merge_bio_hook)(struct page *page, unsigned long offset, - size_t size, struct bio *bio, - unsigned long bio_flags); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); - struct btrfs_fs_info *(*tree_fs_info)(void *private_data); - void (*set_range_writeback)(void *private_data, u64 start, u64 end); /* * Optional hooks, called if the pointer is not NULL @@ -440,10 +432,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, int mirror_num); void wait_on_extent_buffer_writeback(struct extent_buffer *eb); -static inline unsigned long num_extent_pages(u64 start, u64 len) +static inline int num_extent_pages(const struct extent_buffer *eb) { - return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) - - (start >> PAGE_SHIFT); + return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) - + (eb->start >> PAGE_SHIFT); } static inline void extent_buffer_get(struct extent_buffer *eb) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f9dd6d1836a3..ba74827beb32 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -922,7 +922,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, const bool new_inline, struct extent_map *em) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; struct extent_buffer *leaf = path->nodes[0]; const int slot = path->slots[0]; @@ -942,7 +942,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, btrfs_file_extent_num_bytes(leaf, fi); } else if (type == BTRFS_FILE_EXTENT_INLINE) { size_t size; - size = btrfs_file_extent_inline_len(leaf, slot, fi); + size = btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(extent_start + size, fs_info->sectorsize); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 51e77d72068a..2be00e873e92 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -5,14 +5,11 @@ #include <linux/fs.h> #include <linux/pagemap.h> -#include <linux/highmem.h> #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> -#include <linux/mpage.h> #include <linux/falloc.h> -#include <linux/swap.h> #include <linux/writeback.h> #include <linux/compat.h> #include <linux/slab.h> @@ -83,7 +80,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1, static int __btrfs_add_inode_defrag(struct btrfs_inode *inode, struct inode_defrag *defrag) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct inode_defrag *entry; struct rb_node **p; struct rb_node *parent = NULL; @@ -135,8 +132,8 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info) int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct inode_defrag *defrag; u64 transid; int ret; @@ -185,7 +182,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode, struct inode_defrag *defrag) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; int ret; if (!__need_auto_defrag(fs_info)) @@ -833,8 +830,7 @@ next_slot: btrfs_file_extent_num_bytes(leaf, fi); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = key.offset + - btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + btrfs_file_extent_ram_bytes(leaf, fi); } else { /* can't happen */ BUG(); @@ -1133,7 +1129,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot, int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 start, u64 end) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = inode->root; struct extent_buffer *leaf; struct btrfs_path *path; @@ -1470,7 +1466,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, u64 *lockstart, u64 *lockend, struct extent_state **cached_state) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 start_pos; u64 last_pos; int i; @@ -1526,7 +1522,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, size_t *write_bytes) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; struct btrfs_ordered_extent *ordered; u64 lockstart, lockend; @@ -1569,10 +1565,11 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, return ret; } -static noinline ssize_t __btrfs_buffered_write(struct file *file, - struct iov_iter *i, - loff_t pos) +static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, + struct iov_iter *i) { + struct file *file = iocb->ki_filp; + loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1804,7 +1801,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - loff_t pos = iocb->ki_pos; + loff_t pos; ssize_t written; ssize_t written_buffered; loff_t endbyte; @@ -1815,8 +1812,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (written < 0 || !iov_iter_count(from)) return written; - pos += written; - written_buffered = __btrfs_buffered_write(file, from, pos); + pos = iocb->ki_pos; + written_buffered = btrfs_buffered_write(iocb, from); if (written_buffered < 0) { err = written_buffered; goto out; @@ -1953,7 +1950,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, if (iocb->ki_flags & IOCB_DIRECT) { num_written = __btrfs_direct_write(iocb, from); } else { - num_written = __btrfs_buffered_write(file, from, pos); + num_written = btrfs_buffered_write(iocb, from); if (num_written > 0) iocb->ki_pos = pos + num_written; if (clean_page) @@ -2042,7 +2039,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_trans_handle *trans; struct btrfs_log_ctx ctx; int ret = 0, err; - bool full_sync = false; u64 len; /* @@ -2066,96 +2062,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) inode_lock(inode); atomic_inc(&root->log_batch); - full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags); + /* - * We might have have had more pages made dirty after calling - * start_ordered_ops and before acquiring the inode's i_mutex. + * We have to do this here to avoid the priority inversion of waiting on + * IO of a lower priority task while holding a transaciton open. */ - if (full_sync) { - /* - * For a full sync, we need to make sure any ordered operations - * start and finish before we start logging the inode, so that - * all extents are persisted and the respective file extent - * items are in the fs/subvol btree. - */ - ret = btrfs_wait_ordered_range(inode, start, len); - } else { - /* - * Start any new ordered operations before starting to log the - * inode. We will wait for them to finish in btrfs_sync_log(). - * - * Right before acquiring the inode's mutex, we might have new - * writes dirtying pages, which won't immediately start the - * respective ordered operations - that is done through the - * fill_delalloc callbacks invoked from the writepage and - * writepages address space operations. So make sure we start - * all ordered operations before starting to log our inode. Not - * doing this means that while logging the inode, writeback - * could start and invoke writepage/writepages, which would call - * the fill_delalloc callbacks (cow_file_range, - * submit_compressed_extents). These callbacks add first an - * extent map to the modified list of extents and then create - * the respective ordered operation, which means in - * tree-log.c:btrfs_log_inode() we might capture all existing - * ordered operations (with btrfs_get_logged_extents()) before - * the fill_delalloc callback adds its ordered operation, and by - * the time we visit the modified list of extent maps (with - * btrfs_log_changed_extents()), we see and process the extent - * map they created. We then use the extent map to construct a - * file extent item for logging without waiting for the - * respective ordered operation to finish - this file extent - * item points to a disk location that might not have yet been - * written to, containing random data - so after a crash a log - * replay will make our inode have file extent items that point - * to disk locations containing invalid data, as we returned - * success to userspace without waiting for the respective - * ordered operation to finish, because it wasn't captured by - * btrfs_get_logged_extents(). - */ - ret = start_ordered_ops(inode, start, end); - } + ret = btrfs_wait_ordered_range(inode, start, len); if (ret) { inode_unlock(inode); goto out; } atomic_inc(&root->log_batch); - /* - * If the last transaction that changed this file was before the current - * transaction and we have the full sync flag set in our inode, we can - * bail out now without any syncing. - * - * Note that we can't bail out if the full sync flag isn't set. This is - * because when the full sync flag is set we start all ordered extents - * and wait for them to fully complete - when they complete they update - * the inode's last_trans field through: - * - * btrfs_finish_ordered_io() -> - * btrfs_update_inode_fallback() -> - * btrfs_update_inode() -> - * btrfs_set_inode_last_trans() - * - * So we are sure that last_trans is up to date and can do this check to - * bail out safely. For the fast path, when the full sync flag is not - * set in our inode, we can not do it because we start only our ordered - * extents and don't wait for them to complete (that is when - * btrfs_finish_ordered_io runs), so here at this point their last_trans - * value might be less than or equals to fs_info->last_trans_committed, - * and setting a speculative last_trans for an inode when a buffered - * write is made (such as fs_info->generation + 1 for example) would not - * be reliable since after setting the value and before fsync is called - * any number of transactions can start and commit (transaction kthread - * commits the current transaction periodically), and a transaction - * commit does not start nor waits for ordered extents to complete. - */ smp_mb(); if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || - (full_sync && BTRFS_I(inode)->last_trans <= - fs_info->last_trans_committed) || - (!btrfs_have_ordered_extents_in_range(inode, start, len) && - BTRFS_I(inode)->last_trans - <= fs_info->last_trans_committed)) { + BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) { /* * We've had everything committed since the last time we were * modified so clear this flag in case it was set for whatever @@ -2239,13 +2160,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } } - if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, len); - if (ret) { - btrfs_end_transaction(trans); - goto out; - } - } ret = btrfs_commit_transaction(trans); } else { ret = btrfs_end_transaction(trans); @@ -2310,7 +2224,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, u64 offset, u64 end) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = inode->root; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d5f80cb300be..0adf38b00fa0 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -71,10 +71,6 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, inode = btrfs_iget(fs_info->sb, &location, root, NULL); if (IS_ERR(inode)) return inode; - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); - } mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_constraint(inode->i_mapping, @@ -300,9 +296,9 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) check_crcs = 1; - /* Make sure we can fit our crcs into the first page */ + /* Make sure we can fit our crcs and generation into the first page */ if (write && check_crcs && - (num_pages * sizeof(u32)) >= PAGE_SIZE) + (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) return -ENOSPC; memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); @@ -547,7 +543,7 @@ static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) io_ctl_map_page(io_ctl, 0); } - memcpy(io_ctl->cur, bitmap, PAGE_SIZE); + copy_page(io_ctl->cur, bitmap); io_ctl_set_crc(io_ctl, io_ctl->index - 1); if (io_ctl->index < io_ctl->num_pages) io_ctl_map_page(io_ctl, 0); @@ -607,7 +603,7 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, if (ret) return ret; - memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); + copy_page(entry->bitmap, io_ctl->cur); io_ctl_unmap_page(io_ctl); return 0; @@ -655,7 +651,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_path *path, u64 offset) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_free_space_header *header; struct extent_buffer *leaf; struct btrfs_io_ctl io_ctl; @@ -1123,13 +1119,10 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root, { int ret; struct inode *inode = io_ctl->inode; - struct btrfs_fs_info *fs_info; if (!inode) return 0; - fs_info = btrfs_sb(inode->i_sb); - /* Flush the dirty pages in the cache file. */ ret = flush_dirty_cache(inode); if (ret) @@ -1145,7 +1138,7 @@ out: BTRFS_I(inode)->generation = 0; if (block_group) { #ifdef DEBUG - btrfs_err(fs_info, + btrfs_err(root->fs_info, "failed to write free space cache for block group %llu", block_group->key.objectid); #endif diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index b5950aacd697..d6736595ec57 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1236,7 +1236,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) if (ret) goto abort; - ret = btrfs_del_root(trans, fs_info, &free_space_root->root_key); + ret = btrfs_del_root(trans, &free_space_root->root_key); if (ret) goto abort; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 12fcd8897c33..ffca2abf13d0 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -3,7 +3,6 @@ * Copyright (C) 2007 Oracle. All rights reserved. */ -#include <linux/delay.h> #include <linux/kthread.h> #include <linux/pagemap.h> @@ -244,8 +243,6 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { - bool add_to_ctl = true; - spin_lock(rbroot_lock); n = rb_first(rbroot); if (!n) { @@ -257,15 +254,14 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) BUG_ON(info->bitmap); /* Logic error */ if (info->offset > root->ino_cache_progress) - add_to_ctl = false; - else if (info->offset + info->bytes > root->ino_cache_progress) - count = root->ino_cache_progress - info->offset + 1; + count = 0; else - count = info->bytes; + count = min(root->ino_cache_progress - info->offset + 1, + info->bytes); rb_erase(&info->offset_index, rbroot); spin_unlock(rbroot_lock); - if (add_to_ctl) + if (count) __btrfs_add_free_space(root->fs_info, ctl, info->offset, count); kmem_cache_free(btrfs_free_space_cachep, info); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eba61bcb9bb3..9357a19d2bff 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -14,17 +14,13 @@ #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> -#include <linux/mpage.h> -#include <linux/swap.h> #include <linux/writeback.h> #include <linux/compat.h> -#include <linux/bit_spinlock.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/falloc.h> #include <linux/slab.h> #include <linux/ratelimit.h> -#include <linux/mount.h> #include <linux/btrfs.h> #include <linux/blkdev.h> #include <linux/posix_acl_xattr.h> @@ -1443,8 +1439,7 @@ next_slot: nocow = 1; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = found_key.offset + - btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(extent_end, fs_info->sectorsize); } else { @@ -1752,7 +1747,7 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root, void __btrfs_del_delalloc_inode(struct btrfs_root *root, struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = root->fs_info; if (!list_empty(&inode->delalloc_inodes)) { list_del_init(&inode->delalloc_inodes); @@ -1903,8 +1898,8 @@ static void btrfs_clear_bit_hook(void *private_data, } /* - * extent_io.c merge_bio_hook, this must check the chunk tree to make sure - * we don't create bios that span stripes or chunks + * Merge bio hook, this must check the chunk tree to make sure we don't create + * bios that span stripes or chunks * * return 1 if page cannot be merged to bio * return 0 if page can be merged to bio @@ -1962,7 +1957,7 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, +blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, int mirror_num) { struct inode *inode = private_data; @@ -2035,8 +2030,7 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, /* we're doing a write, do the async checksumming */ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, bio_offset, inode, - btrfs_submit_bio_start, - btrfs_submit_bio_done); + btrfs_submit_bio_start); goto out; } else if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, 0, 0); @@ -3610,18 +3604,15 @@ static int btrfs_read_locked_inode(struct inode *inode) filled = true; path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto make_bad; - } + if (!path) + return -ENOMEM; memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) { - if (ret > 0) - ret = -ENOENT; - goto make_bad; + btrfs_free_path(path); + return ret; } leaf = path->nodes[0]; @@ -3774,11 +3765,6 @@ cache_acl: btrfs_sync_inode_flags_to_i_flags(inode); return 0; - -make_bad: - btrfs_free_path(path); - make_bad_inode(inode); - return ret; } /* @@ -3984,7 +3970,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto err; } skip_backref: - ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index); + ret = btrfs_delete_delayed_dir_index(trans, dir, index); if (ret) { btrfs_abort_transaction(trans, ret); goto err; @@ -4087,11 +4073,10 @@ out: } static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *dir, u64 objectid, - const char *name, int name_len) + struct inode *dir, u64 objectid, + const char *name, int name_len) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_dir_item *di; @@ -4124,9 +4109,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - ret = btrfs_del_root_ref(trans, fs_info, objectid, - root->root_key.objectid, dir_ino, - &index, name, name_len); + ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid, + dir_ino, &index, name, name_len); if (ret < 0) { if (ret != -ENOENT) { btrfs_abort_transaction(trans, ret); @@ -4145,12 +4129,11 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - btrfs_release_path(path); index = key.offset; } btrfs_release_path(path); - ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index); + ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -4243,9 +4226,9 @@ again: prev = node; entry = rb_entry(node, struct btrfs_inode, rb_node); - if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode))) + if (objectid < btrfs_ino(entry)) node = node->rb_left; - else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode))) + else if (objectid > btrfs_ino(entry)) node = node->rb_right; else break; @@ -4253,7 +4236,7 @@ again: if (!node) { while (prev) { entry = rb_entry(prev, struct btrfs_inode, rb_node); - if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) { + if (objectid <= btrfs_ino(entry)) { node = prev; break; } @@ -4262,7 +4245,7 @@ again: } while (node) { entry = rb_entry(node, struct btrfs_inode, rb_node); - objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1; + objectid = btrfs_ino(entry) + 1; inode = igrab(&entry->vfs_inode); if (inode) { spin_unlock(&root->inode_lock); @@ -4343,10 +4326,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); - ret = btrfs_unlink_subvol(trans, root, dir, - dest->root_key.objectid, - dentry->d_name.name, - dentry->d_name.len); + ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid, + dentry->d_name.name, dentry->d_name.len); if (ret) { err = ret; btrfs_abort_transaction(trans, ret); @@ -4441,7 +4422,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return PTR_ERR(trans); if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { - err = btrfs_unlink_subvol(trans, root, dir, + err = btrfs_unlink_subvol(trans, dir, BTRFS_I(inode)->location.objectid, dentry->d_name.name, dentry->d_name.len); @@ -4643,8 +4624,8 @@ search_again: BTRFS_I(inode), leaf, fi, found_key.offset); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + item_end += btrfs_file_extent_ram_bytes(leaf, + fi); trace_btrfs_truncate_show_fi_inline( BTRFS_I(inode), leaf, fi, path->slots[0], @@ -5615,9 +5596,9 @@ static void inode_tree_add(struct inode *inode) parent = *p; entry = rb_entry(parent, struct btrfs_inode, rb_node); - if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode))) + if (ino < btrfs_ino(entry)) p = &parent->rb_left; - else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode))) + else if (ino > btrfs_ino(entry)) p = &parent->rb_right; else { WARN_ON(!(entry->vfs_inode.i_state & @@ -5708,16 +5689,21 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, int ret; ret = btrfs_read_locked_inode(inode); - if (!is_bad_inode(inode)) { + if (!ret) { inode_tree_add(inode); unlock_new_inode(inode); if (new) *new = 1; } else { - unlock_new_inode(inode); - iput(inode); - ASSERT(ret < 0); - inode = ERR_PTR(ret < 0 ? ret : -ESTALE); + iget_failed(inode); + /* + * ret > 0 can come from btrfs_search_slot called by + * btrfs_read_locked_inode, this means the inode item + * was not found. + */ + if (ret > 0) + ret = -ENOENT; + inode = ERR_PTR(ret); } } @@ -5745,7 +5731,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_mtime = current_time(inode); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; - BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime); + BTRFS_I(inode)->i_otime = inode->i_mtime; return inode; } @@ -6027,32 +6013,6 @@ err: return ret; } -int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - bool nolock = false; - - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) - return 0; - - if (btrfs_fs_closing(root->fs_info) && - btrfs_is_free_space_inode(BTRFS_I(inode))) - nolock = true; - - if (wbc->sync_mode == WB_SYNC_ALL) { - if (nolock) - trans = btrfs_join_transaction_nolock(root); - else - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans); - } - return ret; -} - /* * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. @@ -6335,8 +6295,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, location->type = BTRFS_INODE_ITEM_KEY; ret = btrfs_insert_inode_locked(inode); - if (ret < 0) + if (ret < 0) { + iput(inode); goto fail; + } path->leave_spinning = 1; ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); @@ -6349,7 +6311,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = current_time(inode); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; - BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime); + BTRFS_I(inode)->i_otime = inode->i_mtime; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); @@ -6395,12 +6357,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return inode; fail_unlock: - unlock_new_inode(inode); + discard_new_inode(inode); fail: if (dir && name) BTRFS_I(dir)->index_cnt--; btrfs_free_path(path); - iput(inode); return ERR_PTR(ret); } @@ -6419,7 +6380,6 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_inode *parent_inode, struct btrfs_inode *inode, const char *name, int name_len, int add_backref, u64 index) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); int ret = 0; struct btrfs_key key; struct btrfs_root *root = parent_inode->root; @@ -6435,7 +6395,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, } if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { - ret = btrfs_add_root_ref(trans, fs_info, key.objectid, + ret = btrfs_add_root_ref(trans, key.objectid, root->root_key.objectid, parent_ino, index, name, name_len); } else if (add_backref) { @@ -6471,7 +6431,7 @@ fail_dir_item: if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { u64 local_index; int err; - err = btrfs_del_root_ref(trans, fs_info, key.objectid, + err = btrfs_del_root_ref(trans, key.objectid, root->root_key.objectid, parent_ino, &local_index, name, name_len); @@ -6505,7 +6465,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = NULL; int err; - int drop_inode = 0; u64 objectid; u64 index = 0; @@ -6527,6 +6486,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } @@ -6541,31 +6501,24 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); - if (err) { - goto out_unlock_inode; - } else { - btrfs_update_inode(trans, root, inode); - d_instantiate_new(dentry, inode); - } + if (err) + goto out_unlock; + + btrfs_update_inode(trans, root, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); - if (drop_inode) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } return err; - -out_unlock_inode: - drop_inode = 1; - unlock_new_inode(inode); - goto out_unlock; - } static int btrfs_create(struct inode *dir, struct dentry *dentry, @@ -6575,7 +6528,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = NULL; - int drop_inode_on_err = 0; int err; u64 objectid; u64 index = 0; @@ -6598,9 +6550,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } - drop_inode_on_err = 1; /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -6613,33 +6565,28 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_update_inode(trans, root, inode); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); if (err) - goto out_unlock_inode; + goto out_unlock; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); - if (err && drop_inode_on_err) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_unlock_inode: - unlock_new_inode(inode); - goto out_unlock; - } static int btrfs_link(struct dentry *old_dentry, struct inode *dir, @@ -6748,6 +6695,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) S_IFDIR | mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_fail; } @@ -6758,34 +6706,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_fail_inode; + goto out_fail; btrfs_i_size_write(BTRFS_I(inode), 0); err = btrfs_update_inode(trans, root, inode); if (err) - goto out_fail_inode; + goto out_fail; err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), dentry->d_name.name, dentry->d_name.len, 0, index); if (err) - goto out_fail_inode; + goto out_fail; d_instantiate_new(dentry, inode); drop_on_err = 0; out_fail: btrfs_end_transaction(trans); - if (drop_on_err) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_fail_inode: - unlock_new_inode(inode); - goto out_fail; } static noinline int uncompress_inline(struct btrfs_path *path, @@ -6847,7 +6791,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, size_t pg_offset, u64 start, u64 len, int create) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; int ret; int err = 0; u64 extent_start = 0; @@ -6943,7 +6887,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, extent_start); } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size_t size; - size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); + + size = btrfs_file_extent_ram_bytes(leaf, item); extent_end = ALIGN(extent_start + size, fs_info->sectorsize); @@ -6994,7 +6939,7 @@ next: if (new_inline) goto out; - size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); + size = btrfs_file_extent_ram_bytes(leaf, item); extent_offset = page_offset(page) + pg_offset - extent_start; copy_size = min_t(u64, PAGE_SIZE - pg_offset, size - extent_offset); @@ -7865,7 +7810,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, isector >>= inode->i_sb->s_blocksize_bits; bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, pgoff, isector, repair_endio, repair_arg); - bio_set_op_attrs(bio, REQ_OP_READ, read_mode); + bio->bi_opf = REQ_OP_READ | read_mode; btrfs_debug(BTRFS_I(inode)->root->fs_info, "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", @@ -8299,8 +8244,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, if (write && async_submit) { ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0, file_offset, inode, - btrfs_submit_bio_start_direct_io, - btrfs_submit_bio_done); + btrfs_submit_bio_start_direct_io); goto err; } else if (write) { /* @@ -9540,8 +9484,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* src is a subvolume */ if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; - ret = btrfs_unlink_subvol(trans, root, old_dir, - root_objectid, + ret = btrfs_unlink_subvol(trans, old_dir, root_objectid, old_dentry->d_name.name, old_dentry->d_name.len); } else { /* src is an inode */ @@ -9560,8 +9503,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* dest is a subvolume */ if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { root_objectid = BTRFS_I(new_inode)->root->root_key.objectid; - ret = btrfs_unlink_subvol(trans, dest, new_dir, - root_objectid, + ret = btrfs_unlink_subvol(trans, new_dir, root_objectid, new_dentry->d_name.name, new_dentry->d_name.len); } else { /* dest is an inode */ @@ -9821,7 +9763,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; - ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, + ret = btrfs_unlink_subvol(trans, old_dir, root_objectid, old_dentry->d_name.name, old_dentry->d_name.len); } else { @@ -9843,8 +9785,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { root_objectid = BTRFS_I(new_inode)->location.objectid; - ret = btrfs_unlink_subvol(trans, dest, new_dir, - root_objectid, + ret = btrfs_unlink_subvol(trans, new_dir, root_objectid, new_dentry->d_name.name, new_dentry->d_name.len); BUG_ON(new_inode->i_nlink == 0); @@ -10115,7 +10056,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, struct btrfs_key key; struct inode *inode = NULL; int err; - int drop_inode = 0; u64 objectid; u64 index = 0; int name_len; @@ -10148,6 +10088,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, objectid, S_IFLNK|S_IRWXUGO, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } @@ -10164,12 +10105,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; path = btrfs_alloc_path(); if (!path) { err = -ENOMEM; - goto out_unlock_inode; + goto out_unlock; } key.objectid = btrfs_ino(BTRFS_I(inode)); key.offset = 0; @@ -10179,7 +10120,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, datasize); if (err) { btrfs_free_path(path); - goto out_unlock_inode; + goto out_unlock; } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], @@ -10211,26 +10152,19 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (!err) err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); - if (err) { - drop_inode = 1; - goto out_unlock_inode; - } + if (err) + goto out_unlock; d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); - if (drop_inode) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_unlock_inode: - drop_inode = 1; - unlock_new_inode(inode); - goto out_unlock; } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, @@ -10439,14 +10373,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) ret = btrfs_init_inode_security(trans, inode, dir, NULL); if (ret) - goto out_inode; + goto out; ret = btrfs_update_inode(trans, root, inode); if (ret) - goto out_inode; + goto out; ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) - goto out_inode; + goto out; /* * We set number of links to 0 in btrfs_new_inode(), and here we set @@ -10456,21 +10390,15 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() */ set_nlink(inode, 1); - unlock_new_inode(inode); d_tmpfile(dentry, inode); + unlock_new_inode(inode); mark_inode_dirty(inode); - out: btrfs_end_transaction(trans); - if (ret) - iput(inode); + if (ret && inode) + discard_new_inode(inode); btrfs_btree_balance_dirty(fs_info); return ret; - -out_inode: - unlock_new_inode(inode); - goto out; - } __attribute__((const)) @@ -10479,12 +10407,6 @@ static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror) return -EAGAIN; } -static struct btrfs_fs_info *iotree_fs_info(void *private_data) -{ - struct inode *inode = private_data; - return btrfs_sb(inode->i_sb); -} - static void btrfs_check_extent_io_range(void *private_data, const char *caller, u64 start, u64 end) { @@ -10499,9 +10421,9 @@ static void btrfs_check_extent_io_range(void *private_data, const char *caller, } } -void btrfs_set_range_writeback(void *private_data, u64 start, u64 end) +void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) { - struct inode *inode = private_data; + struct inode *inode = tree->private_data; unsigned long index = start >> PAGE_SHIFT; unsigned long end_index = end >> PAGE_SHIFT; struct page *page; @@ -10557,10 +10479,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = { /* mandatory callbacks */ .submit_bio_hook = btrfs_submit_bio_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, - .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, - .tree_fs_info = iotree_fs_info, - .set_range_writeback = btrfs_set_range_writeback, /* optional callbacks */ .fill_delalloc = run_delalloc_range, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b077544b5232..d3a5d2a41e5f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5,23 +5,18 @@ #include <linux/kernel.h> #include <linux/bio.h> -#include <linux/buffer_head.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/time.h> -#include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mount.h> -#include <linux/mpage.h> #include <linux/namei.h> -#include <linux/swap.h> #include <linux/writeback.h> #include <linux/compat.h> -#include <linux/bit_spinlock.h> #include <linux/security.h> #include <linux/xattr.h> #include <linux/mm.h> @@ -606,7 +601,7 @@ static noinline int create_subvol(struct inode *dir, trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; - ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit); + ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit); if (ret) goto fail; @@ -616,14 +611,6 @@ static noinline int create_subvol(struct inode *dir, goto fail; } - memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); - btrfs_set_header_bytenr(leaf, leaf->start); - btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(leaf, objectid); - - write_extent_buffer_fsid(leaf, fs_info->fsid); - write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); btrfs_mark_buffer_dirty(leaf); inode_item = &root_item->inode; @@ -711,8 +698,7 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_update_inode(trans, root, dir); BUG_ON(ret); - ret = btrfs_add_root_ref(trans, fs_info, - objectid, root->root_key.objectid, + ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid, btrfs_ino(BTRFS_I(dir)), index, name, namelen); BUG_ON(ret); @@ -2507,8 +2493,8 @@ out: static noinline int btrfs_ioctl_ino_lookup(struct file *file, void __user *argp) { - struct btrfs_ioctl_ino_lookup_args *args; - struct inode *inode; + struct btrfs_ioctl_ino_lookup_args *args; + struct inode *inode; int ret = 0; args = memdup_user(argp, sizeof(*args)); @@ -2941,8 +2927,14 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = btrfs_defrag_root(root); break; case S_IFREG: - if (!(file->f_mode & FMODE_WRITE)) { - ret = -EINVAL; + /* + * Note that this does not check the file descriptor for write + * access. This prevents defragmenting executables that are + * running and allows defrag on files open in read-only mode. + */ + if (!capable(CAP_SYS_ADMIN) && + inode_permission(inode, MAY_WRITE)) { + ret = -EPERM; goto out; } @@ -3165,10 +3157,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, di_args->total_bytes = btrfs_device_get_total_bytes(dev); memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); if (dev->name) { - struct rcu_string *name; - - name = rcu_dereference(dev->name); - strncpy(di_args->path, name->str, sizeof(di_args->path) - 1); + strncpy(di_args->path, rcu_str_deref(dev->name), + sizeof(di_args->path) - 1); di_args->path[sizeof(di_args->path) - 1] = 0; } else { di_args->path[0] = '\0'; @@ -5118,9 +5108,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_quota_ctl_args *sa; - struct btrfs_trans_handle *trans = NULL; int ret; - int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -5136,28 +5124,19 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) } down_write(&fs_info->subvol_sem); - trans = btrfs_start_transaction(fs_info->tree_root, 2); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } switch (sa->cmd) { case BTRFS_QUOTA_CTL_ENABLE: - ret = btrfs_quota_enable(trans, fs_info); + ret = btrfs_quota_enable(fs_info); break; case BTRFS_QUOTA_CTL_DISABLE: - ret = btrfs_quota_disable(trans, fs_info); + ret = btrfs_quota_disable(fs_info); break; default: ret = -EINVAL; break; } - err = btrfs_commit_transaction(trans); - if (err && !ret) - ret = err; -out: kfree(sa); up_write(&fs_info->subvol_sem); drop_write: @@ -5195,15 +5174,13 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) } if (sa->assign) { - ret = btrfs_add_qgroup_relation(trans, fs_info, - sa->src, sa->dst); + ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst); } else { - ret = btrfs_del_qgroup_relation(trans, fs_info, - sa->src, sa->dst); + ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst); } /* update qgroup status and info */ - err = btrfs_run_qgroups(trans, fs_info); + err = btrfs_run_qgroups(trans); if (err < 0) btrfs_handle_fs_error(fs_info, err, "failed to update qgroup status and info"); @@ -5221,7 +5198,6 @@ drop_write: static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_qgroup_create_args *sa; struct btrfs_trans_handle *trans; @@ -5253,9 +5229,9 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) } if (sa->create) { - ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid); + ret = btrfs_create_qgroup(trans, sa->qgroupid); } else { - ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid); + ret = btrfs_remove_qgroup(trans, sa->qgroupid); } err = btrfs_end_transaction(trans); @@ -5272,7 +5248,6 @@ drop_write: static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_qgroup_limit_args *sa; struct btrfs_trans_handle *trans; @@ -5305,7 +5280,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) qgroupid = root->root_key.objectid; } - ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim); + ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim); err = btrfs_end_transaction(trans); if (err && !ret) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2e1a1694a33d..0c4ef208b8b9 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -6,7 +6,6 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/writeback.h> -#include <linux/pagevec.h> #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" @@ -421,129 +420,6 @@ out: return ret == 0; } -/* Needs to either be called under a log transaction or the log_mutex */ -void btrfs_get_logged_extents(struct btrfs_inode *inode, - struct list_head *logged_list, - const loff_t start, - const loff_t end) -{ - struct btrfs_ordered_inode_tree *tree; - struct btrfs_ordered_extent *ordered; - struct rb_node *n; - struct rb_node *prev; - - tree = &inode->ordered_tree; - spin_lock_irq(&tree->lock); - n = __tree_search(&tree->tree, end, &prev); - if (!n) - n = prev; - for (; n; n = rb_prev(n)) { - ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); - if (ordered->file_offset > end) - continue; - if (entry_end(ordered) <= start) - break; - if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) - continue; - list_add(&ordered->log_list, logged_list); - refcount_inc(&ordered->refs); - } - spin_unlock_irq(&tree->lock); -} - -void btrfs_put_logged_extents(struct list_head *logged_list) -{ - struct btrfs_ordered_extent *ordered; - - while (!list_empty(logged_list)) { - ordered = list_first_entry(logged_list, - struct btrfs_ordered_extent, - log_list); - list_del_init(&ordered->log_list); - btrfs_put_ordered_extent(ordered); - } -} - -void btrfs_submit_logged_extents(struct list_head *logged_list, - struct btrfs_root *log) -{ - int index = log->log_transid % 2; - - spin_lock_irq(&log->log_extents_lock[index]); - list_splice_tail(logged_list, &log->logged_list[index]); - spin_unlock_irq(&log->log_extents_lock[index]); -} - -void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *log, u64 transid) -{ - struct btrfs_ordered_extent *ordered; - int index = transid % 2; - - spin_lock_irq(&log->log_extents_lock[index]); - while (!list_empty(&log->logged_list[index])) { - struct inode *inode; - ordered = list_first_entry(&log->logged_list[index], - struct btrfs_ordered_extent, - log_list); - list_del_init(&ordered->log_list); - inode = ordered->inode; - spin_unlock_irq(&log->log_extents_lock[index]); - - if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && - !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { - u64 start = ordered->file_offset; - u64 end = ordered->file_offset + ordered->len - 1; - - WARN_ON(!inode); - filemap_fdatawrite_range(inode->i_mapping, start, end); - } - wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, - &ordered->flags)); - - /* - * In order to keep us from losing our ordered extent - * information when committing the transaction we have to make - * sure that any logged extents are completed when we go to - * commit the transaction. To do this we simply increase the - * current transactions pending_ordered counter and decrement it - * when the ordered extent completes. - */ - if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { - struct btrfs_ordered_inode_tree *tree; - - tree = &BTRFS_I(inode)->ordered_tree; - spin_lock_irq(&tree->lock); - if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { - set_bit(BTRFS_ORDERED_PENDING, &ordered->flags); - atomic_inc(&trans->transaction->pending_ordered); - } - spin_unlock_irq(&tree->lock); - } - btrfs_put_ordered_extent(ordered); - spin_lock_irq(&log->log_extents_lock[index]); - } - spin_unlock_irq(&log->log_extents_lock[index]); -} - -void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid) -{ - struct btrfs_ordered_extent *ordered; - int index = transid % 2; - - spin_lock_irq(&log->log_extents_lock[index]); - while (!list_empty(&log->logged_list[index])) { - ordered = list_first_entry(&log->logged_list[index], - struct btrfs_ordered_extent, - log_list); - list_del_init(&ordered->log_list); - spin_unlock_irq(&log->log_extents_lock[index]); - btrfs_put_ordered_extent(ordered); - spin_lock_irq(&log->log_extents_lock[index]); - } - spin_unlock_irq(&log->log_extents_lock[index]); -} - /* * used to drop a reference on an ordered extent. This will free * the extent if the last reference is dropped @@ -913,20 +789,6 @@ out: return entry; } -bool btrfs_have_ordered_extents_in_range(struct inode *inode, - u64 file_offset, - u64 len) -{ - struct btrfs_ordered_extent *oe; - - oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len); - if (oe) { - btrfs_put_ordered_extent(oe); - return true; - } - return false; -} - /* * lookup and return any extent before 'file_offset'. NULL is returned * if none is found diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3be443fb3001..02d813aaa261 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -54,15 +54,11 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent * has done its due diligence in updating * the isize. */ -#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered - ordered extent */ -#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ +#define BTRFS_ORDERED_TRUNCATED 8 /* Set when we have to truncate an extent */ -#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent - * in the logging code. */ -#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to +#define BTRFS_ORDERED_PENDING 9 /* We are waiting for this ordered extent to * complete in the current transaction. */ -#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */ +#define BTRFS_ORDERED_REGULAR 10 /* Regular IO for COW */ struct btrfs_ordered_extent { /* logical offset in the file */ @@ -182,9 +178,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range( struct btrfs_inode *inode, u64 file_offset, u64 len); -bool btrfs_have_ordered_extents_in_range(struct inode *inode, - u64 file_offset, - u64 len); int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, @@ -193,16 +186,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, const u64 range_start, const u64 range_len); u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, const u64 range_start, const u64 range_len); -void btrfs_get_logged_extents(struct btrfs_inode *inode, - struct list_head *logged_list, - const loff_t start, - const loff_t end); -void btrfs_put_logged_extents(struct list_head *logged_list); -void btrfs_submit_logged_extents(struct list_head *logged_list, - struct btrfs_root *log); -void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *log, u64 transid); -void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); int __init ordered_data_init(void); void __cold ordered_data_exit(void); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index a4e11cf04671..df49931ffe92 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -52,17 +52,9 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) u64 offset; int ref_index = 0; - if (item_size < sizeof(*ei)) { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - struct btrfs_extent_item_v0 *ei0; - BUG_ON(item_size != sizeof(*ei0)); - ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0); - pr_info("\t\textent refs %u\n", - btrfs_extent_refs_v0(eb, ei0)); - return; -#else - BUG(); -#endif + if (unlikely(item_size < sizeof(*ei))) { + btrfs_print_v0_err(eb->fs_info); + btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL); } ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); @@ -133,20 +125,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) WARN_ON(ptr > end); } -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 -static void print_extent_ref_v0(struct extent_buffer *eb, int slot) -{ - struct btrfs_extent_ref_v0 *ref0; - - ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); - printk("\t\textent back ref root %llu gen %llu owner %llu num_refs %lu\n", - btrfs_ref_root_v0(eb, ref0), - btrfs_ref_generation_v0(eb, ref0), - btrfs_ref_objectid_v0(eb, ref0), - (unsigned long)btrfs_ref_count_v0(eb, ref0)); -} -#endif - static void print_uuid_item(struct extent_buffer *l, unsigned long offset, u32 item_size) { @@ -267,8 +245,8 @@ void btrfs_print_leaf(struct extent_buffer *l) struct btrfs_file_extent_item); if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { - pr_info("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l, i, fi)); + pr_info("\t\tinline extent data size %llu\n", + btrfs_file_extent_ram_bytes(l, fi)); break; } pr_info("\t\textent data disk bytenr %llu nr %llu\n", @@ -280,11 +258,8 @@ void btrfs_print_leaf(struct extent_buffer *l) btrfs_file_extent_ram_bytes(l, fi)); break; case BTRFS_EXTENT_REF_V0_KEY: -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - print_extent_ref_v0(l, i); -#else - BUG(); -#endif + btrfs_print_v0_err(fs_info); + btrfs_handle_fs_error(fs_info, -EINVAL, NULL); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c25dc47210a3..4353bb69bb86 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -530,11 +530,11 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) fs_info->qgroup_ulist = NULL; } -static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, - struct btrfs_root *quota_root, - u64 src, u64 dst) +static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, + u64 dst) { int ret; + struct btrfs_root *quota_root = trans->fs_info->quota_root; struct btrfs_path *path; struct btrfs_key key; @@ -554,11 +554,11 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, return ret; } -static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, - struct btrfs_root *quota_root, - u64 src, u64 dst) +static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, + u64 dst) { int ret; + struct btrfs_root *quota_root = trans->fs_info->quota_root; struct btrfs_path *path; struct btrfs_key key; @@ -653,10 +653,10 @@ out: return ret; } -static int del_qgroup_item(struct btrfs_trans_handle *trans, - struct btrfs_root *quota_root, u64 qgroupid) +static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid) { int ret; + struct btrfs_root *quota_root = trans->fs_info->quota_root; struct btrfs_path *path; struct btrfs_key key; @@ -700,9 +700,9 @@ out: } static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_qgroup *qgroup) { + struct btrfs_root *quota_root = trans->fs_info->quota_root; struct btrfs_path *path; struct btrfs_key key; struct extent_buffer *l; @@ -718,7 +718,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); if (ret > 0) ret = -ENOENT; @@ -742,9 +742,10 @@ out: } static int update_qgroup_info_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_qgroup *qgroup) { + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_root *quota_root = fs_info->quota_root; struct btrfs_path *path; struct btrfs_key key; struct extent_buffer *l; @@ -752,7 +753,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, int ret; int slot; - if (btrfs_is_testing(root->fs_info)) + if (btrfs_is_testing(fs_info)) return 0; key.objectid = 0; @@ -763,7 +764,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); if (ret > 0) ret = -ENOENT; @@ -786,10 +787,10 @@ out: return ret; } -static int update_qgroup_status_item(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_root *root) +static int update_qgroup_status_item(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_root *quota_root = fs_info->quota_root; struct btrfs_path *path; struct btrfs_key key; struct extent_buffer *l; @@ -805,7 +806,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); if (ret > 0) ret = -ENOENT; @@ -875,8 +876,7 @@ out: return ret; } -int btrfs_quota_enable(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_quota_enable(struct btrfs_fs_info *fs_info) { struct btrfs_root *quota_root; struct btrfs_root *tree_root = fs_info->tree_root; @@ -886,6 +886,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_key found_key; struct btrfs_qgroup *qgroup = NULL; + struct btrfs_trans_handle *trans = NULL; int ret = 0; int slot; @@ -893,9 +894,25 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, if (fs_info->quota_root) goto out; + /* + * 1 for quota root item + * 1 for BTRFS_QGROUP_STATUS item + * + * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items + * per subvolume. However those are not currently reserved since it + * would be a lot of overkill. + */ + trans = btrfs_start_transaction(tree_root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } + fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); if (!fs_info->qgroup_ulist) { ret = -ENOMEM; + btrfs_abort_transaction(trans, ret); goto out; } @@ -906,12 +923,14 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, BTRFS_QUOTA_TREE_OBJECTID); if (IS_ERR(quota_root)) { ret = PTR_ERR(quota_root); + btrfs_abort_transaction(trans, ret); goto out; } path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; + btrfs_abort_transaction(trans, ret); goto out_free_root; } @@ -921,8 +940,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, quota_root, path, &key, sizeof(*ptr)); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto out_free_path; + } leaf = path->nodes[0]; ptr = btrfs_item_ptr(leaf, path->slots[0], @@ -944,9 +965,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); if (ret > 0) goto out_add_root; - if (ret < 0) + if (ret < 0) { + btrfs_abort_transaction(trans, ret); goto out_free_path; - + } while (1) { slot = path->slots[0]; @@ -956,18 +978,23 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, if (found_key.type == BTRFS_ROOT_REF_KEY) { ret = add_qgroup_item(trans, quota_root, found_key.offset); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto out_free_path; + } qgroup = add_qgroup_rb(fs_info, found_key.offset); if (IS_ERR(qgroup)) { ret = PTR_ERR(qgroup); + btrfs_abort_transaction(trans, ret); goto out_free_path; } } ret = btrfs_next_item(tree_root, path); - if (ret < 0) + if (ret < 0) { + btrfs_abort_transaction(trans, ret); goto out_free_path; + } if (ret) break; } @@ -975,18 +1002,28 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, out_add_root: btrfs_release_path(path); ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto out_free_path; + } qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); if (IS_ERR(qgroup)) { ret = PTR_ERR(qgroup); + btrfs_abort_transaction(trans, ret); goto out_free_path; } spin_lock(&fs_info->qgroup_lock); fs_info->quota_root = quota_root; set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); spin_unlock(&fs_info->qgroup_lock); + + ret = btrfs_commit_transaction(trans); + if (ret) { + trans = NULL; + goto out_free_path; + } + ret = qgroup_rescan_init(fs_info, 0, 1); if (!ret) { qgroup_rescan_zero_tracking(fs_info); @@ -1006,20 +1043,35 @@ out: if (ret) { ulist_free(fs_info->qgroup_ulist); fs_info->qgroup_ulist = NULL; + if (trans) + btrfs_end_transaction(trans); } mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } -int btrfs_quota_disable(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_quota_disable(struct btrfs_fs_info *fs_info) { struct btrfs_root *quota_root; + struct btrfs_trans_handle *trans = NULL; int ret = 0; mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; + + /* + * 1 For the root item + * + * We should also reserve enough items for the quota tree deletion in + * btrfs_clean_quota_tree but this is not done. + */ + trans = btrfs_start_transaction(fs_info->tree_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); @@ -1031,12 +1083,16 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, btrfs_free_qgroup_config(fs_info); ret = btrfs_clean_quota_tree(trans, quota_root); - if (ret) - goto out; + if (ret) { + btrfs_abort_transaction(trans, ret); + goto end_trans; + } - ret = btrfs_del_root(trans, fs_info, "a_root->root_key); - if (ret) - goto out; + ret = btrfs_del_root(trans, "a_root->root_key); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto end_trans; + } list_del("a_root->dirty_list); @@ -1048,6 +1104,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, free_extent_buffer(quota_root->node); free_extent_buffer(quota_root->commit_root); kfree(quota_root); + +end_trans: + ret = btrfs_end_transaction(trans); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; @@ -1177,9 +1236,10 @@ out: return ret; } -int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst) +int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, + u64 dst) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root; struct btrfs_qgroup *parent; struct btrfs_qgroup *member; @@ -1216,13 +1276,13 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, } } - ret = add_qgroup_relation_item(trans, quota_root, src, dst); + ret = add_qgroup_relation_item(trans, src, dst); if (ret) goto out; - ret = add_qgroup_relation_item(trans, quota_root, dst, src); + ret = add_qgroup_relation_item(trans, dst, src); if (ret) { - del_qgroup_relation_item(trans, quota_root, src, dst); + del_qgroup_relation_item(trans, src, dst); goto out; } @@ -1240,9 +1300,10 @@ out: return ret; } -static int __del_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst) +static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, + u64 dst) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root; struct btrfs_qgroup *parent; struct btrfs_qgroup *member; @@ -1276,8 +1337,8 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, ret = -ENOENT; goto out; exist: - ret = del_qgroup_relation_item(trans, quota_root, src, dst); - err = del_qgroup_relation_item(trans, quota_root, dst, src); + ret = del_qgroup_relation_item(trans, src, dst); + err = del_qgroup_relation_item(trans, dst, src); if (err && !ret) ret = err; @@ -1290,21 +1351,22 @@ out: return ret; } -int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst) +int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, + u64 dst) { + struct btrfs_fs_info *fs_info = trans->fs_info; int ret = 0; mutex_lock(&fs_info->qgroup_ioctl_lock); - ret = __del_qgroup_relation(trans, fs_info, src, dst); + ret = __del_qgroup_relation(trans, src, dst); mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } -int btrfs_create_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid) +int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; @@ -1336,9 +1398,9 @@ out: return ret; } -int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid) +int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; struct btrfs_qgroup_list *list; @@ -1362,16 +1424,15 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, goto out; } } - ret = del_qgroup_item(trans, quota_root, qgroupid); + ret = del_qgroup_item(trans, qgroupid); if (ret && ret != -ENOENT) goto out; while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, struct btrfs_qgroup_list, next_group); - ret = __del_qgroup_relation(trans, fs_info, - qgroupid, - list->group->qgroupid); + ret = __del_qgroup_relation(trans, qgroupid, + list->group->qgroupid); if (ret) goto out; } @@ -1384,10 +1445,10 @@ out: return ret; } -int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid, +int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, struct btrfs_qgroup_limit *limit) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; @@ -1451,7 +1512,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, spin_unlock(&fs_info->qgroup_lock); - ret = update_qgroup_limit_item(trans, quota_root, qgroup); + ret = update_qgroup_limit_item(trans, qgroup); if (ret) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; btrfs_info(fs_info, "unable to update quota limit for %llu", @@ -1519,10 +1580,10 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, - gfp_t gfp_flag) +int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, + u64 num_bytes, gfp_t gfp_flag) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_qgroup_extent_record *record; struct btrfs_delayed_ref_root *delayed_refs; int ret; @@ -1530,8 +1591,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || bytenr == 0 || num_bytes == 0) return 0; - if (WARN_ON(trans == NULL)) - return -EINVAL; record = kmalloc(sizeof(*record), gfp_flag); if (!record) return -ENOMEM; @@ -1552,9 +1611,9 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, } int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { + struct btrfs_fs_info *fs_info = trans->fs_info; int nr = btrfs_header_nritems(eb); int i, extent_type, ret; struct btrfs_key key; @@ -1584,8 +1643,8 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); - ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, - num_bytes, GFP_NOFS); + ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes, + GFP_NOFS); if (ret) return ret; } @@ -1655,11 +1714,10 @@ static int adjust_slots_upwards(struct btrfs_path *path, int root_level) } int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *root_eb, u64 root_gen, int root_level) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info = trans->fs_info; int ret = 0; int level; struct extent_buffer *eb = root_eb; @@ -1678,7 +1736,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, } if (root_level == 0) { - ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); + ret = btrfs_qgroup_trace_leaf_items(trans, root_eb); goto out; } @@ -1736,8 +1794,7 @@ walk_down: btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; - ret = btrfs_qgroup_trace_extent(trans, fs_info, - child_bytenr, + ret = btrfs_qgroup_trace_extent(trans, child_bytenr, fs_info->nodesize, GFP_NOFS); if (ret) @@ -1745,8 +1802,8 @@ walk_down: } if (level == 0) { - ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, - path->nodes[level]); + ret = btrfs_qgroup_trace_leaf_items(trans, + path->nodes[level]); if (ret) goto out; @@ -1981,12 +2038,11 @@ static int maybe_fs_roots(struct ulist *roots) return is_fstree(unode->val); } -int -btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 bytenr, u64 num_bytes, - struct ulist *old_roots, struct ulist *new_roots) +int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, + u64 num_bytes, struct ulist *old_roots, + struct ulist *new_roots) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct ulist *qgroups = NULL; struct ulist *tmp = NULL; u64 seq; @@ -2116,9 +2172,10 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) ulist_del(record->old_roots, qgroup_to_skip, 0); } - ret = btrfs_qgroup_account_extent(trans, fs_info, - record->bytenr, record->num_bytes, - record->old_roots, new_roots); + ret = btrfs_qgroup_account_extent(trans, record->bytenr, + record->num_bytes, + record->old_roots, + new_roots); record->old_roots = NULL; new_roots = NULL; } @@ -2136,9 +2193,9 @@ cleanup: /* * called from commit_transaction. Writes all changed qgroups to disk. */ -int btrfs_run_qgroups(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_run_qgroups(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root = fs_info->quota_root; int ret = 0; @@ -2152,11 +2209,11 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, struct btrfs_qgroup, dirty); list_del_init(&qgroup->dirty); spin_unlock(&fs_info->qgroup_lock); - ret = update_qgroup_info_item(trans, quota_root, qgroup); + ret = update_qgroup_info_item(trans, qgroup); if (ret) fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - ret = update_qgroup_limit_item(trans, quota_root, qgroup); + ret = update_qgroup_limit_item(trans, qgroup); if (ret) fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; @@ -2168,7 +2225,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; spin_unlock(&fs_info->qgroup_lock); - ret = update_qgroup_status_item(trans, fs_info, quota_root); + ret = update_qgroup_status_item(trans); if (ret) fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; @@ -2181,13 +2238,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, * cause a transaction abort so we take extra care here to only error * when a readonly fs is a reasonable outcome. */ -int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, - struct btrfs_qgroup_inherit *inherit) +int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, + u64 objectid, struct btrfs_qgroup_inherit *inherit) { int ret = 0; int i; u64 *i_qgroups; + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root = fs_info->quota_root; struct btrfs_qgroup *srcgroup; struct btrfs_qgroup *dstgroup; @@ -2229,22 +2286,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, if (ret) goto out; - if (srcid) { - struct btrfs_root *srcroot; - struct btrfs_key srckey; - - srckey.objectid = srcid; - srckey.type = BTRFS_ROOT_ITEM_KEY; - srckey.offset = (u64)-1; - srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); - if (IS_ERR(srcroot)) { - ret = PTR_ERR(srcroot); - goto out; - } - - level_size = fs_info->nodesize; - } - /* * add qgroup to all inherited groups */ @@ -2253,12 +2294,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { if (*i_qgroups == 0) continue; - ret = add_qgroup_relation_item(trans, quota_root, - objectid, *i_qgroups); + ret = add_qgroup_relation_item(trans, objectid, + *i_qgroups); if (ret && ret != -EEXIST) goto out; - ret = add_qgroup_relation_item(trans, quota_root, - *i_qgroups, objectid); + ret = add_qgroup_relation_item(trans, *i_qgroups, + objectid); if (ret && ret != -EEXIST) goto out; } @@ -2281,7 +2322,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, dstgroup->rsv_rfer = inherit->lim.rsv_rfer; dstgroup->rsv_excl = inherit->lim.rsv_excl; - ret = update_qgroup_limit_item(trans, quota_root, dstgroup); + ret = update_qgroup_limit_item(trans, dstgroup); if (ret) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; btrfs_info(fs_info, @@ -2301,6 +2342,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, * our counts don't go crazy, so at this point the only * difference between the two roots should be the root node. */ + level_size = fs_info->nodesize; dstgroup->rfer = srcgroup->rfer; dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; dstgroup->excl = level_size; @@ -2598,10 +2640,10 @@ static bool is_last_leaf(struct btrfs_path *path) * returns < 0 on error, 0 when more leafs are to be scanned. * returns 1 when done. */ -static int -qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - struct btrfs_trans_handle *trans) +static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, + struct btrfs_path *path) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_key found; struct extent_buffer *scratch_leaf = NULL; struct ulist *roots = NULL; @@ -2669,8 +2711,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, if (ret < 0) goto out; /* For rescan, just pass old_roots as NULL */ - ret = btrfs_qgroup_account_extent(trans, fs_info, - found.objectid, num_bytes, NULL, roots); + ret = btrfs_qgroup_account_extent(trans, found.objectid, + num_bytes, NULL, roots); if (ret < 0) goto out; } @@ -2716,7 +2758,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { err = -EINTR; } else { - err = qgroup_rescan_leaf(fs_info, path, trans); + err = qgroup_rescan_leaf(trans, path); } if (err > 0) btrfs_commit_transaction(trans); @@ -2751,7 +2793,7 @@ out: err); goto done; } - ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); + ret = update_qgroup_status_item(trans); if (ret < 0) { err = ret; btrfs_err(fs_info, "fail to update qgroup status: %d", err); diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index d60dd06445ce..54b8bb282c0e 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -141,24 +141,19 @@ struct btrfs_qgroup { #define QGROUP_RELEASE (1<<1) #define QGROUP_FREE (1<<2) -int btrfs_quota_enable(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); -int btrfs_quota_disable(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); +int btrfs_quota_enable(struct btrfs_fs_info *fs_info); +int btrfs_quota_disable(struct btrfs_fs_info *fs_info); int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, bool interruptible); -int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst); -int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 src, u64 dst); -int btrfs_create_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid); -int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid); -int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid, +int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, + u64 dst); +int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, + u64 dst); +int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid); +int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid); +int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, struct btrfs_qgroup_limit *limit); int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); @@ -217,9 +212,8 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, * Return <0 for error, like memory allocation failure or invalid parameter * (NULL trans) */ -int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, - gfp_t gfp_flag); +int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, + u64 num_bytes, gfp_t gfp_flag); /* * Inform qgroup to trace all leaf items of data @@ -228,7 +222,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, * Return <0 for error(ENOMEM) */ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *eb); /* * Inform qgroup to trace a whole subtree, including all its child tree @@ -241,20 +234,15 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, * Return <0 for error(ENOMEM or tree search error) */ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *root_eb, u64 root_gen, int root_level); -int -btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 bytenr, u64 num_bytes, - struct ulist *old_roots, struct ulist *new_roots); +int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, + u64 num_bytes, struct ulist *old_roots, + struct ulist *new_roots); int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans); -int btrfs_run_qgroups(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); -int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, - struct btrfs_qgroup_inherit *inherit); +int btrfs_run_qgroups(struct btrfs_trans_handle *trans); +int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, + u64 objectid, struct btrfs_qgroup_inherit *inherit); void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes, enum btrfs_qgroup_rsv_type type); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 5e4ad134b9ad..df41d7049936 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -5,32 +5,19 @@ */ #include <linux/sched.h> -#include <linux/wait.h> #include <linux/bio.h> #include <linux/slab.h> -#include <linux/buffer_head.h> #include <linux/blkdev.h> -#include <linux/random.h> -#include <linux/iocontext.h> -#include <linux/capability.h> -#include <linux/ratelimit.h> -#include <linux/kthread.h> #include <linux/raid/pq.h> #include <linux/hash.h> #include <linux/list_sort.h> #include <linux/raid/xor.h> #include <linux/mm.h> -#include <asm/div64.h> #include "ctree.h" -#include "extent_map.h" #include "disk-io.h" -#include "transaction.h" -#include "print-tree.h" #include "volumes.h" #include "raid56.h" #include "async-thread.h" -#include "check-integrity.h" -#include "rcu-string.h" /* set when additional merges to this rbio are not allowed */ #define RBIO_RMW_LOCKED_BIT 1 @@ -175,8 +162,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); static noinline void finish_rmw(struct btrfs_raid_bio *rbio); static void rmw_work(struct btrfs_work *work); static void read_rebuild_work(struct btrfs_work *work); -static void async_rmw_stripe(struct btrfs_raid_bio *rbio); -static void async_read_rebuild(struct btrfs_raid_bio *rbio); static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio); static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed); static void __free_raid_bio(struct btrfs_raid_bio *rbio); @@ -185,7 +170,13 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check); -static void async_scrub_parity(struct btrfs_raid_bio *rbio); +static void scrub_parity_work(struct btrfs_work *work); + +static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func) +{ + btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL); + btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); +} /* * the stripe hash table is used for locking, and to collect @@ -260,7 +251,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) s = kmap(rbio->bio_pages[i]); d = kmap(rbio->stripe_pages[i]); - memcpy(d, s, PAGE_SIZE); + copy_page(d, s); kunmap(rbio->bio_pages[i]); kunmap(rbio->stripe_pages[i]); @@ -516,32 +507,21 @@ static void run_xor(void **pages, int src_cnt, ssize_t len) } /* - * returns true if the bio list inside this rbio - * covers an entire stripe (no rmw required). - * Must be called with the bio list lock held, or - * at a time when you know it is impossible to add - * new bios into the list + * Returns true if the bio list inside this rbio covers an entire stripe (no + * rmw required). */ -static int __rbio_is_full(struct btrfs_raid_bio *rbio) +static int rbio_is_full(struct btrfs_raid_bio *rbio) { + unsigned long flags; unsigned long size = rbio->bio_list_bytes; int ret = 1; + spin_lock_irqsave(&rbio->bio_list_lock, flags); if (size != rbio->nr_data * rbio->stripe_len) ret = 0; - BUG_ON(size > rbio->nr_data * rbio->stripe_len); - return ret; -} - -static int rbio_is_full(struct btrfs_raid_bio *rbio) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&rbio->bio_list_lock, flags); - ret = __rbio_is_full(rbio); spin_unlock_irqrestore(&rbio->bio_list_lock, flags); + return ret; } @@ -812,16 +792,16 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) spin_unlock_irqrestore(&h->lock, flags); if (next->operation == BTRFS_RBIO_READ_REBUILD) - async_read_rebuild(next); + start_async_work(next, read_rebuild_work); else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) { steal_rbio(rbio, next); - async_read_rebuild(next); + start_async_work(next, read_rebuild_work); } else if (next->operation == BTRFS_RBIO_WRITE) { steal_rbio(rbio, next); - async_rmw_stripe(next); + start_async_work(next, rmw_work); } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { steal_rbio(rbio, next); - async_scrub_parity(next); + start_async_work(next, scrub_parity_work); } goto done_nolock; @@ -1275,7 +1255,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) pointers); } else { /* raid5 */ - memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); + copy_page(pointers[nr_data], pointers[0]); run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); } @@ -1343,7 +1323,7 @@ write_data: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; submit_bio(bio); } @@ -1508,20 +1488,6 @@ cleanup: rbio_orig_end_io(rbio, BLK_STS_IOERR); } -static void async_rmw_stripe(struct btrfs_raid_bio *rbio) -{ - btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL); - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); -} - -static void async_read_rebuild(struct btrfs_raid_bio *rbio) -{ - btrfs_init_work(&rbio->work, btrfs_rmw_helper, - read_rebuild_work, NULL, NULL); - - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); -} - /* * the stripe must be locked by the caller. It will * unlock after all the writes are done @@ -1599,7 +1565,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid_rmw_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); @@ -1652,7 +1618,7 @@ static int partial_stripe_write(struct btrfs_raid_bio *rbio) ret = lock_stripe_add(rbio); if (ret == 0) - async_rmw_stripe(rbio); + start_async_work(rbio, rmw_work); return 0; } @@ -1720,8 +1686,11 @@ static void run_plug(struct btrfs_plug_cb *plug) list_del_init(&cur->plug_list); if (rbio_is_full(cur)) { + int ret; + /* we have a full stripe, send it down */ - full_stripe_write(cur); + ret = full_stripe_write(cur); + BUG_ON(ret); continue; } if (last) { @@ -1941,9 +1910,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) BUG_ON(failb != -1); pstripe: /* Copy parity block into failed block to start with */ - memcpy(pointers[faila], - pointers[rbio->nr_data], - PAGE_SIZE); + copy_page(pointers[faila], pointers[rbio->nr_data]); /* rearrange the pointer array */ p = pointers[faila]; @@ -2145,7 +2112,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid_recover_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); @@ -2448,7 +2415,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, pointers); } else { /* raid5 */ - memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); + copy_page(pointers[nr_data], pointers[0]); run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); } @@ -2456,7 +2423,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); parity = kmap(p); if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) - memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE); + copy_page(parity, pointers[rbio->scrubp]); else /* Parity is right, needn't writeback */ bitmap_clear(rbio->dbitmap, pagenr, 1); @@ -2517,7 +2484,7 @@ submit_write: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; submit_bio(bio); } @@ -2699,7 +2666,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid56_parity_scrub_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); @@ -2728,18 +2695,10 @@ static void scrub_parity_work(struct btrfs_work *work) raid56_parity_scrub_stripe(rbio); } -static void async_scrub_parity(struct btrfs_raid_bio *rbio) -{ - btrfs_init_work(&rbio->work, btrfs_rmw_helper, - scrub_parity_work, NULL, NULL); - - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); -} - void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) { if (!lock_stripe_add(rbio)) - async_scrub_parity(rbio); + start_async_work(rbio, scrub_parity_work); } /* The following code is used for dev replace of a missing RAID 5/6 device. */ @@ -2781,5 +2740,5 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio) { if (!lock_stripe_add(rbio)) - async_read_rebuild(rbio); + start_async_work(rbio, read_rebuild_work); } diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 40f1bcef394d..dec14b739b10 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -7,7 +7,6 @@ #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/blkdev.h> -#include <linux/rbtree.h> #include <linux/slab.h> #include <linux/workqueue.h> #include "ctree.h" @@ -355,7 +354,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, dev = bbio->stripes[nzones].dev; /* cannot read ahead on missing device. */ - if (!dev->bdev) + if (!dev->bdev) continue; zone = reada_find_zone(dev, logical, bbio); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 879b76fa881a..8783a1776540 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -586,29 +586,6 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, return btrfs_get_fs_root(fs_info, &key, false); } -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 -static noinline_for_stack -struct btrfs_root *find_tree_root(struct reloc_control *rc, - struct extent_buffer *leaf, - struct btrfs_extent_ref_v0 *ref0) -{ - struct btrfs_root *root; - u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); - u64 generation = btrfs_ref_generation_v0(leaf, ref0); - - BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); - - root = read_fs_root(rc->extent_root->fs_info, root_objectid); - BUG_ON(IS_ERR(root)); - - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && - generation != btrfs_root_generation(&root->root_item)) - return NULL; - - return root; -} -#endif - static noinline_for_stack int find_inline_backref(struct extent_buffer *leaf, int slot, unsigned long *ptr, unsigned long *end) @@ -621,12 +598,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, btrfs_item_key_to_cpu(leaf, &key, slot); item_size = btrfs_item_size_nr(leaf, slot); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (item_size < sizeof(*ei)) { - WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); + btrfs_print_v0_err(leaf->fs_info); + btrfs_handle_fs_error(leaf->fs_info, -EINVAL, NULL); return 1; } -#endif ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); WARN_ON(!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)); @@ -792,7 +768,7 @@ again: type = btrfs_get_extent_inline_ref_type(eb, iref, BTRFS_REF_TYPE_BLOCK); if (type == BTRFS_REF_TYPE_INVALID) { - err = -EINVAL; + err = -EUCLEAN; goto out; } key.type = type; @@ -811,29 +787,7 @@ again: goto next; } -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || - key.type == BTRFS_EXTENT_REF_V0_KEY) { - if (key.type == BTRFS_EXTENT_REF_V0_KEY) { - struct btrfs_extent_ref_v0 *ref0; - ref0 = btrfs_item_ptr(eb, path1->slots[0], - struct btrfs_extent_ref_v0); - if (key.objectid == key.offset) { - root = find_tree_root(rc, eb, ref0); - if (root && !should_ignore_root(root)) - cur->root = root; - else - list_add(&cur->list, &useless); - break; - } - if (is_cowonly_root(btrfs_ref_root_v0(eb, - ref0))) - cur->cowonly = 1; - } -#else - ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { -#endif if (key.objectid == key.offset) { /* * only root blocks of reloc trees use @@ -876,6 +830,12 @@ again: edge->node[UPPER] = upper; goto next; + } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { + err = -EINVAL; + btrfs_print_v0_err(rc->extent_root->fs_info); + btrfs_handle_fs_error(rc->extent_root->fs_info, err, + NULL); + goto out; } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { goto next; } @@ -1321,18 +1281,19 @@ static void __del_reloc_root(struct btrfs_root *root) struct mapping_node *node = NULL; struct reloc_control *rc = fs_info->reloc_ctl; - spin_lock(&rc->reloc_root_tree.lock); - rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->node->start); - if (rb_node) { - node = rb_entry(rb_node, struct mapping_node, rb_node); - rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); + if (rc) { + spin_lock(&rc->reloc_root_tree.lock); + rb_node = tree_search(&rc->reloc_root_tree.rb_root, + root->node->start); + if (rb_node) { + node = rb_entry(rb_node, struct mapping_node, rb_node); + rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); + } + spin_unlock(&rc->reloc_root_tree.lock); + if (!node) + return; + BUG_ON((struct btrfs_root *)node->data != root); } - spin_unlock(&rc->reloc_root_tree.lock); - - if (!node) - return; - BUG_ON((struct btrfs_root *)node->data != root); spin_lock(&fs_info->trans_lock); list_del_init(&root->root_list); @@ -1918,13 +1879,12 @@ again: * and tree block numbers, if current trans doesn't free * data reloc tree inode. */ - ret = btrfs_qgroup_trace_subtree(trans, src, parent, + ret = btrfs_qgroup_trace_subtree(trans, parent, btrfs_header_generation(parent), btrfs_header_level(parent)); if (ret < 0) break; - ret = btrfs_qgroup_trace_subtree(trans, dest, - path->nodes[level], + ret = btrfs_qgroup_trace_subtree(trans, path->nodes[level], btrfs_header_generation(path->nodes[level]), btrfs_header_level(path->nodes[level])); if (ret < 0) @@ -3333,48 +3293,6 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, return 0; } -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 -static int get_ref_objectid_v0(struct reloc_control *rc, - struct btrfs_path *path, - struct btrfs_key *extent_key, - u64 *ref_objectid, int *path_change) -{ - struct btrfs_key key; - struct extent_buffer *leaf; - struct btrfs_extent_ref_v0 *ref0; - int ret; - int slot; - - leaf = path->nodes[0]; - slot = path->slots[0]; - while (1) { - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(rc->extent_root, path); - if (ret < 0) - return ret; - BUG_ON(ret > 0); - leaf = path->nodes[0]; - slot = path->slots[0]; - if (path_change) - *path_change = 1; - } - btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid != extent_key->objectid) - return -ENOENT; - - if (key.type != BTRFS_EXTENT_REF_V0_KEY) { - slot++; - continue; - } - ref0 = btrfs_item_ptr(leaf, slot, - struct btrfs_extent_ref_v0); - *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); - break; - } - return 0; -} -#endif - /* * helper to add a tree block to the list. * the major work is getting the generation and level of the block @@ -3407,23 +3325,12 @@ static int add_tree_block(struct reloc_control *rc, level = (int)extent_key->offset; } generation = btrfs_extent_generation(eb, ei); + } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) { + btrfs_print_v0_err(eb->fs_info); + btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL); + return -EINVAL; } else { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - u64 ref_owner; - int ret; - - BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); - ret = get_ref_objectid_v0(rc, path, extent_key, - &ref_owner, NULL); - if (ret < 0) - return ret; - BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); - level = (int)ref_owner; - /* FIXME: get real generation */ - generation = 0; -#else BUG(); -#endif } btrfs_release_path(path); @@ -3563,11 +3470,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, key.offset = 0; inode = btrfs_iget(fs_info->sb, &key, root, NULL); - if (IS_ERR(inode) || is_bad_inode(inode)) { - if (!IS_ERR(inode)) - iput(inode); + if (IS_ERR(inode)) return -ENOENT; - } truncate: ret = btrfs_check_trunc_cache_free_space(fs_info, @@ -3781,12 +3685,7 @@ int add_data_references(struct reloc_control *rc, eb = path->nodes[0]; ptr = btrfs_item_ptr_offset(eb, path->slots[0]); end = ptr + btrfs_item_size_nr(eb, path->slots[0]); -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (ptr + sizeof(struct btrfs_extent_item_v0) == end) - ptr = end; - else -#endif - ptr += sizeof(struct btrfs_extent_item); + ptr += sizeof(struct btrfs_extent_item); while (ptr < end) { iref = (struct btrfs_extent_inline_ref *)ptr; @@ -3801,7 +3700,7 @@ int add_data_references(struct reloc_control *rc, ret = find_data_references(rc, extent_key, eb, dref, blocks); } else { - ret = -EINVAL; + ret = -EUCLEAN; btrfs_err(rc->extent_root->fs_info, "extent %llu slot %d has an invalid inline ref type", eb->start, path->slots[0]); @@ -3832,13 +3731,7 @@ int add_data_references(struct reloc_control *rc, if (key.objectid != extent_key->objectid) break; -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - if (key.type == BTRFS_SHARED_DATA_REF_KEY || - key.type == BTRFS_EXTENT_REF_V0_KEY) { -#else - BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); if (key.type == BTRFS_SHARED_DATA_REF_KEY) { -#endif ret = __add_tree_block(rc, key.offset, blocksize, blocks); } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { @@ -3846,6 +3739,10 @@ int add_data_references(struct reloc_control *rc, struct btrfs_extent_data_ref); ret = find_data_references(rc, extent_key, eb, dref, blocks); + } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { + btrfs_print_v0_err(eb->fs_info); + btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL); + ret = -EINVAL; } else { ret = 0; } @@ -4084,41 +3981,13 @@ restart: flags = btrfs_extent_flags(path->nodes[0], ei); ret = check_extent_flags(flags); BUG_ON(ret); - + } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) { + err = -EINVAL; + btrfs_print_v0_err(trans->fs_info); + btrfs_abort_transaction(trans, err); + break; } else { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - u64 ref_owner; - int path_change = 0; - - BUG_ON(item_size != - sizeof(struct btrfs_extent_item_v0)); - ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, - &path_change); - if (ret < 0) { - err = ret; - break; - } - if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) - flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; - else - flags = BTRFS_EXTENT_FLAG_DATA; - - if (path_change) { - btrfs_release_path(path); - - path->search_commit_root = 1; - path->skip_locking = 1; - ret = btrfs_search_slot(NULL, rc->extent_root, - &key, path, 0, 0); - if (ret < 0) { - err = ret; - break; - } - BUG_ON(ret > 0); - } -#else BUG(); -#endif } if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { @@ -4169,8 +4038,7 @@ restart: } } if (trans && progress && err == -ENOSPC) { - ret = btrfs_force_chunk_alloc(trans, fs_info, - rc->block_group->flags); + ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags); if (ret == 1) { err = 0; progress = 0; @@ -4284,7 +4152,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; inode = btrfs_iget(fs_info->sb, &key, root, NULL); - BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); + BUG_ON(IS_ERR(inode)); BTRFS_I(inode)->index_cnt = group->key.objectid; err = btrfs_orphan_add(trans, BTRFS_I(inode)); @@ -4375,7 +4243,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) rc->block_group = btrfs_lookup_block_group(fs_info, group_start); BUG_ON(!rc->block_group); - ret = btrfs_inc_block_group_ro(fs_info, rc->block_group); + ret = btrfs_inc_block_group_ro(rc->block_group); if (ret) { err = ret; goto out; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index c451285976ac..65bda0682928 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -320,9 +320,9 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) /* drop the root item for 'key' from the tree root */ int btrfs_del_root(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, const struct btrfs_key *key) + const struct btrfs_key *key) { - struct btrfs_root *root = fs_info->tree_root; + struct btrfs_root *root = trans->fs_info->tree_root; struct btrfs_path *path; int ret; @@ -341,13 +341,12 @@ out: return ret; } -int btrfs_del_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, - const char *name, int name_len) +int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + u64 ref_id, u64 dirid, u64 *sequence, const char *name, + int name_len) { - struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *tree_root = trans->fs_info->tree_root; struct btrfs_path *path; struct btrfs_root_ref *ref; struct extent_buffer *leaf; @@ -413,12 +412,11 @@ out: * * Will return 0, -ENOMEM, or anything from the CoW path */ -int btrfs_add_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 root_id, u64 ref_id, u64 dirid, u64 sequence, - const char *name, int name_len) +int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, + u64 ref_id, u64 dirid, u64 sequence, const char *name, + int name_len) { - struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *tree_root = trans->fs_info->tree_root; struct btrfs_key key; int ret; struct btrfs_path *path; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6702896cdb8f..3be1456b5116 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -188,32 +188,6 @@ struct scrub_ctx { refcount_t refs; }; -struct scrub_fixup_nodatasum { - struct scrub_ctx *sctx; - struct btrfs_device *dev; - u64 logical; - struct btrfs_root *root; - struct btrfs_work work; - int mirror_num; -}; - -struct scrub_nocow_inode { - u64 inum; - u64 offset; - u64 root; - struct list_head list; -}; - -struct scrub_copy_nocow_ctx { - struct scrub_ctx *sctx; - u64 logical; - u64 len; - int mirror_num; - u64 physical_for_dev_replace; - struct list_head inodes; - struct btrfs_work work; -}; - struct scrub_warning { struct btrfs_path *path; u64 extent_item_size; @@ -232,8 +206,6 @@ struct full_stripe_lock { static void scrub_pending_bio_inc(struct scrub_ctx *sctx); static void scrub_pending_bio_dec(struct scrub_ctx *sctx); -static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); -static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); static int scrub_setup_recheck_block(struct scrub_block *original_sblock, struct scrub_block *sblocks_for_recheck); @@ -277,13 +249,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, static void scrub_wr_submit(struct scrub_ctx *sctx); static void scrub_wr_bio_end_io(struct bio *bio); static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); -static int write_page_nocow(struct scrub_ctx *sctx, - u64 physical_for_dev_replace, struct page *page); -static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, - struct scrub_copy_nocow_ctx *ctx); -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, - int mirror_num, u64 physical_for_dev_replace); -static void copy_nocow_pages_worker(struct btrfs_work *work); static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_put_ctx(struct scrub_ctx *sctx); @@ -555,60 +520,6 @@ out: return ret; } -/* - * used for workers that require transaction commits (i.e., for the - * NOCOW case) - */ -static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) -{ - struct btrfs_fs_info *fs_info = sctx->fs_info; - - refcount_inc(&sctx->refs); - /* - * increment scrubs_running to prevent cancel requests from - * completing as long as a worker is running. we must also - * increment scrubs_paused to prevent deadlocking on pause - * requests used for transactions commits (as the worker uses a - * transaction context). it is safe to regard the worker - * as paused for all matters practical. effectively, we only - * avoid cancellation requests from completing. - */ - mutex_lock(&fs_info->scrub_lock); - atomic_inc(&fs_info->scrubs_running); - atomic_inc(&fs_info->scrubs_paused); - mutex_unlock(&fs_info->scrub_lock); - - /* - * check if @scrubs_running=@scrubs_paused condition - * inside wait_event() is not an atomic operation. - * which means we may inc/dec @scrub_running/paused - * at any time. Let's wake up @scrub_pause_wait as - * much as we can to let commit transaction blocked less. - */ - wake_up(&fs_info->scrub_pause_wait); - - atomic_inc(&sctx->workers_pending); -} - -/* used for workers that require transaction commits */ -static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) -{ - struct btrfs_fs_info *fs_info = sctx->fs_info; - - /* - * see scrub_pending_trans_workers_inc() why we're pretending - * to be paused in the scrub counters - */ - mutex_lock(&fs_info->scrub_lock); - atomic_dec(&fs_info->scrubs_running); - atomic_dec(&fs_info->scrubs_paused); - mutex_unlock(&fs_info->scrub_lock); - atomic_dec(&sctx->workers_pending); - wake_up(&fs_info->scrub_pause_wait); - wake_up(&sctx->list_wait); - scrub_put_ctx(sctx); -} - static void scrub_free_csums(struct scrub_ctx *sctx) { while (!list_empty(&sctx->csum_list)) { @@ -882,194 +793,6 @@ out: btrfs_free_path(path); } -static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) -{ - struct page *page = NULL; - unsigned long index; - struct scrub_fixup_nodatasum *fixup = fixup_ctx; - int ret; - int corrected = 0; - struct btrfs_key key; - struct inode *inode = NULL; - struct btrfs_fs_info *fs_info; - u64 end = offset + PAGE_SIZE - 1; - struct btrfs_root *local_root; - int srcu_index; - - key.objectid = root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - fs_info = fixup->root->fs_info; - srcu_index = srcu_read_lock(&fs_info->subvol_srcu); - - local_root = btrfs_read_fs_root_no_name(fs_info, &key); - if (IS_ERR(local_root)) { - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - return PTR_ERR(local_root); - } - - key.type = BTRFS_INODE_ITEM_KEY; - key.objectid = inum; - key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - index = offset >> PAGE_SHIFT; - - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); - if (!page) { - ret = -ENOMEM; - goto out; - } - - if (PageUptodate(page)) { - if (PageDirty(page)) { - /* - * we need to write the data to the defect sector. the - * data that was in that sector is not in memory, - * because the page was modified. we must not write the - * modified page to that sector. - * - * TODO: what could be done here: wait for the delalloc - * runner to write out that page (might involve - * COW) and see whether the sector is still - * referenced afterwards. - * - * For the meantime, we'll treat this error - * incorrectable, although there is a chance that a - * later scrub will find the bad sector again and that - * there's no dirty page in memory, then. - */ - ret = -EIO; - goto out; - } - ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE, - fixup->logical, page, - offset - page_offset(page), - fixup->mirror_num); - unlock_page(page); - corrected = !ret; - } else { - /* - * we need to get good data first. the general readpage path - * will call repair_io_failure for us, we just have to make - * sure we read the bad mirror. - */ - ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, - EXTENT_DAMAGED); - if (ret) { - /* set_extent_bits should give proper error */ - WARN_ON(ret > 0); - if (ret > 0) - ret = -EFAULT; - goto out; - } - - ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page, - btrfs_get_extent, - fixup->mirror_num); - wait_on_page_locked(page); - - corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset, - end, EXTENT_DAMAGED, 0, NULL); - if (!corrected) - clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, - EXTENT_DAMAGED); - } - -out: - if (page) - put_page(page); - - iput(inode); - - if (ret < 0) - return ret; - - if (ret == 0 && corrected) { - /* - * we only need to call readpage for one of the inodes belonging - * to this extent. so make iterate_extent_inodes stop - */ - return 1; - } - - return -EIO; -} - -static void scrub_fixup_nodatasum(struct btrfs_work *work) -{ - struct btrfs_fs_info *fs_info; - int ret; - struct scrub_fixup_nodatasum *fixup; - struct scrub_ctx *sctx; - struct btrfs_trans_handle *trans = NULL; - struct btrfs_path *path; - int uncorrectable = 0; - - fixup = container_of(work, struct scrub_fixup_nodatasum, work); - sctx = fixup->sctx; - fs_info = fixup->root->fs_info; - - path = btrfs_alloc_path(); - if (!path) { - spin_lock(&sctx->stat_lock); - ++sctx->stat.malloc_errors; - spin_unlock(&sctx->stat_lock); - uncorrectable = 1; - goto out; - } - - trans = btrfs_join_transaction(fixup->root); - if (IS_ERR(trans)) { - uncorrectable = 1; - goto out; - } - - /* - * the idea is to trigger a regular read through the standard path. we - * read a page from the (failed) logical address by specifying the - * corresponding copynum of the failed sector. thus, that readpage is - * expected to fail. - * that is the point where on-the-fly error correction will kick in - * (once it's finished) and rewrite the failed sector if a good copy - * can be found. - */ - ret = iterate_inodes_from_logical(fixup->logical, fs_info, path, - scrub_fixup_readpage, fixup, false); - if (ret < 0) { - uncorrectable = 1; - goto out; - } - WARN_ON(ret != 1); - - spin_lock(&sctx->stat_lock); - ++sctx->stat.corrected_errors; - spin_unlock(&sctx->stat_lock); - -out: - if (trans && !IS_ERR(trans)) - btrfs_end_transaction(trans); - if (uncorrectable) { - spin_lock(&sctx->stat_lock); - ++sctx->stat.uncorrectable_errors; - spin_unlock(&sctx->stat_lock); - btrfs_dev_replace_stats_inc( - &fs_info->dev_replace.num_uncorrectable_read_errors); - btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (nodatasum) error at logical %llu on dev %s", - fixup->logical, rcu_str_deref(fixup->dev->name)); - } - - btrfs_free_path(path); - kfree(fixup); - - scrub_pending_trans_workers_dec(sctx); -} - static inline void scrub_get_recover(struct scrub_recover *recover) { refcount_inc(&recover->refs); @@ -1264,42 +987,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) } /* - * NOTE: Even for nodatasum case, it's still possible that it's a - * compressed data extent, thus scrub_fixup_nodatasum(), which write - * inode page cache onto disk, could cause serious data corruption. - * - * So here we could only read from disk, and hope our recovery could - * reach disk before the newer write. - */ - if (0 && !is_metadata && !have_csum) { - struct scrub_fixup_nodatasum *fixup_nodatasum; - - WARN_ON(sctx->is_dev_replace); - - /* - * !is_metadata and !have_csum, this means that the data - * might not be COWed, that it might be modified - * concurrently. The general strategy to work on the - * commit root does not help in the case when COW is not - * used. - */ - fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); - if (!fixup_nodatasum) - goto did_not_correct_error; - fixup_nodatasum->sctx = sctx; - fixup_nodatasum->dev = dev; - fixup_nodatasum->logical = logical; - fixup_nodatasum->root = fs_info->extent_root; - fixup_nodatasum->mirror_num = failed_mirror_index + 1; - scrub_pending_trans_workers_inc(sctx); - btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, - scrub_fixup_nodatasum, NULL, NULL); - btrfs_queue_work(fs_info->scrub_workers, - &fixup_nodatasum->work); - goto out; - } - - /* * now build and submit the bios for the other mirrors, check * checksums. * First try to pick the mirror which is completely without I/O @@ -1866,7 +1553,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, bio = btrfs_io_bio_alloc(1); bio_set_dev(bio, page_bad->dev->bdev); bio->bi_iter.bi_sector = page_bad->physical >> 9; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); if (PAGE_SIZE != ret) { @@ -1961,7 +1648,7 @@ again: bio->bi_end_io = scrub_wr_bio_end_io; bio_set_dev(bio, sbio->dev->bdev); bio->bi_iter.bi_sector = sbio->physical >> 9; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; sbio->status = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical_for_dev_replace || @@ -2361,7 +2048,7 @@ again: bio->bi_end_io = scrub_bio_end_io; bio_set_dev(bio, sbio->dev->bdev); bio->bi_iter.bi_sector = sbio->physical >> 9; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; sbio->status = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical || @@ -2800,17 +2487,10 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map, have_csum = scrub_find_csum(sctx, logical, csum); if (have_csum == 0) ++sctx->stat.no_csum; - if (0 && sctx->is_dev_replace && !have_csum) { - ret = copy_nocow_pages(sctx, logical, l, - mirror_num, - physical_for_dev_replace); - goto behind_scrub_pages; - } } ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, mirror_num, have_csum ? csum : NULL, 0, physical_for_dev_replace); -behind_scrub_pages: if (ret) return ret; len -= l; @@ -3863,7 +3543,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, * -> btrfs_scrub_pause() */ scrub_pause_on(fs_info); - ret = btrfs_inc_block_group_ro(fs_info, cache); + ret = btrfs_inc_block_group_ro(cache); if (!ret && is_dev_replace) { /* * If we are doing a device replace wait for any tasks @@ -3982,14 +3662,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (!cache->removed && !cache->ro && cache->reserved == 0 && btrfs_block_group_used(&cache->item) == 0) { spin_unlock(&cache->lock); - spin_lock(&fs_info->unused_bgs_lock); - if (list_empty(&cache->bg_list)) { - btrfs_get_block_group(cache); - trace_btrfs_add_unused_block_group(cache); - list_add_tail(&cache->bg_list, - &fs_info->unused_bgs); - } - spin_unlock(&fs_info->unused_bgs_lock); + btrfs_mark_bg_unused(cache); } else { spin_unlock(&cache->lock); } @@ -4072,10 +3745,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, if (!fs_info->scrub_wr_completion_workers) goto fail_scrub_wr_completion_workers; - fs_info->scrub_nocow_workers = - btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0); - if (!fs_info->scrub_nocow_workers) - goto fail_scrub_nocow_workers; fs_info->scrub_parity_workers = btrfs_alloc_workqueue(fs_info, "scrubparity", flags, max_active, 2); @@ -4086,8 +3755,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, return 0; fail_scrub_parity_workers: - btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); -fail_scrub_nocow_workers: btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); fail_scrub_wr_completion_workers: btrfs_destroy_workqueue(fs_info->scrub_workers); @@ -4100,7 +3767,6 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) if (--fs_info->scrub_workers_refcnt == 0) { btrfs_destroy_workqueue(fs_info->scrub_workers); btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); - btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); btrfs_destroy_workqueue(fs_info->scrub_parity_workers); } WARN_ON(fs_info->scrub_workers_refcnt < 0); @@ -4113,7 +3779,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, struct scrub_ctx *sctx; int ret; struct btrfs_device *dev; - struct rcu_string *name; if (btrfs_fs_closing(fs_info)) return -EINVAL; @@ -4167,11 +3832,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (!is_dev_replace && !readonly && !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); - rcu_read_lock(); - name = rcu_dereference(dev->name); - btrfs_err(fs_info, "scrub: device %s is not writable", - name->str); - rcu_read_unlock(); + btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable", + rcu_str_deref(dev->name)); return -EROFS; } @@ -4359,330 +4021,3 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, *extent_dev = bbio->stripes[0].dev; btrfs_put_bbio(bbio); } - -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, - int mirror_num, u64 physical_for_dev_replace) -{ - struct scrub_copy_nocow_ctx *nocow_ctx; - struct btrfs_fs_info *fs_info = sctx->fs_info; - - nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS); - if (!nocow_ctx) { - spin_lock(&sctx->stat_lock); - sctx->stat.malloc_errors++; - spin_unlock(&sctx->stat_lock); - return -ENOMEM; - } - - scrub_pending_trans_workers_inc(sctx); - - nocow_ctx->sctx = sctx; - nocow_ctx->logical = logical; - nocow_ctx->len = len; - nocow_ctx->mirror_num = mirror_num; - nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; - btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, - copy_nocow_pages_worker, NULL, NULL); - INIT_LIST_HEAD(&nocow_ctx->inodes); - btrfs_queue_work(fs_info->scrub_nocow_workers, - &nocow_ctx->work); - - return 0; -} - -static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx) -{ - struct scrub_copy_nocow_ctx *nocow_ctx = ctx; - struct scrub_nocow_inode *nocow_inode; - - nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS); - if (!nocow_inode) - return -ENOMEM; - nocow_inode->inum = inum; - nocow_inode->offset = offset; - nocow_inode->root = root; - list_add_tail(&nocow_inode->list, &nocow_ctx->inodes); - return 0; -} - -#define COPY_COMPLETE 1 - -static void copy_nocow_pages_worker(struct btrfs_work *work) -{ - struct scrub_copy_nocow_ctx *nocow_ctx = - container_of(work, struct scrub_copy_nocow_ctx, work); - struct scrub_ctx *sctx = nocow_ctx->sctx; - struct btrfs_fs_info *fs_info = sctx->fs_info; - struct btrfs_root *root = fs_info->extent_root; - u64 logical = nocow_ctx->logical; - u64 len = nocow_ctx->len; - int mirror_num = nocow_ctx->mirror_num; - u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; - int ret; - struct btrfs_trans_handle *trans = NULL; - struct btrfs_path *path; - int not_written = 0; - - path = btrfs_alloc_path(); - if (!path) { - spin_lock(&sctx->stat_lock); - sctx->stat.malloc_errors++; - spin_unlock(&sctx->stat_lock); - not_written = 1; - goto out; - } - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - not_written = 1; - goto out; - } - - ret = iterate_inodes_from_logical(logical, fs_info, path, - record_inode_for_nocow, nocow_ctx, false); - if (ret != 0 && ret != -ENOENT) { - btrfs_warn(fs_info, - "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d", - logical, physical_for_dev_replace, len, mirror_num, - ret); - not_written = 1; - goto out; - } - - btrfs_end_transaction(trans); - trans = NULL; - while (!list_empty(&nocow_ctx->inodes)) { - struct scrub_nocow_inode *entry; - entry = list_first_entry(&nocow_ctx->inodes, - struct scrub_nocow_inode, - list); - list_del_init(&entry->list); - ret = copy_nocow_pages_for_inode(entry->inum, entry->offset, - entry->root, nocow_ctx); - kfree(entry); - if (ret == COPY_COMPLETE) { - ret = 0; - break; - } else if (ret) { - break; - } - } -out: - while (!list_empty(&nocow_ctx->inodes)) { - struct scrub_nocow_inode *entry; - entry = list_first_entry(&nocow_ctx->inodes, - struct scrub_nocow_inode, - list); - list_del_init(&entry->list); - kfree(entry); - } - if (trans && !IS_ERR(trans)) - btrfs_end_transaction(trans); - if (not_written) - btrfs_dev_replace_stats_inc(&fs_info->dev_replace. - num_uncorrectable_read_errors); - - btrfs_free_path(path); - kfree(nocow_ctx); - - scrub_pending_trans_workers_dec(sctx); -} - -static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len, - u64 logical) -{ - struct extent_state *cached_state = NULL; - struct btrfs_ordered_extent *ordered; - struct extent_io_tree *io_tree; - struct extent_map *em; - u64 lockstart = start, lockend = start + len - 1; - int ret = 0; - - io_tree = &inode->io_tree; - - lock_extent_bits(io_tree, lockstart, lockend, &cached_state); - ordered = btrfs_lookup_ordered_range(inode, lockstart, len); - if (ordered) { - btrfs_put_ordered_extent(ordered); - ret = 1; - goto out_unlock; - } - - em = btrfs_get_extent(inode, NULL, 0, start, len, 0); - if (IS_ERR(em)) { - ret = PTR_ERR(em); - goto out_unlock; - } - - /* - * This extent does not actually cover the logical extent anymore, - * move on to the next inode. - */ - if (em->block_start > logical || - em->block_start + em->block_len < logical + len || - test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { - free_extent_map(em); - ret = 1; - goto out_unlock; - } - free_extent_map(em); - -out_unlock: - unlock_extent_cached(io_tree, lockstart, lockend, &cached_state); - return ret; -} - -static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, - struct scrub_copy_nocow_ctx *nocow_ctx) -{ - struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info; - struct btrfs_key key; - struct inode *inode; - struct page *page; - struct btrfs_root *local_root; - struct extent_io_tree *io_tree; - u64 physical_for_dev_replace; - u64 nocow_ctx_logical; - u64 len = nocow_ctx->len; - unsigned long index; - int srcu_index; - int ret = 0; - int err = 0; - - key.objectid = root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - srcu_index = srcu_read_lock(&fs_info->subvol_srcu); - - local_root = btrfs_read_fs_root_no_name(fs_info, &key); - if (IS_ERR(local_root)) { - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - return PTR_ERR(local_root); - } - - key.type = BTRFS_INODE_ITEM_KEY; - key.objectid = inum; - key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - /* Avoid truncate/dio/punch hole.. */ - inode_lock(inode); - inode_dio_wait(inode); - - physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; - io_tree = &BTRFS_I(inode)->io_tree; - nocow_ctx_logical = nocow_ctx->logical; - - ret = check_extent_to_block(BTRFS_I(inode), offset, len, - nocow_ctx_logical); - if (ret) { - ret = ret > 0 ? 0 : ret; - goto out; - } - - while (len >= PAGE_SIZE) { - index = offset >> PAGE_SHIFT; -again: - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); - if (!page) { - btrfs_err(fs_info, "find_or_create_page() failed"); - ret = -ENOMEM; - goto out; - } - - if (PageUptodate(page)) { - if (PageDirty(page)) - goto next_page; - } else { - ClearPageError(page); - err = extent_read_full_page(io_tree, page, - btrfs_get_extent, - nocow_ctx->mirror_num); - if (err) { - ret = err; - goto next_page; - } - - lock_page(page); - /* - * If the page has been remove from the page cache, - * the data on it is meaningless, because it may be - * old one, the new data may be written into the new - * page in the page cache. - */ - if (page->mapping != inode->i_mapping) { - unlock_page(page); - put_page(page); - goto again; - } - if (!PageUptodate(page)) { - ret = -EIO; - goto next_page; - } - } - - ret = check_extent_to_block(BTRFS_I(inode), offset, len, - nocow_ctx_logical); - if (ret) { - ret = ret > 0 ? 0 : ret; - goto next_page; - } - - err = write_page_nocow(nocow_ctx->sctx, - physical_for_dev_replace, page); - if (err) - ret = err; -next_page: - unlock_page(page); - put_page(page); - - if (ret) - break; - - offset += PAGE_SIZE; - physical_for_dev_replace += PAGE_SIZE; - nocow_ctx_logical += PAGE_SIZE; - len -= PAGE_SIZE; - } - ret = COPY_COMPLETE; -out: - inode_unlock(inode); - iput(inode); - return ret; -} - -static int write_page_nocow(struct scrub_ctx *sctx, - u64 physical_for_dev_replace, struct page *page) -{ - struct bio *bio; - struct btrfs_device *dev; - - dev = sctx->wr_tgtdev; - if (!dev) - return -EIO; - if (!dev->bdev) { - btrfs_warn_rl(dev->fs_info, - "scrub write_page_nocow(bdev == NULL) is unexpected"); - return -EIO; - } - bio = btrfs_io_bio_alloc(1); - bio->bi_iter.bi_size = 0; - bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; - bio_set_dev(bio, dev->bdev); - bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; - /* bio_add_page won't fail on a freshly allocated bio */ - bio_add_page(bio, page, PAGE_SIZE, 0); - - if (btrfsic_submit_bio_wait(bio)) { - bio_put(bio); - btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - return -EIO; - } - - bio_put(bio); - return 0; -} diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index c47f62b19226..ba8950bfd9c7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -100,6 +100,7 @@ struct send_ctx { u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 cur_inode_next_write_offset; + bool ignore_cur_inode; u64 send_progress; @@ -1500,7 +1501,7 @@ static int read_symlink(struct btrfs_root *root, BUG_ON(compression); off = btrfs_file_extent_inline_start(ei); - len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); + len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); @@ -5006,6 +5007,15 @@ static int send_hole(struct send_ctx *sctx, u64 end) u64 len; int ret = 0; + /* + * A hole that starts at EOF or beyond it. Since we do not yet support + * fallocate (for extent preallocation and hole punching), sending a + * write of zeroes starting at EOF or beyond would later require issuing + * a truncate operation which would undo the write and achieve nothing. + */ + if (offset >= sctx->cur_inode_size) + return 0; + if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) return send_update_extent(sctx, offset, end - offset); @@ -5160,7 +5170,7 @@ static int clone_range(struct send_ctx *sctx, ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); type = btrfs_file_extent_type(leaf, ei); if (type == BTRFS_FILE_EXTENT_INLINE) { - ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); + ext_len = btrfs_file_extent_ram_bytes(leaf, ei); ext_len = PAGE_ALIGN(ext_len); } else { ext_len = btrfs_file_extent_num_bytes(leaf, ei); @@ -5236,8 +5246,7 @@ static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], ei); if (type == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], ei); + len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); /* * it is possible the inline item won't cover the whole page, * but there may be items after this page. Make @@ -5375,7 +5384,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, } if (right_type == BTRFS_FILE_EXTENT_INLINE) { - right_len = btrfs_file_extent_inline_len(eb, slot, ei); + right_len = btrfs_file_extent_ram_bytes(eb, ei); right_len = PAGE_ALIGN(right_len); } else { right_len = btrfs_file_extent_num_bytes(eb, ei); @@ -5496,8 +5505,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset) struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], fi); if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], fi); + u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); extent_end = ALIGN(key.offset + size, sctx->send_root->fs_info->sectorsize); } else { @@ -5560,7 +5568,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx, fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(leaf, slot, fi); + u64 size = btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(key.offset + size, root->fs_info->sectorsize); @@ -5606,8 +5614,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], fi); if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], fi); + u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); extent_end = ALIGN(key->offset + size, sctx->send_root->fs_info->sectorsize); } else { @@ -5799,6 +5806,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) int pending_move = 0; int refs_processed = 0; + if (sctx->ignore_cur_inode) + return 0; + ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, &refs_processed); if (ret < 0) @@ -5917,6 +5927,93 @@ out: return ret; } +struct parent_paths_ctx { + struct list_head *refs; + struct send_ctx *sctx; +}; + +static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name, + void *ctx) +{ + struct parent_paths_ctx *ppctx = ctx; + + return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx, + ppctx->refs); +} + +/* + * Issue unlink operations for all paths of the current inode found in the + * parent snapshot. + */ +static int btrfs_unlink_all_paths(struct send_ctx *sctx) +{ + LIST_HEAD(deleted_refs); + struct btrfs_path *path; + struct btrfs_key key; + struct parent_paths_ctx ctx; + int ret; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = sctx->cur_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + ctx.refs = &deleted_refs; + ctx.sctx = sctx; + + while (true) { + struct extent_buffer *eb = path->nodes[0]; + int slot = path->slots[0]; + + if (slot >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(sctx->parent_root, path); + if (ret < 0) + goto out; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.objectid != sctx->cur_ino) + break; + if (key.type != BTRFS_INODE_REF_KEY && + key.type != BTRFS_INODE_EXTREF_KEY) + break; + + ret = iterate_inode_ref(sctx->parent_root, path, &key, 1, + record_parent_ref, &ctx); + if (ret < 0) + goto out; + + path->slots[0]++; + } + + while (!list_empty(&deleted_refs)) { + struct recorded_ref *ref; + + ref = list_first_entry(&deleted_refs, struct recorded_ref, list); + ret = send_unlink(sctx, ref->full_path); + if (ret < 0) + goto out; + fs_path_free(ref->full_path); + list_del(&ref->list); + kfree(ref); + } + ret = 0; +out: + btrfs_free_path(path); + if (ret) + __free_recorded_refs(&deleted_refs); + return ret; +} + static int changed_inode(struct send_ctx *sctx, enum btrfs_compare_tree_result result) { @@ -5931,6 +6028,7 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_new_gen = 0; sctx->cur_inode_last_extent = (u64)-1; sctx->cur_inode_next_write_offset = 0; + sctx->ignore_cur_inode = false; /* * Set send_progress to current inode. This will tell all get_cur_xxx @@ -5971,6 +6069,33 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_new_gen = 1; } + /* + * Normally we do not find inodes with a link count of zero (orphans) + * because the most common case is to create a snapshot and use it + * for a send operation. However other less common use cases involve + * using a subvolume and send it after turning it to RO mode just + * after deleting all hard links of a file while holding an open + * file descriptor against it or turning a RO snapshot into RW mode, + * keep an open file descriptor against a file, delete it and then + * turn the snapshot back to RO mode before using it for a send + * operation. So if we find such cases, ignore the inode and all its + * items completely if it's a new inode, or if it's a changed inode + * make sure all its previous paths (from the parent snapshot) are all + * unlinked and all other the inode items are ignored. + */ + if (result == BTRFS_COMPARE_TREE_NEW || + result == BTRFS_COMPARE_TREE_CHANGED) { + u32 nlinks; + + nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii); + if (nlinks == 0) { + sctx->ignore_cur_inode = true; + if (result == BTRFS_COMPARE_TREE_CHANGED) + ret = btrfs_unlink_all_paths(sctx); + goto out; + } + } + if (result == BTRFS_COMPARE_TREE_NEW) { sctx->cur_inode_gen = left_gen; sctx->cur_inode_new = 1; @@ -6309,15 +6434,17 @@ static int changed_cb(struct btrfs_path *left_path, key->objectid == BTRFS_FREE_SPACE_OBJECTID) goto out; - if (key->type == BTRFS_INODE_ITEM_KEY) + if (key->type == BTRFS_INODE_ITEM_KEY) { ret = changed_inode(sctx, result); - else if (key->type == BTRFS_INODE_REF_KEY || - key->type == BTRFS_INODE_EXTREF_KEY) - ret = changed_ref(sctx, result); - else if (key->type == BTRFS_XATTR_ITEM_KEY) - ret = changed_xattr(sctx, result); - else if (key->type == BTRFS_EXTENT_DATA_KEY) - ret = changed_extent(sctx, result); + } else if (!sctx->ignore_cur_inode) { + if (key->type == BTRFS_INODE_REF_KEY || + key->type == BTRFS_INODE_EXTREF_KEY) + ret = changed_ref(sctx, result); + else if (key->type == BTRFS_XATTR_ITEM_KEY) + ret = changed_xattr(sctx, result); + else if (key->type == BTRFS_EXTENT_DATA_KEY) + ret = changed_extent(sctx, result); + } out: return ret; @@ -6328,7 +6455,6 @@ static int full_send_tree(struct send_ctx *sctx) int ret; struct btrfs_root *send_root = sctx->send_root; struct btrfs_key key; - struct btrfs_key found_key; struct btrfs_path *path; struct extent_buffer *eb; int slot; @@ -6350,17 +6476,13 @@ static int full_send_tree(struct send_ctx *sctx) while (1) { eb = path->nodes[0]; slot = path->slots[0]; - btrfs_item_key_to_cpu(eb, &found_key, slot); + btrfs_item_key_to_cpu(eb, &key, slot); - ret = changed_cb(path, NULL, &found_key, + ret = changed_cb(path, NULL, &key, BTRFS_COMPARE_TREE_NEW, sctx); if (ret < 0) goto out; - key.objectid = found_key.objectid; - key.type = found_key.type; - key.offset = found_key.offset + 1; - ret = btrfs_next_item(send_root, path); if (ret < 0) goto out; diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index b7b4acb12833..4c13b737f568 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -3,7 +3,6 @@ * Copyright (C) 2007 Oracle. All rights reserved. */ -#include <linux/highmem.h> #include <asm/unaligned.h> #include "ctree.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 81107ad49f3a..6601c9aa5e35 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -5,7 +5,6 @@ #include <linux/blkdev.h> #include <linux/module.h> -#include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/highmem.h> @@ -15,8 +14,6 @@ #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mount.h> -#include <linux/mpage.h> -#include <linux/swap.h> #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> @@ -468,9 +465,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_subvolrootid: case Opt_device: /* - * These are parsed by btrfs_parse_subvol_options - * and btrfs_parse_early_options - * and can be happily ignored here. + * These are parsed by btrfs_parse_subvol_options or + * btrfs_parse_device_options and can be ignored here. */ break; case Opt_nodatasum: @@ -760,6 +756,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_recovery: btrfs_warn(info, "'recovery' is deprecated, use 'usebackuproot' instead"); + /* fall through */ case Opt_usebackuproot: btrfs_info(info, "trying to use backup root at mount time"); @@ -885,13 +882,16 @@ out: * All other options will be parsed on much later in the mount process and * only when we need to allocate a new super block. */ -static int btrfs_parse_early_options(const char *options, fmode_t flags, - void *holder, struct btrfs_fs_devices **fs_devices) +static int btrfs_parse_device_options(const char *options, fmode_t flags, + void *holder) { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; + struct btrfs_device *device = NULL; int error = 0; + lockdep_assert_held(&uuid_mutex); + if (!options) return 0; @@ -917,11 +917,13 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, error = -ENOMEM; goto out; } - error = btrfs_scan_one_device(device_name, - flags, holder, fs_devices); + device = btrfs_scan_one_device(device_name, flags, + holder); kfree(device_name); - if (error) + if (IS_ERR(device)) { + error = PTR_ERR(device); goto out; + } } } @@ -935,8 +937,8 @@ out: * * The value is later passed to mount_subvol() */ -static int btrfs_parse_subvol_options(const char *options, fmode_t flags, - char **subvol_name, u64 *subvol_objectid) +static int btrfs_parse_subvol_options(const char *options, char **subvol_name, + u64 *subvol_objectid) { substring_t args[MAX_OPT_ARGS]; char *opts, *orig, *p; @@ -948,7 +950,7 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags, /* * strsep changes the string, duplicate it because - * btrfs_parse_early_options gets called later + * btrfs_parse_device_options gets called later */ opts = kstrdup(options, GFP_KERNEL); if (!opts) @@ -1517,6 +1519,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, { struct block_device *bdev = NULL; struct super_block *s; + struct btrfs_device *device = NULL; struct btrfs_fs_devices *fs_devices = NULL; struct btrfs_fs_info *fs_info = NULL; struct security_mnt_opts new_sec_opts; @@ -1526,12 +1529,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - error = btrfs_parse_early_options(data, mode, fs_type, - &fs_devices); - if (error) { - return ERR_PTR(error); - } - security_init_mnt_opts(&new_sec_opts); if (data) { error = parse_security_options(data, &new_sec_opts); @@ -1539,10 +1536,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, return ERR_PTR(error); } - error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); - if (error) - goto error_sec_opts; - /* * Setup a dummy root and fs_info for test/set super. This is because * we don't actually fill this stuff out until open_ctree, but we need @@ -1555,8 +1548,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, goto error_sec_opts; } - fs_info->fs_devices = fs_devices; - fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); security_init_mnt_opts(&fs_info->security_opts); @@ -1565,7 +1556,25 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, goto error_fs_info; } + mutex_lock(&uuid_mutex); + error = btrfs_parse_device_options(data, mode, fs_type); + if (error) { + mutex_unlock(&uuid_mutex); + goto error_fs_info; + } + + device = btrfs_scan_one_device(device_name, mode, fs_type); + if (IS_ERR(device)) { + mutex_unlock(&uuid_mutex); + error = PTR_ERR(device); + goto error_fs_info; + } + + fs_devices = device->fs_devices; + fs_info->fs_devices = fs_devices; + error = btrfs_open_devices(fs_devices, mode, fs_type); + mutex_unlock(&uuid_mutex); if (error) goto error_fs_info; @@ -1650,8 +1659,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - error = btrfs_parse_subvol_options(data, mode, - &subvol_name, &subvol_objectid); + error = btrfs_parse_subvol_options(data, &subvol_name, + &subvol_objectid); if (error) { kfree(subvol_name); return ERR_PTR(error); @@ -2098,14 +2107,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) btrfs_account_ro_block_groups_free_space(found); for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { - if (!list_empty(&found->block_groups[i])) { - switch (i) { - case BTRFS_RAID_DUP: - case BTRFS_RAID_RAID1: - case BTRFS_RAID_RAID10: - factor = 2; - } - } + if (!list_empty(&found->block_groups[i])) + factor = btrfs_bg_type_to_factor( + btrfs_raid_array[i].bg_flag); } } @@ -2222,7 +2226,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct btrfs_ioctl_vol_args *vol; - struct btrfs_fs_devices *fs_devices; + struct btrfs_device *device = NULL; int ret = -ENOTTY; if (!capable(CAP_SYS_ADMIN)) @@ -2234,15 +2238,24 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case BTRFS_IOC_SCAN_DEV: - ret = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_root_fs_type, &fs_devices); + mutex_lock(&uuid_mutex); + device = btrfs_scan_one_device(vol->name, FMODE_READ, + &btrfs_root_fs_type); + ret = PTR_ERR_OR_ZERO(device); + mutex_unlock(&uuid_mutex); break; case BTRFS_IOC_DEVICES_READY: - ret = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_root_fs_type, &fs_devices); - if (ret) + mutex_lock(&uuid_mutex); + device = btrfs_scan_one_device(vol->name, FMODE_READ, + &btrfs_root_fs_type); + if (IS_ERR(device)) { + mutex_unlock(&uuid_mutex); + ret = PTR_ERR(device); break; - ret = !(fs_devices->num_devices == fs_devices->total_devices); + } + ret = !(device->fs_devices->num_devices == + device->fs_devices->total_devices); + mutex_unlock(&uuid_mutex); break; case BTRFS_IOC_GET_SUPPORTED_FEATURES: ret = btrfs_ioctl_get_supported_features((void __user*)arg); @@ -2290,7 +2303,6 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) struct btrfs_fs_devices *cur_devices; struct btrfs_device *dev, *first_dev = NULL; struct list_head *head; - struct rcu_string *name; /* * Lightweight locking of the devices. We should not need @@ -2314,12 +2326,10 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) cur_devices = cur_devices->seed; } - if (first_dev) { - name = rcu_dereference(first_dev->name); - seq_escape(m, name->str, " \t\n\\"); - } else { + if (first_dev) + seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\"); + else WARN_ON(1); - } rcu_read_unlock(); return 0; } @@ -2331,7 +2341,6 @@ static const struct super_operations btrfs_super_ops = { .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, - .write_inode = btrfs_write_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, @@ -2369,7 +2378,7 @@ static __cold void btrfs_interface_exit(void) static void __init btrfs_print_mod_info(void) { - pr_info("Btrfs loaded, crc32c=%s" + static const char options[] = "" #ifdef CONFIG_BTRFS_DEBUG ", debug=on" #endif @@ -2382,8 +2391,8 @@ static void __init btrfs_print_mod_info(void) #ifdef CONFIG_BTRFS_FS_REF_VERIFY ", ref-verify=on" #endif - "\n", - crc32c_impl()); + ; + pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options); } static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 4a4e960c7c66..3717c864ba23 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -7,10 +7,8 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> -#include <linux/buffer_head.h> #include <linux/kobject.h> #include <linux/bug.h> -#include <linux/genhd.h> #include <linux/debugfs.h> #include "ctree.h" diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index ace94db09d29..412b910b04cc 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -216,7 +216,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, btrfs_init_dummy_trans(&trans, fs_info); test_msg("qgroup basic add"); - ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID); + ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID); if (ret) { test_err("couldn't create a qgroup %d", ret); return ret; @@ -249,8 +249,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, - nodesize, old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, + new_roots); if (ret) { test_err("couldn't account space for a qgroup %d", ret); return ret; @@ -285,8 +285,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, - nodesize, old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, + new_roots); if (ret) { test_err("couldn't account space for a qgroup %d", ret); return -EINVAL; @@ -322,7 +322,7 @@ static int test_multiple_refs(struct btrfs_root *root, * We have BTRFS_FS_TREE_OBJECTID created already from the * previous test. */ - ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID); + ret = btrfs_create_qgroup(&trans, BTRFS_FIRST_FREE_OBJECTID); if (ret) { test_err("couldn't create a qgroup %d", ret); return ret; @@ -350,8 +350,8 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, - nodesize, old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, + new_roots); if (ret) { test_err("couldn't account space for a qgroup %d", ret); return ret; @@ -385,8 +385,8 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, - nodesize, old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, + new_roots); if (ret) { test_err("couldn't account space for a qgroup %d", ret); return ret; @@ -426,8 +426,8 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, - nodesize, old_roots, new_roots); + ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, + new_roots); if (ret) { test_err("couldn't account space for a qgroup %d", ret); return ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ff5f6c719976..3b84f5015029 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -241,7 +241,7 @@ loop: refcount_set(&cur_trans->use_count, 2); atomic_set(&cur_trans->pending_ordered, 0); cur_trans->flags = 0; - cur_trans->start_time = get_seconds(); + cur_trans->start_time = ktime_get_seconds(); memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); @@ -680,7 +680,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) trans = start_transaction(root, 0, TRANS_ATTACH, BTRFS_RESERVE_NO_FLUSH, true); - if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT) + if (trans == ERR_PTR(-ENOENT)) btrfs_wait_for_commit(root->fs_info, 0); return trans; @@ -1152,7 +1152,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) ret = btrfs_run_dev_replace(trans, fs_info); if (ret) return ret; - ret = btrfs_run_qgroups(trans, fs_info); + ret = btrfs_run_qgroups(trans); if (ret) return ret; @@ -1355,8 +1355,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, goto out; /* Now qgroup are all updated, we can inherit it to new qgroups */ - ret = btrfs_qgroup_inherit(trans, fs_info, - src->root_key.objectid, dst_objectid, + ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid, inherit); if (ret < 0) goto out; @@ -1574,7 +1573,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, /* * insert root back/forward references */ - ret = btrfs_add_root_ref(trans, fs_info, objectid, + ret = btrfs_add_root_ref(trans, objectid, parent_root->root_key.objectid, btrfs_ino(BTRFS_I(parent_inode)), index, dentry->d_name.name, dentry->d_name.len); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 94439482a0ec..4cbb1b55387d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -48,7 +48,7 @@ struct btrfs_transaction { int aborted; struct list_head list; struct extent_io_tree dirty_pages; - unsigned long start_time; + time64_t start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; wait_queue_head_t pending_wait; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 8d40e7dd8c30..db835635372f 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -19,6 +19,7 @@ #include "tree-checker.h" #include "disk-io.h" #include "compression.h" +#include "volumes.h" /* * Error message should follow the following format: @@ -353,6 +354,102 @@ static int check_dir_item(struct btrfs_fs_info *fs_info, return 0; } +__printf(4, 5) +__cold +static void block_group_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(fs_info, + "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, key.offset, &vaf); + va_end(args); +} + +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_block_group_item bgi; + u32 item_size = btrfs_item_size_nr(leaf, slot); + u64 flags; + u64 type; + + /* + * Here we don't really care about alignment since extent allocator can + * handle it. We care more about the size, as if one block group is + * larger than maximum size, it's must be some obvious corruption. + */ + if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) { + block_group_err(fs_info, leaf, slot, + "invalid block group size, have %llu expect (0, %llu]", + key->offset, BTRFS_MAX_DATA_CHUNK_SIZE); + return -EUCLEAN; + } + + if (item_size != sizeof(bgi)) { + block_group_err(fs_info, leaf, slot, + "invalid item size, have %u expect %zu", + item_size, sizeof(bgi)); + return -EUCLEAN; + } + + read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), + sizeof(bgi)); + if (btrfs_block_group_chunk_objectid(&bgi) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID) { + block_group_err(fs_info, leaf, slot, + "invalid block group chunk objectid, have %llu expect %llu", + btrfs_block_group_chunk_objectid(&bgi), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + + if (btrfs_block_group_used(&bgi) > key->offset) { + block_group_err(fs_info, leaf, slot, + "invalid block group used, have %llu expect [0, %llu)", + btrfs_block_group_used(&bgi), key->offset); + return -EUCLEAN; + } + + flags = btrfs_block_group_flags(&bgi); + if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) { + block_group_err(fs_info, leaf, slot, +"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", + flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, + hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); + return -EUCLEAN; + } + + type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; + if (type != BTRFS_BLOCK_GROUP_DATA && + type != BTRFS_BLOCK_GROUP_METADATA && + type != BTRFS_BLOCK_GROUP_SYSTEM && + type != (BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_DATA)) { + block_group_err(fs_info, leaf, slot, +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", + type, hweight64(type), + BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, + BTRFS_BLOCK_GROUP_SYSTEM, + BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); + return -EUCLEAN; + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -374,6 +471,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, case BTRFS_XATTR_ITEM_KEY: ret = check_dir_item(fs_info, leaf, key, slot); break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(fs_info, leaf, key, slot); + break; } return ret; } @@ -396,9 +496,22 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, * skip this check for relocation trees. */ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { + u64 owner = btrfs_header_owner(leaf); struct btrfs_root *check_root; - key.objectid = btrfs_header_owner(leaf); + /* These trees must never be empty */ + if (owner == BTRFS_ROOT_TREE_OBJECTID || + owner == BTRFS_CHUNK_TREE_OBJECTID || + owner == BTRFS_EXTENT_TREE_OBJECTID || + owner == BTRFS_DEV_TREE_OBJECTID || + owner == BTRFS_FS_TREE_OBJECTID || + owner == BTRFS_DATA_RELOC_TREE_OBJECTID) { + generic_err(fs_info, leaf, 0, + "invalid root, root %llu must never be empty", + owner); + return -EUCLEAN; + } + key.objectid = owner; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f8220ec02036..1650dc44a5e3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -545,12 +545,8 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); - if (IS_ERR(inode)) { + if (IS_ERR(inode)) inode = NULL; - } else if (is_bad_inode(inode)) { - iput(inode); - inode = NULL; - } return inode; } @@ -597,7 +593,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, if (btrfs_file_extent_disk_bytenr(eb, item) == 0) nbytes = 0; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - size = btrfs_file_extent_inline_len(eb, slot, item); + size = btrfs_file_extent_ram_bytes(eb, item); nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = ALIGN(start + size, fs_info->sectorsize); @@ -685,7 +681,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * as the owner of the file extent changed from log tree * (doesn't affect qgroup) to fs/file tree(affects qgroup) */ - ret = btrfs_qgroup_trace_extent(trans, fs_info, + ret = btrfs_qgroup_trace_extent(trans, btrfs_file_extent_disk_bytenr(eb, item), btrfs_file_extent_disk_num_bytes(eb, item), GFP_NOFS); @@ -715,7 +711,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * allocation tree */ ret = btrfs_alloc_logged_file_extent(trans, - fs_info, root->root_key.objectid, key->objectid, offset, &ins); if (ret) @@ -1291,6 +1286,46 @@ again: return ret; } +static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir, + const u8 ref_type, const char *name, + const int namelen) +{ + struct btrfs_key key; + struct btrfs_path *path; + const u64 parent_id = btrfs_ino(BTRFS_I(dir)); + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = btrfs_ino(BTRFS_I(inode)); + key.type = ref_type; + if (key.type == BTRFS_INODE_REF_KEY) + key.offset = parent_id; + else + key.offset = btrfs_extref_hash(parent_id, name, namelen); + + ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + if (key.type == BTRFS_INODE_EXTREF_KEY) + ret = btrfs_find_name_in_ext_backref(path->nodes[0], + path->slots[0], parent_id, + name, namelen, NULL); + else + ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], + name, namelen, NULL); + +out: + btrfs_free_path(path); + return ret; +} + /* * replay one inode back reference item found in the log tree. * eb, slot and key refer to the buffer and key found in the log tree. @@ -1400,6 +1435,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, } } + /* + * If a reference item already exists for this inode + * with the same parent and name, but different index, + * drop it and the corresponding directory index entries + * from the parent before adding the new reference item + * and dir index entries, otherwise we would fail with + * -EEXIST returned from btrfs_add_link() below. + */ + ret = btrfs_inode_ref_exists(inode, dir, key->type, + name, namelen); + if (ret > 0) { + ret = btrfs_unlink_inode(trans, root, + BTRFS_I(dir), + BTRFS_I(inode), + name, namelen); + /* + * If we dropped the link count to 0, bump it so + * that later the iput() on the inode will not + * free it. We will fixup the link count later. + */ + if (!ret && inode->i_nlink == 0) + inc_nlink(inode); + } + if (ret < 0) + goto out; + /* insert our name */ ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), @@ -2120,7 +2181,7 @@ again: dir_key->offset, name, name_len, 0); } - if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { + if (!log_di || log_di == ERR_PTR(-ENOENT)) { btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); @@ -2933,7 +2994,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* bail out if we need to do a full commit */ if (btrfs_need_log_full_commit(fs_info, trans)) { ret = -EAGAIN; - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&root->log_mutex); goto out; } @@ -2951,7 +3011,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (ret) { blk_finish_plug(&plug); btrfs_abort_transaction(trans, ret); - btrfs_free_logged_extents(log, log_transid); btrfs_set_log_full_commit(fs_info, trans); mutex_unlock(&root->log_mutex); goto out; @@ -3002,7 +3061,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out; } btrfs_wait_tree_log_extents(log, mark); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out; @@ -3020,7 +3078,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (atomic_read(&log_root_tree->log_commit[index2])) { blk_finish_plug(&plug); ret = btrfs_wait_tree_log_extents(log, mark); - btrfs_wait_logged_extents(trans, log, log_transid); wait_log_commit(log_root_tree, root_log_ctx.log_transid); mutex_unlock(&log_root_tree->log_mutex); @@ -3045,7 +3102,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (btrfs_need_log_full_commit(fs_info, trans)) { blk_finish_plug(&plug); btrfs_wait_tree_log_extents(log, mark); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out_wake_log_root; @@ -3058,7 +3114,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (ret) { btrfs_set_log_full_commit(fs_info, trans); btrfs_abort_transaction(trans, ret); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } @@ -3068,11 +3123,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, EXTENT_NEW | EXTENT_DIRTY); if (ret) { btrfs_set_log_full_commit(fs_info, trans); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } - btrfs_wait_logged_extents(trans, log, log_transid); btrfs_set_super_log_root(fs_info->super_for_commit, log_root_tree->node->start); @@ -3159,14 +3212,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); } - /* - * We may have short-circuited the log tree with the full commit logic - * and left ordered extents on our list, so clear these out to keep us - * from leaking inodes and memory. - */ - btrfs_free_logged_extents(log, 0); - btrfs_free_logged_extents(log, 1); - free_extent_buffer(log->node); kfree(log); } @@ -3756,7 +3801,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, int start_slot, int nr, int inode_only, u64 logged_isize) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; unsigned long src_offset; unsigned long dst_offset; struct btrfs_root *log = inode->root->log_root; @@ -3937,9 +3982,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(src, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(src, - src_path->slots[0], - extent); + len = btrfs_file_extent_ram_bytes(src, extent); *last_extent = ALIGN(key.offset + len, fs_info->sectorsize); } else { @@ -4004,7 +4047,7 @@ fill_holes: extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); if (btrfs_file_extent_type(src, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(src, i, extent); + len = btrfs_file_extent_ram_bytes(src, extent); extent_end = ALIGN(key.offset + len, fs_info->sectorsize); } else { @@ -4078,131 +4121,32 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) return 0; } -static int wait_ordered_extents(struct btrfs_trans_handle *trans, - struct inode *inode, - struct btrfs_root *root, - const struct extent_map *em, - const struct list_head *logged_list, - bool *ordered_io_error) +static int log_extent_csums(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_root *log_root, + const struct extent_map *em) { - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_ordered_extent *ordered; - struct btrfs_root *log = root->log_root; - u64 mod_start = em->mod_start; - u64 mod_len = em->mod_len; - const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; u64 csum_offset; u64 csum_len; LIST_HEAD(ordered_sums); int ret = 0; - *ordered_io_error = false; - - if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || + if (inode->flags & BTRFS_INODE_NODATASUM || + test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || em->block_start == EXTENT_MAP_HOLE) return 0; - /* - * Wait far any ordered extent that covers our extent map. If it - * finishes without an error, first check and see if our csums are on - * our outstanding ordered extents. - */ - list_for_each_entry(ordered, logged_list, log_list) { - struct btrfs_ordered_sum *sum; - - if (!mod_len) - break; - - if (ordered->file_offset + ordered->len <= mod_start || - mod_start + mod_len <= ordered->file_offset) - continue; - - if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && - !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) && - !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { - const u64 start = ordered->file_offset; - const u64 end = ordered->file_offset + ordered->len - 1; - - WARN_ON(ordered->inode != inode); - filemap_fdatawrite_range(inode->i_mapping, start, end); - } - - wait_event(ordered->wait, - (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) || - test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); - - if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { - /* - * Clear the AS_EIO/AS_ENOSPC flags from the inode's - * i_mapping flags, so that the next fsync won't get - * an outdated io error too. - */ - filemap_check_errors(inode->i_mapping); - *ordered_io_error = true; - break; - } - /* - * We are going to copy all the csums on this ordered extent, so - * go ahead and adjust mod_start and mod_len in case this - * ordered extent has already been logged. - */ - if (ordered->file_offset > mod_start) { - if (ordered->file_offset + ordered->len >= - mod_start + mod_len) - mod_len = ordered->file_offset - mod_start; - /* - * If we have this case - * - * |--------- logged extent ---------| - * |----- ordered extent ----| - * - * Just don't mess with mod_start and mod_len, we'll - * just end up logging more csums than we need and it - * will be ok. - */ - } else { - if (ordered->file_offset + ordered->len < - mod_start + mod_len) { - mod_len = (mod_start + mod_len) - - (ordered->file_offset + ordered->len); - mod_start = ordered->file_offset + - ordered->len; - } else { - mod_len = 0; - } - } - - if (skip_csum) - continue; - - /* - * To keep us from looping for the above case of an ordered - * extent that falls inside of the logged extent. - */ - if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, - &ordered->flags)) - continue; - - list_for_each_entry(sum, &ordered->list, list) { - ret = btrfs_csum_file_blocks(trans, log, sum); - if (ret) - break; - } - } - - if (*ordered_io_error || !mod_len || ret || skip_csum) - return ret; - + /* If we're compressed we have to save the entire range of csums. */ if (em->compress_type) { csum_offset = 0; csum_len = max(em->block_len, em->orig_block_len); } else { - csum_offset = mod_start - em->start; - csum_len = mod_len; + csum_offset = em->mod_start - em->start; + csum_len = em->mod_len; } /* block start is already adjusted for the file extent offset. */ - ret = btrfs_lookup_csums_range(fs_info->csum_root, + ret = btrfs_lookup_csums_range(trans->fs_info->csum_root, em->block_start + csum_offset, em->block_start + csum_offset + csum_len - 1, &ordered_sums, 0); @@ -4214,7 +4158,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum, list); if (!ret) - ret = btrfs_csum_file_blocks(trans, log, sums); + ret = btrfs_csum_file_blocks(trans, log_root, sums); list_del(&sums->list); kfree(sums); } @@ -4226,7 +4170,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_root *root, const struct extent_map *em, struct btrfs_path *path, - const struct list_head *logged_list, struct btrfs_log_ctx *ctx) { struct btrfs_root *log = root->log_root; @@ -4238,18 +4181,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans, u64 block_len; int ret; int extent_inserted = 0; - bool ordered_io_err = false; - ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em, - logged_list, &ordered_io_err); + ret = log_extent_csums(trans, inode, log, em); if (ret) return ret; - if (ordered_io_err) { - ctx->io_err = -EIO; - return ctx->io_err; - } - btrfs_init_map_token(&token); ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start, @@ -4424,7 +4360,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, - struct list_head *logged_list, struct btrfs_log_ctx *ctx, const u64 start, const u64 end) @@ -4480,20 +4415,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, } list_sort(NULL, &extents, extent_cmp); - btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); - /* - * Some ordered extents started by fsync might have completed - * before we could collect them into the list logged_list, which - * means they're gone, not in our logged_list nor in the inode's - * ordered tree. We want the application/user space to know an - * error happened while attempting to persist file data so that - * it can take proper action. If such error happened, we leave - * without writing to the log tree and the fsync must report the - * file data write error and not commit the current transaction. - */ - ret = filemap_check_errors(inode->vfs_inode.i_mapping); - if (ret) - ctx->io_err = ret; process: while (!list_empty(&extents)) { em = list_entry(extents.next, struct extent_map, list); @@ -4512,8 +4433,7 @@ process: write_unlock(&tree->lock); - ret = log_one_extent(trans, inode, root, em, path, logged_list, - ctx); + ret = log_one_extent(trans, inode, root, em, path, ctx); write_lock(&tree->lock); clear_em_logging(tree, em); free_extent_map(em); @@ -4712,9 +4632,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(leaf, - path->slots[0], - extent); + len = btrfs_file_extent_ram_bytes(leaf, extent); ASSERT(len == i_size || (len == fs_info->sectorsize && btrfs_file_extent_compression(leaf, extent) != @@ -4898,7 +4816,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_key min_key; struct btrfs_key max_key; struct btrfs_root *log = root->log_root; - LIST_HEAD(logged_list); u64 last_extent = 0; int err = 0; int ret; @@ -5094,8 +5011,7 @@ again: * we don't need to do more work nor fallback to * a transaction commit. */ - if (IS_ERR(other_inode) && - PTR_ERR(other_inode) == -ENOENT) { + if (other_inode == ERR_PTR(-ENOENT)) { goto next_key; } else if (IS_ERR(other_inode)) { err = PTR_ERR(other_inode); @@ -5235,7 +5151,7 @@ log_extents: } if (fast_search) { ret = btrfs_log_changed_extents(trans, root, inode, dst_path, - &logged_list, ctx, start, end); + ctx, start, end); if (ret) { err = ret; goto out_unlock; @@ -5286,10 +5202,6 @@ log_extents: inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); out_unlock: - if (unlikely(err)) - btrfs_put_logged_extents(&logged_list); - else - btrfs_submit_logged_extents(&logged_list, log); mutex_unlock(&inode->log_mutex); btrfs_free_path(path); @@ -5585,7 +5497,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_path *path; struct btrfs_key key; @@ -6120,7 +6032,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_inode *old_dir, struct dentry *parent) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; /* * this will force the logging code to walk the dentry chain diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1da162928d1a..da86706123ff 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -8,15 +8,12 @@ #include <linux/slab.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> -#include <linux/iocontext.h> -#include <linux/capability.h> #include <linux/ratelimit.h> #include <linux/kthread.h> #include <linux/raid/pq.h> #include <linux/semaphore.h> #include <linux/uuid.h> #include <linux/list_sort.h> -#include <asm/div64.h> #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -634,44 +631,48 @@ static void pending_bios_fn(struct btrfs_work *work) * devices. */ static void btrfs_free_stale_devices(const char *path, - struct btrfs_device *skip_dev) + struct btrfs_device *skip_device) { - struct btrfs_fs_devices *fs_devs, *tmp_fs_devs; - struct btrfs_device *dev, *tmp_dev; + struct btrfs_fs_devices *fs_devices, *tmp_fs_devices; + struct btrfs_device *device, *tmp_device; - list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) { - - if (fs_devs->opened) + list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) { + mutex_lock(&fs_devices->device_list_mutex); + if (fs_devices->opened) { + mutex_unlock(&fs_devices->device_list_mutex); continue; + } - list_for_each_entry_safe(dev, tmp_dev, - &fs_devs->devices, dev_list) { + list_for_each_entry_safe(device, tmp_device, + &fs_devices->devices, dev_list) { int not_found = 0; - if (skip_dev && skip_dev == dev) + if (skip_device && skip_device == device) continue; - if (path && !dev->name) + if (path && !device->name) continue; rcu_read_lock(); if (path) - not_found = strcmp(rcu_str_deref(dev->name), + not_found = strcmp(rcu_str_deref(device->name), path); rcu_read_unlock(); if (not_found) continue; /* delete the stale device */ - if (fs_devs->num_devices == 1) { - btrfs_sysfs_remove_fsid(fs_devs); - list_del(&fs_devs->fs_list); - free_fs_devices(fs_devs); + fs_devices->num_devices--; + list_del(&device->dev_list); + btrfs_free_device(device); + + if (fs_devices->num_devices == 0) break; - } else { - fs_devs->num_devices--; - list_del(&dev->dev_list); - btrfs_free_device(dev); - } + } + mutex_unlock(&fs_devices->device_list_mutex); + if (fs_devices->num_devices == 0) { + btrfs_sysfs_remove_fsid(fs_devices); + list_del(&fs_devices->fs_list); + free_fs_devices(fs_devices); } } } @@ -750,7 +751,8 @@ error_brelse: * error pointer when failed */ static noinline struct btrfs_device *device_list_add(const char *path, - struct btrfs_super_block *disk_super) + struct btrfs_super_block *disk_super, + bool *new_device_added) { struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; @@ -764,21 +766,26 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (IS_ERR(fs_devices)) return ERR_CAST(fs_devices); + mutex_lock(&fs_devices->device_list_mutex); list_add(&fs_devices->fs_list, &fs_uuids); device = NULL; } else { + mutex_lock(&fs_devices->device_list_mutex); device = find_device(fs_devices, devid, disk_super->dev_item.uuid); } if (!device) { - if (fs_devices->opened) + if (fs_devices->opened) { + mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); + } device = btrfs_alloc_device(NULL, &devid, disk_super->dev_item.uuid); if (IS_ERR(device)) { + mutex_unlock(&fs_devices->device_list_mutex); /* we can safely leave the fs_devices entry around */ return device; } @@ -786,17 +793,16 @@ static noinline struct btrfs_device *device_list_add(const char *path, name = rcu_string_strdup(path, GFP_NOFS); if (!name) { btrfs_free_device(device); + mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-ENOMEM); } rcu_assign_pointer(device->name, name); - mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); fs_devices->num_devices++; - mutex_unlock(&fs_devices->device_list_mutex); device->fs_devices = fs_devices; - btrfs_free_stale_devices(path, device); + *new_device_added = true; if (disk_super->label[0]) pr_info("BTRFS: device label %s devid %llu transid %llu %s\n", @@ -840,12 +846,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, * with larger generation number or the last-in if * generation are equal. */ + mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EEXIST); } name = rcu_string_strdup(path, GFP_NOFS); - if (!name) + if (!name) { + mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-ENOMEM); + } rcu_string_free(device->name); rcu_assign_pointer(device->name, name); if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { @@ -865,6 +874,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, fs_devices->total_devices = btrfs_super_num_devices(disk_super); + mutex_unlock(&fs_devices->device_list_mutex); return device; } @@ -1004,7 +1014,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) blkdev_put(device->bdev, device->mode); } -static void btrfs_prepare_close_one_device(struct btrfs_device *device) +static void btrfs_close_one_device(struct btrfs_device *device) { struct btrfs_fs_devices *fs_devices = device->fs_devices; struct btrfs_device *new_device; @@ -1022,6 +1032,8 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device) if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) fs_devices->missing_devices--; + btrfs_close_bdev(device); + new_device = btrfs_alloc_device(NULL, &device->devid, device->uuid); BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ @@ -1035,39 +1047,23 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device) list_replace_rcu(&device->dev_list, &new_device->dev_list); new_device->fs_devices = device->fs_devices; + + call_rcu(&device->rcu, free_device_rcu); } static int close_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device, *tmp; - struct list_head pending_put; - - INIT_LIST_HEAD(&pending_put); if (--fs_devices->opened > 0) return 0; mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) { - btrfs_prepare_close_one_device(device); - list_add(&device->dev_list, &pending_put); + btrfs_close_one_device(device); } mutex_unlock(&fs_devices->device_list_mutex); - /* - * btrfs_show_devname() is using the device_list_mutex, - * sometimes call to blkdev_put() leads vfs calling - * into this func. So do put outside of device_list_mutex, - * as of now. - */ - while (!list_empty(&pending_put)) { - device = list_first_entry(&pending_put, - struct btrfs_device, dev_list); - list_del(&device->dev_list); - btrfs_close_bdev(device); - call_rcu(&device->rcu, free_device_rcu); - } - WARN_ON(fs_devices->open_devices); WARN_ON(fs_devices->rw_devices); fs_devices->opened = 0; @@ -1146,7 +1142,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, { int ret; - mutex_lock(&uuid_mutex); + lockdep_assert_held(&uuid_mutex); + mutex_lock(&fs_devices->device_list_mutex); if (fs_devices->opened) { fs_devices->opened++; @@ -1156,7 +1153,6 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ret = open_fs_devices(fs_devices, flags, holder); } mutex_unlock(&fs_devices->device_list_mutex); - mutex_unlock(&uuid_mutex); return ret; } @@ -1217,16 +1213,18 @@ static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, * and we are not allowed to call set_blocksize during the scan. The superblock * is read via pagecache */ -int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, - struct btrfs_fs_devices **fs_devices_ret) +struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, + void *holder) { struct btrfs_super_block *disk_super; - struct btrfs_device *device; + bool new_device_added = false; + struct btrfs_device *device = NULL; struct block_device *bdev; struct page *page; - int ret = 0; u64 bytenr; + lockdep_assert_held(&uuid_mutex); + /* * we would like to check all the supers, but that would make * a btrfs mount succeed after a mkfs from a different FS. @@ -1238,112 +1236,25 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, bdev = blkdev_get_by_path(path, flags, holder); if (IS_ERR(bdev)) - return PTR_ERR(bdev); + return ERR_CAST(bdev); if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) { - ret = -EINVAL; + device = ERR_PTR(-EINVAL); goto error_bdev_put; } - mutex_lock(&uuid_mutex); - device = device_list_add(path, disk_super); - if (IS_ERR(device)) - ret = PTR_ERR(device); - else - *fs_devices_ret = device->fs_devices; - mutex_unlock(&uuid_mutex); + device = device_list_add(path, disk_super, &new_device_added); + if (!IS_ERR(device)) { + if (new_device_added) + btrfs_free_stale_devices(path, device); + } btrfs_release_disk_super(page); error_bdev_put: blkdev_put(bdev, flags); - return ret; -} - -/* helper to account the used device space in the range */ -int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, - u64 end, u64 *length) -{ - struct btrfs_key key; - struct btrfs_root *root = device->fs_info->dev_root; - struct btrfs_dev_extent *dev_extent; - struct btrfs_path *path; - u64 extent_end; - int ret; - int slot; - struct extent_buffer *l; - - *length = 0; - - if (start >= device->total_bytes || - test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) - return 0; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - path->reada = READA_FORWARD; - - key.objectid = device->devid; - key.offset = start; - key.type = BTRFS_DEV_EXTENT_KEY; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; - if (ret > 0) { - ret = btrfs_previous_item(root, path, key.objectid, key.type); - if (ret < 0) - goto out; - } - - while (1) { - l = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto out; - - break; - } - btrfs_item_key_to_cpu(l, &key, slot); - - if (key.objectid < device->devid) - goto next; - - if (key.objectid > device->devid) - break; - - if (key.type != BTRFS_DEV_EXTENT_KEY) - goto next; - - dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); - extent_end = key.offset + btrfs_dev_extent_length(l, - dev_extent); - if (key.offset <= start && extent_end > end) { - *length = end - start + 1; - break; - } else if (key.offset <= start && extent_end > start) - *length += extent_end - start; - else if (key.offset > start && extent_end <= end) - *length += extent_end - key.offset; - else if (key.offset > start && key.offset <= end) { - *length += end - key.offset + 1; - break; - } else if (key.offset > end) - break; - -next: - path->slots[0]++; - } - ret = 0; -out: - btrfs_free_path(path); - return ret; + return device; } static int contains_pending_extent(struct btrfs_transaction *transaction, @@ -1755,10 +1666,8 @@ error: * the btrfs_device struct should be fully filled in */ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_device *device) { - struct btrfs_root *root = fs_info->chunk_root; int ret; struct btrfs_path *path; struct btrfs_dev_item *dev_item; @@ -1774,8 +1683,8 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; - ret = btrfs_insert_empty_item(trans, root, path, &key, - sizeof(*dev_item)); + ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path, + &key, sizeof(*dev_item)); if (ret) goto out; @@ -1800,7 +1709,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); ptr = btrfs_device_fsid(dev_item); - write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE); + write_extent_buffer(leaf, trans->fs_info->fsid, ptr, BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(leaf); ret = 0; @@ -1924,9 +1833,10 @@ static struct btrfs_device * btrfs_find_next_active_device( * where this function called, there should be always be another device (or * this_dev) which is active. */ -void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, - struct btrfs_device *device, struct btrfs_device *this_dev) +void btrfs_assign_next_active_device(struct btrfs_device *device, + struct btrfs_device *this_dev) { + struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_device *next_device; if (this_dev) @@ -2029,11 +1939,14 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, cur_devices->num_devices--; cur_devices->total_devices--; + /* Update total_devices of the parent fs_devices if it's seed */ + if (cur_devices != fs_devices) + fs_devices->total_devices--; if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) cur_devices->missing_devices--; - btrfs_assign_next_active_device(fs_info, device, NULL); + btrfs_assign_next_active_device(device, NULL); if (device->bdev) { cur_devices->open_devices--; @@ -2084,12 +1997,11 @@ error_undo: goto out; } -void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, - struct btrfs_device *srcdev) +void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) { struct btrfs_fs_devices *fs_devices; - lockdep_assert_held(&fs_info->fs_devices->device_list_mutex); + lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex); /* * in case of fs with no seed, srcdev->fs_devices will point @@ -2151,10 +2063,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, } } -void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, - struct btrfs_device *tgtdev) +void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) { - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices; WARN_ON(!tgtdev); mutex_lock(&fs_devices->device_list_mutex); @@ -2166,7 +2077,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, fs_devices->num_devices--; - btrfs_assign_next_active_device(fs_info, tgtdev, NULL); + btrfs_assign_next_active_device(tgtdev, NULL); list_del_rcu(&tgtdev->dev_list); @@ -2297,7 +2208,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&seed_devices->alloc_list); mutex_init(&seed_devices->device_list_mutex); - mutex_lock(&fs_info->fs_devices->device_list_mutex); + mutex_lock(&fs_devices->device_list_mutex); list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, synchronize_rcu); list_for_each_entry(device, &seed_devices->devices, dev_list) @@ -2317,7 +2228,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) generate_random_uuid(fs_devices->fsid); memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); - mutex_unlock(&fs_info->fs_devices->device_list_mutex); + mutex_unlock(&fs_devices->device_list_mutex); super_flags = btrfs_super_flags(disk_super) & ~BTRFS_SUPER_FLAG_SEEDING; @@ -2407,15 +2318,16 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; - struct list_head *devices; struct super_block *sb = fs_info->sb; struct rcu_string *name; - u64 tmp; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + u64 orig_super_total_bytes; + u64 orig_super_num_devices; int seeding_dev = 0; int ret = 0; bool unlocked = false; - if (sb_rdonly(sb) && !fs_info->fs_devices->seeding) + if (sb_rdonly(sb) && !fs_devices->seeding) return -EROFS; bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, @@ -2423,7 +2335,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (IS_ERR(bdev)) return PTR_ERR(bdev); - if (fs_info->fs_devices->seeding) { + if (fs_devices->seeding) { seeding_dev = 1; down_write(&sb->s_umount); mutex_lock(&uuid_mutex); @@ -2431,18 +2343,16 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path filemap_write_and_wait(bdev->bd_inode->i_mapping); - devices = &fs_info->fs_devices->devices; - - mutex_lock(&fs_info->fs_devices->device_list_mutex); - list_for_each_entry(device, devices, dev_list) { + mutex_lock(&fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev == bdev) { ret = -EEXIST; mutex_unlock( - &fs_info->fs_devices->device_list_mutex); + &fs_devices->device_list_mutex); goto error; } } - mutex_unlock(&fs_info->fs_devices->device_list_mutex); + mutex_unlock(&fs_devices->device_list_mutex); device = btrfs_alloc_device(fs_info, NULL, NULL); if (IS_ERR(device)) { @@ -2491,33 +2401,34 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } } - device->fs_devices = fs_info->fs_devices; + device->fs_devices = fs_devices; - mutex_lock(&fs_info->fs_devices->device_list_mutex); + mutex_lock(&fs_devices->device_list_mutex); mutex_lock(&fs_info->chunk_mutex); - list_add_rcu(&device->dev_list, &fs_info->fs_devices->devices); - list_add(&device->dev_alloc_list, - &fs_info->fs_devices->alloc_list); - fs_info->fs_devices->num_devices++; - fs_info->fs_devices->open_devices++; - fs_info->fs_devices->rw_devices++; - fs_info->fs_devices->total_devices++; - fs_info->fs_devices->total_rw_bytes += device->total_bytes; + list_add_rcu(&device->dev_list, &fs_devices->devices); + list_add(&device->dev_alloc_list, &fs_devices->alloc_list); + fs_devices->num_devices++; + fs_devices->open_devices++; + fs_devices->rw_devices++; + fs_devices->total_devices++; + fs_devices->total_rw_bytes += device->total_bytes; atomic64_add(device->total_bytes, &fs_info->free_chunk_space); if (!blk_queue_nonrot(q)) - fs_info->fs_devices->rotating = 1; + fs_devices->rotating = 1; - tmp = btrfs_super_total_bytes(fs_info->super_copy); + orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy); btrfs_set_super_total_bytes(fs_info->super_copy, - round_down(tmp + device->total_bytes, fs_info->sectorsize)); + round_down(orig_super_total_bytes + device->total_bytes, + fs_info->sectorsize)); - tmp = btrfs_super_num_devices(fs_info->super_copy); - btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1); + orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy); + btrfs_set_super_num_devices(fs_info->super_copy, + orig_super_num_devices + 1); /* add sysfs device entry */ - btrfs_sysfs_add_device_link(fs_info->fs_devices, device); + btrfs_sysfs_add_device_link(fs_devices, device); /* * we've got more storage, clear any full flags on the space @@ -2526,7 +2437,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_clear_space_info_full(fs_info); mutex_unlock(&fs_info->chunk_mutex); - mutex_unlock(&fs_info->fs_devices->device_list_mutex); + mutex_unlock(&fs_devices->device_list_mutex); if (seeding_dev) { mutex_lock(&fs_info->chunk_mutex); @@ -2538,7 +2449,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } } - ret = btrfs_add_dev_item(trans, fs_info, device); + ret = btrfs_add_dev_item(trans, device); if (ret) { btrfs_abort_transaction(trans, ret); goto error_sysfs; @@ -2558,7 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path */ snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", fs_info->fsid); - if (kobject_rename(&fs_info->fs_devices->fsid_kobj, fsid_buf)) + if (kobject_rename(&fs_devices->fsid_kobj, fsid_buf)) btrfs_warn(fs_info, "sysfs: failed to create fsid for sprout"); } @@ -2593,7 +2504,23 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path return ret; error_sysfs: - btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); + btrfs_sysfs_rm_device_link(fs_devices, device); + mutex_lock(&fs_info->fs_devices->device_list_mutex); + mutex_lock(&fs_info->chunk_mutex); + list_del_rcu(&device->dev_list); + list_del(&device->dev_alloc_list); + fs_info->fs_devices->num_devices--; + fs_info->fs_devices->open_devices--; + fs_info->fs_devices->rw_devices--; + fs_info->fs_devices->total_devices--; + fs_info->fs_devices->total_rw_bytes -= device->total_bytes; + atomic64_sub(device->total_bytes, &fs_info->free_chunk_space); + btrfs_set_super_total_bytes(fs_info->super_copy, + orig_super_total_bytes); + btrfs_set_super_num_devices(fs_info->super_copy, + orig_super_num_devices); + mutex_unlock(&fs_info->chunk_mutex); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); error_trans: if (seeding_dev) sb->s_flags |= SB_RDONLY; @@ -2697,9 +2624,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, return btrfs_update_device(trans, device); } -static int btrfs_free_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 chunk_offset) +static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->chunk_root; int ret; struct btrfs_path *path; @@ -2808,9 +2735,9 @@ static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info, return em; } -int btrfs_remove_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 chunk_offset) +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct extent_map *em; struct map_lookup *map; u64 dev_extent_len = 0; @@ -2829,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, } map = em->map_lookup; mutex_lock(&fs_info->chunk_mutex); - check_system_chunk(trans, fs_info, map->type); + check_system_chunk(trans, map->type); mutex_unlock(&fs_info->chunk_mutex); /* @@ -2869,7 +2796,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, } mutex_unlock(&fs_devices->device_list_mutex); - ret = btrfs_free_chunk(trans, fs_info, chunk_offset); + ret = btrfs_free_chunk(trans, chunk_offset); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -2885,7 +2812,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, } } - ret = btrfs_remove_block_group(trans, fs_info, chunk_offset, em); + ret = btrfs_remove_block_group(trans, chunk_offset, em); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -2950,7 +2877,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) * step two, delete the device extents and the * chunk tree entries */ - ret = btrfs_remove_chunk(trans, fs_info, chunk_offset); + ret = btrfs_remove_chunk(trans, chunk_offset); btrfs_end_transaction(trans); return ret; } @@ -3059,7 +2986,7 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info, if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_force_chunk_alloc(trans, fs_info, + ret = btrfs_force_chunk_alloc(trans, BTRFS_BLOCK_GROUP_DATA); btrfs_end_transaction(trans); if (ret < 0) @@ -4692,7 +4619,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_DATA) { max_stripe_size = SZ_1G; - max_chunk_size = 10 * max_stripe_size; + max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; if (!devs_max) devs_max = BTRFS_MAX_DEVS(info); } else if (type & BTRFS_BLOCK_GROUP_METADATA) { @@ -4900,7 +4827,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, refcount_inc(&em->refs); write_unlock(&em_tree->lock); - ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes); + ret = btrfs_make_block_group(trans, 0, type, start, num_bytes); if (ret) goto error_del_extent; @@ -4934,9 +4861,9 @@ error: } int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 chunk_offset, u64 chunk_size) + u64 chunk_offset, u64 chunk_size) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_root *chunk_root = fs_info->chunk_root; struct btrfs_key key; @@ -5038,13 +4965,12 @@ out: * require modifying the chunk tree. This division is important for the * bootstrap process of adding storage to a seed btrfs. */ -int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 type) +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type) { u64 chunk_offset; - lockdep_assert_held(&fs_info->chunk_mutex); - chunk_offset = find_next_chunk(fs_info); + lockdep_assert_held(&trans->fs_info->chunk_mutex); + chunk_offset = find_next_chunk(trans->fs_info); return __btrfs_alloc_chunk(trans, chunk_offset, type); } @@ -5175,7 +5101,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) /* * There could be two corrupted data stripes, we need * to loop retry in order to rebuild the correct data. - * + * * Fail a stripe at a time on every retry except the * stripe under reconstruction. */ @@ -6187,21 +6113,11 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, btrfs_io_bio(bio)->stripe_index = dev_nr; bio->bi_end_io = btrfs_end_bio; bio->bi_iter.bi_sector = physical >> 9; -#ifdef DEBUG - { - struct rcu_string *name; - - rcu_read_lock(); - name = rcu_dereference(dev->name); - btrfs_debug(fs_info, - "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", - bio_op(bio), bio->bi_opf, - (u64)bio->bi_iter.bi_sector, - (u_long)dev->bdev->bd_dev, name->str, dev->devid, - bio->bi_iter.bi_size); - rcu_read_unlock(); - } -#endif + btrfs_debug_in_rcu(fs_info, + "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", + bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector, + (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid, + bio->bi_iter.bi_size); bio_set_dev(bio, dev->bdev); btrfs_bio_counter_inc_noblocked(fs_info); @@ -6403,6 +6319,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, u16 num_stripes; u16 sub_stripes; u64 type; + u64 features; + bool mixed = false; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); @@ -6441,6 +6359,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, btrfs_chunk_type(leaf, chunk)); return -EIO; } + + if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { + btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && + (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(fs_info, + "system chunk with data or metadata type: 0x%llx", type); + return -EIO; + } + + features = btrfs_super_incompat_flags(fs_info->super_copy); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = true; + + if (!mixed) { + if ((type & BTRFS_BLOCK_GROUP_METADATA) && + (type & BTRFS_BLOCK_GROUP_DATA)) { + btrfs_err(fs_info, + "mixed chunk type in non-mixed mode: 0x%llx", type); + return -EIO; + } + } + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || @@ -6527,6 +6471,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); map->type = btrfs_chunk_type(leaf, chunk); map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + map->verified_stripes = 0; for (i = 0; i < num_stripes; i++) { map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); @@ -6563,10 +6508,14 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, write_lock(&map_tree->map_tree.lock); ret = add_extent_mapping(&map_tree->map_tree, em, 0); write_unlock(&map_tree->map_tree.lock); - BUG_ON(ret); /* Tree corruption */ + if (ret < 0) { + btrfs_err(fs_info, + "failed to add chunk map, start=%llu len=%llu: %d", + em->start, em->len, ret); + } free_extent_map(em); - return 0; + return ret; } static void fill_device_from_item(struct extent_buffer *leaf, @@ -7108,9 +7057,9 @@ out: } static int update_dev_stat_item(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_device *device) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *dev_root = fs_info->dev_root; struct btrfs_path *path; struct btrfs_key key; @@ -7203,7 +7152,7 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, */ smp_rmb(); - ret = update_dev_stat_item(trans, fs_info, device); + ret = update_dev_stat_item(trans, device); if (!ret) atomic_sub(stats_cnt, &device->dev_stats_ccnt); } @@ -7382,3 +7331,197 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) fs_devices = fs_devices->seed; } } + +/* + * Multiplicity factor for simple profiles: DUP, RAID1-like and RAID10. + */ +int btrfs_bg_type_to_factor(u64 flags) +{ + if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10)) + return 2; + return 1; +} + + +static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) +{ + int index = btrfs_bg_flags_to_raid_index(type); + int ncopies = btrfs_raid_array[index].ncopies; + int data_stripes; + + switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case BTRFS_BLOCK_GROUP_RAID5: + data_stripes = num_stripes - 1; + break; + case BTRFS_BLOCK_GROUP_RAID6: + data_stripes = num_stripes - 2; + break; + default: + data_stripes = num_stripes / ncopies; + break; + } + return div_u64(chunk_len, data_stripes); +} + +static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, + u64 chunk_offset, u64 devid, + u64 physical_offset, u64 physical_len) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct map_lookup *map; + u64 stripe_len; + bool found = false; + int ret = 0; + int i; + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + read_unlock(&em_tree->lock); + + if (!em) { + btrfs_err(fs_info, +"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk", + physical_offset, devid); + ret = -EUCLEAN; + goto out; + } + + map = em->map_lookup; + stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes); + if (physical_len != stripe_len) { + btrfs_err(fs_info, +"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu", + physical_offset, devid, em->start, physical_len, + stripe_len); + ret = -EUCLEAN; + goto out; + } + + for (i = 0; i < map->num_stripes; i++) { + if (map->stripes[i].dev->devid == devid && + map->stripes[i].physical == physical_offset) { + found = true; + if (map->verified_stripes >= map->num_stripes) { + btrfs_err(fs_info, + "too many dev extents for chunk %llu found", + em->start); + ret = -EUCLEAN; + goto out; + } + map->verified_stripes++; + break; + } + } + if (!found) { + btrfs_err(fs_info, + "dev extent physical offset %llu devid %llu has no corresponding chunk", + physical_offset, devid); + ret = -EUCLEAN; + } +out: + free_extent_map(em); + return ret; +} + +static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct rb_node *node; + int ret = 0; + + read_lock(&em_tree->lock); + for (node = rb_first(&em_tree->map); node; node = rb_next(node)) { + em = rb_entry(node, struct extent_map, rb_node); + if (em->map_lookup->num_stripes != + em->map_lookup->verified_stripes) { + btrfs_err(fs_info, + "chunk %llu has missing dev extent, have %d expect %d", + em->start, em->map_lookup->verified_stripes, + em->map_lookup->num_stripes); + ret = -EUCLEAN; + goto out; + } + } +out: + read_unlock(&em_tree->lock); + return ret; +} + +/* + * Ensure that all dev extents are mapped to correct chunk, otherwise + * later chunk allocation/free would cause unexpected behavior. + * + * NOTE: This will iterate through the whole device tree, which should be of + * the same size level as the chunk tree. This slightly increases mount time. + */ +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) +{ + struct btrfs_path *path; + struct btrfs_root *root = fs_info->dev_root; + struct btrfs_key key; + int ret = 0; + + key.objectid = 1; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + path->reada = READA_FORWARD; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_item(root, path); + if (ret < 0) + goto out; + /* No dev extents at all? Not good */ + if (ret > 0) { + ret = -EUCLEAN; + goto out; + } + } + while (1) { + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_dev_extent *dext; + int slot = path->slots[0]; + u64 chunk_offset; + u64 physical_offset; + u64 physical_len; + u64 devid; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.type != BTRFS_DEV_EXTENT_KEY) + break; + devid = key.objectid; + physical_offset = key.offset; + + dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext); + physical_len = btrfs_dev_extent_length(leaf, dext); + + ret = verify_one_dev_extent(fs_info, chunk_offset, devid, + physical_offset, physical_len); + if (ret < 0) + goto out; + ret = btrfs_next_item(root, path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; + } + } + + /* Ensure all chunks have corresponding dev extents */ + ret = verify_chunk_dev_extent_mapping(fs_info); +out: + btrfs_free_path(path); + return ret; +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5139ec8daf4c..23e9285d88de 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -11,6 +11,8 @@ #include <linux/btrfs.h> #include "async-thread.h" +#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) + extern struct mutex uuid_mutex; #define BTRFS_STRIPE_LEN SZ_64K @@ -343,6 +345,7 @@ struct map_lookup { u64 stripe_len; int num_stripes; int sub_stripes; + int verified_stripes; /* For mount time dev extent verification */ struct btrfs_bio_stripe stripes[]; }; @@ -382,8 +385,6 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio) } } -int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, - u64 end, u64 *length); void btrfs_get_bbio(struct btrfs_bio *bbio); void btrfs_put_bbio(struct btrfs_bio *bbio); int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, @@ -396,20 +397,19 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 **logical, int *naddrs, int *stripe_len); int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); -int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 type); +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num, int async_submit); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder); -int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, - struct btrfs_fs_devices **fs_devices_ret); +struct btrfs_device *btrfs_scan_one_device(const char *path, + fmode_t flags, void *holder); int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step); -void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, - struct btrfs_device *device, struct btrfs_device *this_dev); +void btrfs_assign_next_active_device(struct btrfs_device *device, + struct btrfs_device *this_dev); int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info, const char *device_path, struct btrfs_device **device); @@ -453,22 +453,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, - struct btrfs_device *srcdev); +void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev); -void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, - struct btrfs_device *tgtdev); +void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev); void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len); unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, u64 logical); int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - u64 chunk_offset, u64 chunk_size); -int btrfs_remove_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 chunk_offset); + u64 chunk_offset, u64 chunk_size); +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset); static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, int index) @@ -560,4 +556,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev); +int btrfs_bg_type_to_factor(u64 flags); +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); + #endif diff --git a/fs/buffer.c b/fs/buffer.c index cabc045f483d..4cc679d5bf58 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -45,6 +45,7 @@ #include <linux/mpage.h> #include <linux/bit_spinlock.h> #include <linux/pagevec.h> +#include <linux/sched/mm.h> #include <trace/events/block.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -813,12 +814,16 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry) { struct buffer_head *bh, *head; - gfp_t gfp = GFP_NOFS; + gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; long offset; + struct mem_cgroup *memcg; if (retry) gfp |= __GFP_NOFAIL; + memcg = get_mem_cgroup_from_page(page); + memalloc_use_memcg(memcg); + head = NULL; offset = PAGE_SIZE; while ((offset -= size) >= 0) { @@ -835,6 +840,9 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, /* Link the buffer to its page */ set_bh_page(bh, page, offset); } +out: + memalloc_unuse_memcg(); + mem_cgroup_put(memcg); return head; /* * In case anything failed, we just free everything we got. @@ -848,7 +856,7 @@ no_grow: } while (head); } - return NULL; + goto out; } EXPORT_SYMBOL_GPL(alloc_page_buffers); @@ -1900,15 +1908,16 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, break; case IOMAP_UNWRITTEN: /* - * For unwritten regions, we always need to ensure that - * sub-block writes cause the regions in the block we are not - * writing to are zeroed. Set the buffer as new to ensure this. + * For unwritten regions, we always need to ensure that regions + * in the block we are not writing to are zeroed. Mark the + * buffer as new to ensure this. */ set_buffer_new(bh); set_buffer_unwritten(bh); /* FALLTHRU */ case IOMAP_MAPPED: - if (offset >= i_size_read(inode)) + if ((iomap->flags & IOMAP_F_NEW) || + offset >= i_size_read(inode)) set_buffer_new(bh); bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> inode->i_blkbits; @@ -2076,6 +2085,40 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, } EXPORT_SYMBOL(block_write_begin); +int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, + struct page *page) +{ + loff_t old_size = inode->i_size; + bool i_size_changed = false; + + /* + * No need to use i_size_read() here, the i_size cannot change under us + * because we hold i_rwsem. + * + * But it's important to update i_size while still holding page lock: + * page writeout could otherwise come in and zero beyond i_size. + */ + if (pos + copied > inode->i_size) { + i_size_write(inode, pos + copied); + i_size_changed = true; + } + + unlock_page(page); + put_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); + /* + * Don't mark the inode dirty under page lock. First, it unnecessarily + * makes the holding time of page lock longer. Second, it forces lock + * ordering of page lock and transaction start for journaling + * filesystems. + */ + if (i_size_changed) + mark_inode_dirty(inode); + return copied; +} + int block_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) @@ -2116,39 +2159,8 @@ int generic_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - struct inode *inode = mapping->host; - loff_t old_size = inode->i_size; - int i_size_changed = 0; - copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_mutex. - * - * But it's important to update i_size while still holding page lock: - * page writeout could otherwise come in and zero beyond i_size. - */ - if (pos+copied > inode->i_size) { - i_size_write(inode, pos+copied); - i_size_changed = 1; - } - - unlock_page(page); - put_page(page); - - if (old_size < pos) - pagecache_isize_extended(inode, old_size, pos); - /* - * Don't mark the inode dirty under page lock. First, it unnecessarily - * makes the holding time of page lock longer. Second, it forces lock - * ordering of page lock and transaction start for journaling - * filesystems. - */ - if (i_size_changed) - mark_inode_dirty(inode); - - return copied; + return __generic_write_end(mapping->host, pos, copied, page); } EXPORT_SYMBOL(generic_write_end); diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index d9f001078e08..4a717d400807 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -218,7 +218,8 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) "%s", fsdef->dentry->d_sb->s_id); - fscache_object_init(&fsdef->fscache, NULL, &cache->cache); + fscache_object_init(&fsdef->fscache, &fscache_fsdef_index, + &cache->cache); ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); if (ret < 0) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ab0bbe93b398..af2b17b21b94 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -186,12 +186,12 @@ try_again: * need to wait for it to be destroyed */ wait_for_old_object: trace_cachefiles_wait_active(object, dentry, xobject); + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); if (fscache_object_is_live(&xobject->fscache)) { pr_err("\n"); pr_err("Error: Unexpected object collision\n"); cachefiles_printk_object(object, xobject); - BUG(); } atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); @@ -248,7 +248,6 @@ wait_for_old_object: goto try_again; requeue: - clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo); _leave(" = -ETIMEDOUT"); return -ETIMEDOUT; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 5082c8a49686..40f7595aad10 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, struct cachefiles_one_read *monitor = container_of(wait, struct cachefiles_one_read, monitor); struct cachefiles_object *object; + struct fscache_retrieval *op = monitor->op; struct wait_bit_key *key = _key; struct page *page = wait->private; @@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, list_del(&wait->entry); /* move onto the action list and queue for FS-Cache thread pool */ - ASSERT(monitor->op); + ASSERT(op); - object = container_of(monitor->op->op.object, - struct cachefiles_object, fscache); + /* We need to temporarily bump the usage count as we don't own a ref + * here otherwise cachefiles_read_copier() may free the op between the + * monitor being enqueued on the op->to_do list and the op getting + * enqueued on the work queue. + */ + fscache_get_retrieval(op); + object = container_of(op->op.object, struct cachefiles_object, fscache); spin_lock(&object->work_lock); - list_add_tail(&monitor->op_link, &monitor->op->to_do); + list_add_tail(&monitor->op_link, &op->to_do); spin_unlock(&object->work_lock); - fscache_enqueue_retrieval(monitor->op); + fscache_enqueue_retrieval(op); + fscache_put_retrieval(op); return 0; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ad0bed99b1d5..e2679e8a2535 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -429,8 +429,7 @@ out: * file or symlink, return 1 so the VFS can retry. */ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode, - int *opened) + struct file *file, unsigned flags, umode_t mode) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -507,9 +506,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, dout("atomic_open finish_open on dn %p\n", dn); if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { ceph_init_inode_acls(d_inode(dentry), &acls); - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; } - err = finish_open(file, dentry, ceph_open, opened); + err = finish_open(file, dentry, ceph_open); } out_req: if (!req->r_err && req->r_target_inode) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a7077a0c989f..971328b99ede 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1025,8 +1025,7 @@ extern const struct file_operations ceph_file_fops; extern int ceph_renew_caps(struct inode *inode); extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode, - int *opened); + struct file *file, unsigned flags, umode_t mode); extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len); diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 5f132d59dfc2..35c83fe7dba0 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -16,24 +16,28 @@ config CIFS select CRYPTO_DES help This is the client VFS module for the SMB3 family of NAS protocols, - as well as for earlier dialects such as SMB2.1, SMB2 and the + (including support for the most recent, most secure dialect SMB3.1.1) + as well as for earlier dialects such as SMB2.1, SMB2 and the older Common Internet File System (CIFS) protocol. CIFS was the successor to the original dialect, the Server Message Block (SMB) protocol, the native file sharing mechanism for most early PC operating systems. - The SMB3 protocol is supported by most modern operating systems and - NAS appliances (e.g. Samba, Windows 8, Windows 2012, MacOS). + The SMB3 protocol is supported by most modern operating systems + and NAS appliances (e.g. Samba, Windows 10, Windows Server 2016, + MacOS) and even in the cloud (e.g. Microsoft Azure). The older CIFS protocol was included in Windows NT4, 2000 and XP (and later) as well by Samba (which provides excellent CIFS and SMB3 - server support for Linux and many other operating systems). Limited - support for OS/2 and Windows ME and similar very old servers is - provided as well. + server support for Linux and many other operating systems). Use of + dialects older than SMB2.1 is often discouraged on public networks. + This module also provides limited support for OS/2 and Windows ME + and similar very old servers. - The cifs module provides an advanced network file system client + This module provides an advanced network file system client for mounting to SMB3 (and CIFS) compliant servers. It includes support for DFS (hierarchical name space), secure per-user - session establishment via Kerberos or NTLM or NTLMv2, - safe distributed caching (oplock), optional packet + session establishment via Kerberos or NTLM or NTLMv2, RDMA + (smbdirect), advanced security features, per-share encryption, + directory leases, safe distributed caching (oplock), optional packet signing, Unicode and other internationalization improvements. In general, the default dialects, SMB3 and later, enable better @@ -43,18 +47,11 @@ config CIFS than SMB3 mounts. SMB2/SMB3 mount options are also slightly simpler (compared to CIFS) due to protocol improvements. - If you need to mount to Samba, Macs or Windows from this machine, say Y. - -config CIFS_STATS - bool "CIFS statistics" - depends on CIFS - help - Enabling this option will cause statistics for each server share - mounted by the cifs client to be displayed in /proc/fs/cifs/Stats + If you need to mount to Samba, Azure, Macs or Windows from this machine, say Y. config CIFS_STATS2 bool "Extended statistics" - depends on CIFS_STATS + depends on CIFS help Enabling this option will allow more detailed statistics on SMB request timing to be displayed in /proc/fs/cifs/DebugData and also @@ -66,9 +63,24 @@ config CIFS_STATS2 Unless you are a developer or are doing network performance analysis or tuning, say N. +config CIFS_ALLOW_INSECURE_LEGACY + bool "Support legacy servers which use less secure dialects" + depends on CIFS + default y + help + Modern dialects, SMB2.1 and later (including SMB3 and 3.1.1), have + additional security features, including protection against + man-in-the-middle attacks and stronger crypto hashes, so the use + of legacy dialects (SMB1/CIFS and SMB2.0) is discouraged. + + Disabling this option prevents users from using vers=1.0 or vers=2.0 + on mounts with cifs.ko + + If unsure, say Y. + config CIFS_WEAK_PW_HASH bool "Support legacy servers which use weaker LANMAN security" - depends on CIFS + depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY help Modern CIFS servers including Samba and most Windows versions (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) @@ -186,15 +198,6 @@ config CIFS_NFSD_EXPORT help Allows NFS server to export a CIFS mounted share (nfsd over cifs) -config CIFS_SMB311 - bool "SMB3.1.1 network file system support" - depends on CIFS - select CRYPTO_SHA512 - - help - This enables support for the newest, and most secure dialect, SMB3.11. - If unsure, say Y - config CIFS_SMB_DIRECT bool "SMB Direct support (Experimental)" depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index e1553d1e0e50..b7420e605b28 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -128,8 +128,10 @@ fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data, memset(&auxdata, 0, sizeof(auxdata)); auxdata.eof = cifsi->server_eof; - auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime); - auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime); + auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec; + auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec; + auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec; + auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec; if (memcmp(data, &auxdata, datalen) != 0) return FSCACHE_CHECKAUX_OBSOLETE; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index bfe999505815..f1fbea947fef 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -160,25 +160,41 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); seq_printf(m, "Features:"); #ifdef CONFIG_CIFS_DFS_UPCALL - seq_printf(m, " dfs"); + seq_printf(m, " DFS"); #endif #ifdef CONFIG_CIFS_FSCACHE - seq_printf(m, " fscache"); + seq_printf(m, ",FSCACHE"); +#endif +#ifdef CONFIG_CIFS_SMB_DIRECT + seq_printf(m, ",SMB_DIRECT"); +#endif +#ifdef CONFIG_CIFS_STATS2 + seq_printf(m, ",STATS2"); +#else + seq_printf(m, ",STATS"); +#endif +#ifdef CONFIG_CIFS_DEBUG2 + seq_printf(m, ",DEBUG2"); +#elif defined(CONFIG_CIFS_DEBUG) + seq_printf(m, ",DEBUG"); +#endif +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + seq_printf(m, ",ALLOW_INSECURE_LEGACY"); #endif #ifdef CONFIG_CIFS_WEAK_PW_HASH - seq_printf(m, " lanman"); + seq_printf(m, ",WEAK_PW_HASH"); #endif #ifdef CONFIG_CIFS_POSIX - seq_printf(m, " posix"); + seq_printf(m, ",CIFS_POSIX"); #endif #ifdef CONFIG_CIFS_UPCALL - seq_printf(m, " spnego"); + seq_printf(m, ",UPCALL(SPNEGO)"); #endif #ifdef CONFIG_CIFS_XATTR - seq_printf(m, " xattr"); + seq_printf(m, ",XATTR"); #endif #ifdef CONFIG_CIFS_ACL - seq_printf(m, " acl"); + seq_printf(m, ",ACL"); #endif seq_putc(m, '\n'); seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); @@ -259,10 +275,9 @@ skip_rdma: server->credits, server->dialect); if (server->sign) seq_printf(m, " signed"); -#ifdef CONFIG_CIFS_SMB311 if (server->posix_ext_supported) seq_printf(m, " posix"); -#endif /* 3.1.1 */ + i++; list_for_each(tmp2, &server->smb_ses_list) { ses = list_entry(tmp2, struct cifs_ses, @@ -350,7 +365,6 @@ skip_rdma: return 0; } -#ifdef CONFIG_CIFS_STATS static ssize_t cifs_stats_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { @@ -364,13 +378,23 @@ static ssize_t cifs_stats_proc_write(struct file *file, rc = kstrtobool_from_user(buffer, count, &bv); if (rc == 0) { #ifdef CONFIG_CIFS_STATS2 + int i; + atomic_set(&totBufAllocCount, 0); atomic_set(&totSmBufAllocCount, 0); #endif /* CONFIG_CIFS_STATS2 */ + spin_lock(&GlobalMid_Lock); + GlobalMaxActiveXid = 0; + GlobalCurrentXid = 0; + spin_unlock(&GlobalMid_Lock); spin_lock(&cifs_tcp_ses_lock); list_for_each(tmp1, &cifs_tcp_ses_list) { server = list_entry(tmp1, struct TCP_Server_Info, tcp_ses_list); +#ifdef CONFIG_CIFS_STATS2 + for (i = 0; i < NUMBER_OF_SMB2_COMMANDS; i++) + atomic_set(&server->smb2slowcmd[i], 0); +#endif /* CONFIG_CIFS_STATS2 */ list_for_each(tmp2, &server->smb_ses_list) { ses = list_entry(tmp2, struct cifs_ses, smb_ses_list); @@ -379,6 +403,10 @@ static ssize_t cifs_stats_proc_write(struct file *file, struct cifs_tcon, tcon_list); atomic_set(&tcon->num_smbs_sent, 0); + spin_lock(&tcon->stat_lock); + tcon->bytes_read = 0; + tcon->bytes_written = 0; + spin_unlock(&tcon->stat_lock); if (server->ops->clear_stats) server->ops->clear_stats(tcon); } @@ -395,13 +423,15 @@ static ssize_t cifs_stats_proc_write(struct file *file, static int cifs_stats_proc_show(struct seq_file *m, void *v) { int i; +#ifdef CONFIG_CIFS_STATS2 + int j; +#endif /* STATS2 */ struct list_head *tmp1, *tmp2, *tmp3; struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - seq_printf(m, - "Resources in use\nCIFS Session: %d\n", + seq_printf(m, "Resources in use\nCIFS Session: %d\n", sesInfoAllocCount.counter); seq_printf(m, "Share (unique mount targets): %d\n", tconInfoAllocCount.counter); @@ -430,6 +460,13 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) list_for_each(tmp1, &cifs_tcp_ses_list) { server = list_entry(tmp1, struct TCP_Server_Info, tcp_ses_list); +#ifdef CONFIG_CIFS_STATS2 + for (j = 0; j < NUMBER_OF_SMB2_COMMANDS; j++) + if (atomic_read(&server->smb2slowcmd[j])) + seq_printf(m, "%d slow responses from %s for command %d\n", + atomic_read(&server->smb2slowcmd[j]), + server->hostname, j); +#endif /* STATS2 */ list_for_each(tmp2, &server->smb_ses_list) { ses = list_entry(tmp2, struct cifs_ses, smb_ses_list); @@ -466,7 +503,6 @@ static const struct file_operations cifs_stats_proc_fops = { .release = single_release, .write = cifs_stats_proc_write, }; -#endif /* STATS */ #ifdef CONFIG_CIFS_SMB_DIRECT #define PROC_FILE_DEFINE(name) \ @@ -524,9 +560,7 @@ cifs_proc_init(void) proc_create_single("DebugData", 0, proc_fs_cifs, cifs_debug_data_proc_show); -#ifdef CONFIG_CIFS_STATS proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops); -#endif /* STATS */ proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops); proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops); proc_create("LinuxExtensionsEnabled", 0644, proc_fs_cifs, @@ -564,9 +598,7 @@ cifs_proc_clean(void) remove_proc_entry("DebugData", proc_fs_cifs); remove_proc_entry("cifsFYI", proc_fs_cifs); remove_proc_entry("traceSMB", proc_fs_cifs); -#ifdef CONFIG_CIFS_STATS remove_proc_entry("Stats", proc_fs_cifs); -#endif remove_proc_entry("SecurityFlags", proc_fs_cifs); remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index ee2a8ec70056..85b31cfa2f3c 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -83,7 +83,13 @@ int __cifs_calc_signature(struct smb_rqst *rqst, kaddr = (char *) kmap(rqst->rq_pages[i]) + offset; - crypto_shash_update(shash, kaddr, len); + rc = crypto_shash_update(shash, kaddr, len); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); + kunmap(rqst->rq_pages[i]); + return rc; + } kunmap(rqst->rq_pages[i]); } @@ -452,7 +458,7 @@ find_timestamp(struct cifs_ses *ses) unsigned char *blobptr; unsigned char *blobend; struct ntlmssp2_name *attrptr; - struct timespec ts; + struct timespec64 ts; if (!ses->auth_key.len || !ses->auth_key.response) return 0; @@ -477,7 +483,7 @@ find_timestamp(struct cifs_ses *ses) blobptr += attrsize; /* advance attr value */ } - ktime_get_real_ts(&ts); + ktime_get_real_ts64(&ts); return cpu_to_le64(cifs_UnixTimeToNT(ts)); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d5aa7ae917bf..7065426b3280 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -139,6 +139,9 @@ cifs_read_super(struct super_block *sb) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) sb->s_flags |= SB_POSIXACL; + if (tcon->snapshot_time) + sb->s_flags |= SB_RDONLY; + if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files) sb->s_maxbytes = MAX_LFS_FILESIZE; else @@ -209,14 +212,16 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) xid = get_xid(); - /* - * PATH_MAX may be too long - it would presumably be total path, - * but note that some servers (includinng Samba 3) have a shorter - * maximum path. - * - * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO. - */ - buf->f_namelen = PATH_MAX; + if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) > 0) + buf->f_namelen = + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength); + else + buf->f_namelen = PATH_MAX; + + buf->f_fsid.val[0] = tcon->vol_serial_number; + /* are using part of create time for more randomness, see man statfs */ + buf->f_fsid.val[1] = (int)le64_to_cpu(tcon->vol_create_time); + buf->f_files = 0; /* undefined */ buf->f_ffree = 0; /* unlimited */ @@ -427,7 +432,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) else if (tcon->ses->user_name) seq_show_option(s, "username", tcon->ses->user_name); - if (tcon->ses->domainName) + if (tcon->ses->domainName && tcon->ses->domainName[0] != 0) seq_show_option(s, "domain", tcon->ses->domainName); if (srcaddr->sa_family != AF_UNSPEC) { @@ -481,20 +486,12 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",persistenthandles"); else if (tcon->use_resilient) seq_puts(s, ",resilienthandles"); - -#ifdef CONFIG_CIFS_SMB311 if (tcon->posix_extensions) seq_puts(s, ",posix"); else if (tcon->unix_ext) seq_puts(s, ",unix"); else seq_puts(s, ",nounix"); -#else - if (tcon->unix_ext) - seq_puts(s, ",unix"); - else - seq_puts(s, ",nounix"); -#endif /* SMB311 */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) seq_puts(s, ",posixpaths"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) @@ -546,6 +543,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",wsize=%u", cifs_sb->wsize); seq_printf(s, ",echo_interval=%lu", tcon->ses->server->echo_interval / HZ); + if (tcon->snapshot_time) + seq_printf(s, ",snapshot=%llu", tcon->snapshot_time); /* convert actimeo and display it in seconds */ seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 5f0231803431..f3a78efc3109 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -65,8 +65,7 @@ extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, bool excl); extern int cifs_atomic_open(struct inode *, struct dentry *, - struct file *, unsigned, umode_t, - int *); + struct file *, unsigned, umode_t); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, unsigned int); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c923c7854027..0c9ab62c3df4 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -76,6 +76,9 @@ #define SMB_ECHO_INTERVAL_MAX 600 #define SMB_ECHO_INTERVAL_DEFAULT 60 +/* maximum number of PDUs in one compound */ +#define MAX_COMPOUND 5 + /* * Default number of credits to keep available for SMB3. * This value is chosen somewhat arbitrarily. The Windows client @@ -191,9 +194,7 @@ enum smb_version { Smb_21, Smb_30, Smb_302, -#ifdef CONFIG_CIFS_SMB311 Smb_311, -#endif /* SMB311 */ Smb_3any, Smb_default, Smb_version_err @@ -456,13 +457,11 @@ struct smb_version_operations { long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t, loff_t); /* init transform request - used for encryption for now */ - int (*init_transform_rq)(struct TCP_Server_Info *, struct smb_rqst *, - struct smb_rqst *); - /* free transform request */ - void (*free_transform_rq)(struct smb_rqst *); + int (*init_transform_rq)(struct TCP_Server_Info *, int num_rqst, + struct smb_rqst *, struct smb_rqst *); int (*is_transform_hdr)(void *buf); int (*receive_transform)(struct TCP_Server_Info *, - struct mid_q_entry **); + struct mid_q_entry **, char **, int *); enum securityEnum (*select_sectype)(struct TCP_Server_Info *, enum securityEnum); int (*next_header)(char *); @@ -684,15 +683,14 @@ struct TCP_Server_Info { #ifdef CONFIG_CIFS_STATS2 atomic_t in_send; /* requests trying to send */ atomic_t num_waiters; /* blocked waiting to get in sendrecv */ -#endif + atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */ +#endif /* STATS2 */ unsigned int max_read; unsigned int max_write; -#ifdef CONFIG_CIFS_SMB311 __le16 cipher_type; /* save initital negprot hash */ __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; bool posix_ext_supported; -#endif /* 3.1.1 */ struct delayed_work reconnect; /* reconnect workqueue job */ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ unsigned long echo_interval; @@ -886,9 +884,7 @@ struct cifs_ses { __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; -#ifdef CONFIG_CIFS_SMB311 __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; -#endif /* 3.1.1 */ /* * Network interfaces available on the server this session is @@ -913,6 +909,7 @@ cap_unix(struct cifs_ses *ses) struct cached_fid { bool is_valid:1; /* Do we have a useable root fid */ + struct kref refcount; struct cifs_fid *fid; struct mutex fid_mutex; struct cifs_tcon *tcon; @@ -936,7 +933,6 @@ struct cifs_tcon { __u32 tid; /* The 4 byte tree id */ __u16 Flags; /* optional support bits */ enum statusEnum tidStatus; -#ifdef CONFIG_CIFS_STATS atomic_t num_smbs_sent; union { struct { @@ -967,24 +963,9 @@ struct cifs_tcon { atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS]; } smb2_stats; } stats; -#ifdef CONFIG_CIFS_STATS2 - unsigned long long time_writes; - unsigned long long time_reads; - unsigned long long time_opens; - unsigned long long time_deletes; - unsigned long long time_closes; - unsigned long long time_mkdirs; - unsigned long long time_rmdirs; - unsigned long long time_renames; - unsigned long long time_t2renames; - unsigned long long time_ffirst; - unsigned long long time_fnext; - unsigned long long time_fclose; -#endif /* CONFIG_CIFS_STATS2 */ __u64 bytes_read; __u64 bytes_written; spinlock_t stat_lock; /* protects the two fields above */ -#endif /* CONFIG_CIFS_STATS */ FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; @@ -997,9 +978,7 @@ struct cifs_tcon { bool seal:1; /* transport encryption for this mounted share */ bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ -#ifdef CONFIG_CIFS_SMB311 bool posix_extensions; /* if true SMB3.11 posix extensions enabled */ -#endif /* CIFS_311 */ bool local_lease:1; /* check leases (only) on local system not remote */ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ @@ -1046,6 +1025,7 @@ struct tcon_link { }; extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb); +extern void smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst); static inline struct cifs_tcon * tlink_tcon(struct tcon_link *tlink) @@ -1352,7 +1332,6 @@ convert_delimiter(char *path, char delim) *pos = delim; } -#ifdef CONFIG_CIFS_STATS #define cifs_stats_inc atomic_inc static inline void cifs_stats_bytes_written(struct cifs_tcon *tcon, @@ -1372,13 +1351,6 @@ static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon, tcon->bytes_read += bytes; spin_unlock(&tcon->stat_lock); } -#else - -#define cifs_stats_inc(field) do {} while (0) -#define cifs_stats_bytes_written(tcon, bytes) do {} while (0) -#define cifs_stats_bytes_read(tcon, bytes) do {} while (0) - -#endif /* @@ -1544,9 +1516,9 @@ struct cifs_fattr { dev_t cf_rdev; unsigned int cf_nlink; unsigned int cf_dtype; - struct timespec cf_atime; - struct timespec cf_mtime; - struct timespec cf_ctime; + struct timespec64 cf_atime; + struct timespec64 cf_mtime; + struct timespec64 cf_ctime; }; static inline void free_dfs_info_param(struct dfs_info3_param *param) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1890f534c88b..20adda4de83b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -94,6 +94,10 @@ extern int cifs_call_async(struct TCP_Server_Info *server, extern int cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, struct smb_rqst *rqst, int *resp_buf_type, const int flags, struct kvec *resp_iov); +extern int compound_send_recv(const unsigned int xid, struct cifs_ses *ses, + const int flags, const int num_rqst, + struct smb_rqst *rqst, int *resp_buf_type, + struct kvec *resp_iov); extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , @@ -143,9 +147,9 @@ extern enum securityEnum select_sectype(struct TCP_Server_Info *server, enum securityEnum requested); extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp); -extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); -extern u64 cifs_UnixTimeToNT(struct timespec); -extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, +extern struct timespec64 cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); +extern u64 cifs_UnixTimeToNT(struct timespec64); +extern struct timespec64 cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset); extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); extern int cifs_get_writer(struct cifsInodeInfo *cinode); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 93408eab92e7..dc2f4cf08fe9 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -508,13 +508,13 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) * this requirement. */ int val, seconds, remain, result; - struct timespec ts; - unsigned long utc = ktime_get_real_seconds(); + struct timespec64 ts; + time64_t utc = ktime_get_real_seconds(); ts = cnvrtDosUnixTm(rsp->SrvTime.Date, rsp->SrvTime.Time, 0); - cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", - (int)ts.tv_sec, (int)utc, - (int)(utc - ts.tv_sec)); + cifs_dbg(FYI, "SrvTime %lld sec since 1970 (utc: %lld) diff: %lld\n", + ts.tv_sec, utc, + utc - ts.tv_sec); val = (int)(utc - ts.tv_sec); seconds = abs(val); result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; @@ -4082,7 +4082,7 @@ QInfRetry: if (rc) { cifs_dbg(FYI, "Send error in QueryInfo = %d\n", rc); } else if (data) { - struct timespec ts; + struct timespec64 ts; __u32 time = le32_to_cpu(pSMBr->last_write_time); /* decode response */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5df2c0698cda..c832a8a1970a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -303,10 +303,8 @@ static const match_table_t cifs_smb_version_tokens = { { Smb_21, SMB21_VERSION_STRING }, { Smb_30, SMB30_VERSION_STRING }, { Smb_302, SMB302_VERSION_STRING }, -#ifdef CONFIG_CIFS_SMB311 { Smb_311, SMB311_VERSION_STRING }, { Smb_311, ALT_SMB311_VERSION_STRING }, -#endif /* SMB311 */ { Smb_3any, SMB3ANY_VERSION_STRING }, { Smb_default, SMBDEFAULT_VERSION_STRING }, { Smb_version_err, NULL } @@ -350,6 +348,7 @@ cifs_reconnect(struct TCP_Server_Info *server) server->max_read = 0; cifs_dbg(FYI, "Reconnecting tcp session\n"); + trace_smb3_reconnect(server->CurrentMid, server->hostname); /* before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they are not used until reconnected */ @@ -851,13 +850,14 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid) static int cifs_demultiplex_thread(void *p) { - int length; + int i, num_mids, length; struct TCP_Server_Info *server = p; unsigned int pdu_length; unsigned int next_offset; char *buf = NULL; struct task_struct *task_to_wake = NULL; - struct mid_q_entry *mid_entry; + struct mid_q_entry *mids[MAX_COMPOUND]; + char *bufs[MAX_COMPOUND]; current->flags |= PF_MEMALLOC; cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current)); @@ -924,58 +924,75 @@ next_pdu: server->pdu_size = next_offset; } - mid_entry = NULL; + memset(mids, 0, sizeof(mids)); + memset(bufs, 0, sizeof(bufs)); + num_mids = 0; + if (server->ops->is_transform_hdr && server->ops->receive_transform && server->ops->is_transform_hdr(buf)) { length = server->ops->receive_transform(server, - &mid_entry); + mids, + bufs, + &num_mids); } else { - mid_entry = server->ops->find_mid(server, buf); + mids[0] = server->ops->find_mid(server, buf); + bufs[0] = buf; + if (mids[0]) + num_mids = 1; - if (!mid_entry || !mid_entry->receive) - length = standard_receive3(server, mid_entry); + if (!mids[0] || !mids[0]->receive) + length = standard_receive3(server, mids[0]); else - length = mid_entry->receive(server, mid_entry); + length = mids[0]->receive(server, mids[0]); } if (length < 0) { - if (mid_entry) - cifs_mid_q_entry_release(mid_entry); + for (i = 0; i < num_mids; i++) + if (mids[i]) + cifs_mid_q_entry_release(mids[i]); continue; } if (server->large_buf) buf = server->bigbuf; + server->lstrp = jiffies; - if (mid_entry != NULL) { - mid_entry->resp_buf_size = server->pdu_size; - if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) && - mid_entry->mid_state == MID_RESPONSE_RECEIVED && - server->ops->handle_cancelled_mid) - server->ops->handle_cancelled_mid( - mid_entry->resp_buf, - server); - if (!mid_entry->multiRsp || mid_entry->multiEnd) - mid_entry->callback(mid_entry); + for (i = 0; i < num_mids; i++) { + if (mids[i] != NULL) { + mids[i]->resp_buf_size = server->pdu_size; + if ((mids[i]->mid_flags & MID_WAIT_CANCELLED) && + mids[i]->mid_state == MID_RESPONSE_RECEIVED && + server->ops->handle_cancelled_mid) + server->ops->handle_cancelled_mid( + mids[i]->resp_buf, + server); - cifs_mid_q_entry_release(mid_entry); - } else if (server->ops->is_oplock_break && - server->ops->is_oplock_break(buf, server)) { - cifs_dbg(FYI, "Received oplock break\n"); - } else { - cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n", - atomic_read(&midCount)); - cifs_dump_mem("Received Data is: ", buf, - HEADER_SIZE(server)); + if (!mids[i]->multiRsp || mids[i]->multiEnd) + mids[i]->callback(mids[i]); + + cifs_mid_q_entry_release(mids[i]); + } else if (server->ops->is_oplock_break && + server->ops->is_oplock_break(bufs[i], + server)) { + cifs_dbg(FYI, "Received oplock break\n"); + } else { + cifs_dbg(VFS, "No task to wake, unknown frame " + "received! NumMids %d\n", + atomic_read(&midCount)); + cifs_dump_mem("Received Data is: ", bufs[i], + HEADER_SIZE(server)); #ifdef CONFIG_CIFS_DEBUG2 - if (server->ops->dump_detail) - server->ops->dump_detail(buf, server); - cifs_dump_mids(server); + if (server->ops->dump_detail) + server->ops->dump_detail(bufs[i], + server); + cifs_dump_mids(server); #endif /* CIFS_DEBUG2 */ + } } + if (pdu_length > server->pdu_size) { if (!allocate_buffers(server)) continue; @@ -1174,6 +1191,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3) substring_t args[MAX_OPT_ARGS]; switch (match_token(value, cifs_smb_version_tokens, args)) { +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY case Smb_1: if (disable_legacy_dialects) { cifs_dbg(VFS, "mount with legacy dialect disabled\n"); @@ -1198,6 +1216,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3) vol->ops = &smb20_operations; vol->vals = &smb20_values; break; +#else + case Smb_1: + cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n"); + return 1; + case Smb_20: + cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n"); + return 1; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ case Smb_21: vol->ops = &smb21_operations; vol->vals = &smb21_values; @@ -1210,12 +1236,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3) vol->ops = &smb30_operations; /* currently identical with 3.0 */ vol->vals = &smb302_values; break; -#ifdef CONFIG_CIFS_SMB311 case Smb_311: vol->ops = &smb311_operations; vol->vals = &smb311_values; break; -#endif /* SMB311 */ case Smb_3any: vol->ops = &smb30_operations; /* currently identical with 3.0 */ vol->vals = &smb3any_values; @@ -3030,15 +3054,17 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) } } -#ifdef CONFIG_CIFS_SMB311 - if ((volume_info->linux_ext) && (ses->server->posix_ext_supported)) { - if (ses->server->vals->protocol_id == SMB311_PROT_ID) { + if (volume_info->linux_ext) { + if (ses->server->posix_ext_supported) { tcon->posix_extensions = true; printk_once(KERN_WARNING "SMB3.11 POSIX Extensions are experimental\n"); + } else { + cifs_dbg(VFS, "Server does not support mounting with posix SMB3.11 extensions.\n"); + rc = -EOPNOTSUPP; + goto out_fail; } } -#endif /* 311 */ /* * BB Do we need to wrap session_mutex around this TCon call and Unix @@ -3992,11 +4018,9 @@ try_mount_again: goto remote_path_check; } -#ifdef CONFIG_CIFS_SMB311 /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ if (tcon->posix_extensions) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; -#endif /* SMB3.11 */ /* tell server which Unix caps we support */ if (cap_unix(tcon->ses)) { @@ -4459,11 +4483,10 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) goto out; } -#ifdef CONFIG_CIFS_SMB311 /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ if (tcon->posix_extensions) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; -#endif /* SMB3.11 */ + if (cap_unix(ses)) reset_cifs_unix_caps(0, tcon, NULL, vol_info); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ddae52bd1993..3713d22b95a7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -465,8 +465,7 @@ out_err: int cifs_atomic_open(struct inode *inode, struct dentry *direntry, - struct file *file, unsigned oflags, umode_t mode, - int *opened) + struct file *file, unsigned oflags, umode_t mode) { int rc; unsigned int xid; @@ -539,9 +538,9 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, } if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; - rc = finish_open(file, direntry, generic_file_open, opened); + rc = finish_open(file, direntry, generic_file_open); if (rc) { if (server->ops->close) server->ops->close(xid, tcon, &fid); diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 85145a763021..ea6ace9c2417 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -129,8 +129,10 @@ static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi, memset(&auxdata, 0, sizeof(auxdata)); auxdata.eof = cifsi->server_eof; - auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime); - auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime); + auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec; + auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec; + auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec; + auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec; cifsi->fscache = fscache_acquire_cookie(tcon->fscache, @@ -166,8 +168,10 @@ void cifs_fscache_release_inode_cookie(struct inode *inode) if (cifsi->fscache) { memset(&auxdata, 0, sizeof(auxdata)); auxdata.eof = cifsi->server_eof; - auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime); - auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime); + auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec; + auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec; + auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec; + auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec; cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache); fscache_relinquish_cookie(cifsi->fscache, &auxdata, false); diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index c7e3ac251e16..8c0862e41306 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -31,9 +31,11 @@ * Auxiliary data attached to CIFS inode within the cache */ struct cifs_fscache_inode_auxdata { - struct timespec last_write_time; - struct timespec last_change_time; - u64 eof; + u64 last_write_time_sec; + u64 last_change_time_sec; + u32 last_write_time_nsec; + u32 last_change_time_nsec; + u64 eof; }; /* diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a2cfb33e85c1..d32eaa4b2437 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -95,7 +95,6 @@ static void cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) { struct cifsInodeInfo *cifs_i = CIFS_I(inode); - struct timespec ts; cifs_dbg(FYI, "%s: revalidating inode %llu\n", __func__, cifs_i->uniqueid); @@ -114,8 +113,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) } /* revalidate if mtime or size have changed */ - ts = timespec64_to_timespec(inode->i_mtime); - if (timespec_equal(&ts, &fattr->cf_mtime) && + if (timespec64_equal(&inode->i_mtime, &fattr->cf_mtime) && cifs_i->server_eof == fattr->cf_eof) { cifs_dbg(FYI, "%s: inode %llu is unchanged\n", __func__, cifs_i->uniqueid); @@ -164,9 +162,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) cifs_revalidate_cache(inode, fattr); spin_lock(&inode->i_lock); - inode->i_atime = timespec_to_timespec64(fattr->cf_atime); - inode->i_mtime = timespec_to_timespec64(fattr->cf_mtime); - inode->i_ctime = timespec_to_timespec64(fattr->cf_ctime); + inode->i_atime = fattr->cf_atime; + inode->i_mtime = fattr->cf_mtime; + inode->i_ctime = fattr->cf_ctime; inode->i_rdev = fattr->cf_rdev; cifs_nlink_fattr_to_inode(inode, fattr); inode->i_uid = fattr->cf_uid; @@ -327,8 +325,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; fattr->cf_uid = cifs_sb->mnt_uid; fattr->cf_gid = cifs_sb->mnt_gid; - ktime_get_real_ts(&fattr->cf_mtime); - fattr->cf_mtime = timespec_trunc(fattr->cf_mtime, sb->s_time_gran); + ktime_get_real_ts64(&fattr->cf_mtime); + fattr->cf_mtime = timespec64_trunc(fattr->cf_mtime, sb->s_time_gran); fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime; fattr->cf_nlink = 2; fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; @@ -604,8 +602,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, if (info->LastAccessTime) fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); else { - ktime_get_real_ts(&fattr->cf_atime); - fattr->cf_atime = timespec_trunc(fattr->cf_atime, sb->s_time_gran); + ktime_get_real_ts64(&fattr->cf_atime); + fattr->cf_atime = timespec64_trunc(fattr->cf_atime, sb->s_time_gran); } fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); @@ -1122,17 +1120,19 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, if (!server->ops->set_file_info) return -ENOSYS; + info_buf.Pad = 0; + if (attrs->ia_valid & ATTR_ATIME) { set_time = true; info_buf.LastAccessTime = - cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_atime))); + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); } else info_buf.LastAccessTime = 0; if (attrs->ia_valid & ATTR_MTIME) { set_time = true; info_buf.LastWriteTime = - cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_mtime))); + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); } else info_buf.LastWriteTime = 0; @@ -1145,7 +1145,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, if (set_time && (attrs->ia_valid & ATTR_CTIME)) { cifs_dbg(FYI, "CIFS - CTIME changed\n"); info_buf.ChangeTime = - cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_ctime))); + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); } else info_buf.ChangeTime = 0; @@ -1577,14 +1577,12 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) server = tcon->ses->server; -#ifdef CONFIG_CIFS_SMB311 if ((server->ops->posix_mkdir) && (tcon->posix_extensions)) { rc = server->ops->posix_mkdir(xid, inode, mode, tcon, full_path, cifs_sb); d_drop(direntry); /* for time being always refresh inode info */ goto mkdir_out; } -#endif /* SMB311 */ if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { @@ -2071,8 +2069,8 @@ int cifs_getattr(const struct path *path, struct kstat *stat, /* old CIFS Unix Extensions doesn't return create time */ if (CIFS_I(inode)->createtime) { stat->result_mask |= STATX_BTIME; - stat->btime = timespec_to_timespec64( - cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime))); + stat->btime = + cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime)); } stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED); @@ -2278,17 +2276,17 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->gid = INVALID_GID; /* no change */ if (attrs->ia_valid & ATTR_ATIME) - args->atime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_atime)); + args->atime = cifs_UnixTimeToNT(attrs->ia_atime); else args->atime = NO_CHANGE_64; if (attrs->ia_valid & ATTR_MTIME) - args->mtime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_mtime)); + args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime); else args->mtime = NO_CHANGE_64; if (attrs->ia_valid & ATTR_CTIME) - args->ctime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_ctime)); + args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime); else args->ctime = NO_CHANGE_64; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index de41f96aba49..2148b0f60e5e 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -396,7 +396,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_io_parms io_parms; int buf_type = CIFS_NO_BUFFER; __le16 *utf16_path; - __u8 oplock = SMB2_OPLOCK_LEVEL_II; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct smb2_file_all_info *pfile_info = NULL; oparms.tcon = tcon; @@ -459,7 +459,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_io_parms io_parms; int create_options = CREATE_NOT_DIR; __le16 *utf16_path; - __u8 oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct kvec iov[2]; if (backup_cred(cifs_sb)) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 53e8362cbc4a..dacb2c05674c 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -122,9 +122,7 @@ tconInfoAlloc(void) mutex_init(&ret_buf->crfid.fid_mutex); ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid), GFP_KERNEL); -#ifdef CONFIG_CIFS_STATS spin_lock_init(&ret_buf->stat_lock); -#endif } return ret_buf; } diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index d7ad0dfe4e68..fdd908e4a26b 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -918,10 +918,10 @@ smbCalcSize(void *buf, struct TCP_Server_Info *server) * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units) * into Unix UTC (based 1970-01-01, in seconds). */ -struct timespec +struct timespec64 cifs_NTtimeToUnix(__le64 ntutc) { - struct timespec ts; + struct timespec64 ts; /* BB what about the timezone? BB */ /* Subtract the NTFS time offset, then convert to 1s intervals. */ @@ -935,12 +935,12 @@ cifs_NTtimeToUnix(__le64 ntutc) */ if (t < 0) { abs_t = -t; - ts.tv_nsec = (long)(do_div(abs_t, 10000000) * 100); + ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100); ts.tv_nsec = -ts.tv_nsec; ts.tv_sec = -abs_t; } else { abs_t = t; - ts.tv_nsec = (long)do_div(abs_t, 10000000) * 100; + ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100; ts.tv_sec = abs_t; } @@ -949,7 +949,7 @@ cifs_NTtimeToUnix(__le64 ntutc) /* Convert the Unix UTC into NT UTC. */ u64 -cifs_UnixTimeToNT(struct timespec t) +cifs_UnixTimeToNT(struct timespec64 t) { /* Convert to 100ns intervals and then add the NTFS time offset. */ return (u64) t.tv_sec * 10000000 + t.tv_nsec/100 + NTFS_TIME_OFFSET; @@ -959,10 +959,11 @@ static const int total_days_of_prev_months[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; -struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) +struct timespec64 cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) { - struct timespec ts; - int sec, min, days, month, year; + struct timespec64 ts; + time64_t sec; + int min, days, month, year; u16 date = le16_to_cpu(le_date); u16 time = le16_to_cpu(le_time); SMB_TIME *st = (SMB_TIME *)&time; @@ -973,7 +974,7 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) sec = 2 * st->TwoSeconds; min = st->Minutes; if ((sec > 59) || (min > 59)) - cifs_dbg(VFS, "illegal time min %d sec %d\n", min, sec); + cifs_dbg(VFS, "illegal time min %d sec %lld\n", min, sec); sec += (min * 60); sec += 60 * 60 * st->Hours; if (st->Hours > 24) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 646dcd149de1..378151e09e91 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -624,7 +624,6 @@ cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, static void cifs_clear_stats(struct cifs_tcon *tcon) { -#ifdef CONFIG_CIFS_STATS atomic_set(&tcon->stats.cifs_stats.num_writes, 0); atomic_set(&tcon->stats.cifs_stats.num_reads, 0); atomic_set(&tcon->stats.cifs_stats.num_flushes, 0); @@ -646,13 +645,11 @@ cifs_clear_stats(struct cifs_tcon *tcon) atomic_set(&tcon->stats.cifs_stats.num_locks, 0); atomic_set(&tcon->stats.cifs_stats.num_acl_get, 0); atomic_set(&tcon->stats.cifs_stats.num_acl_set, 0); -#endif } static void cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon) { -#ifdef CONFIG_CIFS_STATS seq_printf(m, " Oplocks breaks: %d", atomic_read(&tcon->stats.cifs_stats.num_oplock_brks)); seq_printf(m, "\nReads: %d Bytes: %llu", @@ -684,7 +681,6 @@ cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon) atomic_read(&tcon->stats.cifs_stats.num_ffirst), atomic_read(&tcon->stats.cifs_stats.num_fnext), atomic_read(&tcon->stats.cifs_stats.num_fclose)); -#endif } static void diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index d01ad706d7fc..1eef1791d0c4 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -120,7 +120,9 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, break; } - if (use_cached_root_handle == false) + if (use_cached_root_handle) + close_shroot(&tcon->crfid); + else rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); if (tmprc) rc = tmprc; @@ -281,7 +283,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, int rc; if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) && - (buf->LastWriteTime == 0) && (buf->ChangeTime) && + (buf->LastWriteTime == 0) && (buf->ChangeTime == 0) && (buf->Attributes == 0)) return 0; /* would be a no op, no sense sending this */ diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 3ff7cec2da81..303d4592ebe7 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -93,7 +93,6 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24) }; -#ifdef CONFIG_CIFS_SMB311 static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len, __u32 non_ctxlen) { @@ -127,7 +126,6 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len, /* length of negcontexts including pad from end of sec blob to them */ return (len - nc_offset) + size_of_pad_before_neg_ctxts; } -#endif /* CIFS_SMB311 */ int smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr) @@ -222,10 +220,9 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr) clc_len = smb2_calc_size(buf, srvr); -#ifdef CONFIG_CIFS_SMB311 if (shdr->Command == SMB2_NEGOTIATE) clc_len += get_neg_ctxt_len(shdr, len, clc_len); -#endif /* SMB311 */ + if (len != clc_len) { cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n", clc_len, len, mid); @@ -451,15 +448,13 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) /* Windows doesn't allow paths beginning with \ */ if (from[0] == '\\') start_of_path = from + 1; -#ifdef CONFIG_CIFS_SMB311 + /* SMB311 POSIX extensions paths do not include leading slash */ else if (cifs_sb_master_tlink(cifs_sb) && cifs_sb_master_tcon(cifs_sb)->posix_extensions && (from[0] == '/')) { start_of_path = from + 1; - } -#endif /* 311 */ - else + } else start_of_path = from; to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len, @@ -759,7 +754,6 @@ smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server) return 0; } -#ifdef CONFIG_CIFS_SMB311 /** * smb311_update_preauth_hash - update @ses hash with the packet data in @iov * @@ -821,4 +815,3 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec) return 0; } -#endif diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ea92a38b2f08..541258447c4c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -444,7 +444,11 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */, NULL /* no data input */, 0 /* no data input */, (char **)&out_buf, &ret_data_len); - if (rc != 0) { + if (rc == -EOPNOTSUPP) { + cifs_dbg(FYI, + "server does not support query network interfaces\n"); + goto out; + } else if (rc != 0) { cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc); goto out; } @@ -466,21 +470,36 @@ out: return rc; } -void -smb2_cached_lease_break(struct work_struct *work) +static void +smb2_close_cached_fid(struct kref *ref) { - struct cached_fid *cfid = container_of(work, - struct cached_fid, lease_break); - mutex_lock(&cfid->fid_mutex); + struct cached_fid *cfid = container_of(ref, struct cached_fid, + refcount); + if (cfid->is_valid) { cifs_dbg(FYI, "clear cached root file handle\n"); SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid, cfid->fid->volatile_fid); cfid->is_valid = false; } +} + +void close_shroot(struct cached_fid *cfid) +{ + mutex_lock(&cfid->fid_mutex); + kref_put(&cfid->refcount, smb2_close_cached_fid); mutex_unlock(&cfid->fid_mutex); } +void +smb2_cached_lease_break(struct work_struct *work) +{ + struct cached_fid *cfid = container_of(work, + struct cached_fid, lease_break); + + close_shroot(cfid); +} + /* * Open the directory at the root of a share */ @@ -495,6 +514,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) if (tcon->crfid.is_valid) { cifs_dbg(FYI, "found a cached root file handle\n"); memcpy(pfid, tcon->crfid.fid, sizeof(struct cifs_fid)); + kref_get(&tcon->crfid.refcount); mutex_unlock(&tcon->crfid.fid_mutex); return 0; } @@ -511,6 +531,8 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid)); tcon->crfid.tcon = tcon; tcon->crfid.is_valid = true; + kref_init(&tcon->crfid.refcount); + kref_get(&tcon->crfid.refcount); } mutex_unlock(&tcon->crfid.fid_mutex); return rc; @@ -549,9 +571,14 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, FS_DEVICE_INFORMATION); SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, + FS_VOLUME_INFORMATION); + SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */ if (no_cached_open) SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + else + close_shroot(&tcon->crfid); + return; } @@ -873,13 +900,11 @@ smb2_can_echo(struct TCP_Server_Info *server) static void smb2_clear_stats(struct cifs_tcon *tcon) { -#ifdef CONFIG_CIFS_STATS int i; for (i = 0; i < NUMBER_OF_SMB2_COMMANDS; i++) { atomic_set(&tcon->stats.smb2_stats.smb2_com_sent[i], 0); atomic_set(&tcon->stats.smb2_stats.smb2_com_failed[i], 0); } -#endif } static void @@ -918,67 +943,58 @@ smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon) static void smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) { -#ifdef CONFIG_CIFS_STATS atomic_t *sent = tcon->stats.smb2_stats.smb2_com_sent; atomic_t *failed = tcon->stats.smb2_stats.smb2_com_failed; - seq_printf(m, "\nNegotiates: %d sent %d failed", - atomic_read(&sent[SMB2_NEGOTIATE_HE]), - atomic_read(&failed[SMB2_NEGOTIATE_HE])); - seq_printf(m, "\nSessionSetups: %d sent %d failed", - atomic_read(&sent[SMB2_SESSION_SETUP_HE]), - atomic_read(&failed[SMB2_SESSION_SETUP_HE])); - seq_printf(m, "\nLogoffs: %d sent %d failed", - atomic_read(&sent[SMB2_LOGOFF_HE]), - atomic_read(&failed[SMB2_LOGOFF_HE])); - seq_printf(m, "\nTreeConnects: %d sent %d failed", + + /* + * Can't display SMB2_NEGOTIATE, SESSION_SETUP, LOGOFF, CANCEL and ECHO + * totals (requests sent) since those SMBs are per-session not per tcon + */ + seq_printf(m, "\nBytes read: %llu Bytes written: %llu", + (long long)(tcon->bytes_read), + (long long)(tcon->bytes_written)); + seq_printf(m, "\nTreeConnects: %d total %d failed", atomic_read(&sent[SMB2_TREE_CONNECT_HE]), atomic_read(&failed[SMB2_TREE_CONNECT_HE])); - seq_printf(m, "\nTreeDisconnects: %d sent %d failed", + seq_printf(m, "\nTreeDisconnects: %d total %d failed", atomic_read(&sent[SMB2_TREE_DISCONNECT_HE]), atomic_read(&failed[SMB2_TREE_DISCONNECT_HE])); - seq_printf(m, "\nCreates: %d sent %d failed", + seq_printf(m, "\nCreates: %d total %d failed", atomic_read(&sent[SMB2_CREATE_HE]), atomic_read(&failed[SMB2_CREATE_HE])); - seq_printf(m, "\nCloses: %d sent %d failed", + seq_printf(m, "\nCloses: %d total %d failed", atomic_read(&sent[SMB2_CLOSE_HE]), atomic_read(&failed[SMB2_CLOSE_HE])); - seq_printf(m, "\nFlushes: %d sent %d failed", + seq_printf(m, "\nFlushes: %d total %d failed", atomic_read(&sent[SMB2_FLUSH_HE]), atomic_read(&failed[SMB2_FLUSH_HE])); - seq_printf(m, "\nReads: %d sent %d failed", + seq_printf(m, "\nReads: %d total %d failed", atomic_read(&sent[SMB2_READ_HE]), atomic_read(&failed[SMB2_READ_HE])); - seq_printf(m, "\nWrites: %d sent %d failed", + seq_printf(m, "\nWrites: %d total %d failed", atomic_read(&sent[SMB2_WRITE_HE]), atomic_read(&failed[SMB2_WRITE_HE])); - seq_printf(m, "\nLocks: %d sent %d failed", + seq_printf(m, "\nLocks: %d total %d failed", atomic_read(&sent[SMB2_LOCK_HE]), atomic_read(&failed[SMB2_LOCK_HE])); - seq_printf(m, "\nIOCTLs: %d sent %d failed", + seq_printf(m, "\nIOCTLs: %d total %d failed", atomic_read(&sent[SMB2_IOCTL_HE]), atomic_read(&failed[SMB2_IOCTL_HE])); - seq_printf(m, "\nCancels: %d sent %d failed", - atomic_read(&sent[SMB2_CANCEL_HE]), - atomic_read(&failed[SMB2_CANCEL_HE])); - seq_printf(m, "\nEchos: %d sent %d failed", - atomic_read(&sent[SMB2_ECHO_HE]), - atomic_read(&failed[SMB2_ECHO_HE])); - seq_printf(m, "\nQueryDirectories: %d sent %d failed", + seq_printf(m, "\nQueryDirectories: %d total %d failed", atomic_read(&sent[SMB2_QUERY_DIRECTORY_HE]), atomic_read(&failed[SMB2_QUERY_DIRECTORY_HE])); - seq_printf(m, "\nChangeNotifies: %d sent %d failed", + seq_printf(m, "\nChangeNotifies: %d total %d failed", atomic_read(&sent[SMB2_CHANGE_NOTIFY_HE]), atomic_read(&failed[SMB2_CHANGE_NOTIFY_HE])); - seq_printf(m, "\nQueryInfos: %d sent %d failed", + seq_printf(m, "\nQueryInfos: %d total %d failed", atomic_read(&sent[SMB2_QUERY_INFO_HE]), atomic_read(&failed[SMB2_QUERY_INFO_HE])); - seq_printf(m, "\nSetInfos: %d sent %d failed", + seq_printf(m, "\nSetInfos: %d total %d failed", atomic_read(&sent[SMB2_SET_INFO_HE]), atomic_read(&failed[SMB2_SET_INFO_HE])); seq_printf(m, "\nOplockBreaks: %d sent %d failed", atomic_read(&sent[SMB2_OPLOCK_BREAK_HE]), atomic_read(&failed[SMB2_OPLOCK_BREAK_HE])); -#endif } static void @@ -1353,6 +1369,13 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon, } +/* GMT Token is @GMT-YYYY.MM.DD-HH.MM.SS Unicode which is 48 bytes + null */ +#define GMT_TOKEN_SIZE 50 + +/* + * Input buffer contains (empty) struct smb_snapshot array with size filled in + * For output see struct SRV_SNAPSHOT_ARRAY in MS-SMB2 section 2.2.32.2 + */ static int smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile, void __user *ioc_buf) @@ -1382,14 +1405,27 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, kfree(retbuf); return rc; } - if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) { - rc = -ERANGE; - kfree(retbuf); - return rc; - } - if (ret_data_len > snapshot_in.snapshot_array_size) - ret_data_len = snapshot_in.snapshot_array_size; + /* + * Check for min size, ie not large enough to fit even one GMT + * token (snapshot). On the first ioctl some users may pass in + * smaller size (or zero) to simply get the size of the array + * so the user space caller can allocate sufficient memory + * and retry the ioctl again with larger array size sufficient + * to hold all of the snapshot GMT tokens on the second try. + */ + if (snapshot_in.snapshot_array_size < GMT_TOKEN_SIZE) + ret_data_len = sizeof(struct smb_snapshot_array); + + /* + * We return struct SRV_SNAPSHOT_ARRAY, followed by + * the snapshot array (of 50 byte GMT tokens) each + * representing an available previous version of the data + */ + if (ret_data_len > (snapshot_in.snapshot_array_size + + sizeof(struct smb_snapshot_array))) + ret_data_len = snapshot_in.snapshot_array_size + + sizeof(struct smb_snapshot_array); if (copy_to_user(ioc_buf, retbuf, ret_data_len)) rc = -EFAULT; @@ -1487,7 +1523,11 @@ smb2_is_session_expired(char *buf) shdr->Status != STATUS_USER_SESSION_DELETED) return false; + trace_smb3_ses_expired(shdr->TreeId, shdr->SessionId, + le16_to_cpu(shdr->Command), + le64_to_cpu(shdr->MessageId)); cifs_dbg(FYI, "Session expired or deleted\n"); + return true; } @@ -1504,16 +1544,140 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, CIFS_CACHE_READ(cinode) ? 1 : 0); } +static void +smb2_set_related(struct smb_rqst *rqst) +{ + struct smb2_sync_hdr *shdr; + + shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base); + shdr->Flags |= SMB2_FLAGS_RELATED_OPERATIONS; +} + +char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0}; + +static void +smb2_set_next_command(struct TCP_Server_Info *server, struct smb_rqst *rqst) +{ + struct smb2_sync_hdr *shdr; + unsigned long len = smb_rqst_len(server, rqst); + + /* SMB headers in a compound are 8 byte aligned. */ + if (len & 7) { + rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding; + rqst->rq_iov[rqst->rq_nvec].iov_len = 8 - (len & 7); + rqst->rq_nvec++; + len = smb_rqst_len(server, rqst); + } + + shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base); + shdr->NextCommand = cpu_to_le32(len); +} + static int smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *buf) { + struct smb2_query_info_rsp *rsp; + struct smb2_fs_full_size_info *info = NULL; + struct smb_rqst rqst[3]; + int resp_buftype[3]; + struct kvec rsp_iov[3]; + struct kvec open_iov[5]; /* 4 + potential padding. */ + struct kvec qi_iov[1]; + struct kvec close_iov[1]; + struct cifs_ses *ses = tcon->ses; + struct TCP_Server_Info *server = ses->server; + __le16 srch_path = 0; /* Null - open root of share */ + u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + struct cifs_open_parms oparms; + struct cifs_fid fid; + int flags = 0; + int rc; + + if (smb3_encryption_required(tcon)) + flags |= CIFS_TRANSFORM_REQ; + + memset(rqst, 0, sizeof(rqst)); + memset(resp_buftype, 0, sizeof(resp_buftype)); + memset(rsp_iov, 0, sizeof(rsp_iov)); + + memset(&open_iov, 0, sizeof(open_iov)); + rqst[0].rq_iov = open_iov; + rqst[0].rq_nvec = 4; + + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = 0; + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, &srch_path); + if (rc) + goto qfs_exit; + smb2_set_next_command(server, &rqst[0]); + + memset(&qi_iov, 0, sizeof(qi_iov)); + rqst[1].rq_iov = qi_iov; + rqst[1].rq_nvec = 1; + + rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID, + FS_FULL_SIZE_INFORMATION, + SMB2_O_INFO_FILESYSTEM, 0, + sizeof(struct smb2_fs_full_size_info)); + if (rc) + goto qfs_exit; + smb2_set_next_command(server, &rqst[1]); + smb2_set_related(&rqst[1]); + + memset(&close_iov, 0, sizeof(close_iov)); + rqst[2].rq_iov = close_iov; + rqst[2].rq_nvec = 1; + + rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID); + if (rc) + goto qfs_exit; + smb2_set_related(&rqst[2]); + + rc = compound_send_recv(xid, ses, flags, 3, rqst, + resp_buftype, rsp_iov); + if (rc) + goto qfs_exit; + + rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; + buf->f_type = SMB2_MAGIC_NUMBER; + info = (struct smb2_fs_full_size_info *)( + le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp); + rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), + &rsp_iov[1], + sizeof(struct smb2_fs_full_size_info)); + if (!rc) + smb2_copy_fs_info_to_kstatfs(info, buf); + +qfs_exit: + SMB2_open_free(&rqst[0]); + SMB2_query_info_free(&rqst[1]); + SMB2_close_free(&rqst[2]); + free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); + free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); + free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base); + return rc; +} + +static int +smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, + struct kstatfs *buf) +{ int rc; __le16 srch_path = 0; /* Null - open root of share */ u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct cifs_open_parms oparms; struct cifs_fid fid; + if (!tcon->posix_extensions) + return smb2_queryfs(xid, tcon, buf); + oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; @@ -1524,9 +1688,10 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, NULL); if (rc) return rc; + + rc = SMB311_posix_qfs_info(xid, tcon, fid.persistent_fid, + fid.volatile_fid, buf); buf->f_type = SMB2_MAGIC_NUMBER; - rc = SMB2_QFS_info(xid, tcon, fid.persistent_fid, fid.volatile_fid, - buf); SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); return rc; } @@ -1700,7 +1865,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, &resp_buftype); if (!rc || !err_iov.iov_base) { rc = -ENOENT; - goto querty_exit; + goto free_path; } err_buf = err_iov.iov_base; @@ -1741,6 +1906,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, querty_exit: free_rsp_buf(resp_buftype, err_buf); + free_path: kfree(utf16_path); return rc; } @@ -2326,35 +2492,51 @@ static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf, sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); } -/* Assumes: - * rqst->rq_iov[0] is transform header - * rqst->rq_iov[1+] data to be encrypted/decrypted +/* Assumes the first rqst has a transform header as the first iov. + * I.e. + * rqst[0].rq_iov[0] is transform header + * rqst[0].rq_iov[1+] data to be encrypted/decrypted + * rqst[1+].rq_iov[0+] data to be encrypted/decrypted */ static struct scatterlist * -init_sg(struct smb_rqst *rqst, u8 *sign) +init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign) { - unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1; - unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20; + unsigned int sg_len; struct scatterlist *sg; unsigned int i; unsigned int j; + unsigned int idx = 0; + int skip; + + sg_len = 1; + for (i = 0; i < num_rqst; i++) + sg_len += rqst[i].rq_nvec + rqst[i].rq_npages; sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL); if (!sg) return NULL; sg_init_table(sg, sg_len); - smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 20, assoc_data_len); - for (i = 1; i < rqst->rq_nvec; i++) - smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base, - rqst->rq_iov[i].iov_len); - for (j = 0; i < sg_len - 1; i++, j++) { - unsigned int len, offset; + for (i = 0; i < num_rqst; i++) { + for (j = 0; j < rqst[i].rq_nvec; j++) { + /* + * The first rqst has a transform header where the + * first 20 bytes are not part of the encrypted blob + */ + skip = (i == 0) && (j == 0) ? 20 : 0; + smb2_sg_set_buf(&sg[idx++], + rqst[i].rq_iov[j].iov_base + skip, + rqst[i].rq_iov[j].iov_len - skip); + } - rqst_page_get_length(rqst, j, &len, &offset); - sg_set_page(&sg[i], rqst->rq_pages[j], len, offset); + for (j = 0; j < rqst[i].rq_npages; j++) { + unsigned int len, offset; + + rqst_page_get_length(&rqst[i], j, &len, &offset); + sg_set_page(&sg[idx++], rqst[i].rq_pages[j], len, offset); + } } - smb2_sg_set_buf(&sg[sg_len - 1], sign, SMB2_SIGNATURE_SIZE); + smb2_sg_set_buf(&sg[idx], sign, SMB2_SIGNATURE_SIZE); return sg; } @@ -2386,10 +2568,11 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) * untouched. */ static int -crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) +crypt_message(struct TCP_Server_Info *server, int num_rqst, + struct smb_rqst *rqst, int enc) { struct smb2_transform_hdr *tr_hdr = - (struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base; + (struct smb2_transform_hdr *)rqst[0].rq_iov[0].iov_base; unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20; int rc = 0; struct scatterlist *sg; @@ -2440,7 +2623,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) crypt_len += SMB2_SIGNATURE_SIZE; } - sg = init_sg(rqst, sign); + sg = init_sg(num_rqst, rqst, sign); if (!sg) { cifs_dbg(VFS, "%s: Failed to init sg", __func__); rc = -ENOMEM; @@ -2477,103 +2660,98 @@ free_req: return rc; } +void +smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst) +{ + int i, j; + + for (i = 0; i < num_rqst; i++) { + if (rqst[i].rq_pages) { + for (j = rqst[i].rq_npages - 1; j >= 0; j--) + put_page(rqst[i].rq_pages[j]); + kfree(rqst[i].rq_pages); + } + } +} + +/* + * This function will initialize new_rq and encrypt the content. + * The first entry, new_rq[0], only contains a single iov which contains + * a smb2_transform_hdr and is pre-allocated by the caller. + * This function then populates new_rq[1+] with the content from olq_rq[0+]. + * + * The end result is an array of smb_rqst structures where the first structure + * only contains a single iov for the transform header which we then can pass + * to crypt_message(). + * + * new_rq[0].rq_iov[0] : smb2_transform_hdr pre-allocated by the caller + * new_rq[1+].rq_iov[*] == old_rq[0+].rq_iov[*] : SMB2/3 requests + */ static int -smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq, - struct smb_rqst *old_rq) +smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, + struct smb_rqst *new_rq, struct smb_rqst *old_rq) { - struct kvec *iov; struct page **pages; - struct smb2_transform_hdr *tr_hdr; - unsigned int npages = old_rq->rq_npages; - unsigned int orig_len; - int i; + struct smb2_transform_hdr *tr_hdr = new_rq[0].rq_iov[0].iov_base; + unsigned int npages; + unsigned int orig_len = 0; + int i, j; int rc = -ENOMEM; - pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (!pages) - return rc; - - new_rq->rq_pages = pages; - new_rq->rq_offset = old_rq->rq_offset; - new_rq->rq_npages = old_rq->rq_npages; - new_rq->rq_pagesz = old_rq->rq_pagesz; - new_rq->rq_tailsz = old_rq->rq_tailsz; - - for (i = 0; i < npages; i++) { - pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); - if (!pages[i]) - goto err_free_pages; - } - - iov = kmalloc_array(old_rq->rq_nvec + 1, sizeof(struct kvec), - GFP_KERNEL); - if (!iov) - goto err_free_pages; + for (i = 1; i < num_rqst; i++) { + npages = old_rq[i - 1].rq_npages; + pages = kmalloc_array(npages, sizeof(struct page *), + GFP_KERNEL); + if (!pages) + goto err_free; + + new_rq[i].rq_pages = pages; + new_rq[i].rq_npages = npages; + new_rq[i].rq_offset = old_rq[i - 1].rq_offset; + new_rq[i].rq_pagesz = old_rq[i - 1].rq_pagesz; + new_rq[i].rq_tailsz = old_rq[i - 1].rq_tailsz; + new_rq[i].rq_iov = old_rq[i - 1].rq_iov; + new_rq[i].rq_nvec = old_rq[i - 1].rq_nvec; + + orig_len += smb_rqst_len(server, &old_rq[i - 1]); + + for (j = 0; j < npages; j++) { + pages[j] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); + if (!pages[j]) + goto err_free; + } - /* copy all iovs from the old */ - memcpy(&iov[1], &old_rq->rq_iov[0], - sizeof(struct kvec) * old_rq->rq_nvec); + /* copy pages form the old */ + for (j = 0; j < npages; j++) { + char *dst, *src; + unsigned int offset, len; - new_rq->rq_iov = iov; - new_rq->rq_nvec = old_rq->rq_nvec + 1; + rqst_page_get_length(&new_rq[i], j, &len, &offset); - tr_hdr = kmalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL); - if (!tr_hdr) - goto err_free_iov; + dst = (char *) kmap(new_rq[i].rq_pages[j]) + offset; + src = (char *) kmap(old_rq[i - 1].rq_pages[j]) + offset; - orig_len = smb_rqst_len(server, old_rq); + memcpy(dst, src, len); + kunmap(new_rq[i].rq_pages[j]); + kunmap(old_rq[i - 1].rq_pages[j]); + } + } - /* fill the 2nd iov with a transform header */ + /* fill the 1st iov with a transform header */ fill_transform_hdr(tr_hdr, orig_len, old_rq); - new_rq->rq_iov[0].iov_base = tr_hdr; - new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr); - - /* copy pages form the old */ - for (i = 0; i < npages; i++) { - char *dst, *src; - unsigned int offset, len; - - rqst_page_get_length(new_rq, i, &len, &offset); - - dst = (char *) kmap(new_rq->rq_pages[i]) + offset; - src = (char *) kmap(old_rq->rq_pages[i]) + offset; - memcpy(dst, src, len); - kunmap(new_rq->rq_pages[i]); - kunmap(old_rq->rq_pages[i]); - } - - rc = crypt_message(server, new_rq, 1); + rc = crypt_message(server, num_rqst, new_rq, 1); cifs_dbg(FYI, "encrypt message returned %d", rc); if (rc) - goto err_free_tr_hdr; + goto err_free; return rc; -err_free_tr_hdr: - kfree(tr_hdr); -err_free_iov: - kfree(iov); -err_free_pages: - for (i = i - 1; i >= 0; i--) - put_page(pages[i]); - kfree(pages); +err_free: + smb3_free_compound_rqst(num_rqst - 1, &new_rq[1]); return rc; } -static void -smb3_free_transform_rq(struct smb_rqst *rqst) -{ - int i = rqst->rq_npages - 1; - - for (; i >= 0; i--) - put_page(rqst->rq_pages[i]); - kfree(rqst->rq_pages); - /* free transform header */ - kfree(rqst->rq_iov[0].iov_base); - kfree(rqst->rq_iov); -} - static int smb3_is_transform_hdr(void *buf) { @@ -2603,7 +2781,7 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, rqst.rq_pagesz = PAGE_SIZE; rqst.rq_tailsz = (page_data_size % PAGE_SIZE) ? : PAGE_SIZE; - rc = crypt_message(server, &rqst, 0); + rc = crypt_message(server, 1, &rqst, 0); cifs_dbg(FYI, "decrypt message returned %d\n", rc); if (rc) @@ -2878,13 +3056,20 @@ discard_data: static int receive_encrypted_standard(struct TCP_Server_Info *server, - struct mid_q_entry **mid) + struct mid_q_entry **mids, char **bufs, + int *num_mids) { - int length; + int ret, length; char *buf = server->smallbuf; + char *tmpbuf; + struct smb2_sync_hdr *shdr; unsigned int pdu_length = server->pdu_size; unsigned int buf_size; struct mid_q_entry *mid_entry; + int next_is_large; + char *next_buffer = NULL; + + *num_mids = 0; /* switch to large buffer if too big for a small one */ if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE) { @@ -2905,24 +3090,61 @@ receive_encrypted_standard(struct TCP_Server_Info *server, if (length) return length; + next_is_large = server->large_buf; + one_more: + shdr = (struct smb2_sync_hdr *)buf; + if (shdr->NextCommand) { + if (next_is_large) { + tmpbuf = server->bigbuf; + next_buffer = (char *)cifs_buf_get(); + } else { + tmpbuf = server->smallbuf; + next_buffer = (char *)cifs_small_buf_get(); + } + memcpy(next_buffer, + tmpbuf + le32_to_cpu(shdr->NextCommand), + pdu_length - le32_to_cpu(shdr->NextCommand)); + } + mid_entry = smb2_find_mid(server, buf); if (mid_entry == NULL) cifs_dbg(FYI, "mid not found\n"); else { cifs_dbg(FYI, "mid found\n"); mid_entry->decrypted = true; + mid_entry->resp_buf_size = server->pdu_size; } - *mid = mid_entry; + if (*num_mids >= MAX_COMPOUND) { + cifs_dbg(VFS, "too many PDUs in compound\n"); + return -1; + } + bufs[*num_mids] = buf; + mids[(*num_mids)++] = mid_entry; if (mid_entry && mid_entry->handle) - return mid_entry->handle(server, mid_entry); + ret = mid_entry->handle(server, mid_entry); + else + ret = cifs_handle_standard(server, mid_entry); - return cifs_handle_standard(server, mid_entry); + if (ret == 0 && shdr->NextCommand) { + pdu_length -= le32_to_cpu(shdr->NextCommand); + server->large_buf = next_is_large; + if (next_is_large) + server->bigbuf = next_buffer; + else + server->smallbuf = next_buffer; + + buf += le32_to_cpu(shdr->NextCommand); + goto one_more; + } + + return ret; } static int -smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid) +smb3_receive_transform(struct TCP_Server_Info *server, + struct mid_q_entry **mids, char **bufs, int *num_mids) { char *buf = server->smallbuf; unsigned int pdu_length = server->pdu_size; @@ -2945,10 +3167,11 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid) return -ECONNABORTED; } + /* TODO: add support for compounds containing READ. */ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) - return receive_encrypted_read(server, mid); + return receive_encrypted_read(server, &mids[0]); - return receive_encrypted_standard(server, mid); + return receive_encrypted_standard(server, mids, bufs, num_mids); } int @@ -3250,7 +3473,6 @@ struct smb_version_operations smb30_operations = { .fallocate = smb3_fallocate, .enum_snapshots = smb3_enum_snapshots, .init_transform_rq = smb3_init_transform_rq, - .free_transform_rq = smb3_free_transform_rq, .is_transform_hdr = smb3_is_transform_hdr, .receive_transform = smb3_receive_transform, .get_dfs_refer = smb2_get_dfs_refer, @@ -3267,7 +3489,6 @@ struct smb_version_operations smb30_operations = { .next_header = smb2_next_header, }; -#ifdef CONFIG_CIFS_SMB311 struct smb_version_operations smb311_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -3335,7 +3556,7 @@ struct smb_version_operations smb311_operations = { .is_status_pending = smb2_is_status_pending, .is_session_expired = smb2_is_session_expired, .oplock_response = smb2_oplock_response, - .queryfs = smb2_queryfs, + .queryfs = smb311_queryfs, .mand_lock = smb2_mand_lock, .mand_unlock_range = smb2_unlock_range, .push_mand_locks = smb2_push_mandatory_locks, @@ -3357,7 +3578,6 @@ struct smb_version_operations smb311_operations = { .fallocate = smb3_fallocate, .enum_snapshots = smb3_enum_snapshots, .init_transform_rq = smb3_init_transform_rq, - .free_transform_rq = smb3_free_transform_rq, .is_transform_hdr = smb3_is_transform_hdr, .receive_transform = smb3_receive_transform, .get_dfs_refer = smb2_get_dfs_refer, @@ -3366,9 +3586,13 @@ struct smb_version_operations smb311_operations = { .query_all_EAs = smb2_query_eas, .set_EA = smb2_set_ea, #endif /* CIFS_XATTR */ +#ifdef CONFIG_CIFS_ACL + .get_acl = get_smb2_acl, + .get_acl_by_fid = get_smb2_acl_by_fid, + .set_acl = set_smb2_acl, +#endif /* CIFS_ACL */ .next_header = smb2_next_header, }; -#endif /* CIFS_SMB311 */ struct smb_version_values smb20_values = { .version_string = SMB20_VERSION_STRING, @@ -3496,7 +3720,6 @@ struct smb_version_values smb302_values = { .create_lease_size = sizeof(struct create_lease_v2), }; -#ifdef CONFIG_CIFS_SMB311 struct smb_version_values smb311_values = { .version_string = SMB311_VERSION_STRING, .protocol_id = SMB311_PROT_ID, @@ -3517,4 +3740,3 @@ struct smb_version_values smb311_values = { .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, .create_lease_size = sizeof(struct create_lease_v2), }; -#endif /* SMB311 */ diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3c92678cb45b..2f1938011395 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -80,7 +80,7 @@ static const int smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { /* SMB2_OPLOCK_BREAK */ 24 /* BB this is 36 for LEASE_BREAK variant */ }; -static int smb3_encryption_required(const struct cifs_tcon *tcon) +int smb3_encryption_required(const struct cifs_tcon *tcon) { if (!tcon) return 0; @@ -360,17 +360,15 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, total_len); if (tcon != NULL) { -#ifdef CONFIG_CIFS_STATS2 uint16_t com_code = le16_to_cpu(smb2_command); cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]); -#endif cifs_stats_inc(&tcon->num_smbs_sent); } return rc; } -#ifdef CONFIG_CIFS_SMB311 + /* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */ #define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) */ @@ -585,13 +583,6 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode) return 0; } -#else -static void assemble_neg_contexts(struct smb2_negotiate_req *req, - unsigned int *total_len) -{ - return; -} -#endif /* SMB311 */ /* * @@ -636,10 +627,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) return rc; req->sync_hdr.SessionId = 0; -#ifdef CONFIG_CIFS_SMB311 + memset(server->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE); memset(ses->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE); -#endif if (strcmp(ses->server->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { @@ -741,10 +731,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID)) cifs_dbg(FYI, "negotiated smb3.02 dialect\n"); -#ifdef CONFIG_CIFS_SMB311 else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) cifs_dbg(FYI, "negotiated smb3.1.1 dialect\n"); -#endif /* SMB311 */ else { cifs_dbg(VFS, "Illegal dialect returned by server 0x%x\n", le16_to_cpu(rsp->DialectRevision)); @@ -753,9 +741,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } server->dialect = le16_to_cpu(rsp->DialectRevision); - /* BB: add check that dialect was valid given dialect(s) we asked for */ - -#ifdef CONFIG_CIFS_SMB311 /* * Keep a copy of the hash after negprot. This hash will be * the starting hash value for all sessions made from this @@ -763,7 +748,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) */ memcpy(server->preauth_sha_hash, ses->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE); -#endif + /* SMB2 only has an extended negflavor */ server->negflavor = CIFS_NEGFLAVOR_EXTENDED; /* set it to the maximum buffer size value we can send with 1 credit */ @@ -804,7 +789,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) rc = -EIO; } -#ifdef CONFIG_CIFS_SMB311 if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { if (rsp->NegotiateContextCount) rc = smb311_decode_neg_context(rsp, server, @@ -812,7 +796,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) else cifs_dbg(VFS, "Missing expected negotiate contexts\n"); } -#endif /* CONFIG_CIFS_SMB311 */ neg_exit: free_rsp_buf(resp_buftype, rsp); return rc; @@ -1373,13 +1356,11 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, sess_data->nls_cp = (struct nls_table *) nls_cp; sess_data->previous_session = ses->Suid; -#ifdef CONFIG_CIFS_SMB311 /* * Initialize the session hash with the server one. */ memcpy(ses->preauth_sha_hash, ses->server->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE); -#endif while (sess_data->func) sess_data->func(sess_data); @@ -1875,6 +1856,51 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec, return 0; } +/* See MS-SMB2 2.2.13.2.7 */ +static struct crt_twarp_ctxt * +create_twarp_buf(__u64 timewarp) +{ + struct crt_twarp_ctxt *buf; + + buf = kzalloc(sizeof(struct crt_twarp_ctxt), GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct crt_twarp_ctxt, Timestamp)); + buf->ccontext.DataLength = cpu_to_le32(8); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct crt_twarp_ctxt, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + /* SMB2_CREATE_TIMEWARP_TOKEN is "TWrp" */ + buf->Name[0] = 'T'; + buf->Name[1] = 'W'; + buf->Name[2] = 'r'; + buf->Name[3] = 'p'; + buf->Timestamp = cpu_to_le64(timewarp); + return buf; +} + +/* See MS-SMB2 2.2.13.2.7 */ +static int +add_twarp_context(struct kvec *iov, unsigned int *num_iovec, __u64 timewarp) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + iov[num].iov_base = create_twarp_buf(timewarp); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct crt_twarp_ctxt); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = cpu_to_le32( + sizeof(struct smb2_create_req) + + iov[num - 1].iov_len); + le32_add_cpu(&req->CreateContextsLength, sizeof(struct crt_twarp_ctxt)); + *num_iovec = num + 1; + return 0; +} + static int alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, const char *treename, const __le16 *path) @@ -1920,7 +1946,6 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, return 0; } -#ifdef CONFIG_CIFS_SMB311 int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *full_path, @@ -1928,7 +1953,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, { struct smb_rqst rqst; struct smb2_create_req *req; - struct smb2_create_rsp *rsp; + struct smb2_create_rsp *rsp = NULL; struct TCP_Server_Info *server; struct cifs_ses *ses = tcon->ses; struct kvec iov[3]; /* make sure at least one for each open context */ @@ -1943,27 +1968,31 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, char *pc_buf = NULL; int flags = 0; unsigned int total_len; - __le16 *path = cifs_convert_path_to_utf16(full_path, cifs_sb); - - if (!path) - return -ENOMEM; + __le16 *utf16_path = NULL; cifs_dbg(FYI, "mkdir\n"); + /* resource #1: path allocation */ + utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + if (ses && (ses->server)) server = ses->server; - else - return -EIO; + else { + rc = -EIO; + goto err_free_path; + } + /* resource #2: request */ rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len); - if (rc) - return rc; + goto err_free_path; + if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - req->ImpersonationLevel = IL_IMPERSONATION; req->DesiredAccess = cpu_to_le32(FILE_WRITE_ATTRIBUTES); /* File attributes ignored on open (used in create though) */ @@ -1992,50 +2021,44 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; rc = alloc_path_with_tree_prefix(©_path, ©_size, &name_len, - tcon->treeName, path); - if (rc) { - cifs_small_buf_release(req); - return rc; - } + tcon->treeName, utf16_path); + if (rc) + goto err_free_req; + req->NameLength = cpu_to_le16(name_len * 2); uni_path_len = copy_size; - path = copy_path; + /* free before overwriting resource */ + kfree(utf16_path); + utf16_path = copy_path; } else { - uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; + uni_path_len = (2 * UniStrnlen((wchar_t *)utf16_path, PATH_MAX)) + 2; /* MUST set path len (NameLength) to 0 opening root of share */ req->NameLength = cpu_to_le16(uni_path_len - 2); if (uni_path_len % 8 != 0) { copy_size = roundup(uni_path_len, 8); copy_path = kzalloc(copy_size, GFP_KERNEL); if (!copy_path) { - cifs_small_buf_release(req); - return -ENOMEM; + rc = -ENOMEM; + goto err_free_req; } - memcpy((char *)copy_path, (const char *)path, + memcpy((char *)copy_path, (const char *)utf16_path, uni_path_len); uni_path_len = copy_size; - path = copy_path; + /* free before overwriting resource */ + kfree(utf16_path); + utf16_path = copy_path; } } iov[1].iov_len = uni_path_len; - iov[1].iov_base = path; + iov[1].iov_base = utf16_path; req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE; if (tcon->posix_extensions) { - if (n_iov > 2) { - struct create_context *ccontext = - (struct create_context *)iov[n_iov-1].iov_base; - ccontext->Next = - cpu_to_le32(iov[n_iov-1].iov_len); - } - + /* resource #3: posix buf */ rc = add_posix_context(iov, &n_iov, mode); - if (rc) { - cifs_small_buf_release(req); - kfree(copy_path); - return rc; - } + if (rc) + goto err_free_req; pc_buf = iov[n_iov-1].iov_base; } @@ -2044,73 +2067,57 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, rqst.rq_iov = iov; rqst.rq_nvec = n_iov; - rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, - &rsp_iov); - - cifs_small_buf_release(req); - rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; - - if (rc != 0) { + /* resource #4: response buffer */ + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); + if (rc) { cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); trace_smb3_posix_mkdir_err(xid, tcon->tid, ses->Suid, - CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES, rc); - goto smb311_mkdir_exit; - } else - trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid, - ses->Suid, CREATE_NOT_FILE, - FILE_WRITE_ATTRIBUTES); + CREATE_NOT_FILE, + FILE_WRITE_ATTRIBUTES, rc); + goto err_free_rsp_buf; + } + + rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; + trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid, + ses->Suid, CREATE_NOT_FILE, + FILE_WRITE_ATTRIBUTES); SMB2_close(xid, tcon, rsp->PersistentFileId, rsp->VolatileFileId); /* Eventually save off posix specific response info and timestaps */ -smb311_mkdir_exit: - kfree(copy_path); - kfree(pc_buf); +err_free_rsp_buf: free_rsp_buf(resp_buftype, rsp); + kfree(pc_buf); +err_free_req: + cifs_small_buf_release(req); +err_free_path: + kfree(utf16_path); return rc; - } -#endif /* SMB311 */ int -SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, - __u8 *oplock, struct smb2_file_all_info *buf, - struct kvec *err_iov, int *buftype) +SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, + struct cifs_open_parms *oparms, __le16 *path) { - struct smb_rqst rqst; + struct TCP_Server_Info *server = tcon->ses->server; struct smb2_create_req *req; - struct smb2_create_rsp *rsp; - struct TCP_Server_Info *server; - struct cifs_tcon *tcon = oparms->tcon; - struct cifs_ses *ses = tcon->ses; - struct kvec iov[5]; /* make sure at least one for each open context */ - struct kvec rsp_iov = {NULL, 0}; - int resp_buftype; - int uni_path_len; - __le16 *copy_path = NULL; - int copy_size; - int rc = 0; unsigned int n_iov = 2; __u32 file_attributes = 0; - char *dhc_buf = NULL, *lc_buf = NULL, *pc_buf = NULL; - int flags = 0; + int copy_size; + int uni_path_len; unsigned int total_len; - - cifs_dbg(FYI, "create/open\n"); - - if (ses && (ses->server)) - server = ses->server; - else - return -EIO; + struct kvec *iov = rqst->rq_iov; + __le16 *copy_path; + int rc; rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len); - if (rc) return rc; - if (smb3_encryption_required(tcon)) - flags |= CIFS_TRANSFORM_REQ; + iov[0].iov_base = (char *)req; + /* -1 since last byte is buf[0] which is sent below (path) */ + iov[0].iov_len = total_len - 1; if (oparms->create_options & CREATE_OPTION_READONLY) file_attributes |= ATTR_READONLY; @@ -2124,11 +2131,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, req->ShareAccess = FILE_SHARE_ALL_LE; req->CreateDisposition = cpu_to_le32(oparms->disposition); req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK); - - iov[0].iov_base = (char *)req; - /* -1 since last byte is buf[0] which is sent below (path) */ - iov[0].iov_len = total_len - 1; - req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req)); /* [MS-SMB2] 2.2.13 NameOffset: @@ -2146,10 +2148,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, rc = alloc_path_with_tree_prefix(©_path, ©_size, &name_len, tcon->treeName, path); - if (rc) { - cifs_small_buf_release(req); + if (rc) return rc; - } req->NameLength = cpu_to_le16(name_len * 2); uni_path_len = copy_size; path = copy_path; @@ -2157,18 +2157,16 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; /* MUST set path len (NameLength) to 0 opening root of share */ req->NameLength = cpu_to_le16(uni_path_len - 2); - if (uni_path_len % 8 != 0) { - copy_size = roundup(uni_path_len, 8); - copy_path = kzalloc(copy_size, GFP_KERNEL); - if (!copy_path) { - cifs_small_buf_release(req); - return -ENOMEM; - } - memcpy((char *)copy_path, (const char *)path, - uni_path_len); - uni_path_len = copy_size; - path = copy_path; - } + copy_size = uni_path_len; + if (copy_size % 8 != 0) + copy_size = roundup(copy_size, 8); + copy_path = kzalloc(copy_size, GFP_KERNEL); + if (!copy_path) + return -ENOMEM; + memcpy((char *)copy_path, (const char *)path, + uni_path_len); + uni_path_len = copy_size; + path = copy_path; } iov[1].iov_len = uni_path_len; @@ -2183,12 +2181,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, else { rc = add_lease_context(server, iov, &n_iov, oparms->fid->lease_key, oplock); - if (rc) { - cifs_small_buf_release(req); - kfree(copy_path); + if (rc) return rc; - } - lc_buf = iov[n_iov-1].iov_base; } if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) { @@ -2202,16 +2196,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, rc = add_durable_context(iov, &n_iov, oparms, tcon->use_persistent); - if (rc) { - cifs_small_buf_release(req); - kfree(copy_path); - kfree(lc_buf); + if (rc) return rc; - } - dhc_buf = iov[n_iov-1].iov_base; } -#ifdef CONFIG_CIFS_SMB311 if (tcon->posix_extensions) { if (n_iov > 2) { struct create_context *ccontext = @@ -2221,24 +2209,79 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } rc = add_posix_context(iov, &n_iov, oparms->mode); - if (rc) { - cifs_small_buf_release(req); - kfree(copy_path); - kfree(lc_buf); - kfree(dhc_buf); + if (rc) return rc; + } + + if (tcon->snapshot_time) { + cifs_dbg(FYI, "adding snapshot context\n"); + if (n_iov > 2) { + struct create_context *ccontext = + (struct create_context *)iov[n_iov-1].iov_base; + ccontext->Next = + cpu_to_le32(iov[n_iov-1].iov_len); } - pc_buf = iov[n_iov-1].iov_base; + + rc = add_twarp_context(iov, &n_iov, tcon->snapshot_time); + if (rc) + return rc; } -#endif /* SMB311 */ + + + rqst->rq_nvec = n_iov; + return 0; +} + +/* rq_iov[0] is the request and is released by cifs_small_buf_release(). + * All other vectors are freed by kfree(). + */ +void +SMB2_open_free(struct smb_rqst *rqst) +{ + int i; + + cifs_small_buf_release(rqst->rq_iov[0].iov_base); + for (i = 1; i < rqst->rq_nvec; i++) + if (rqst->rq_iov[i].iov_base != smb2_padding) + kfree(rqst->rq_iov[i].iov_base); +} + +int +SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, + __u8 *oplock, struct smb2_file_all_info *buf, + struct kvec *err_iov, int *buftype) +{ + struct smb_rqst rqst; + struct smb2_create_rsp *rsp = NULL; + struct TCP_Server_Info *server; + struct cifs_tcon *tcon = oparms->tcon; + struct cifs_ses *ses = tcon->ses; + struct kvec iov[5]; /* make sure at least one for each open context */ + struct kvec rsp_iov = {NULL, 0}; + int resp_buftype; + int rc = 0; + int flags = 0; + + cifs_dbg(FYI, "create/open\n"); + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + if (smb3_encryption_required(tcon)) + flags |= CIFS_TRANSFORM_REQ; memset(&rqst, 0, sizeof(struct smb_rqst)); + memset(&iov, 0, sizeof(iov)); rqst.rq_iov = iov; - rqst.rq_nvec = n_iov; + rqst.rq_nvec = 5; + + rc = SMB2_open_init(tcon, &rqst, oplock, oparms, path); + if (rc) + goto creat_exit; rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; if (rc != 0) { @@ -2275,10 +2318,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, else *oplock = rsp->OplockLevel; creat_exit: - kfree(copy_path); - kfree(lc_buf); - kfree(dhc_buf); - kfree(pc_buf); + SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); return rc; } @@ -2469,43 +2509,62 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, } int +SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, + u64 persistent_fid, u64 volatile_fid) +{ + struct smb2_close_req *req; + struct kvec *iov = rqst->rq_iov; + unsigned int total_len; + int rc; + + rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len); + if (rc) + return rc; + + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + iov[0].iov_base = (char *)req; + iov[0].iov_len = total_len; + + return 0; +} + +void +SMB2_close_free(struct smb_rqst *rqst) +{ + cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ +} + +int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, int flags) { struct smb_rqst rqst; - struct smb2_close_req *req; - struct smb2_close_rsp *rsp; + struct smb2_close_rsp *rsp = NULL; struct cifs_ses *ses = tcon->ses; struct kvec iov[1]; struct kvec rsp_iov; int resp_buftype; int rc = 0; - unsigned int total_len; cifs_dbg(FYI, "Close\n"); if (!ses || !(ses->server)) return -EIO; - rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len); - if (rc) - return rc; - if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - req->PersistentFileId = persistent_fid; - req->VolatileFileId = volatile_fid; - - iov[0].iov_base = (char *)req; - iov[0].iov_len = total_len; - memset(&rqst, 0, sizeof(struct smb_rqst)); + memset(&iov, 0, sizeof(iov)); rqst.rq_iov = iov; rqst.rq_nvec = 1; + rc = SMB2_close_init(tcon, &rqst, persistent_fid, volatile_fid); + if (rc) + goto close_exit; + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); rsp = (struct smb2_close_rsp *)rsp_iov.iov_base; if (rc != 0) { @@ -2518,6 +2577,7 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon, /* BB FIXME - decode close response, update inode for caching */ close_exit: + SMB2_close_free(&rqst); free_rsp_buf(resp_buftype, rsp); return rc; } @@ -2529,9 +2589,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, return SMB2_close_flags(xid, tcon, persistent_fid, volatile_fid, 0); } -static int -validate_iov(unsigned int offset, unsigned int buffer_length, - struct kvec *iov, unsigned int min_buf_size) +int +smb2_validate_iov(unsigned int offset, unsigned int buffer_length, + struct kvec *iov, unsigned int min_buf_size) { unsigned int smb_len = iov->iov_len; char *end_of_smb = smb_len + (char *)iov->iov_base; @@ -2575,7 +2635,7 @@ validate_and_copy_iov(unsigned int offset, unsigned int buffer_length, if (!data) return -EINVAL; - rc = validate_iov(offset, buffer_length, iov, minbufsize); + rc = smb2_validate_iov(offset, buffer_length, iov, minbufsize); if (rc) return rc; @@ -2584,36 +2644,22 @@ validate_and_copy_iov(unsigned int offset, unsigned int buffer_length, return 0; } -static int -query_info(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, u8 info_class, u8 info_type, - u32 additional_info, size_t output_len, size_t min_len, void **data, - u32 *dlen) +int +SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, + u64 persistent_fid, u64 volatile_fid, + u8 info_class, u8 info_type, u32 additional_info, + size_t output_len) { - struct smb_rqst rqst; struct smb2_query_info_req *req; - struct smb2_query_info_rsp *rsp = NULL; - struct kvec iov[2]; - struct kvec rsp_iov; - int rc = 0; - int resp_buftype; - struct cifs_ses *ses = tcon->ses; - int flags = 0; + struct kvec *iov = rqst->rq_iov; unsigned int total_len; - - cifs_dbg(FYI, "Query Info\n"); - - if (!ses || !(ses->server)) - return -EIO; + int rc; rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req, &total_len); if (rc) return rc; - if (smb3_encryption_required(tcon)) - flags |= CIFS_TRANSFORM_REQ; - req->InfoType = info_type; req->FileInfoClass = info_class; req->PersistentFileId = persistent_fid; @@ -2630,13 +2676,50 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, iov[0].iov_base = (char *)req; /* 1 for Buffer */ iov[0].iov_len = total_len - 1; + return 0; +} + +void +SMB2_query_info_free(struct smb_rqst *rqst) +{ + cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ +} + +static int +query_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, u8 info_class, u8 info_type, + u32 additional_info, size_t output_len, size_t min_len, void **data, + u32 *dlen) +{ + struct smb_rqst rqst; + struct smb2_query_info_rsp *rsp = NULL; + struct kvec iov[1]; + struct kvec rsp_iov; + int rc = 0; + int resp_buftype; + struct cifs_ses *ses = tcon->ses; + int flags = 0; + + cifs_dbg(FYI, "Query Info\n"); + + if (!ses || !(ses->server)) + return -EIO; + + if (smb3_encryption_required(tcon)) + flags |= CIFS_TRANSFORM_REQ; memset(&rqst, 0, sizeof(struct smb_rqst)); + memset(&iov, 0, sizeof(iov)); rqst.rq_iov = iov; rqst.rq_nvec = 1; + rc = SMB2_query_info_init(tcon, &rqst, persistent_fid, volatile_fid, + info_class, info_type, additional_info, + output_len); + if (rc) + goto qinf_exit; + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; if (rc) { @@ -2665,6 +2748,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, &rsp_iov, min_len, *data); qinf_exit: + SMB2_query_info_free(&rqst); free_rsp_buf(resp_buftype, rsp); return rc; } @@ -3623,9 +3707,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, goto qdir_exit; } - rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset), - le32_to_cpu(rsp->OutputBufferLength), &rsp_iov, - info_buf_size); + rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp_iov, + info_buf_size); if (rc) goto qdir_exit; @@ -3927,9 +4011,9 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, return rc; } -static void -copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, - struct kstatfs *kst) +void +smb2_copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, + struct kstatfs *kst) { kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); @@ -3939,6 +4023,25 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, return; } +static void +copy_posix_fs_info_to_kstatfs(FILE_SYSTEM_POSIX_INFO *response_data, + struct kstatfs *kst) +{ + kst->f_bsize = le32_to_cpu(response_data->BlockSize); + kst->f_blocks = le64_to_cpu(response_data->TotalBlocks); + kst->f_bfree = le64_to_cpu(response_data->BlocksAvail); + if (response_data->UserBlocksAvail == cpu_to_le64(-1)) + kst->f_bavail = kst->f_bfree; + else + kst->f_bavail = le64_to_cpu(response_data->UserBlocksAvail); + if (response_data->TotalFileNodes != cpu_to_le64(-1)) + kst->f_files = le64_to_cpu(response_data->TotalFileNodes); + if (response_data->FreeFileNodes != cpu_to_le64(-1)) + kst->f_ffree = le64_to_cpu(response_data->FreeFileNodes); + + return; +} + static int build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, int outbuf_len, u64 persistent_fid, u64 volatile_fid) @@ -3976,6 +4079,54 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, } int +SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata) +{ + struct smb_rqst rqst; + struct smb2_query_info_rsp *rsp = NULL; + struct kvec iov; + struct kvec rsp_iov; + int rc = 0; + int resp_buftype; + struct cifs_ses *ses = tcon->ses; + FILE_SYSTEM_POSIX_INFO *info = NULL; + int flags = 0; + + rc = build_qfs_info_req(&iov, tcon, FS_POSIX_INFORMATION, + sizeof(FILE_SYSTEM_POSIX_INFO), + persistent_fid, volatile_fid); + if (rc) + return rc; + + if (smb3_encryption_required(tcon)) + flags |= CIFS_TRANSFORM_REQ; + + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = &iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); + cifs_small_buf_release(iov.iov_base); + if (rc) { + cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); + goto posix_qfsinf_exit; + } + rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; + + info = (FILE_SYSTEM_POSIX_INFO *)( + le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp); + rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp_iov, + sizeof(FILE_SYSTEM_POSIX_INFO)); + if (!rc) + copy_posix_fs_info_to_kstatfs(info, fsdata); + +posix_qfsinf_exit: + free_rsp_buf(resp_buftype, rsp_iov.iov_base); + return rc; +} + +int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata) { @@ -4012,11 +4163,11 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, info = (struct smb2_fs_full_size_info *)( le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp); - rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset), - le32_to_cpu(rsp->OutputBufferLength), &rsp_iov, - sizeof(struct smb2_fs_full_size_info)); + rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp_iov, + sizeof(struct smb2_fs_full_size_info)); if (!rc) - copy_fs_info_to_kstatfs(info, fsdata); + smb2_copy_fs_info_to_kstatfs(info, fsdata); qfsinf_exit: free_rsp_buf(resp_buftype, rsp_iov.iov_base); @@ -4046,6 +4197,9 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, } else if (level == FS_SECTOR_SIZE_INFORMATION) { max_len = sizeof(struct smb3_fs_ss_info); min_len = sizeof(struct smb3_fs_ss_info); + } else if (level == FS_VOLUME_INFORMATION) { + max_len = sizeof(struct smb3_fs_vol_info) + MAX_VOL_LABEL_LEN; + min_len = sizeof(struct smb3_fs_vol_info); } else { cifs_dbg(FYI, "Invalid qfsinfo level %d\n", level); return -EINVAL; @@ -4073,7 +4227,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, rsp_len = le32_to_cpu(rsp->OutputBufferLength); offset = le16_to_cpu(rsp->OutputBufferOffset); - rc = validate_iov(offset, rsp_len, &rsp_iov, min_len); + rc = smb2_validate_iov(offset, rsp_len, &rsp_iov, min_len); if (rc) goto qfsattr_exit; @@ -4090,6 +4244,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, tcon->ss_flags = le32_to_cpu(ss_info->Flags); tcon->perf_sector_size = le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf); + } else if (level == FS_VOLUME_INFORMATION) { + struct smb3_fs_vol_info *vol_info = (struct smb3_fs_vol_info *) + (offset + (char *)rsp); + tcon->vol_serial_number = vol_info->VolumeSerialNumber; + tcon->vol_create_time = vol_info->VolumeCreationTime; } qfsattr_exit: diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index a671adcc44a6..a2eeae9e0432 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -153,6 +153,8 @@ struct smb2_transform_hdr { * */ +#define COMPOUND_FID 0xFFFFFFFFFFFFFFFFULL + #define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9) struct smb2_err_rsp { @@ -765,6 +767,14 @@ struct create_durable_handle_reconnect_v2 { struct durable_reconnect_context_v2 dcontext; } __packed; +/* See MS-SMB2 2.2.13.2.5 */ +struct crt_twarp_ctxt { + struct create_context ccontext; + __u8 Name[8]; + __le64 Timestamp; + +} __packed; + #define COPY_CHUNK_RES_KEY_SIZE 24 struct resume_key_req { char ResumeKey[COPY_CHUNK_RES_KEY_SIZE]; @@ -1223,6 +1233,7 @@ struct smb2_lease_ack { #define FS_DRIVER_PATH_INFORMATION 9 /* Local only */ #define FS_VOLUME_FLAGS_INFORMATION 10 /* Local only */ #define FS_SECTOR_SIZE_INFORMATION 11 /* SMB3 or later. Query */ +#define FS_POSIX_INFORMATION 100 /* SMB3.1.1 POSIX. Query */ struct smb2_fs_full_size_info { __le64 TotalAllocationUnits; @@ -1248,6 +1259,17 @@ struct smb3_fs_ss_info { __le32 ByteOffsetForPartitionAlignment; } __packed; +/* volume info struct - see MS-FSCC 2.5.9 */ +#define MAX_VOL_LABEL_LEN 32 +struct smb3_fs_vol_info { + __le64 VolumeCreationTime; + __u32 VolumeSerialNumber; + __le32 VolumeLabelLength; /* includes trailing null */ + __u8 SupportsObjects; /* True if eg like NTFS, supports objects */ + __u8 Reserved; + __u8 VolumeLabel[0]; /* variable len */ +} __packed; + /* partial list of QUERY INFO levels */ #define FILE_DIRECTORY_INFORMATION 1 #define FILE_FULL_DIRECTORY_INFORMATION 2 @@ -1361,4 +1383,6 @@ struct smb2_file_eof_info { /* encoding of request for level 10 */ __le64 EndOfFile; /* new end of file value */ } __packed; /* level 20 Set */ +extern char smb2_padding[7]; + #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 6e6a4f2ec890..b4076577eeb7 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -68,6 +68,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server, extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid); +extern void close_shroot(struct cached_fid *cfid); extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src); extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, @@ -132,6 +133,10 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, __u8 *oplock, struct smb2_file_all_info *buf, struct kvec *err_iov, int *resp_buftype); +extern int SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, + __u8 *oplock, struct cifs_open_parms *oparms, + __le16 *path); +extern void SMB2_open_free(struct smb_rqst *rqst); extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, u32 indatalen, @@ -140,6 +145,9 @@ extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, int flags); +extern int SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, + u64 persistent_file_id, u64 volatile_file_id); +extern void SMB2_close_free(struct smb_rqst *rqst); extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, @@ -149,6 +157,11 @@ extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct smb2_file_all_info *data); +extern int SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, + u64 persistent_fid, u64 volatile_fid, + u8 info_class, u8 info_type, + u32 additional_info, size_t output_len); +extern void SMB2_query_info_free(struct smb_rqst *rqst); extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, void **data, unsigned int *plen); @@ -197,6 +210,9 @@ void smb2_cancelled_close_fid(struct work_struct *work); extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct kstatfs *FSData); +extern int SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_file_id, u64 volatile_file_id, + struct kstatfs *FSData); extern int SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, int lvl); extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, @@ -213,9 +229,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -#ifdef CONFIG_CIFS_SMB311 +extern int smb3_encryption_required(const struct cifs_tcon *tcon); +extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, + struct kvec *iov, unsigned int min_buf_size); +extern void smb2_copy_fs_info_to_kstatfs( + struct smb2_fs_full_size_info *pfs_inf, + struct kstatfs *kst); extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); extern int smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec); -#endif #endif /* _SMB2PROTO_H */ diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 719d55e63d88..7b351c65ee46 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -70,7 +70,6 @@ err: return rc; } -#ifdef CONFIG_CIFS_SMB311 int smb311_crypto_shash_allocate(struct TCP_Server_Info *server) { @@ -98,7 +97,6 @@ err: cifs_free_hash(&p->hmacsha256, &p->sdeschmacsha256); return rc; } -#endif static struct cifs_ses * smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) @@ -173,7 +171,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct kvec *iov = rqst->rq_iov; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; - struct shash_desc *shash = &server->secmech.sdeschmacsha256->shash; + struct shash_desc *shash; struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); @@ -187,7 +185,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) rc = smb2_crypto_shash_allocate(server); if (rc) { - cifs_dbg(VFS, "%s: shah256 alloc failed\n", __func__); + cifs_dbg(VFS, "%s: sha256 alloc failed\n", __func__); return rc; } @@ -198,6 +196,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) return rc; } + shash = &server->secmech.sdeschmacsha256->shash; rc = crypto_shash_init(shash); if (rc) { cifs_dbg(VFS, "%s: Could not init sha256", __func__); @@ -395,7 +394,6 @@ generate_smb30signingkey(struct cifs_ses *ses) return generate_smb3signingkey(ses, &triplet); } -#ifdef CONFIG_CIFS_SMB311 int generate_smb311signingkey(struct cifs_ses *ses) @@ -423,7 +421,6 @@ generate_smb311signingkey(struct cifs_ses *ses) return generate_smb3signingkey(ses, &triplet); } -#endif /* 311 */ int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index c55ea4e6201b..5fdb9a509a97 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -802,7 +802,7 @@ out1: */ static int smbd_post_send_negotiate_req(struct smbd_connection *info) { - struct ib_send_wr send_wr, *send_wr_fail; + struct ib_send_wr send_wr; int rc = -ENOMEM; struct smbd_request *request; struct smbd_negotiate_req *packet; @@ -854,7 +854,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->has_payload = false; atomic_inc(&info->send_pending); - rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); + rc = ib_post_send(info->id->qp, &send_wr, NULL); if (!rc) return 0; @@ -1024,7 +1024,7 @@ static void smbd_destroy_header(struct smbd_connection *info, static int smbd_post_send(struct smbd_connection *info, struct smbd_request *request, bool has_payload) { - struct ib_send_wr send_wr, *send_wr_fail; + struct ib_send_wr send_wr; int rc, i; for (i = 0; i < request->num_sge; i++) { @@ -1055,7 +1055,7 @@ static int smbd_post_send(struct smbd_connection *info, atomic_inc(&info->send_pending); } - rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); + rc = ib_post_send(info->id->qp, &send_wr, NULL); if (rc) { log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); if (has_payload) { @@ -1184,7 +1184,7 @@ static int smbd_post_send_data( static int smbd_post_recv( struct smbd_connection *info, struct smbd_response *response) { - struct ib_recv_wr recv_wr, *recv_wr_fail = NULL; + struct ib_recv_wr recv_wr; int rc = -EIO; response->sge.addr = ib_dma_map_single( @@ -1203,7 +1203,7 @@ static int smbd_post_recv( recv_wr.sg_list = &response->sge; recv_wr.num_sge = 1; - rc = ib_post_recv(info->id->qp, &recv_wr, &recv_wr_fail); + rc = ib_post_recv(info->id->qp, &recv_wr, NULL); if (rc) { ib_dma_unmap_single(info->id->device, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); @@ -1662,9 +1662,16 @@ static struct smbd_connection *_smbd_get_connection( info->max_receive_size = smbd_max_receive_size; info->keep_alive_interval = smbd_keep_alive_interval; - if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) { - log_rdma_event(ERR, "warning: device max_sge = %d too small\n", - info->id->device->attrs.max_sge); + if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) { + log_rdma_event(ERR, + "warning: device max_send_sge = %d too small\n", + info->id->device->attrs.max_send_sge); + log_rdma_event(ERR, "Queue Pair creation may fail\n"); + } + if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) { + log_rdma_event(ERR, + "warning: device max_recv_sge = %d too small\n", + info->id->device->attrs.max_recv_sge); log_rdma_event(ERR, "Queue Pair creation may fail\n"); } @@ -2473,7 +2480,6 @@ struct smbd_mr *smbd_register_mr( int rc, i; enum dma_data_direction dir; struct ib_reg_wr *reg_wr; - struct ib_send_wr *bad_wr; if (num_pages > info->max_frmr_depth) { log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", @@ -2547,7 +2553,7 @@ skip_multiple_pages: * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution * on the next ib_post_send when we actaully send I/O to remote peer */ - rc = ib_post_send(info->id->qp, ®_wr->wr, &bad_wr); + rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); if (!rc) return smbdirect_mr; @@ -2592,7 +2598,7 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) */ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) { - struct ib_send_wr *wr, *bad_wr; + struct ib_send_wr *wr; struct smbd_connection *info = smbdirect_mr->conn; int rc = 0; @@ -2607,7 +2613,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) wr->send_flags = IB_SEND_SIGNALED; init_completion(&smbdirect_mr->invalidate_done); - rc = ib_post_send(info->id->qp, wr, &bad_wr); + rc = ib_post_send(info->id->qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); smbd_disconnect_rdma_connection(info); diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index 67e413f6ee4d..d4aed5217a56 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -281,6 +281,44 @@ DEFINE_EVENT(smb3_cmd_done_class, smb3_##name, \ TP_ARGS(tid, sesid, cmd, mid)) DEFINE_SMB3_CMD_DONE_EVENT(cmd_done); +DEFINE_SMB3_CMD_DONE_EVENT(ses_expired); + +DECLARE_EVENT_CLASS(smb3_mid_class, + TP_PROTO(__u16 cmd, + __u64 mid, + __u32 pid, + unsigned long when_sent, + unsigned long when_received), + TP_ARGS(cmd, mid, pid, when_sent, when_received), + TP_STRUCT__entry( + __field(__u16, cmd) + __field(__u64, mid) + __field(__u32, pid) + __field(unsigned long, when_sent) + __field(unsigned long, when_received) + ), + TP_fast_assign( + __entry->cmd = cmd; + __entry->mid = mid; + __entry->pid = pid; + __entry->when_sent = when_sent; + __entry->when_received = when_received; + ), + TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", + __entry->cmd, __entry->mid, __entry->pid, __entry->when_sent, + __entry->when_received) +) + +#define DEFINE_SMB3_MID_EVENT(name) \ +DEFINE_EVENT(smb3_mid_class, smb3_##name, \ + TP_PROTO(__u16 cmd, \ + __u64 mid, \ + __u32 pid, \ + unsigned long when_sent, \ + unsigned long when_received), \ + TP_ARGS(cmd, mid, pid, when_sent, when_received)) + +DEFINE_SMB3_MID_EVENT(slow_rsp); DECLARE_EVENT_CLASS(smb3_exit_err_class, TP_PROTO(unsigned int xid, @@ -422,6 +460,32 @@ DEFINE_EVENT(smb3_open_done_class, smb3_##name, \ DEFINE_SMB3_OPEN_DONE_EVENT(open_done); DEFINE_SMB3_OPEN_DONE_EVENT(posix_mkdir_done); +DECLARE_EVENT_CLASS(smb3_reconnect_class, + TP_PROTO(__u64 currmid, + char *hostname), + TP_ARGS(currmid, hostname), + TP_STRUCT__entry( + __field(__u64, currmid) + __field(char *, hostname) + ), + TP_fast_assign( + __entry->currmid = currmid; + __entry->hostname = hostname; + ), + TP_printk("server=%s current_mid=0x%llx", + __entry->hostname, + __entry->currmid) +) + +#define DEFINE_SMB3_RECONNECT_EVENT(name) \ +DEFINE_EVENT(smb3_reconnect_class, smb3_##name, \ + TP_PROTO(__u64 currmid, \ + char *hostname), \ + TP_ARGS(currmid, hostname)) + +DEFINE_SMB3_RECONNECT_EVENT(reconnect); +DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect); + #endif /* _CIFS_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index a341ec839c83..78f96fa3d7d9 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -115,8 +115,17 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) now = jiffies; /* commands taking longer than one second are indications that something is wrong, unless it is quite a slow link or server */ - if (time_after(now, midEntry->when_alloc + HZ)) { - if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) { + if (time_after(now, midEntry->when_alloc + HZ) && + (midEntry->command != command)) { + /* smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command */ + if ((le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) && + (le16_to_cpu(midEntry->command) >= 0)) + cifs_stats_inc(&midEntry->server->smb2slowcmd[le16_to_cpu(midEntry->command)]); + + trace_smb3_slow_rsp(le16_to_cpu(midEntry->command), + midEntry->mid, midEntry->pid, + midEntry->when_sent, midEntry->when_received); + if (cifsFYI & CIFS_TIMER) { pr_debug(" CIFS slow rsp: cmd %d mid %llu", midEntry->command, midEntry->mid); pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n", @@ -361,6 +370,8 @@ uncork: * socket so the server throws away the partial SMB */ server->tcpStatus = CifsNeedReconnect; + trace_smb3_partial_send_reconnect(server->CurrentMid, + server->hostname); } smbd_done: if (rc < 0 && rc != -EINTR) @@ -373,26 +384,42 @@ smbd_done: } static int -smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags) +smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, + struct smb_rqst *rqst, int flags) { - struct smb_rqst cur_rqst; + struct kvec iov; + struct smb2_transform_hdr tr_hdr; + struct smb_rqst cur_rqst[MAX_COMPOUND]; int rc; if (!(flags & CIFS_TRANSFORM_REQ)) - return __smb_send_rqst(server, 1, rqst); + return __smb_send_rqst(server, num_rqst, rqst); - if (!server->ops->init_transform_rq || - !server->ops->free_transform_rq) { - cifs_dbg(VFS, "Encryption requested but transform callbacks are missed\n"); + if (num_rqst > MAX_COMPOUND - 1) + return -ENOMEM; + + memset(&cur_rqst[0], 0, sizeof(cur_rqst)); + memset(&iov, 0, sizeof(iov)); + memset(&tr_hdr, 0, sizeof(tr_hdr)); + + iov.iov_base = &tr_hdr; + iov.iov_len = sizeof(tr_hdr); + cur_rqst[0].rq_iov = &iov; + cur_rqst[0].rq_nvec = 1; + + if (!server->ops->init_transform_rq) { + cifs_dbg(VFS, "Encryption requested but transform callback " + "is missing\n"); return -EIO; } - rc = server->ops->init_transform_rq(server, &cur_rqst, rqst); + rc = server->ops->init_transform_rq(server, num_rqst + 1, + &cur_rqst[0], rqst); if (rc) return rc; - rc = __smb_send_rqst(server, 1, &cur_rqst); - server->ops->free_transform_rq(&cur_rqst); + rc = __smb_send_rqst(server, num_rqst + 1, &cur_rqst[0]); + smb3_free_compound_rqst(num_rqst, &cur_rqst[1]); return rc; } @@ -606,7 +633,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, */ cifs_save_when_sent(mid); cifs_in_send_inc(server); - rc = smb_send_rqst(server, rqst, flags); + rc = smb_send_rqst(server, 1, rqst, flags); cifs_in_send_dec(server); if (rc < 0) { @@ -746,20 +773,21 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) } int -cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, - struct smb_rqst *rqst, int *resp_buf_type, const int flags, - struct kvec *resp_iov) +compound_send_recv(const unsigned int xid, struct cifs_ses *ses, + const int flags, const int num_rqst, struct smb_rqst *rqst, + int *resp_buf_type, struct kvec *resp_iov) { - int rc = 0; + int i, j, rc = 0; int timeout, optype; - struct mid_q_entry *midQ; + struct mid_q_entry *midQ[MAX_COMPOUND]; unsigned int credits = 1; char *buf; timeout = flags & CIFS_TIMEOUT_MASK; optype = flags & CIFS_OP_MASK; - *resp_buf_type = CIFS_NO_BUFFER; /* no response buf yet */ + for (i = 0; i < num_rqst; i++) + resp_buf_type[i] = CIFS_NO_BUFFER; /* no response buf yet */ if ((ses == NULL) || (ses->server == NULL)) { cifs_dbg(VFS, "Null session\n"); @@ -786,98 +814,117 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, mutex_lock(&ses->server->srv_mutex); - midQ = ses->server->ops->setup_request(ses, rqst); - if (IS_ERR(midQ)) { - mutex_unlock(&ses->server->srv_mutex); - /* Update # of requests on wire to server */ - add_credits(ses->server, 1, optype); - return PTR_ERR(midQ); + for (i = 0; i < num_rqst; i++) { + midQ[i] = ses->server->ops->setup_request(ses, &rqst[i]); + if (IS_ERR(midQ[i])) { + for (j = 0; j < i; j++) + cifs_delete_mid(midQ[j]); + mutex_unlock(&ses->server->srv_mutex); + /* Update # of requests on wire to server */ + add_credits(ses->server, 1, optype); + return PTR_ERR(midQ[i]); + } + + midQ[i]->mid_state = MID_REQUEST_SUBMITTED; } - midQ->mid_state = MID_REQUEST_SUBMITTED; cifs_in_send_inc(ses->server); - rc = smb_send_rqst(ses->server, rqst, flags); + rc = smb_send_rqst(ses->server, num_rqst, rqst, flags); cifs_in_send_dec(ses->server); - cifs_save_when_sent(midQ); + + for (i = 0; i < num_rqst; i++) + cifs_save_when_sent(midQ[i]); if (rc < 0) ses->server->sequence_number -= 2; + mutex_unlock(&ses->server->srv_mutex); - if (rc < 0) - goto out; + for (i = 0; i < num_rqst; i++) { + if (rc < 0) + goto out; -#ifdef CONFIG_CIFS_SMB311 - if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) - smb311_update_preauth_hash(ses, rqst->rq_iov, - rqst->rq_nvec); -#endif + if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) + smb311_update_preauth_hash(ses, rqst[i].rq_iov, + rqst[i].rq_nvec); - if (timeout == CIFS_ASYNC_OP) - goto out; + if (timeout == CIFS_ASYNC_OP) + goto out; - rc = wait_for_response(ses->server, midQ); - if (rc != 0) { - cifs_dbg(FYI, "Cancelling wait for mid %llu\n", midQ->mid); - send_cancel(ses->server, rqst, midQ); - spin_lock(&GlobalMid_Lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED) { - midQ->mid_flags |= MID_WAIT_CANCELLED; - midQ->callback = DeleteMidQEntry; + rc = wait_for_response(ses->server, midQ[i]); + if (rc != 0) { + cifs_dbg(FYI, "Cancelling wait for mid %llu\n", + midQ[i]->mid); + send_cancel(ses->server, &rqst[i], midQ[i]); + spin_lock(&GlobalMid_Lock); + if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) { + midQ[i]->mid_flags |= MID_WAIT_CANCELLED; + midQ[i]->callback = DeleteMidQEntry; + spin_unlock(&GlobalMid_Lock); + add_credits(ses->server, 1, optype); + return rc; + } spin_unlock(&GlobalMid_Lock); + } + + rc = cifs_sync_mid_result(midQ[i], ses->server); + if (rc != 0) { add_credits(ses->server, 1, optype); return rc; } - spin_unlock(&GlobalMid_Lock); - } - - rc = cifs_sync_mid_result(midQ, ses->server); - if (rc != 0) { - add_credits(ses->server, 1, optype); - return rc; - } - - if (!midQ->resp_buf || midQ->mid_state != MID_RESPONSE_RECEIVED) { - rc = -EIO; - cifs_dbg(FYI, "Bad MID state?\n"); - goto out; - } - buf = (char *)midQ->resp_buf; - resp_iov->iov_base = buf; - resp_iov->iov_len = midQ->resp_buf_size + - ses->server->vals->header_preamble_size; - if (midQ->large_buf) - *resp_buf_type = CIFS_LARGE_BUFFER; - else - *resp_buf_type = CIFS_SMALL_BUFFER; + if (!midQ[i]->resp_buf || + midQ[i]->mid_state != MID_RESPONSE_RECEIVED) { + rc = -EIO; + cifs_dbg(FYI, "Bad MID state?\n"); + goto out; + } -#ifdef CONFIG_CIFS_SMB311 - if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) { - struct kvec iov = { - .iov_base = resp_iov->iov_base, - .iov_len = resp_iov->iov_len - }; - smb311_update_preauth_hash(ses, &iov, 1); - } -#endif + buf = (char *)midQ[i]->resp_buf; + resp_iov[i].iov_base = buf; + resp_iov[i].iov_len = midQ[i]->resp_buf_size + + ses->server->vals->header_preamble_size; + + if (midQ[i]->large_buf) + resp_buf_type[i] = CIFS_LARGE_BUFFER; + else + resp_buf_type[i] = CIFS_SMALL_BUFFER; + + if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) { + struct kvec iov = { + .iov_base = resp_iov[i].iov_base, + .iov_len = resp_iov[i].iov_len + }; + smb311_update_preauth_hash(ses, &iov, 1); + } - credits = ses->server->ops->get_credits(midQ); + credits = ses->server->ops->get_credits(midQ[i]); - rc = ses->server->ops->check_receive(midQ, ses->server, - flags & CIFS_LOG_ERROR); + rc = ses->server->ops->check_receive(midQ[i], ses->server, + flags & CIFS_LOG_ERROR); - /* mark it so buf will not be freed by cifs_delete_mid */ - if ((flags & CIFS_NO_RESP) == 0) - midQ->resp_buf = NULL; + /* mark it so buf will not be freed by cifs_delete_mid */ + if ((flags & CIFS_NO_RESP) == 0) + midQ[i]->resp_buf = NULL; + } out: - cifs_delete_mid(midQ); + for (i = 0; i < num_rqst; i++) + cifs_delete_mid(midQ[i]); add_credits(ses->server, credits, optype); return rc; } int +cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, + struct smb_rqst *rqst, int *resp_buf_type, const int flags, + struct kvec *resp_iov) +{ + return compound_send_recv(xid, ses, flags, 1, rqst, resp_buf_type, + resp_iov); +} + +int SendReceive2(const unsigned int xid, struct cifs_ses *ses, struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, const int flags, struct kvec *resp_iov) diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 316af84674f1..50ddb795aaeb 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -35,6 +35,14 @@ #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" #define CIFS_XATTR_ATTRIB "cifs.dosattrib" /* full name: user.cifs.dosattrib */ #define CIFS_XATTR_CREATETIME "cifs.creationtime" /* user.cifs.creationtime */ +/* + * Although these three are just aliases for the above, need to move away from + * confusing users and using the 20+ year old term 'cifs' when it is no longer + * secure, replaced by SMB2 (then even more highly secure SMB3) many years ago + */ +#define SMB3_XATTR_CIFS_ACL "system.smb3_acl" +#define SMB3_XATTR_ATTRIB "smb3.dosattrib" /* full name: user.smb3.dosattrib */ +#define SMB3_XATTR_CREATETIME "smb3.creationtime" /* user.smb3.creationtime */ /* BB need to add server (Samba e.g) support for security and trusted prefix */ enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT }; @@ -220,10 +228,12 @@ static int cifs_xattr_get(const struct xattr_handler *handler, switch (handler->flags) { case XATTR_USER: cifs_dbg(FYI, "%s:querying user xattr %s\n", __func__, name); - if (strcmp(name, CIFS_XATTR_ATTRIB) == 0) { + if ((strcmp(name, CIFS_XATTR_ATTRIB) == 0) || + (strcmp(name, SMB3_XATTR_ATTRIB) == 0)) { rc = cifs_attrib_get(dentry, inode, value, size); break; - } else if (strcmp(name, CIFS_XATTR_CREATETIME) == 0) { + } else if ((strcmp(name, CIFS_XATTR_CREATETIME) == 0) || + (strcmp(name, SMB3_XATTR_CREATETIME) == 0)) { rc = cifs_creation_time_get(dentry, inode, value, size); break; } @@ -363,6 +373,19 @@ static const struct xattr_handler cifs_cifs_acl_xattr_handler = { .set = cifs_xattr_set, }; +/* + * Although this is just an alias for the above, need to move away from + * confusing users and using the 20 year old term 'cifs' when it is no + * longer secure and was replaced by SMB2/SMB3 a long time ago, and + * SMB3 and later are highly secure. + */ +static const struct xattr_handler smb3_acl_xattr_handler = { + .name = SMB3_XATTR_CIFS_ACL, + .flags = XATTR_CIFS_ACL, + .get = cifs_xattr_get, + .set = cifs_xattr_set, +}; + static const struct xattr_handler cifs_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = XATTR_ACL_ACCESS, @@ -381,6 +404,7 @@ const struct xattr_handler *cifs_xattr_handlers[] = { &cifs_user_xattr_handler, &cifs_os2_xattr_handler, &cifs_cifs_acl_xattr_handler, + &smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */ &cifs_posix_acl_access_xattr_handler, &cifs_posix_acl_default_xattr_handler, NULL diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 9907475b4226..a9b00942e87d 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -198,34 +198,6 @@ static int do_video_stillpicture(struct file *file, return err; } -struct compat_video_spu_palette { - int length; - compat_uptr_t palette; -}; - -static int do_video_set_spu_palette(struct file *file, - unsigned int cmd, struct compat_video_spu_palette __user *up) -{ - struct video_spu_palette __user *up_native; - compat_uptr_t palp; - int length, err; - - err = get_user(palp, &up->palette); - err |= get_user(length, &up->length); - if (err) - return -EFAULT; - - up_native = compat_alloc_user_space(sizeof(struct video_spu_palette)); - err = put_user(compat_ptr(palp), &up_native->palette); - err |= put_user(length, &up_native->length); - if (err) - return -EFAULT; - - err = do_ioctl(file, cmd, (unsigned long) up_native); - - return err; -} - #ifdef CONFIG_BLOCK typedef struct sg_io_hdr32 { compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ @@ -1206,9 +1178,6 @@ COMPATIBLE_IOCTL(AUDIO_CLEAR_BUFFER) COMPATIBLE_IOCTL(AUDIO_SET_ID) COMPATIBLE_IOCTL(AUDIO_SET_MIXER) COMPATIBLE_IOCTL(AUDIO_SET_STREAMTYPE) -COMPATIBLE_IOCTL(AUDIO_SET_EXT_ID) -COMPATIBLE_IOCTL(AUDIO_SET_ATTRIBUTES) -COMPATIBLE_IOCTL(AUDIO_SET_KARAOKE) COMPATIBLE_IOCTL(DMX_START) COMPATIBLE_IOCTL(DMX_STOP) COMPATIBLE_IOCTL(DMX_SET_FILTER) @@ -1233,16 +1202,9 @@ COMPATIBLE_IOCTL(VIDEO_FAST_FORWARD) COMPATIBLE_IOCTL(VIDEO_SLOWMOTION) COMPATIBLE_IOCTL(VIDEO_GET_CAPABILITIES) COMPATIBLE_IOCTL(VIDEO_CLEAR_BUFFER) -COMPATIBLE_IOCTL(VIDEO_SET_ID) COMPATIBLE_IOCTL(VIDEO_SET_STREAMTYPE) COMPATIBLE_IOCTL(VIDEO_SET_FORMAT) -COMPATIBLE_IOCTL(VIDEO_SET_SYSTEM) -COMPATIBLE_IOCTL(VIDEO_SET_HIGHLIGHT) -COMPATIBLE_IOCTL(VIDEO_SET_SPU) -COMPATIBLE_IOCTL(VIDEO_GET_NAVI) -COMPATIBLE_IOCTL(VIDEO_SET_ATTRIBUTES) COMPATIBLE_IOCTL(VIDEO_GET_SIZE) -COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE) /* cec */ COMPATIBLE_IOCTL(CEC_ADAP_G_CAPS) COMPATIBLE_IOCTL(CEC_ADAP_G_LOG_ADDRS) @@ -1347,8 +1309,6 @@ static long do_ioctl_trans(unsigned int cmd, return do_video_get_event(file, cmd, argp); case VIDEO_STILLPICTURE: return do_video_stillpicture(file, cmd, argp); - case VIDEO_SET_SPU_PALETTE: - return do_video_set_spu_palette(file, cmd, argp); } /* diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 577cff24707b..39843fa7e11b 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1777,6 +1777,16 @@ void configfs_unregister_group(struct config_group *group) struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + mutex_lock(&subsys->su_mutex); + if (!group->cg_item.ci_parent->ci_group) { + /* + * The parent has already been unlinked and detached + * due to a rmdir. + */ + goto unlink_group; + } + mutex_unlock(&subsys->su_mutex); + inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); configfs_detach_prep(dentry, NULL); @@ -1791,6 +1801,7 @@ void configfs_unregister_group(struct config_group *group) dput(dentry); mutex_lock(&subsys->su_mutex); +unlink_group: unlink_group(group); mutex_unlock(&subsys->su_mutex); } diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 88f266efc09b..99d491cd01f9 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c @@ -64,7 +64,6 @@ static void config_item_init(struct config_item *item) */ int config_item_set_name(struct config_item *item, const char *fmt, ...) { - int error = 0; int limit = CONFIGFS_ITEM_NAME_LEN; int need; va_list args; @@ -79,25 +78,11 @@ int config_item_set_name(struct config_item *item, const char *fmt, ...) if (need < limit) name = item->ci_namebuf; else { - /* - * Need more space? Allocate it and try again - */ - limit = need + 1; - name = kmalloc(limit, GFP_KERNEL); - if (!name) { - error = -ENOMEM; - goto Done; - } va_start(args, fmt); - need = vsnprintf(name, limit, fmt, args); + name = kvasprintf(GFP_KERNEL, fmt, args); va_end(args); - - /* Still? Give up. */ - if (need >= limit) { - kfree(name); - error = -EFAULT; - goto Done; - } + if (!name) + return -EFAULT; } /* Free the old name, if necessary. */ @@ -106,8 +91,7 @@ int config_item_set_name(struct config_item *item, const char *fmt, ...) /* Now, set the new name */ item->ci_name = name; - Done: - return error; + return 0; } EXPORT_SYMBOL(config_item_set_name); diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 78ffc2699993..a5c54af861f7 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -64,7 +64,7 @@ static void fill_item_path(struct config_item * item, char * buffer, int length) /* back up enough to print this bus id with '/' */ length -= cur; - strncpy(buffer + length,config_item_name(p),cur); + memcpy(buffer + length, config_item_name(p), cur); *(buffer + --length) = '/'; } } @@ -566,7 +566,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping) if (index >= end) break; - if (!radix_tree_exceptional_entry(pvec_ent)) + if (WARN_ON_ONCE( + !radix_tree_exceptional_entry(pvec_ent))) continue; xa_lock_irq(&mapping->i_pages); @@ -578,6 +579,13 @@ struct page *dax_layout_busy_page(struct address_space *mapping) if (page) break; } + + /* + * We don't expect normal struct page entries to exist in our + * tree, but we keep these pagevec calls so that this code is + * consistent with the common pattern for handling pagevecs + * throughout the kernel. + */ pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); index++; diff --git a/fs/dcache.c b/fs/dcache.c index 0e8e5de3c48a..2e7e8d85e9b4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -292,7 +292,8 @@ void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry spin_unlock(&dentry->d_lock); name->name = p->name; } else { - memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN); + memcpy(name->inline_name, dentry->d_iname, + dentry->d_name.len + 1); spin_unlock(&dentry->d_lock); name->name = name->inline_name; } @@ -358,14 +359,11 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; - bool hashed = !d_unhashed(dentry); - if (hashed) - raw_write_seqcount_begin(&dentry->d_seq); + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - if (hashed) - raw_write_seqcount_end(&dentry->d_seq); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -732,16 +730,16 @@ static inline bool fast_dput(struct dentry *dentry) if (dentry->d_lockref.count > 1) { dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); - return 1; + return true; } - return 0; + return false; } /* * If we weren't the last ref, we're done. */ if (ret) - return 1; + return true; /* * Careful, careful. The reference count went down @@ -770,7 +768,7 @@ static inline bool fast_dput(struct dentry *dentry) /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) - return 1; + return true; /* * Not the fast normal case? Get the lock. We've already decremented @@ -787,7 +785,7 @@ static inline bool fast_dput(struct dentry *dentry) */ if (dentry->d_lockref.count) { spin_unlock(&dentry->d_lock); - return 1; + return true; } /* @@ -796,7 +794,7 @@ static inline bool fast_dput(struct dentry *dentry) * set it to 1. */ dentry->d_lockref.count = 1; - return 0; + return false; } @@ -1892,50 +1890,25 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW; + inode->i_state &= ~I_NEW & ~I_CREATING; smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_instantiate_new); -/** - * d_instantiate_no_diralias - instantiate a non-aliased dentry - * @entry: dentry to complete - * @inode: inode to attach to this dentry - * - * Fill in inode information in the entry. If a directory alias is found, then - * return an error (and drop inode). Together with d_materialise_unique() this - * guarantees that a directory inode may never have more than one alias. - */ -int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) -{ - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); - - security_d_instantiate(entry, inode); - spin_lock(&inode->i_lock); - if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { - spin_unlock(&inode->i_lock); - iput(inode); - return -EBUSY; - } - __d_instantiate(entry, inode); - spin_unlock(&inode->i_lock); - - return 0; -} -EXPORT_SYMBOL(d_instantiate_no_diralias); - struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; if (root_inode) { res = d_alloc_anon(root_inode->i_sb); - if (res) + if (res) { + res->d_flags |= DCACHE_RCUACCESS; d_instantiate(res, root_inode); - else + } else { iput(root_inode); + } } return res; } @@ -2676,33 +2649,6 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) } EXPORT_SYMBOL(d_exact_alias); -/** - * dentry_update_name_case - update case insensitive dentry with a new name - * @dentry: dentry to be updated - * @name: new name - * - * Update a case insensitive dentry with new case of name. - * - * dentry must have been returned by d_lookup with name @name. Old and new - * name lengths must match (ie. no d_compare which allows mismatched name - * lengths). - * - * Parent inode i_mutex must be held over d_lookup and into this call (to - * keep renames and concurrent inserts, and readdir(2) away). - */ -void dentry_update_name_case(struct dentry *dentry, const struct qstr *name) -{ - BUG_ON(!inode_is_locked(dentry->d_parent->d_inode)); - BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ - - spin_lock(&dentry->d_lock); - write_seqcount_begin(&dentry->d_seq); - memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); - write_seqcount_end(&dentry->d_seq); - spin_unlock(&dentry->d_lock); -} -EXPORT_SYMBOL(dentry_update_name_case); - static void swap_names(struct dentry *dentry, struct dentry *target) { if (unlikely(dname_external(target))) { diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 71fccccf317e..8c6ab6c95727 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -86,7 +86,9 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, /* length of the variable name itself: remove GUID and separator */ namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1; - uuid_le_to_bin(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + err = guid_parse(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + if (err) + goto out; if (efivar_variable_is_removable(var->var.VendorGuid, dentry->d_name.name, namelen)) diff --git a/fs/exec.c b/fs/exec.c index 72e961a62adb..bdd0eacefdf5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -293,6 +293,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; + vma_set_anonymous(vma); if (down_write_killable(&mm->mmap_sem)) { err = -EINTR; diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 1b8b44637e70..5331a15a61f1 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -873,8 +873,8 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) struct bio *bio; if (per_dev != master_dev) { - bio = bio_clone_kmalloc(master_dev->bio, - GFP_KERNEL); + bio = bio_clone_fast(master_dev->bio, + GFP_KERNEL, NULL); if (unlikely(!bio)) { ORE_DBGMSG( "Failed to allocate BIO size=%u\n", diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 047c327a6b23..28b2609f25c1 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -126,7 +126,6 @@ static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &ext2_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP; return 0; } #else diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 6484199b35d1..5c3d7b7e4975 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -611,8 +611,7 @@ fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return ERR_PTR(err); fail: diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 71635909df3b..7f7ee18fe179 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -86,7 +86,7 @@ void ext2_evict_inode(struct inode * inode) if (want_delete) { sb_start_intwrite(inode->i_sb); /* set dtime */ - EXT2_I(inode)->i_dtime = get_seconds(); + EXT2_I(inode)->i_dtime = ktime_get_real_seconds(); mark_inode_dirty(inode); __ext2_write_inode(inode, inode_needs_sync(inode)); /* truncate to 0 */ diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 152453a91877..0c26dcc5d850 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -45,8 +45,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) return 0; } inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -192,8 +191,7 @@ out: out_fail: inode_dec_link_count(inode); - unlock_new_inode(inode); - iput (inode); + discard_new_inode(inode); goto out; } @@ -261,8 +259,7 @@ out: out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); out_dir: inode_dec_link_count(dir); goto out; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 8ff53f8da3bc..73bd58fa13de 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -682,7 +682,8 @@ static int ext2_setup_super (struct super_block * sb, "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && (le32_to_cpu(es->s_lastcheck) + - le32_to_cpu(es->s_checkinterval) <= get_seconds())) + le32_to_cpu(es->s_checkinterval) <= + ktime_get_real_seconds())) ext2_msg(sb, KERN_WARNING, "warning: checktime reached, " "running e2fsck is recommended"); @@ -1248,7 +1249,7 @@ void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, spin_lock(&EXT2_SB(sb)->s_lock); es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); - es->s_wtime = cpu_to_le32(get_seconds()); + es->s_wtime = cpu_to_le32(ktime_get_real_seconds()); /* unlock before we do IO */ spin_unlock(&EXT2_SB(sb)->s_lock); mark_buffer_dirty(EXT2_SB(sb)->s_sbh); @@ -1360,7 +1361,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) * the rdonly flag and then mark the partition as valid again. */ es->s_state = cpu_to_le16(sbi->s_mount_state); - es->s_mtime = cpu_to_le32(get_seconds()); + es->s_mtime = cpu_to_le32(ktime_get_real_seconds()); spin_unlock(&sbi->s_lock); err = dquot_suspend(sb, -1); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e68cefe08261..e5d6ee61ff48 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -368,6 +368,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, desc, bh))) { ext4_unlock_group(sb, block_group); @@ -386,6 +388,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } @@ -423,9 +426,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, "Cannot get buffer for block bitmap - " - "block_group = %u, block_bitmap = %llu", - block_group, bitmap_blk); + ext4_warning(sb, "Cannot get buffer for block bitmap - " + "block_group = %u, block_bitmap = %llu", + block_group, bitmap_blk); return ERR_PTR(-ENOMEM); } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 7c7123f265c2..0f0edd1cd0cd 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -789,17 +789,16 @@ struct move_extent { * affected filesystem before 2242. */ -static inline __le32 ext4_encode_extra_time(struct timespec *time) +static inline __le32 ext4_encode_extra_time(struct timespec64 *time) { - u32 extra = sizeof(time->tv_sec) > 4 ? - ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; + u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK; return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); } -static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +static inline void ext4_decode_extra_time(struct timespec64 *time, + __le32 extra) { - if (unlikely(sizeof(time->tv_sec) > 4 && - (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) { + if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) { #if 1 /* Handle legacy encoding of pre-1970 dates with epoch @@ -821,9 +820,8 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) do { \ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {\ - struct timespec ts = timespec64_to_timespec((inode)->xtime); \ (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&ts); \ + ext4_encode_extra_time(&(inode)->xtime); \ } \ } while (0) @@ -840,10 +838,8 @@ do { \ do { \ (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \ - struct timespec ts = timespec64_to_timespec((inode)->xtime); \ - ext4_decode_extra_time(&ts, \ + ext4_decode_extra_time(&(inode)->xtime, \ raw_inode->xtime ## _extra); \ - (inode)->xtime = timespec_to_timespec64(ts); \ } \ else \ (inode)->xtime.tv_nsec = 0; \ @@ -993,9 +989,9 @@ struct ext4_inode_info { /* * File creation time. Its function is same as that of - * struct timespec i_{a,c,m}time in the generic inode. + * struct timespec64 i_{a,c,m}time in the generic inode. */ - struct timespec i_crtime; + struct timespec64 i_crtime; /* mballoc */ struct list_head i_prealloc_list; @@ -1299,7 +1295,14 @@ struct ext4_super_block { __le32 s_lpf_ino; /* Location of the lost+found inode */ __le32 s_prj_quota_inum; /* inode for tracking project quota */ __le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */ - __le32 s_reserved[98]; /* Padding to the end of the block */ + __u8 s_wtime_hi; + __u8 s_mtime_hi; + __u8 s_mkfs_time_hi; + __u8 s_lastcheck_hi; + __u8 s_first_error_time_hi; + __u8 s_last_error_time_hi; + __u8 s_pad[2]; + __le32 s_reserved[96]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; @@ -2456,6 +2459,7 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_inode_attach_jinode(struct inode *inode); extern int ext4_can_truncate(struct inode *inode); extern int ext4_truncate(struct inode *); +extern int ext4_break_layouts(struct inode *); extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); @@ -3058,7 +3062,7 @@ static inline void ext4_set_de_type(struct super_block *sb, /* readpages.c */ extern int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages); + unsigned nr_pages, bool is_readahead); /* symlink.c */ extern const struct inode_operations ext4_encrypted_symlink_inode_operations; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8ce6fd5b10dd..72a361d5ef74 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4826,6 +4826,13 @@ static long ext4_zero_range(struct file *file, loff_t offset, * released from page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_break_layouts(inode); + if (ret) { + up_write(&EXT4_I(inode)->i_mmap_sem); + goto out_mutex; + } + ret = ext4_update_disksize_before_punch(inode, offset, len); if (ret) { up_write(&EXT4_I(inode)->i_mmap_sem); @@ -5499,6 +5506,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) * page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_break_layouts(inode); + if (ret) + goto out_mmap; + /* * Need to round down offset to be aligned with page size boundary * for page size > block size. @@ -5647,6 +5659,11 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) * page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_break_layouts(inode); + if (ret) + goto out_mmap; + /* * Need to round down to align start offset to page size boundary * for page size > block size. diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 7f8023340eb8..69d65d49837b 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -374,7 +374,7 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); if (IS_DAX(file_inode(file))) { vma->vm_ops = &ext4_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + vma->vm_flags |= VM_HUGEPAGE; } else { vma->vm_ops = &ext4_file_vm_ops; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index fb83750c1a14..2addcb8730e1 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -90,6 +90,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; blk = ext4_inode_bitmap(sb, desc); if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, EXT4_INODES_PER_GROUP(sb) / 8)) { @@ -101,6 +103,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSBADCRC; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } @@ -135,9 +138,9 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) } bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, "Cannot read inode bitmap - " - "block_group = %u, inode_bitmap = %llu", - block_group, bitmap_blk); + ext4_warning(sb, "Cannot read inode bitmap - " + "block_group = %u, inode_bitmap = %llu", + block_group, bitmap_blk); return ERR_PTR(-ENOMEM); } if (bitmap_uptodate(bh)) @@ -1083,7 +1086,7 @@ got: /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - ei->i_crtime = timespec64_to_timespec(inode->i_mtime); + ei->i_crtime = inode->i_mtime; memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0; @@ -1385,7 +1388,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, ext4_itable_unused_count(sb, gdp)), sbi->s_inodes_per_block); - if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { + if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) || + ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp)) < + EXT4_FIRST_INO(sb)))) { ext4_error(sb, "Something is wrong with group %u: " "used itable blocks: %d; " "itable unused count: %u", diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index e55a8bc870bd..3543fe80a3c4 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -682,6 +682,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, goto convert; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out; + flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, 0, flags); @@ -710,7 +714,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, out_up_read: up_read(&EXT4_I(inode)->xattr_sem); out: - if (handle) + if (handle && (ret != 1)) ext4_journal_stop(handle); brelse(iloc.bh); return ret; @@ -752,6 +756,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); + mark_inode_dirty(inode); out: return copied; } @@ -898,7 +903,6 @@ retry_journal: goto out; } - page = grab_cache_page_write_begin(mapping, 0, flags); if (!page) { ret = -ENOMEM; @@ -916,6 +920,9 @@ retry_journal: if (ret < 0) goto out_release_page; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out_release_page; up_read(&EXT4_I(inode)->xattr_sem); *pagep = page; @@ -936,7 +943,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int i_size_changed = 0; int ret; ret = ext4_write_inline_data_end(inode, pos, len, copied, page); @@ -954,10 +960,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * But it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. */ - if (pos+copied > inode->i_size) { + if (pos+copied > inode->i_size) i_size_write(inode, pos+copied); - i_size_changed = 1; - } unlock_page(page); put_page(page); @@ -967,8 +971,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) - mark_inode_dirty(inode); + mark_inode_dirty(inode); return copied; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7d6c10017bdf..d0dd585add6a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -317,7 +317,7 @@ stop_handle: * (Well, we could do this if we need to, but heck - it works) */ ext4_orphan_del(handle, inode); - EXT4_I(inode)->i_dtime = get_seconds(); + EXT4_I(inode)->i_dtime = (__u32)ktime_get_real_seconds(); /* * One subtle ordering requirement: if anything has gone wrong @@ -1389,9 +1389,10 @@ static int ext4_write_end(struct file *file, loff_t old_size = inode->i_size; int ret = 0, ret2; int i_size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_write_end(inode, pos, len, copied); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1419,7 +1420,7 @@ static int ext4_write_end(struct file *file, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) + if (i_size_changed || inline_data) ext4_mark_inode_dirty(handle, inode); if (pos + len > inode->i_size && ext4_can_truncate(inode)) @@ -1493,6 +1494,7 @@ static int ext4_journalled_write_end(struct file *file, int partial = 0; unsigned from, to; int size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); @@ -1500,7 +1502,7 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1531,7 +1533,7 @@ static int ext4_journalled_write_end(struct file *file, if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); - if (size_changed) { + if (size_changed || inline_data) { ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -2028,11 +2030,7 @@ static int __ext4_journalled_writepage(struct page *page, } if (inline_data) { - BUFFER_TRACE(inode_bh, "get write access"); - ret = ext4_journal_get_write_access(handle, inode_bh); - - err = ext4_handle_dirty_metadata(handle, inode, inode_bh); - + ret = ext4_mark_inode_dirty(handle, inode); } else { ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, do_journal_get_write_access); @@ -3327,7 +3325,8 @@ static int ext4_readpage(struct file *file, struct page *page) ret = ext4_readpage_inline(inode, page); if (ret == -EAGAIN) - return ext4_mpage_readpages(page->mapping, NULL, page, 1); + return ext4_mpage_readpages(page->mapping, NULL, page, 1, + false); return ret; } @@ -3342,7 +3341,7 @@ ext4_readpages(struct file *file, struct address_space *mapping, if (ext4_has_inline_data(inode)) return 0; - return ext4_mpage_readpages(mapping, pages, NULL, nr_pages); + return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static void ext4_invalidatepage(struct page *page, unsigned int offset, @@ -4193,6 +4192,39 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, return 0; } +static void ext4_wait_dax_page(struct ext4_inode_info *ei, bool *did_unlock) +{ + *did_unlock = true; + up_write(&ei->i_mmap_sem); + schedule(); + down_write(&ei->i_mmap_sem); +} + +int ext4_break_layouts(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct page *page; + bool retry; + int error; + + if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem))) + return -EINVAL; + + do { + retry = false; + page = dax_layout_busy_page(inode->i_mapping); + if (!page) + return 0; + + error = ___wait_var_event(&page->_refcount, + atomic_read(&page->_refcount) == 1, + TASK_INTERRUPTIBLE, 0, 0, + ext4_wait_dax_page(ei, &retry)); + } while (error == 0 && retry); + + return error; +} + /* * ext4_punch_hole: punches a hole in a file by releasing the blocks * associated with the given offset and length @@ -4266,6 +4298,11 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) * page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_break_layouts(inode); + if (ret) + goto out_dio; + first_block_offset = round_up(offset, sb->s_blocksize); last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; @@ -4946,17 +4983,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = -EFSCORRUPTED; goto bad_inode; } else if (!ext4_has_inline_data(inode)) { - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { - if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - (S_ISLNK(inode->i_mode) && - !ext4_inode_is_fast_symlink(inode)))) - /* Validate extent which is part of inode */ + /* validate the block references in the inode */ + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + (S_ISLNK(inode->i_mode) && + !ext4_inode_is_fast_symlink(inode))) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ret = ext4_ext_check_inode(inode); - } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - (S_ISLNK(inode->i_mode) && - !ext4_inode_is_fast_symlink(inode))) { - /* Validate block references which are part of inode */ - ret = ext4_ind_check_inode(inode); + else + ret = ext4_ind_check_inode(inode); } } if (ret) @@ -5555,6 +5589,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_wait_for_tail_page_commit(inode); } down_write(&EXT4_I(inode)->i_mmap_sem); + + rc = ext4_break_layouts(inode); + if (rc) { + up_write(&EXT4_I(inode)->i_mmap_sem); + error = rc; + goto err_out; + } + /* * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f7ab34088162..e29fce2fbf25 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -14,6 +14,7 @@ #include <linux/log2.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/nospec.h> #include <linux/backing-dev.h> #include <trace/events/ext4.h> @@ -2140,7 +2141,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) * This should tell if fe_len is exactly power of 2 */ if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0) - ac->ac_2order = i - 1; + ac->ac_2order = array_index_nospec(i - 1, + sb->s_blocksize_bits + 2); } /* if stream allocation is enabled, use global goal */ @@ -3799,7 +3801,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, ext4_group_t group; ext4_grpblk_t bit; unsigned long long grp_blk_start; - int err = 0; int free = 0; BUG_ON(pa->pa_deleted == 0); @@ -3840,7 +3841,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, } atomic_add(free, &sbi->s_mb_discarded); - return err; + return 0; } static noinline_for_stack int diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 27b9a76a0dfa..39b07c2d3384 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -147,7 +147,7 @@ static int kmmpd(void *data) mmp_block = le64_to_cpu(es->s_mmp_block); mmp = (struct mmp_struct *)(bh->b_data); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); /* * Start with the higher mmp_check_interval and reduce it if * the MMP block is being updated on time. @@ -165,7 +165,7 @@ static int kmmpd(void *data) seq = 1; mmp->mmp_seq = cpu_to_le32(seq); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); last_update_time = jiffies; retval = write_mmp_block(sb, bh); @@ -186,11 +186,8 @@ static int kmmpd(void *data) goto exit_thread; } - if (sb_rdonly(sb)) { - ext4_warning(sb, "kmmpd being stopped since filesystem " - "has been remounted as readonly."); - goto exit_thread; - } + if (sb_rdonly(sb)) + break; diff = jiffies - last_update_time; if (diff < mmp_update_interval * HZ) @@ -244,7 +241,7 @@ static int kmmpd(void *data) * Unmount seems to be clean. */ mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); retval = write_mmp_block(sb, bh); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 8e17efdcbf11..a409ff70d67b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -134,9 +134,7 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2, mapping[0] = inode1->i_mapping; mapping[1] = inode2->i_mapping; } else { - pgoff_t tmp = index1; - index1 = index2; - index2 = tmp; + swap(index1, index2); mapping[0] = inode2->i_mapping; mapping[1] = inode1->i_mapping; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2a4c25c4681d..116ff68c5bd4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1398,6 +1398,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto cleanup_and_exit; dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " "falling back\n")); + ret = NULL; } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); if (!nblocks) { diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 19b87a8de6ff..f461d75ac049 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -98,7 +98,7 @@ static void mpage_end_io(struct bio *bio) int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -259,7 +259,8 @@ int ext4_mpage_readpages(struct address_space *mapping, bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_set_op_attrs(bio, REQ_OP_READ, + is_readahead ? REQ_RAHEAD : 0); } length = first_hole << blkbits; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba2396a7bd04..5863fd22e90b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -312,6 +312,24 @@ void ext4_itable_unused_set(struct super_block *sb, bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); } +static void __ext4_update_tstamp(__le32 *lo, __u8 *hi) +{ + time64_t now = ktime_get_real_seconds(); + + now = clamp_val(now, 0, (1ull << 40) - 1); + + *lo = cpu_to_le32(lower_32_bits(now)); + *hi = upper_32_bits(now); +} + +static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) +{ + return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo); +} +#define ext4_update_tstamp(es, tstamp) \ + __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) +#define ext4_get_tstamp(es, tstamp) \ + __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) static void __save_error_info(struct super_block *sb, const char *func, unsigned int line) @@ -322,11 +340,12 @@ static void __save_error_info(struct super_block *sb, const char *func, if (bdev_read_only(sb->s_bdev)) return; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - es->s_last_error_time = cpu_to_le32(get_seconds()); + ext4_update_tstamp(es, s_last_error_time); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); es->s_last_error_line = cpu_to_le32(line); if (!es->s_first_error_time) { es->s_first_error_time = es->s_last_error_time; + es->s_first_error_time_hi = es->s_last_error_time_hi; strncpy(es->s_first_error_func, func, sizeof(es->s_first_error_func)); es->s_first_error_line = cpu_to_le32(line); @@ -776,26 +795,26 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); + int ret; - if ((flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) && - !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) { - percpu_counter_sub(&sbi->s_freeclusters_counter, - grp->bb_free); - set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, - &grp->bb_state); + if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) { + ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, + &grp->bb_state); + if (!ret) + percpu_counter_sub(&sbi->s_freeclusters_counter, + grp->bb_free); } - if ((flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) && - !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { - if (gdp) { + if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) { + ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, + &grp->bb_state); + if (!ret && gdp) { int count; count = ext4_free_inodes_count(sb, gdp); percpu_counter_sub(&sbi->s_freeinodes_counter, count); } - set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, - &grp->bb_state); } } @@ -2174,8 +2193,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, "warning: maximal mount count reached, " "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && - (le32_to_cpu(es->s_lastcheck) + - le32_to_cpu(es->s_checkinterval) <= get_seconds())) + (ext4_get_tstamp(es, s_lastcheck) + + le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds())) ext4_msg(sb, KERN_WARNING, "warning: checktime reached, " "running e2fsck is recommended"); @@ -2184,7 +2203,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); - es->s_mtime = cpu_to_le32(get_seconds()); + ext4_update_tstamp(es, s_mtime); ext4_update_dynamic_rev(sb); if (sbi->s_journal) ext4_set_feature_journal_needs_recovery(sb); @@ -2342,7 +2361,7 @@ static int ext4_check_descriptors(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext4_fsblk_t last_block; - ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1; + ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0); ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; @@ -2875,8 +2894,9 @@ static void print_daily_error_info(struct timer_list *t) ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", le32_to_cpu(es->s_error_count)); if (es->s_first_error_time) { - printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", - sb->s_id, le32_to_cpu(es->s_first_error_time), + printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d", + sb->s_id, + ext4_get_tstamp(es, s_first_error_time), (int) sizeof(es->s_first_error_func), es->s_first_error_func, le32_to_cpu(es->s_first_error_line)); @@ -2889,8 +2909,9 @@ static void print_daily_error_info(struct timer_list *t) printk(KERN_CONT "\n"); } if (es->s_last_error_time) { - printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", - sb->s_id, le32_to_cpu(es->s_last_error_time), + printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d", + sb->s_id, + ext4_get_tstamp(es, s_last_error_time), (int) sizeof(es->s_last_error_func), es->s_last_error_func, le32_to_cpu(es->s_last_error_line)); @@ -3141,14 +3162,8 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) if (!gdp) continue; - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) - continue; - if (group != 0) + if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) break; - ext4_error(sb, "Inode table for bg 0 marked as " - "needing zeroing"); - if (sb_rdonly(sb)) - return ngroups; } return group; @@ -3514,7 +3529,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_sb_block = sb_block; if (sb->s_bdev->bd_part) sbi->s_sectors_written_start = - part_stat_read(sb->s_bdev->bd_part, sectors[1]); + part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]); /* Cleanup superblock name */ strreplace(sb->s_id, '/', '!'); @@ -4085,14 +4100,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount2; } } + sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); ret = -EFSCORRUPTED; goto failed_mount2; } - sbi->s_gdb_count = db_count; - timer_setup(&sbi->s_err_report, print_daily_error_info, 0); /* Register extent status tree shrinker */ @@ -4820,11 +4834,12 @@ static int ext4_commit_super(struct super_block *sb, int sync) * to complain and force a full file system check. */ if (!(sb->s_flags & SB_RDONLY)) - es->s_wtime = cpu_to_le32(get_seconds()); + ext4_update_tstamp(es, s_wtime); if (sb->s_bdev->bd_part) es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + ((part_stat_read(sb->s_bdev->bd_part, + sectors[STAT_WRITE]) - EXT4_SB(sb)->s_sectors_written_start) >> 1)); else es->s_kbytes_written = @@ -5087,6 +5102,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) #endif char *orig_data = kstrdup(data, GFP_KERNEL); + if (data && !orig_data) + return -ENOMEM; + /* Store the original options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; @@ -5213,6 +5231,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); + if (sbi->s_mmp_tsk) + kthread_stop(sbi->s_mmp_tsk); } else { /* Make sure we can mount this feature set readwrite */ if (ext4_has_feature_readonly(sb) || @@ -5670,13 +5690,13 @@ static int ext4_enable_quotas(struct super_block *sb) DQUOT_USAGE_ENABLED | (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); if (err) { - for (type--; type >= 0; type--) - dquot_quota_off(sb, type); - ext4_warning(sb, "Failed to enable quota tracking " "(type=%d, err=%d). Please run " "e2fsck to fix.", type, err); + for (type--; type >= 0; type--) + dquot_quota_off(sb, type); + return err; } } diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index f34da0bb8f17..9212a026a1f1 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -25,6 +25,8 @@ typedef enum { attr_reserved_clusters, attr_inode_readahead, attr_trigger_test_error, + attr_first_error_time, + attr_last_error_time, attr_feature, attr_pointer_ui, attr_pointer_atomic, @@ -56,7 +58,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); return snprintf(buf, PAGE_SIZE, "%lu\n", - (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + (part_stat_read(sb->s_bdev->bd_part, + sectors[STAT_WRITE]) - sbi->s_sectors_written_start) >> 1); } @@ -68,7 +71,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) return snprintf(buf, PAGE_SIZE, "0\n"); return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)(sbi->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + ((part_stat_read(sb->s_bdev->bd_part, + sectors[STAT_WRITE]) - EXT4_SB(sb)->s_sectors_written_start) >> 1))); } @@ -182,8 +186,8 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); -EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); -EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); +EXT4_ATTR(first_error_time, 0444, first_error_time); +EXT4_ATTR(last_error_time, 0444, last_error_time); static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); @@ -249,6 +253,15 @@ static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) return NULL; } +static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi) +{ + return snprintf(buf, PAGE_SIZE, "%lld", + ((time64_t)hi << 32) + le32_to_cpu(lo)); +} + +#define print_tstamp(buf, es, tstamp) \ + __print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi) + static ssize_t ext4_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -274,8 +287,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj, case attr_pointer_ui: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%u\n", - *((unsigned int *) ptr)); + if (a->attr_ptr == ptr_ext4_super_block_offset) + return snprintf(buf, PAGE_SIZE, "%u\n", + le32_to_cpup(ptr)); + else + return snprintf(buf, PAGE_SIZE, "%u\n", + *((unsigned int *) ptr)); case attr_pointer_atomic: if (!ptr) return 0; @@ -283,6 +300,10 @@ static ssize_t ext4_attr_show(struct kobject *kobj, atomic_read((atomic_t *) ptr)); case attr_feature: return snprintf(buf, PAGE_SIZE, "supported\n"); + case attr_first_error_time: + return print_tstamp(buf, sbi->s_es, s_first_error_time); + case attr_last_error_time: + return print_tstamp(buf, sbi->s_es, s_last_error_time); } return 0; @@ -308,7 +329,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj, ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret) return ret; - *((unsigned int *) ptr) = t; + if (a->attr_ptr == ptr_ext4_super_block_offset) + *((__le32 *) ptr) = cpu_to_le32(t); + else + *((unsigned int *) ptr) = t; return len; case attr_inode_readahead: return inode_readahead_blks_store(sbi, buf, len); diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h index 0cb13badf473..bcbe3668c1d4 100644 --- a/fs/ext4/truncate.h +++ b/fs/ext4/truncate.h @@ -11,6 +11,10 @@ */ static inline void ext4_truncate_failed_write(struct inode *inode) { + /* + * We don't need to call ext4_break_layouts() because the blocks we + * are truncating were never visible to userspace. + */ down_write(&EXT4_I(inode)->i_mmap_sem); truncate_inode_pages(inode->i_mapping, inode->i_size); ext4_truncate(inode); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 723df14f4084..f36fc5d5b257 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -190,6 +190,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); if ((void *)next >= end) return -EFSCORRUPTED; + if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) + return -EFSCORRUPTED; e = next; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8f931d699287..b7c9b58acf3e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1421,6 +1421,11 @@ out: /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. + * + * Note that the aops->readpages() function is ONLY used for read-ahead. If + * this function ever deviates from doing just read-ahead, it should either + * use ->readpage() or do the necessary surgery to decouple ->readpages() + * readom read-ahead. */ static int f2fs_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4d8b1de83143..6799c3fc44e3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1304,7 +1304,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) * and the return value is in kbytes. s is of struct f2fs_sb_info. */ #define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) - \ +(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) - \ (s)->sectors_written_start) >> 1) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3995e926ba3a..17bcff789c08 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2882,7 +2882,8 @@ try_onemore: /* For write statistics */ if (sb->s_bdev->bd_part) sbi->sectors_written_start = - (u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]); + (u64)part_stat_read(sb->s_bdev->bd_part, + sectors[STAT_WRITE]); /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); diff --git a/fs/file_table.c b/fs/file_table.c index 7ec0b3e5f05d..d6eccd04d703 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -51,6 +51,7 @@ static void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { + security_file_free(f); percpu_counter_dec(&nr_files); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -100,9 +101,8 @@ int proc_nr_files(struct ctl_table *table, int write, * done, you will imbalance int the mount's writer count * and a warning at __fput() time. */ -struct file *get_empty_filp(void) +struct file *alloc_empty_file(int flags, const struct cred *cred) { - const struct cred *cred = current_cred(); static long old_max; struct file *f; int error; @@ -123,11 +123,10 @@ struct file *get_empty_filp(void) if (unlikely(!f)) return ERR_PTR(-ENOMEM); - percpu_counter_inc(&nr_files); f->f_cred = get_cred(cred); error = security_file_alloc(f); if (unlikely(error)) { - file_free(f); + file_free_rcu(&f->f_u.fu_rcuhead); return ERR_PTR(error); } @@ -136,7 +135,10 @@ struct file *get_empty_filp(void) spin_lock_init(&f->f_lock); mutex_init(&f->f_pos_lock); eventpoll_init_file(f); + f->f_flags = flags; + f->f_mode = OPEN_FMODE(flags); /* f->f_version: 0 */ + percpu_counter_inc(&nr_files); return f; over: @@ -152,15 +154,15 @@ over: * alloc_file - allocate and initialize a 'struct file' * * @path: the (dentry, vfsmount) pair for the new file - * @mode: the mode with which the new file will be opened + * @flags: O_... flags with which the new file will be opened * @fop: the 'struct file_operations' for the new file */ -struct file *alloc_file(const struct path *path, fmode_t mode, +static struct file *alloc_file(const struct path *path, int flags, const struct file_operations *fop) { struct file *file; - file = get_empty_filp(); + file = alloc_empty_file(flags, current_cred()); if (IS_ERR(file)) return file; @@ -168,19 +170,56 @@ struct file *alloc_file(const struct path *path, fmode_t mode, file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; file->f_wb_err = filemap_sample_wb_err(file->f_mapping); - if ((mode & FMODE_READ) && + if ((file->f_mode & FMODE_READ) && likely(fop->read || fop->read_iter)) - mode |= FMODE_CAN_READ; - if ((mode & FMODE_WRITE) && + file->f_mode |= FMODE_CAN_READ; + if ((file->f_mode & FMODE_WRITE) && likely(fop->write || fop->write_iter)) - mode |= FMODE_CAN_WRITE; - file->f_mode = mode; + file->f_mode |= FMODE_CAN_WRITE; + file->f_mode |= FMODE_OPENED; file->f_op = fop; - if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) + if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); return file; } -EXPORT_SYMBOL(alloc_file); + +struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, + const char *name, int flags, + const struct file_operations *fops) +{ + static const struct dentry_operations anon_ops = { + .d_dname = simple_dname + }; + struct qstr this = QSTR_INIT(name, strlen(name)); + struct path path; + struct file *file; + + path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this); + if (!path.dentry) + return ERR_PTR(-ENOMEM); + if (!mnt->mnt_sb->s_d_op) + d_set_d_op(path.dentry, &anon_ops); + path.mnt = mntget(mnt); + d_instantiate(path.dentry, inode); + file = alloc_file(&path, flags, fops); + if (IS_ERR(file)) { + ihold(inode); + path_put(&path); + } + return file; +} +EXPORT_SYMBOL(alloc_file_pseudo); + +struct file *alloc_file_clone(struct file *base, int flags, + const struct file_operations *fops) +{ + struct file *f = alloc_file(&base->f_path, flags, fops); + if (!IS_ERR(f)) { + path_get(&f->f_path); + f->f_mapping = base->f_mapping; + } + return f; +} /* the real guts of fput() - releasing the last reference to file */ @@ -190,6 +229,9 @@ static void __fput(struct file *file) struct vfsmount *mnt = file->f_path.mnt; struct inode *inode = file->f_inode; + if (unlikely(!(file->f_mode & FMODE_OPENED))) + goto out; + might_sleep(); fsnotify_close(file); @@ -207,7 +249,6 @@ static void __fput(struct file *file) } if (file->f_op->release) file->f_op->release(inode, file); - security_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(file->f_mode & FMODE_PATH))) { cdev_put(inode->i_cdev); @@ -220,12 +261,10 @@ static void __fput(struct file *file) put_write_access(inode); __mnt_drop_write(mnt); } - file->f_path.dentry = NULL; - file->f_path.mnt = NULL; - file->f_inode = NULL; - file_free(file); dput(dentry); mntput(mnt); +out: + file_free(file); } static LLIST_HEAD(delayed_fput_list); @@ -300,14 +339,6 @@ void __fput_sync(struct file *file) EXPORT_SYMBOL(fput); -void put_filp(struct file *file) -{ - if (atomic_long_dec_and_test(&file->f_count)) { - security_file_free(file); - file_free(file); - } -} - void __init files_init(void) { filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index c184c5a356ff..cdcb376ef8df 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -220,6 +220,7 @@ int fscache_add_cache(struct fscache_cache *cache, { struct fscache_cache_tag *tag; + ASSERTCMP(ifsdef->cookie, ==, &fscache_fsdef_index); BUG_ON(!cache->ops); BUG_ON(!ifsdef); @@ -248,7 +249,6 @@ int fscache_add_cache(struct fscache_cache *cache, if (!cache->kobj) goto error; - ifsdef->cookie = &fscache_fsdef_index; ifsdef->cache = cache; cache->fsdef = ifsdef; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 97137d7ec5ee..83bfe04456b6 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -516,6 +516,7 @@ static int fscache_alloc_object(struct fscache_cache *cache, goto error; } + ASSERTCMP(object->cookie, ==, cookie); fscache_stat(&fscache_n_object_alloc); object->debug_id = atomic_inc_return(&fscache_object_debug_id); @@ -571,6 +572,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie, _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); + ASSERTCMP(object->cookie, ==, cookie); + spin_lock(&cookie->lock); /* there may be multiple initial creations of this object, but we only @@ -610,9 +613,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, spin_unlock(&cache->object_list_lock); } - /* attach to the cookie */ - object->cookie = cookie; - fscache_cookie_get(cookie, fscache_cookie_get_attach_object); + /* Attach to the cookie. The object already has a ref on it. */ hlist_add_head(&object->cookie_link, &cookie->backing_objects); fscache_objlist_add(object); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 20e0d0a4dc8c..9edc920f651f 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -327,6 +327,7 @@ void fscache_object_init(struct fscache_object *object, object->store_limit_l = 0; object->cache = cache; object->cookie = cookie; + fscache_cookie_get(cookie, fscache_cookie_get_attach_object); object->parent = NULL; #ifdef CONFIG_FSCACHE_OBJECT_LIST RB_CLEAR_NODE(&object->objlist_link); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index e30c5975ea58..8d265790374c 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -70,7 +70,8 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERT(fscache_object_is_available(op->object)); ASSERTCMP(atomic_read(&op->usage), >, 0); - ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS, + op->state, ==, FSCACHE_OP_ST_CANCELLED); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -499,7 +500,8 @@ void fscache_put_operation(struct fscache_operation *op) struct fscache_cache *cache; _enter("{OBJ%x OP%x,%d}", - op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + op->object ? op->object->debug_id : 0, + op->debug_id, atomic_read(&op->usage)); ASSERTCMP(atomic_read(&op->usage), >, 0); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 56231b31f806..d80aab0d5982 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -399,7 +399,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, */ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, - umode_t mode, int *opened) + umode_t mode) { int err; struct inode *inode; @@ -469,7 +469,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); - err = finish_open(file, entry, generic_file_open, opened); + err = finish_open(file, entry, generic_file_open); if (err) { fuse_sync_release(ff, flags); } else { @@ -489,7 +489,7 @@ out_err: static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, - umode_t mode, int *opened) + umode_t mode) { int err; struct fuse_conn *fc = get_fuse_conn(dir); @@ -508,12 +508,12 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, goto no_open; /* Only creates */ - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; if (fc->no_create) goto mknod; - err = fuse_create_open(dir, entry, file, flags, mode, opened); + err = fuse_create_open(dir, entry, file, flags, mode); if (err == -ENOSYS) { fc->no_create = 1; goto mknod; @@ -539,6 +539,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, { struct fuse_entry_out outarg; struct inode *inode; + struct dentry *d; int err; struct fuse_forget_link *forget; @@ -570,11 +571,17 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, } kfree(forget); - err = d_instantiate_no_diralias(entry, inode); - if (err) - return err; + d_drop(entry); + d = d_splice_alias(inode, entry); + if (IS_ERR(d)) + return PTR_ERR(d); - fuse_change_entry_timeout(entry, &outarg); + if (d) { + fuse_change_entry_timeout(d, &outarg); + dput(d); + } else { + fuse_change_entry_timeout(entry, &outarg); + } fuse_invalidate_attr(dir); return 0; diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 776717f1eeea..af5f87a493d9 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -82,14 +82,12 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int error; - int len; + size_t len; char *data; const char *name = gfs2_acl_name(type); if (acl) { - len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); - if (len == 0) - return 0; + len = posix_acl_xattr_size(acl->a_count); data = kmalloc(len, GFP_NOFS); if (data == NULL) return -ENOMEM; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 35f5ee23566d..31e8270d0b26 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -22,6 +22,7 @@ #include <linux/backing-dev.h> #include <linux/uio.h> #include <trace/events/writeback.h> +#include <linux/sched/signal.h> #include "gfs2.h" #include "incore.h" @@ -36,10 +37,11 @@ #include "super.h" #include "util.h" #include "glops.h" +#include "aops.h" -static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int len) +void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int len) { struct buffer_head *head = page_buffers(page); unsigned int bsize = head->b_size; @@ -82,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, return 0; } -static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - return gfs2_block_map(inode, lblock, bh_result, 0); -} - /** * gfs2_writepage_common - Common bits of writepage * @page: The page to be written @@ -462,7 +458,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping, * Returns: errno */ -static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +int stuffed_readpage(struct gfs2_inode *ip, struct page *page) { struct buffer_head *dibh; u64 dsize = i_size_read(&ip->i_inode); @@ -512,9 +508,13 @@ static int __gfs2_readpage(void *file, struct page *page) { struct gfs2_inode *ip = GFS2_I(page->mapping->host); struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); + int error; - if (gfs2_is_stuffed(ip)) { + if (i_blocksize(page->mapping->host) == PAGE_SIZE && + !page_has_buffers(page)) { + error = iomap_readpage(page, &gfs2_iomap_ops); + } else if (gfs2_is_stuffed(ip)) { error = stuffed_readpage(ip, page); unlock_page(page); } else { @@ -644,139 +644,10 @@ out_uninit: } /** - * gfs2_write_begin - Begin to write to a file - * @file: The file to write to - * @mapping: The mapping in which to write - * @pos: The file offset at which to start writing - * @len: Length of the write - * @flags: Various flags - * @pagep: Pointer to return the page - * @fsdata: Pointer to return fs data (unused by GFS2) - * - * Returns: errno - */ - -static int gfs2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - struct gfs2_inode *ip = GFS2_I(mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(mapping->host); - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - unsigned requested = 0; - int alloc_required; - int error = 0; - pgoff_t index = pos >> PAGE_SHIFT; - unsigned from = pos & (PAGE_SIZE - 1); - struct page *page; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); - if (unlikely(error)) - goto out_uninit; - if (&ip->i_inode == sdp->sd_rindex) { - error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, - GL_NOCACHE, &m_ip->i_gh); - if (unlikely(error)) { - gfs2_glock_dq(&ip->i_gh); - goto out_uninit; - } - } - - alloc_required = gfs2_write_alloc_required(ip, pos, len); - - if (alloc_required || gfs2_is_jdata(ip)) - gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); - - if (alloc_required) { - struct gfs2_alloc_parms ap = { .aflags = 0, }; - requested = data_blocks + ind_blocks; - ap.target = requested; - error = gfs2_quota_lock_check(ip, &ap); - if (error) - goto out_unlock; - - error = gfs2_inplace_reserve(ip, &ap); - if (error) - goto out_qunlock; - } - - rblocks = RES_DINODE + ind_blocks; - if (gfs2_is_jdata(ip)) - rblocks += data_blocks ? data_blocks : 1; - if (ind_blocks || data_blocks) - rblocks += RES_STATFS + RES_QUOTA; - if (&ip->i_inode == sdp->sd_rindex) - rblocks += 2 * RES_STATFS; - if (alloc_required) - rblocks += gfs2_rg_blocks(ip, requested); - - error = gfs2_trans_begin(sdp, rblocks, - PAGE_SIZE/sdp->sd_sb.sb_bsize); - if (error) - goto out_trans_fail; - - error = -ENOMEM; - flags |= AOP_FLAG_NOFS; - page = grab_cache_page_write_begin(mapping, index, flags); - *pagep = page; - if (unlikely(!page)) - goto out_endtrans; - - if (gfs2_is_stuffed(ip)) { - error = 0; - if (pos + len > gfs2_max_stuffed_size(ip)) { - error = gfs2_unstuff_dinode(ip, page); - if (error == 0) - goto prepare_write; - } else if (!PageUptodate(page)) { - error = stuffed_readpage(ip, page); - } - goto out; - } - -prepare_write: - error = __block_write_begin(page, from, len, gfs2_block_map); -out: - if (error == 0) - return 0; - - unlock_page(page); - put_page(page); - - gfs2_trans_end(sdp); - if (alloc_required) { - gfs2_inplace_release(ip); - if (pos + len > ip->i_inode.i_size) - gfs2_trim_blocks(&ip->i_inode); - } - goto out_qunlock; - -out_endtrans: - gfs2_trans_end(sdp); -out_trans_fail: - if (alloc_required) - gfs2_inplace_release(ip); -out_qunlock: - if (alloc_required) - gfs2_quota_unlock(ip); -out_unlock: - if (&ip->i_inode == sdp->sd_rindex) { - gfs2_glock_dq(&m_ip->i_gh); - gfs2_holder_uninit(&m_ip->i_gh); - } - gfs2_glock_dq(&ip->i_gh); -out_uninit: - gfs2_holder_uninit(&ip->i_gh); - return error; -} - -/** * adjust_fs_space - Adjusts the free space available due to gfs2_grow * @inode: the rindex inode */ -static void adjust_fs_space(struct inode *inode) +void adjust_fs_space(struct inode *inode) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -822,11 +693,11 @@ out: * This copies the data from the page into the inode block after * the inode data structure itself. * - * Returns: errno + * Returns: copied bytes or errno */ -static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, - loff_t pos, unsigned copied, - struct page *page) +int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned copied, + struct page *page) { struct gfs2_inode *ip = GFS2_I(inode); u64 to = pos + copied; @@ -853,84 +724,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, } /** - * gfs2_write_end - * @file: The file to write to - * @mapping: The address space to write to - * @pos: The file position - * @len: The length of the data - * @copied: How much was actually copied by the VFS - * @page: The page that has been written - * @fsdata: The fsdata (unused in GFS2) - * - * The main write_end function for GFS2. We just put our locking around the VFS - * provided functions. - * - * Returns: errno - */ - -static int gfs2_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - struct buffer_head *dibh; - int ret; - struct gfs2_trans *tr = current->journal_info; - BUG_ON(!tr); - - BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); - - ret = gfs2_meta_inode_buffer(ip, &dibh); - if (unlikely(ret)) - goto out; - - if (gfs2_is_stuffed(ip)) { - ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page); - page = NULL; - goto out2; - } - - if (gfs2_is_jdata(ip)) - gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len); - else - gfs2_ordered_add_inode(ip); - - ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - page = NULL; - if (tr->tr_num_buf_new) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - else - gfs2_trans_add_meta(ip->i_gl, dibh); - -out2: - if (inode == sdp->sd_rindex) { - adjust_fs_space(inode); - sdp->sd_rindex_uptodate = 0; - } - - brelse(dibh); -out: - if (page) { - unlock_page(page); - put_page(page); - } - gfs2_trans_end(sdp); - gfs2_inplace_release(ip); - if (ip->i_qadata && ip->i_qadata->qa_qd_num) - gfs2_quota_unlock(ip); - if (inode == sdp->sd_rindex) { - gfs2_glock_dq(&m_ip->i_gh); - gfs2_holder_uninit(&m_ip->i_gh); - } - gfs2_glock_dq(&ip->i_gh); - gfs2_holder_uninit(&ip->i_gh); - return ret; -} - -/** * jdata_set_page_dirty - Page dirtying function * @page: The page to dirty * @@ -1023,96 +816,6 @@ out: } /** - * gfs2_ok_for_dio - check that dio is valid on this file - * @ip: The inode - * @offset: The offset at which we are reading or writing - * - * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) - * 1 (to accept the i/o request) - */ -static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) -{ - /* - * Should we return an error here? I can't see that O_DIRECT for - * a stuffed file makes any sense. For now we'll silently fall - * back to buffered I/O - */ - if (gfs2_is_stuffed(ip)) - return 0; - - if (offset >= i_size_read(&ip->i_inode)) - return 0; - return 1; -} - - - -static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct address_space *mapping = inode->i_mapping; - struct gfs2_inode *ip = GFS2_I(inode); - loff_t offset = iocb->ki_pos; - struct gfs2_holder gh; - int rv; - - /* - * Deferred lock, even if its a write, since we do no allocation - * on this path. All we need change is atime, and this lock mode - * ensures that other nodes have flushed their buffered read caches - * (i.e. their page cache entries for this inode). We do not, - * unfortunately have the option of only flushing a range like - * the VFS does. - */ - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); - rv = gfs2_glock_nq(&gh); - if (rv) - goto out_uninit; - rv = gfs2_ok_for_dio(ip, offset); - if (rv != 1) - goto out; /* dio not valid, fall back to buffered i/o */ - - /* - * Now since we are holding a deferred (CW) lock at this point, you - * might be wondering why this is ever needed. There is a case however - * where we've granted a deferred local lock against a cached exclusive - * glock. That is ok provided all granted local locks are deferred, but - * it also means that it is possible to encounter pages which are - * cached and possibly also mapped. So here we check for that and sort - * them out ahead of the dio. The glock state machine will take care of - * everything else. - * - * If in fact the cached glock state (gl->gl_state) is deferred (CW) in - * the first place, mapping->nr_pages will always be zero. - */ - if (mapping->nrpages) { - loff_t lstart = offset & ~(PAGE_SIZE - 1); - loff_t len = iov_iter_count(iter); - loff_t end = PAGE_ALIGN(offset + len) - 1; - - rv = 0; - if (len == 0) - goto out; - if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) - unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); - rv = filemap_write_and_wait_range(mapping, lstart, end); - if (rv) - goto out; - if (iov_iter_rw(iter) == WRITE) - truncate_inode_pages_range(mapping, lstart, end); - } - - rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - gfs2_get_block_direct, NULL, NULL, 0); -out: - gfs2_glock_dq(&gh); -out_uninit: - gfs2_holder_uninit(&gh); - return rv; -} - -/** * gfs2_releasepage - free the metadata associated with a page * @page: the page that's being released * @gfp_mask: passed from Linux VFS, ignored by us @@ -1187,12 +890,10 @@ static const struct address_space_operations gfs2_writeback_aops = { .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, - .direct_IO = gfs2_direct_IO, + .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, @@ -1203,13 +904,11 @@ static const struct address_space_operations gfs2_ordered_aops = { .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, .set_page_dirty = __set_page_dirty_buffers, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, - .direct_IO = gfs2_direct_IO, + .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, @@ -1220,8 +919,6 @@ static const struct address_space_operations gfs2_jdata_aops = { .writepages = gfs2_jdata_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, - .write_begin = gfs2_write_begin, - .write_end = gfs2_write_end, .set_page_dirty = jdata_set_page_dirty, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h new file mode 100644 index 000000000000..fa8e5d0144dd --- /dev/null +++ b/fs/gfs2/aops.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Red Hat, Inc. All rights reserved. + */ + +#ifndef __AOPS_DOT_H__ +#define __AOPS_DOT_H__ + +#include "incore.h" + +extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page); +extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned copied, + struct page *page); +extern void adjust_fs_space(struct inode *inode); +extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int len); + +#endif /* __AOPS_DOT_H__ */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index ed6699705c13..03128ed1f34e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -28,6 +28,7 @@ #include "trans.h" #include "dir.h" #include "util.h" +#include "aops.h" #include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k @@ -41,6 +42,8 @@ struct metapath { int mp_aheight; /* actual height (lookup height) */ }; +static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); + /** * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page * @ip: the inode @@ -389,7 +392,7 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) return mp->mp_aheight - x - 1; } -static inline void release_metapath(struct metapath *mp) +static void release_metapath(struct metapath *mp) { int i; @@ -397,27 +400,23 @@ static inline void release_metapath(struct metapath *mp) if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); + mp->mp_bh[i] = NULL; } } /** * gfs2_extent_length - Returns length of an extent of blocks - * @start: Start of the buffer - * @len: Length of the buffer in bytes - * @ptr: Current position in the buffer - * @limit: Max extent length to return (0 = unlimited) + * @bh: The metadata block + * @ptr: Current position in @bh + * @limit: Max extent length to return * @eob: Set to 1 if we hit "end of block" * - * If the first block is zero (unallocated) it will return the number of - * unallocated blocks in the extent, otherwise it will return the number - * of contiguous blocks in the extent. - * * Returns: The length of the extent (minimum of one block) */ -static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob) +static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob) { - const __be64 *end = (start + len); + const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); const __be64 *first = ptr; u64 d = be64_to_cpu(*ptr); @@ -426,14 +425,11 @@ static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __b ptr++; if (ptr >= end) break; - if (limit && --limit == 0) - break; - if (d) - d++; + d++; } while(be64_to_cpu(*ptr) == d); if (ptr >= end) *eob = 1; - return (ptr - first); + return ptr - first; } typedef const __be64 *(*gfs2_metadata_walker)( @@ -609,11 +605,13 @@ enum alloc_state { * ii) Indirect blocks to fill in lower part of the metadata tree * iii) Data blocks * - * The function is in two parts. The first part works out the total - * number of blocks which we need. The second part does the actual - * allocation asking for an extent at a time (if enough contiguous free - * blocks are available, there will only be one request per bmap call) - * and uses the state machine to initialise the blocks in order. + * This function is called after gfs2_iomap_get, which works out the + * total number of blocks which we need via gfs2_alloc_size. + * + * We then do the actual allocation asking for an extent at a time (if + * enough contiguous free blocks are available, there will only be one + * allocation request per call) and uses the state machine to initialise + * the blocks in order. * * Right now, this function will allocate at most one indirect block * worth of data -- with a default block size of 4K, that's slightly @@ -633,39 +631,26 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, struct buffer_head *dibh = mp->mp_bh[0]; u64 bn; unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; - unsigned dblks = 0; - unsigned ptrs_per_blk; + size_t dblks = iomap->length >> inode->i_blkbits; const unsigned end_of_metadata = mp->mp_fheight - 1; int ret; enum alloc_state state; __be64 *ptr; __be64 zero_bn = 0; - size_t maxlen = iomap->length >> inode->i_blkbits; BUG_ON(mp->mp_aheight < 1); BUG_ON(dibh == NULL); + BUG_ON(dblks < 1); gfs2_trans_add_meta(ip->i_gl, dibh); down_write(&ip->i_rw_mutex); if (mp->mp_fheight == mp->mp_aheight) { - struct buffer_head *bh; - int eob; - - /* Bottom indirect block exists, find unalloced extent size */ - ptr = metapointer(end_of_metadata, mp); - bh = mp->mp_bh[end_of_metadata]; - dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, - maxlen, &eob); - BUG_ON(dblks < 1); + /* Bottom indirect block exists */ state = ALLOC_DATA; } else { /* Need to allocate indirect blocks */ - ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs : - sdp->sd_diptrs; - dblks = min(maxlen, (size_t)(ptrs_per_blk - - mp->mp_list[end_of_metadata])); if (mp->mp_fheight == ip->i_height) { /* Writing into existing tree, extend tree down */ iblks = mp->mp_fheight - mp->mp_aheight; @@ -750,6 +735,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, } } while (iomap->addr == IOMAP_NULL_ADDR); + iomap->type = IOMAP_MAPPED; iomap->length = (u64)dblks << inode->i_blkbits; ip->i_height = mp->mp_fheight; gfs2_add_inode_blocks(&ip->i_inode, alloced); @@ -759,18 +745,51 @@ out: return ret; } -static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap) +#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE + +/** + * gfs2_alloc_size - Compute the maximum allocation size + * @inode: The inode + * @mp: The metapath + * @size: Requested size in blocks + * + * Compute the maximum size of the next allocation at @mp. + * + * Returns: size in blocks + */ +static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + const __be64 *first, *ptr, *end; - iomap->addr = (ip->i_no_addr << inode->i_blkbits) + - sizeof(struct gfs2_dinode); - iomap->offset = 0; - iomap->length = i_size_read(inode); - iomap->type = IOMAP_INLINE; -} + /* + * For writes to stuffed files, this function is called twice via + * gfs2_iomap_get, before and after unstuffing. The size we return the + * first time needs to be large enough to get the reservation and + * allocation sizes right. The size we return the second time must + * be exact or else gfs2_iomap_alloc won't do the right thing. + */ -#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE + if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { + unsigned int maxsize = mp->mp_fheight > 1 ? + sdp->sd_inptrs : sdp->sd_diptrs; + maxsize -= mp->mp_list[mp->mp_fheight - 1]; + if (size > maxsize) + size = maxsize; + return size; + } + + first = metapointer(ip->i_height - 1, mp); + end = metaend(ip->i_height - 1, mp); + if (end - first > size) + end = first + size; + for (ptr = first; ptr < end; ptr++) { + if (*ptr) + break; + } + return ptr - first; +} /** * gfs2_iomap_get - Map blocks from an inode to disk blocks @@ -789,37 +808,63 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + loff_t size = i_size_read(inode); __be64 *ptr; sector_t lblock; sector_t lblock_stop; int ret; int eob; u64 len; - struct buffer_head *bh; + struct buffer_head *dibh = NULL, *bh; u8 height; if (!length) return -EINVAL; + down_read(&ip->i_rw_mutex); + + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (ret) + goto unlock; + iomap->private = dibh; + if (gfs2_is_stuffed(ip)) { - if (flags & IOMAP_REPORT) { - if (pos >= i_size_read(inode)) - return -ENOENT; - gfs2_stuffed_iomap(inode, iomap); - return 0; + if (flags & IOMAP_WRITE) { + loff_t max_size = gfs2_max_stuffed_size(ip); + + if (pos + length > max_size) + goto unstuff; + iomap->length = max_size; + } else { + if (pos >= size) { + if (flags & IOMAP_REPORT) { + ret = -ENOENT; + goto unlock; + } else { + /* report a hole */ + iomap->offset = pos; + iomap->length = length; + goto do_alloc; + } + } + iomap->length = size; } - BUG_ON(!(flags & IOMAP_WRITE)); + iomap->addr = (ip->i_no_addr << inode->i_blkbits) + + sizeof(struct gfs2_dinode); + iomap->type = IOMAP_INLINE; + iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); + goto out; } + +unstuff: lblock = pos >> inode->i_blkbits; iomap->offset = lblock << inode->i_blkbits; lblock_stop = (pos + length - 1) >> inode->i_blkbits; len = lblock_stop - lblock + 1; + iomap->length = len << inode->i_blkbits; - down_read(&ip->i_rw_mutex); - - ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]); - if (ret) - goto unlock; + get_bh(dibh); + mp->mp_bh[0] = dibh; height = ip->i_height; while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) @@ -840,12 +885,12 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, goto do_alloc; bh = mp->mp_bh[ip->i_height - 1]; - len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob); + len = gfs2_extent_length(bh, ptr, len, &eob); iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; iomap->length = len << inode->i_blkbits; iomap->type = IOMAP_MAPPED; - iomap->flags = IOMAP_F_MERGED; + iomap->flags |= IOMAP_F_MERGED; if (eob) iomap->flags |= IOMAP_F_GFS2_BOUNDARY; @@ -853,25 +898,185 @@ out: iomap->bdev = inode->i_sb->s_bdev; unlock: up_read(&ip->i_rw_mutex); + if (ret && dibh) + brelse(dibh); return ret; do_alloc: iomap->addr = IOMAP_NULL_ADDR; - iomap->length = len << inode->i_blkbits; iomap->type = IOMAP_HOLE; - iomap->flags = 0; if (flags & IOMAP_REPORT) { - loff_t size = i_size_read(inode); if (pos >= size) ret = -ENOENT; else if (height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); else iomap->length = size - pos; + } else if (flags & IOMAP_WRITE) { + u64 alloc_size; + + if (flags & IOMAP_DIRECT) + goto out; /* (see gfs2_file_direct_write) */ + + len = gfs2_alloc_size(inode, mp, len); + alloc_size = len << inode->i_blkbits; + if (alloc_size < iomap->length) + iomap->length = alloc_size; + } else { + if (pos < size && height == ip->i_height) + ret = gfs2_hole_size(inode, lblock, len, mp, iomap); } goto out; } +static int gfs2_write_lock(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); + if (error) + goto out_uninit; + if (&ip->i_inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, &m_ip->i_gh); + if (error) + goto out_unlock; + } + return 0; + +out_unlock: + gfs2_glock_dq(&ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + return error; +} + +static void gfs2_write_unlock(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (&ip->i_inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + gfs2_glock_dq_uninit(&m_ip->i_gh); + } + gfs2_glock_dq_uninit(&ip->i_gh); +} + +static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos, + unsigned copied, struct page *page, + struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); +} + +static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, + loff_t length, unsigned flags, + struct iomap *iomap) +{ + struct metapath mp = { .mp_aheight = 1, }; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + bool unstuff, alloc_required; + int ret; + + ret = gfs2_write_lock(inode); + if (ret) + return ret; + + unstuff = gfs2_is_stuffed(ip) && + pos + length > gfs2_max_stuffed_size(ip); + + ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); + if (ret) + goto out_release; + + alloc_required = unstuff || iomap->type == IOMAP_HOLE; + + if (alloc_required || gfs2_is_jdata(ip)) + gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, + &ind_blocks); + + if (alloc_required) { + struct gfs2_alloc_parms ap = { + .target = data_blocks + ind_blocks + }; + + ret = gfs2_quota_lock_check(ip, &ap); + if (ret) + goto out_release; + + ret = gfs2_inplace_reserve(ip, &ap); + if (ret) + goto out_qunlock; + } + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + if (inode == sdp->sd_rindex) + rblocks += 2 * RES_STATFS; + if (alloc_required) + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); + + ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits); + if (ret) + goto out_trans_fail; + + if (unstuff) { + ret = gfs2_unstuff_dinode(ip, NULL); + if (ret) + goto out_trans_end; + release_metapath(&mp); + brelse(iomap->private); + iomap->private = NULL; + ret = gfs2_iomap_get(inode, iomap->offset, iomap->length, + flags, iomap, &mp); + if (ret) + goto out_trans_end; + } + + if (iomap->type == IOMAP_HOLE) { + ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); + if (ret) { + gfs2_trans_end(sdp); + gfs2_inplace_release(ip); + punch_hole(ip, iomap->offset, iomap->length); + goto out_qunlock; + } + } + release_metapath(&mp); + if (gfs2_is_jdata(ip)) + iomap->page_done = gfs2_iomap_journaled_page_done; + return 0; + +out_trans_end: + gfs2_trans_end(sdp); +out_trans_fail: + if (alloc_required) + gfs2_inplace_release(ip); +out_qunlock: + if (alloc_required) + gfs2_quota_unlock(ip); +out_release: + if (iomap->private) + brelse(iomap->private); + release_metapath(&mp); + gfs2_write_unlock(inode); + return ret; +} + static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap) { @@ -879,22 +1084,79 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, struct metapath mp = { .mp_aheight = 1, }; int ret; + iomap->flags |= IOMAP_F_BUFFER_HEAD; + trace_gfs2_iomap_start(ip, pos, length, flags); - if (flags & IOMAP_WRITE) { - ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); - if (!ret && iomap->type == IOMAP_HOLE) - ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); - release_metapath(&mp); + if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { + ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); } else { ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); release_metapath(&mp); + /* + * Silently fall back to buffered I/O for stuffed files or if + * we've hot a hole (see gfs2_file_direct_write). + */ + if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) && + iomap->type != IOMAP_MAPPED) + ret = -ENOTBLK; } trace_gfs2_iomap_end(ip, iomap, ret); return ret; } +static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, + ssize_t written, unsigned flags, struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_trans *tr = current->journal_info; + struct buffer_head *dibh = iomap->private; + + if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) + goto out; + + if (iomap->type != IOMAP_INLINE) { + gfs2_ordered_add_inode(ip); + + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + else + gfs2_trans_add_meta(ip->i_gl, dibh); + } + + if (inode == sdp->sd_rindex) { + adjust_fs_space(inode); + sdp->sd_rindex_uptodate = 0; + } + + gfs2_trans_end(sdp); + gfs2_inplace_release(ip); + + if (length != written && (iomap->flags & IOMAP_F_NEW)) { + /* Deallocate blocks that were just allocated. */ + loff_t blockmask = i_blocksize(inode) - 1; + loff_t end = (pos + length) & ~blockmask; + + pos = (pos + written + blockmask) & ~blockmask; + if (pos < end) { + truncate_pagecache_range(inode, pos, end - 1); + punch_hole(ip, pos, end - pos); + } + } + + if (ip->i_qadata && ip->i_qadata->qa_qd_num) + gfs2_quota_unlock(ip); + gfs2_write_unlock(inode); + +out: + if (dibh) + brelse(dibh); + return 0; +} + const struct iomap_ops gfs2_iomap_ops = { .iomap_begin = gfs2_iomap_begin, + .iomap_end = gfs2_iomap_end, }; /** @@ -941,12 +1203,6 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, } else { ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp); release_metapath(&mp); - - /* Return unmapped buffer beyond the end of file. */ - if (ret == -ENOENT) { - ret = 0; - goto out; - } } if (ret) goto out; @@ -2060,7 +2316,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; - if (lblock_stop > end_of_file) + if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) return 1; size = (lblock_stop - lblock) << shift; @@ -2154,11 +2410,11 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) if (error) goto out; } else { - unsigned int start_off, end_off, blocksize; + unsigned int start_off, end_len, blocksize; blocksize = i_blocksize(inode); start_off = offset & (blocksize - 1); - end_off = (offset + length) & (blocksize - 1); + end_len = (offset + length) & (blocksize - 1); if (start_off) { unsigned int len = length; if (length > blocksize - start_off) @@ -2167,11 +2423,11 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) if (error) goto out; if (start_off + length < blocksize) - end_off = 0; + end_len = 0; } - if (end_off) { + if (end_len) { error = gfs2_block_zero_range(inode, - offset + length - end_off, end_off); + offset + length - end_len, end_len); if (error) goto out; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index d97ad89955d1..e37002560c11 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1011,7 +1011,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) u64 bn, leaf_no; __be64 *lp; u32 index; - int x, moved = 0; + int x; int error; index = name->hash >> (32 - dip->i_depth); @@ -1113,8 +1113,6 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) if (!prev) prev = dent; - - moved = 1; } else { prev = dent; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 7137db7b0119..08369c6cd127 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -26,10 +26,12 @@ #include <linux/dlm.h> #include <linux/dlm_plock.h> #include <linux/delay.h> +#include <linux/backing-dev.h> #include "gfs2.h" #include "incore.h" #include "bmap.h" +#include "aops.h" #include "dir.h" #include "glock.h" #include "glops.h" @@ -387,7 +389,7 @@ static int gfs2_allocate_page_backing(struct page *page) * blocks allocated on disk to back that page. */ -static int gfs2_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); @@ -688,12 +690,83 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } +static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + size_t count = iov_iter_count(to); + struct gfs2_holder gh; + ssize_t ret; + + if (!count) + return 0; /* skip atime */ + + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + + ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL); + + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return ret; +} + +static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + size_t len = iov_iter_count(from); + loff_t offset = iocb->ki_pos; + struct gfs2_holder gh; + ssize_t ret; + + /* + * Deferred lock, even if its a write, since we do no allocation on + * this path. All we need to change is the atime, and this lock mode + * ensures that other nodes have flushed their buffered read caches + * (i.e. their page cache entries for this inode). We do not, + * unfortunately, have the option of only flushing a range like the + * VFS does. + */ + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + + /* Silently fall back to buffered I/O when writing beyond EOF */ + if (offset + len > i_size_read(&ip->i_inode)) + goto out; + + ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL); + +out: + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); + return ret; +} + +static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + ssize_t ret; + + if (iocb->ki_flags & IOCB_DIRECT) { + ret = gfs2_file_direct_read(iocb, to); + if (likely(ret != -ENOTBLK)) + return ret; + iocb->ki_flags &= ~IOCB_DIRECT; + } + return generic_file_read_iter(iocb, to); +} + /** * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context - * @iov: The data to write - * @nr_segs: Number of @iov segments - * @pos: The file position + * @from: The data to write * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong @@ -705,8 +778,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct gfs2_inode *ip = GFS2_I(file_inode(file)); - int ret; + struct inode *inode = file_inode(file); + struct gfs2_inode *ip = GFS2_I(inode); + ssize_t written = 0, ret; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -723,7 +797,71 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) gfs2_glock_dq_uninit(&gh); } - return generic_file_write_iter(iocb, from); + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out; + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = inode_to_bdi(inode); + + ret = file_remove_privs(file); + if (ret) + goto out2; + + ret = file_update_time(file); + if (ret) + goto out2; + + if (iocb->ki_flags & IOCB_DIRECT) { + struct address_space *mapping = file->f_mapping; + loff_t pos, endbyte; + ssize_t buffered; + + written = gfs2_file_direct_write(iocb, from); + if (written < 0 || !iov_iter_count(from)) + goto out2; + + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + if (unlikely(ret < 0)) + goto out2; + buffered = ret; + + /* + * We need to ensure that the page cache pages are written to + * disk and invalidated to preserve the expected O_DIRECT + * semantics. + */ + pos = iocb->ki_pos; + endbyte = pos + buffered - 1; + ret = filemap_write_and_wait_range(mapping, pos, endbyte); + if (!ret) { + iocb->ki_pos += buffered; + written += buffered; + invalidate_mapping_pages(mapping, + pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); + } else { + /* + * We don't know how much we wrote, so just return + * the number of bytes which were direct-written + */ + } + } else { + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + if (likely(ret > 0)) + iocb->ki_pos += ret; + } + +out2: + current->backing_dev_info = NULL; +out: + inode_unlock(inode); + if (likely(ret > 0)) { + /* Handle various SYNC-type writes */ + ret = generic_write_sync(iocb, ret); + } + return written ? written : ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, @@ -733,7 +871,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, struct gfs2_inode *ip = GFS2_I(inode); loff_t end = offset + len; struct buffer_head *dibh; - struct iomap iomap = { }; int error; error = gfs2_meta_inode_buffer(ip, &dibh); @@ -749,12 +886,14 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, } while (offset < end) { + struct iomap iomap = { }; + error = gfs2_iomap_get_alloc(inode, offset, end - offset, &iomap); if (error) goto out; offset = iomap.offset + iomap.length; - if (iomap.type != IOMAP_HOLE) + if (!(iomap.flags & IOMAP_F_NEW)) continue; error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits, iomap.length >> inode->i_blkbits, @@ -1125,7 +1264,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, - .read_iter = generic_file_read_iter, + .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, @@ -1155,7 +1294,7 @@ const struct file_operations gfs2_dir_fops = { const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, - .read_iter = generic_file_read_iter, + .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d2ad817e089f..b96d39c28e17 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -65,6 +65,27 @@ struct gfs2_log_operations { #define GBF_FULL 1 +/** + * Clone bitmaps (bi_clone): + * + * - When a block is freed, we remember the previous state of the block in the + * clone bitmap, and only mark the block as free in the real bitmap. + * + * - When looking for a block to allocate, we check for a free block in the + * clone bitmap, and if no clone bitmap exists, in the real bitmap. + * + * - For allocating a block, we mark it as allocated in the real bitmap, and if + * a clone bitmap exists, also in the clone bitmap. + * + * - At the end of a log_flush, we copy the real bitmap into the clone bitmap + * to make the clone bitmap reflect the current allocation state. + * (Alternatively, we could remove the clone bitmap.) + * + * The clone bitmaps are in-core only, and is never written to disk. + * + * These steps ensure that blocks which have been freed in a transaction cannot + * be reallocated in that same transaction. + */ struct gfs2_bitmap { struct buffer_head *bi_bh; char *bi_clone; @@ -295,7 +316,6 @@ struct gfs2_blkreserv { struct rb_node rs_node; /* link to other block reservations */ struct gfs2_rbm rs_rbm; /* Start of reservation */ u32 rs_free; /* how many blocks are still free */ - u64 rs_inum; /* Inode number for reservation */ }; /* @@ -398,7 +418,6 @@ struct gfs2_inode { struct gfs2_holder i_gh; /* for prepare/commit_write only */ struct gfs2_qadata *i_qadata; /* quota allocation data */ struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */ - struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; struct list_head i_ordered; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index feda55f67050..648f0ca1ad57 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -580,7 +580,7 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct file *file, umode_t mode, dev_t dev, const char *symname, - unsigned int size, int excl, int *opened) + unsigned int size, int excl) { const struct qstr *name = &dentry->d_name; struct posix_acl *default_acl, *acl; @@ -626,7 +626,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = 0; if (file) { if (S_ISREG(inode->i_mode)) - error = finish_open(file, dentry, gfs2_open_common, opened); + error = finish_open(file, dentry, gfs2_open_common); else error = finish_no_open(file, NULL); } @@ -767,8 +767,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); d_instantiate(dentry, inode); if (file) { - *opened |= FILE_CREATED; - error = finish_open(file, dentry, gfs2_open_common, opened); + file->f_mode |= FMODE_CREATED; + error = finish_open(file, dentry, gfs2_open_common); } gfs2_glock_dq_uninit(ghs); gfs2_glock_dq_uninit(ghs + 1); @@ -822,7 +822,7 @@ fail: static int gfs2_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl, NULL); + return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl); } /** @@ -830,14 +830,13 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, * @dir: The directory inode * @dentry: The dentry of the new inode * @file: File to be opened - * @opened: atomic_open flags * * * Returns: errno */ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, - struct file *file, int *opened) + struct file *file) { struct inode *inode; struct dentry *d; @@ -866,7 +865,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, return d; } if (file && S_ISREG(inode->i_mode)) - error = finish_open(file, dentry, gfs2_open_common, opened); + error = finish_open(file, dentry, gfs2_open_common); gfs2_glock_dq_uninit(&gh); if (error) { @@ -879,7 +878,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { - return __gfs2_lookup(dir, dentry, NULL, NULL); + return __gfs2_lookup(dir, dentry, NULL); } /** @@ -1189,7 +1188,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, if (size >= gfs2_max_stuffed_size(GFS2_I(dir))) return -ENAMETOOLONG; - return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL); + return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0); } /** @@ -1204,7 +1203,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir)); - return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL); + return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0); } /** @@ -1219,7 +1218,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0, NULL); + return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0); } /** @@ -1229,14 +1228,13 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, * @file: The proposed new struct file * @flags: open flags * @mode: File mode - * @opened: Flag to say whether the file has been opened or not * * Returns: error code or 0 for success */ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, - umode_t mode, int *opened) + umode_t mode) { struct dentry *d; bool excl = !!(flags & O_EXCL); @@ -1244,13 +1242,13 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!d_in_lookup(dentry)) goto skip_lookup; - d = __gfs2_lookup(dir, dentry, file, opened); + d = __gfs2_lookup(dir, dentry, file); if (IS_ERR(d)) return PTR_ERR(d); if (d != NULL) dentry = d; if (d_really_is_positive(dentry)) { - if (!(*opened & FILE_OPENED)) + if (!(file->f_mode & FMODE_OPENED)) return finish_no_open(file, d); dput(d); return 0; @@ -1262,7 +1260,7 @@ skip_lookup: if (!(flags & O_CREAT)) return -ENOENT; - return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl, opened); + return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl); } /* diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 006c6164f759..ac7caa267ed6 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -821,6 +821,13 @@ restart: goto fail; } + /** + * If we're a spectator, we don't want to take the lock in EX because + * we cannot do the first-mount responsibility it implies: recovery. + */ + if (sdp->sd_args.ar_spectator) + goto locks_done; + error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); if (!error) { mounted_mode = DLM_LOCK_EX; @@ -896,9 +903,16 @@ locks_done: if (lvb_gen < mount_gen) { /* wait for mounted nodes to update control_lock lvb to our generation, which might include new recovery bits set */ - fs_info(sdp, "control_mount wait1 block %u start %u mount %u " - "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, - lvb_gen, ls->ls_recover_flags); + if (sdp->sd_args.ar_spectator) { + fs_info(sdp, "Recovery is required. Waiting for a " + "non-spectator to mount.\n"); + msleep_interruptible(1000); + } else { + fs_info(sdp, "control_mount wait1 block %u start %u " + "mount %u lvb %u flags %lx\n", block_gen, + start_gen, mount_gen, lvb_gen, + ls->ls_recover_flags); + } spin_unlock(&ls->ls_recover_spin); goto restart; } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 0248835625f1..ee20ea42e7b5 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -92,7 +92,8 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd) static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct writeback_control *wbc, - struct gfs2_trans *tr) + struct gfs2_trans *tr, + bool *withdraw) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { @@ -107,8 +108,10 @@ __acquires(&sdp->sd_ail_lock) gfs2_assert(sdp, bd->bd_tr == tr); if (!buffer_busy(bh)) { - if (!buffer_uptodate(bh)) + if (!buffer_uptodate(bh)) { gfs2_io_error_bh(sdp, bh); + *withdraw = true; + } list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); continue; } @@ -148,6 +151,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct list_head *head = &sdp->sd_ail1_list; struct gfs2_trans *tr; struct blk_plug plug; + bool withdraw = false; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); @@ -156,11 +160,13 @@ restart: list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; - if (gfs2_ail1_start_one(sdp, wbc, tr)) + if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw)) goto restart; } spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); + if (withdraw) + gfs2_lm_withdraw(sdp, NULL); trace_gfs2_ail_flush(sdp, wbc, 0); } @@ -188,7 +194,8 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) * */ -static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + bool *withdraw) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; @@ -199,11 +206,12 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_assert(sdp, bd->bd_tr == tr); if (buffer_busy(bh)) continue; - if (!buffer_uptodate(bh)) + if (!buffer_uptodate(bh)) { gfs2_io_error_bh(sdp, bh); + *withdraw = true; + } list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); } - } /** @@ -218,10 +226,11 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) struct gfs2_trans *tr, *s; int oldest_tr = 1; int ret; + bool withdraw = false; spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { - gfs2_ail1_empty_one(sdp, tr); + gfs2_ail1_empty_one(sdp, tr, &withdraw); if (list_empty(&tr->tr_ail1_list) && oldest_tr) list_move(&tr->tr_list, &sdp->sd_ail2_list); else @@ -230,6 +239,9 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) ret = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); + if (withdraw) + gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n"); + return ret; } @@ -689,7 +701,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, hash = ~crc32(~0, lh, LH_V1_SIZE); lh->lh_hash = cpu_to_be32(hash); - tv = current_kernel_time64(); + ktime_get_coarse_real_ts64(&tv); lh->lh_nsec = cpu_to_be32(tv.tv_nsec); lh->lh_sec = cpu_to_be64(tv.tv_sec); addr = gfs2_log_bmap(sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 4d6567990baf..f2567f958d00 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -49,7 +49,7 @@ void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) if (test_set_buffer_pinned(bh)) gfs2_assert_withdraw(sdp, 0); if (!buffer_uptodate(bh)) - gfs2_io_error_bh(sdp, bh); + gfs2_io_error_bh_wd(sdp, bh); bd = bh->b_private; /* If this buffer is in the AIL and it has already been written * to in-place disk block, remove it from the AIL. diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 52de1036d9f9..be9c0bf697fe 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -293,7 +293,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, if (unlikely(!buffer_uptodate(bh))) { struct gfs2_trans *tr = current->journal_info; if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) - gfs2_io_error_bh(sdp, bh); + gfs2_io_error_bh_wd(sdp, bh); brelse(bh); *bhp = NULL; return -EIO; @@ -320,7 +320,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) if (!buffer_uptodate(bh)) { struct gfs2_trans *tr = current->journal_info; if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) - gfs2_io_error_bh(sdp, bh); + gfs2_io_error_bh_wd(sdp, bh); return -EIO; } if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index d8b622c375ab..0f501f938d1c 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -413,12 +413,13 @@ void gfs2_recover_func(struct work_struct *work) ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep; int ro = 0; unsigned int pass; - int error; + int error = 0; int jlocked = 0; t_start = ktime_get(); - if (sdp->sd_args.ar_spectator || - (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) { + if (sdp->sd_args.ar_spectator) + goto fail; + if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid); jlocked = 1; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 33abcf29bc05..1ad3256b9cbc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -123,17 +123,26 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, /** * gfs2_testbit - test a bit in the bitmaps * @rbm: The bit to test + * @use_clone: If true, test the clone bitmap, not the official bitmap. + * + * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps, + * not the "real" bitmaps, to avoid allocating recently freed blocks. * * Returns: The two bit block state of the requested bit */ -static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) { struct gfs2_bitmap *bi = rbm_bi(rbm); - const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; + const u8 *buffer; const u8 *byte; unsigned int bit; + if (use_clone && bi->bi_clone) + buffer = bi->bi_clone; + else + buffer = bi->bi_bh->b_data; + buffer += bi->bi_offset; byte = buffer + (rbm->offset / GFS2_NBBY); bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; @@ -322,7 +331,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le u8 res; for (n = 0; n < n_unaligned; n++) { - res = gfs2_testbit(rbm); + res = gfs2_testbit(rbm, true); if (res != GFS2_BLKST_FREE) return true; (*len)--; @@ -607,8 +616,10 @@ int gfs2_rsqa_alloc(struct gfs2_inode *ip) static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) { + struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); + gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", - (unsigned long long)rs->rs_inum, + (unsigned long long)ip->i_no_addr, (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); } @@ -1051,6 +1062,18 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) /* rd_data0, rd_data and rd_bitbytes already set from rindex */ } +static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) +{ + const struct gfs2_rgrp *str = buf; + + rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); + rgl->rl_flags = str->rg_flags; + rgl->rl_free = str->rg_free; + rgl->rl_dinodes = str->rg_dinodes; + rgl->rl_igeneration = str->rg_igeneration; + rgl->__pad = 0UL; +} + static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) { struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd); @@ -1073,6 +1096,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) str->rg_crc = cpu_to_be32(crc); memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf); } static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) @@ -1087,25 +1111,6 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) return 1; } -static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) -{ - const struct gfs2_rgrp *str = buf; - - rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); - rgl->rl_flags = str->rg_flags; - rgl->rl_free = str->rg_free; - rgl->rl_dinodes = str->rg_dinodes; - rgl->rl_igeneration = str->rg_igeneration; - rgl->__pad = 0UL; -} - -static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) -{ - struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; - u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; - rgl->rl_unlinked = cpu_to_be32(unlinked); -} - static u32 count_unlinked(struct gfs2_rgrpd *rgd) { struct gfs2_bitmap *bi; @@ -1424,7 +1429,6 @@ int gfs2_fitrim(struct file *filp, void __user *argp) rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, bh); gfs2_rgrp_out(rgd, bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); gfs2_trans_end(sdp); } } @@ -1488,6 +1492,34 @@ static void rs_insert(struct gfs2_inode *ip) } /** + * rgd_free - return the number of free blocks we can allocate. + * @rgd: the resource group + * + * This function returns the number of free blocks for an rgrp. + * That's the clone-free blocks (blocks that are free, not including those + * still being used for unlinked files that haven't been deleted.) + * + * It also subtracts any blocks reserved by someone else, but does not + * include free blocks that are still part of our current reservation, + * because obviously we can (and will) allocate them. + */ +static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs) +{ + u32 tot_reserved, tot_free; + + if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free)) + return 0; + tot_reserved = rgd->rd_reserved - rs->rs_free; + + if (rgd->rd_free_clone < tot_reserved) + tot_reserved = 0; + + tot_free = rgd->rd_free_clone - tot_reserved; + + return tot_free; +} + +/** * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks @@ -1502,7 +1534,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, u64 goal; struct gfs2_blkreserv *rs = &ip->i_res; u32 extlen; - u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; + u32 free_blocks = rgd_free(rgd, rs); int ret; struct inode *inode = &ip->i_inode; @@ -1528,7 +1560,6 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (ret == 0) { rs->rs_rbm = rbm; rs->rs_free = extlen; - rs->rs_inum = ip->i_no_addr; rs_insert(ip); } else { if (goal == rgd->rd_last_alloc + rgd->rd_data0) @@ -1686,7 +1717,8 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, while(1) { bi = rbm_bi(rbm); - if (test_bit(GBF_FULL, &bi->bi_flags) && + if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && + test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; @@ -1983,7 +2015,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) int error = 0, rg_locked, flags = 0; u64 last_unlinked = NO_BLOCK; int loops = 0; - u32 skip = 0; + u32 free_blocks, skip = 0; if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; @@ -1991,8 +2023,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; - } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { - rs->rs_rbm.rgd = begin = ip->i_rgd; + } else if (rs->rs_rbm.rgd && + rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) { + begin = rs->rs_rbm.rgd; } else { check_and_update_goal(ip); rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); @@ -2053,11 +2086,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= ap->target || + free_blocks = rgd_free(rs->rs_rbm.rgd, rs); + if (free_blocks >= ap->target || (loops == 2 && ap->min_target && - rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) { - ip->i_rgd = rs->rs_rbm.rgd; - ap->allowed = ip->i_rgd->rd_free_clone; + free_blocks >= ap->min_target)) { + ap->allowed = free_blocks; return 0; } check_rgrp: @@ -2116,26 +2149,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip) } /** - * gfs2_get_block_type - Check a block in a RG is of given type - * @rgd: the resource group holding the block - * @block: the block number - * - * Returns: The block type (GFS2_BLKST_*) - */ - -static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) -{ - struct gfs2_rbm rbm = { .rgd = rgd, }; - int ret; - - ret = gfs2_rbm_from_block(&rbm, block); - WARN_ON_ONCE(ret != 0); - - return gfs2_testbit(&rbm); -} - - -/** * gfs2_alloc_extent - allocate an extent from a given bitmap * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode @@ -2159,7 +2172,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); - if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) + if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE) break; gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); gfs2_setbit(&pos, true, GFS2_BLKST_USED); @@ -2335,7 +2348,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; + struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, }; unsigned int ndata; u64 block; /* block, within the file system scope */ int error; @@ -2393,7 +2406,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2434,7 +2446,6 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth) @@ -2471,8 +2482,7 @@ void gfs2_unlink_di(struct inode *inode) trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - update_rgrp_lvb_unlinked(rgd, 1); + be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); } void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) @@ -2492,8 +2502,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - update_rgrp_lvb_unlinked(rgd, -1); + be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); gfs2_statfs_change(sdp, 0, +1, -1); trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); @@ -2516,6 +2525,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { struct gfs2_rgrpd *rgd; struct gfs2_holder rgd_gh; + struct gfs2_rbm rbm; int error = -EINVAL; rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); @@ -2526,7 +2536,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) if (error) goto fail; - if (gfs2_get_block_type(rgd, no_addr) != type) + rbm.rgd = rgd; + error = gfs2_rbm_from_block(&rbm, no_addr); + WARN_ON_ONCE(error != 0); + + if (gfs2_testbit(&rbm, false) != type) error = -ESTALE; gfs2_glock_dq_uninit(&rgd_gh); @@ -2558,19 +2572,34 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) return; - if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) - rgd = ip->i_rgd; - else + /* + * The resource group last accessed is kept in the last position. + */ + + if (rlist->rl_rgrps) { + rgd = rlist->rl_rgd[rlist->rl_rgrps - 1]; + if (rgrp_contains_block(rgd, block)) + return; rgd = gfs2_blk2rgrpd(sdp, block, 1); + } else { + rgd = ip->i_res.rs_rbm.rgd; + if (!rgd || !rgrp_contains_block(rgd, block)) + rgd = gfs2_blk2rgrpd(sdp, block, 1); + } + if (!rgd) { - fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); + fs_err(sdp, "rlist_add: no rgrp for block %llu\n", + (unsigned long long)block); return; } - ip->i_rgd = rgd; - for (x = 0; x < rlist->rl_rgrps; x++) - if (rlist->rl_rgd[x] == rgd) + for (x = 0; x < rlist->rl_rgrps; x++) { + if (rlist->rl_rgd[x] == rgd) { + swap(rlist->rl_rgd[x], + rlist->rl_rgd[rlist->rl_rgrps - 1]); return; + } + } if (rlist->rl_rgrps == rlist->rl_space) { new_space = rlist->rl_space + 10; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index af0d5b01cf0b..c212893534ed 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1729,7 +1729,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) if (ip) { ip->i_flags = 0; ip->i_gl = NULL; - ip->i_rgd = NULL; memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_rahead = 0; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index c191fa58a1df..1787d295834e 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -429,11 +429,18 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) spin_lock(&sdp->sd_jindex_spin); rv = -EBUSY; - if (sdp->sd_jdesc->jd_jid == jid) + /** + * If we're a spectator, we use journal0, but it's not really ours. + * So we need to wait for its recovery too. If we skip it we'd never + * queue work to the recovery workqueue, and so its completion would + * never clear the DFL_BLOCK_LOCKS flag, so all our locks would + * permanently stop working. + */ + if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator) goto out; rv = -ENOENT; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (jd->jd_jid != jid) + if (jd->jd_jid != jid && !sdp->sd_args.ar_spectator) continue; rv = gfs2_recover_journal(jd, false); break; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index cb10b95efe0f..e0025258107a 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -606,7 +606,8 @@ TRACE_EVENT(gfs2_rs, __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; - __entry->inum = rs->rs_inum; + __entry->inum = container_of(rs, struct gfs2_inode, + i_res)->i_no_addr; __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); __entry->free = rs->rs_free; __entry->func = func; diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 1e6e7da25a17..ad70087d0597 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -30,9 +30,11 @@ struct gfs2_glock; * block, or all of the blocks in the rg, whichever is smaller */ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) { - if (requested < ip->i_rgd->rd_length) + struct gfs2_rgrpd *rgd = ip->i_res.rs_rbm.rgd; + + if (requested < rgd->rd_length) return requested + 1; - return ip->i_rgd->rd_length; + return rgd->rd_length; } extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 763d659db91b..59c811de0dc7 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -46,14 +46,16 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return 0; - va_start(args, fmt); + if (fmt) { + va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; + vaf.fmt = fmt; + vaf.va = &args; - fs_err(sdp, "%pV", &vaf); + fs_err(sdp, "%pV", &vaf); - va_end(args); + va_end(args); + } if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { fs_err(sdp, "about to withdraw this file system\n"); @@ -246,21 +248,21 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, } /** - * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw - * Returns: -1 if this call withdrew the machine, - * 0 if it was already withdrawn + * gfs2_io_error_bh_i - Flag a buffer I/O error + * @withdraw: withdraw the filesystem */ -int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *function, char *file, unsigned int line) +void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, char *file, unsigned int line, + bool withdraw) { - int rv; - rv = gfs2_lm_withdraw(sdp, - "fatal: I/O error\n" - " block = %llu\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, - function, file, line); - return rv; + fs_err(sdp, + "fatal: I/O error\n" + " block = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, + function, file, line); + if (withdraw) + gfs2_lm_withdraw(sdp, NULL); } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 3926f95a6eb7..96ac4aba4738 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -136,11 +136,15 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); -int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *function, char *file, unsigned int line); +void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, char *file, unsigned int line, + bool withdraw); + +#define gfs2_io_error_bh_wd(sdp, bh) \ +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true); #define gfs2_io_error_bh(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__); +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false); extern struct kmem_cache *gfs2_glock_cachep; diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index f2bce1e0f6fb..38515988aaf7 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -343,60 +343,45 @@ struct ea_list { unsigned int ei_size; }; -static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea) -{ - switch (ea->ea_type) { - case GFS2_EATYPE_USR: - return 5 + ea->ea_name_len + 1; - case GFS2_EATYPE_SYS: - return 7 + ea->ea_name_len + 1; - case GFS2_EATYPE_SECURITY: - return 9 + ea->ea_name_len + 1; - default: - return 0; - } -} - static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, void *private) { struct ea_list *ei = private; struct gfs2_ea_request *er = ei->ei_er; - unsigned int ea_size = gfs2_ea_strlen(ea); + unsigned int ea_size; + char *prefix; + unsigned int l; if (ea->ea_type == GFS2_EATYPE_UNUSED) return 0; - if (er->er_data_len) { - char *prefix = NULL; - unsigned int l = 0; - char c = 0; + switch (ea->ea_type) { + case GFS2_EATYPE_USR: + prefix = "user."; + l = 5; + break; + case GFS2_EATYPE_SYS: + prefix = "system."; + l = 7; + break; + case GFS2_EATYPE_SECURITY: + prefix = "security."; + l = 9; + break; + default: + BUG(); + } + ea_size = l + ea->ea_name_len + 1; + if (er->er_data_len) { if (ei->ei_size + ea_size > er->er_data_len) return -ERANGE; - switch (ea->ea_type) { - case GFS2_EATYPE_USR: - prefix = "user."; - l = 5; - break; - case GFS2_EATYPE_SYS: - prefix = "system."; - l = 7; - break; - case GFS2_EATYPE_SECURITY: - prefix = "security."; - l = 9; - break; - } - - BUG_ON(l == 0); - memcpy(er->er_data + ei->ei_size, prefix, l); memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea), ea->ea_name_len); - memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1); + er->er_data[ei->ei_size + ea_size - 1] = 0; } ei->ei_size += ea_size; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 2a16111d312f..a2dfa1b2a89c 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -541,7 +541,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, HFS_I(inode)->rsrc_inode = dir; HFS_I(dir)->rsrc_inode = inode; igrab(dir); - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); mark_inode_dirty(inode); dont_mount(dentry); out: diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index cb8374af08a6..33b8423ef0c9 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -19,7 +19,7 @@ #define HOSTFS_ATTR_ATIME_SET 128 #define HOSTFS_ATTR_MTIME_SET 256 -/* These two are unused by hostfs. */ +/* This one is unused by hostfs. */ #define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ #define HOSTFS_ATTR_ATTR_FLAG 1024 diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2597b290c2a5..444c7b170359 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -610,33 +610,21 @@ static struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, int err; inode = hostfs_iget(ino->i_sb); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); + if (IS_ERR(inode)) goto out; - } err = -ENOMEM; name = dentry_name(dentry); - if (name == NULL) - goto out_put; - - err = read_name(inode, name); - - __putname(name); - if (err == -ENOENT) { + if (name) { + err = read_name(inode, name); + __putname(name); + } + if (err) { iput(inode); - inode = NULL; + inode = (err == -ENOENT) ? NULL : ERR_PTR(err); } - else if (err) - goto out_put; - - d_add(dentry, inode); - return NULL; - - out_put: - iput(inode); out: - return ERR_PTR(err); + return d_splice_alias(inode, dentry); } static int hostfs_link(struct dentry *to, struct inode *ino, diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index c83ece7facc5..d85230c84ef2 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -244,6 +244,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in result = iget_locked(dir->i_sb, ino); if (!result) { hpfs_error(dir->i_sb, "hpfs_lookup: can't get inode"); + result = ERR_PTR(-ENOMEM); goto bail1; } if (result->i_state & I_NEW) { @@ -266,6 +267,8 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in if (de->has_acl || de->has_xtd_perm) if (!sb_rdonly(dir->i_sb)) { hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures"); + iput(result); + result = ERR_PTR(-EINVAL); goto bail1; } @@ -301,29 +304,17 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in } } +bail1: hpfs_brelse4(&qbh); /* * Made it. */ - end: - end_add: +end: +end_add: hpfs_unlock(dir->i_sb); - d_add(dentry, result); - return NULL; - - /* - * Didn't. - */ - bail1: - - hpfs_brelse4(&qbh); - - /*bail:*/ - - hpfs_unlock(dir->i_sb); - return ERR_PTR(-ENOENT); + return d_splice_alias(result, dentry); } const struct file_operations hpfs_dir_ops = diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 2a153aed4c19..ab2e7cc2ff33 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -334,16 +334,23 @@ long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg); * local time (HPFS) to GMT (Unix) */ -static inline time_t local_to_gmt(struct super_block *s, time32_t t) +static inline time64_t local_to_gmt(struct super_block *s, time32_t t) { extern struct timezone sys_tz; return t + sys_tz.tz_minuteswest * 60 + hpfs_sb(s)->sb_timeshift; } -static inline time32_t gmt_to_local(struct super_block *s, time_t t) +static inline time32_t gmt_to_local(struct super_block *s, time64_t t) { extern struct timezone sys_tz; - return t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift; + t = t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift; + + return clamp_t(time64_t, t, 0, U32_MAX); +} + +static inline time32_t local_get_seconds(struct super_block *s) +{ + return gmt_to_local(s, ktime_get_real_seconds()); } /* diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index a3615e4c730d..082b7c76dd0c 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -11,7 +11,7 @@ static void hpfs_update_directory_times(struct inode *dir) { - time_t t = get_seconds(); + time64_t t = local_to_gmt(dir->i_sb, local_get_seconds(dir->i_sb)); if (t == dir->i_mtime.tv_sec && t == dir->i_ctime.tv_sec) return; @@ -50,7 +50,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) /*dee.archive = 0;*/ dee.hidden = name[0] == '.'; dee.fnode = cpu_to_le32(fno); - dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); if (!result) goto bail2; @@ -91,7 +91,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) dnode->root_dnode = 1; dnode->up = cpu_to_le32(fno); de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0); - de->creation_date = de->write_date = de->read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + de->creation_date = de->write_date = de->read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); if (!(mode & 0222)) de->read_only = 1; de->first = de->directory = 1; /*de->hidden = de->system = 0;*/ @@ -151,7 +151,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b dee.archive = 1; dee.hidden = name[0] == '.'; dee.fnode = cpu_to_le32(fno); - dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); if (!result) @@ -238,7 +238,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, de dee.archive = 1; dee.hidden = name[0] == '.'; dee.fnode = cpu_to_le32(fno); - dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); if (!result) @@ -314,7 +314,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy dee.archive = 1; dee.hidden = name[0] == '.'; dee.fnode = cpu_to_le32(fno); - dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds())); + dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); if (!result) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d508c7844681..346a146c7617 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -411,6 +411,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, bool truncate_op = (lend == LLONG_MAX); memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, current->mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pagevec_init(&pvec); next = start; @@ -595,6 +596,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, * as input to create an allocation policy. */ memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pseudo_vma.vm_file = file; @@ -1308,10 +1310,6 @@ static int get_hstate_idx(int page_size_log) return h - hstates; } -static const struct dentry_operations anon_ops = { - .d_dname = simple_dname -}; - /* * Note that size should be aligned to proper hugepage size in caller side, * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. @@ -1320,19 +1318,18 @@ struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, struct user_struct **user, int creat_flags, int page_size_log) { - struct file *file = ERR_PTR(-ENOMEM); struct inode *inode; - struct path path; - struct super_block *sb; - struct qstr quick_string; + struct vfsmount *mnt; int hstate_idx; + struct file *file; hstate_idx = get_hstate_idx(page_size_log); if (hstate_idx < 0) return ERR_PTR(-ENODEV); *user = NULL; - if (!hugetlbfs_vfsmount[hstate_idx]) + mnt = hugetlbfs_vfsmount[hstate_idx]; + if (!mnt) return ERR_PTR(-ENOENT); if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { @@ -1348,45 +1345,28 @@ struct file *hugetlb_file_setup(const char *name, size_t size, } } - sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb; - quick_string.name = name; - quick_string.len = strlen(quick_string.name); - quick_string.hash = 0; - path.dentry = d_alloc_pseudo(sb, &quick_string); - if (!path.dentry) - goto out_shm_unlock; - - d_set_d_op(path.dentry, &anon_ops); - path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]); file = ERR_PTR(-ENOSPC); - inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0); + inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0); if (!inode) - goto out_dentry; + goto out; if (creat_flags == HUGETLB_SHMFS_INODE) inode->i_flags |= S_PRIVATE; - file = ERR_PTR(-ENOMEM); - if (hugetlb_reserve_pages(inode, 0, - size >> huge_page_shift(hstate_inode(inode)), NULL, - acctflag)) - goto out_inode; - - d_instantiate(path.dentry, inode); inode->i_size = size; clear_nlink(inode); - file = alloc_file(&path, FMODE_WRITE | FMODE_READ, - &hugetlbfs_file_operations); - if (IS_ERR(file)) - goto out_dentry; /* inode is already attached */ - - return file; + if (hugetlb_reserve_pages(inode, 0, + size >> huge_page_shift(hstate_inode(inode)), NULL, + acctflag)) + file = ERR_PTR(-ENOMEM); + else + file = alloc_file_pseudo(inode, mnt, name, O_RDWR, + &hugetlbfs_file_operations); + if (!IS_ERR(file)) + return file; -out_inode: iput(inode); -out_dentry: - path_put(&path); -out_shm_unlock: +out: if (*user) { user_shm_unlock(size, *user); *user = NULL; diff --git a/fs/inode.c b/fs/inode.c index 8c86c809ca17..a06de4454232 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -804,6 +804,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } + if (unlikely(inode->i_state & I_CREATING)) { + spin_unlock(&inode->i_lock); + return ERR_PTR(-ESTALE); + } __iget(inode); spin_unlock(&inode->i_lock); return inode; @@ -831,6 +835,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } + if (unlikely(inode->i_state & I_CREATING)) { + spin_unlock(&inode->i_lock); + return ERR_PTR(-ESTALE); + } __iget(inode); spin_unlock(&inode->i_lock); return inode; @@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode) lockdep_annotate_inode_mutex_key(inode); spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW; + inode->i_state &= ~I_NEW & ~I_CREATING; smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(unlock_new_inode); +void discard_new_inode(struct inode *inode) +{ + lockdep_annotate_inode_mutex_key(inode); + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_NEW)); + inode->i_state &= ~I_NEW; + smp_mb(); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); + iput(inode); +} +EXPORT_SYMBOL(discard_new_inode); + /** * lock_two_nondirectories - take two i_mutexes on non-directory objects * @@ -1029,6 +1050,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, { struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); struct inode *old; + bool creating = inode->i_state & I_CREATING; again: spin_lock(&inode_hash_lock); @@ -1039,6 +1061,8 @@ again: * Use the old inode instead of the preallocated one. */ spin_unlock(&inode_hash_lock); + if (IS_ERR(old)) + return NULL; wait_on_inode(old); if (unlikely(inode_unhashed(old))) { iput(old); @@ -1060,6 +1084,8 @@ again: inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); + if (!creating) + inode_sb_list_add(inode); unlock: spin_unlock(&inode_hash_lock); @@ -1094,12 +1120,13 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, struct inode *inode = ilookup5(sb, hashval, test, data); if (!inode) { - struct inode *new = new_inode(sb); + struct inode *new = alloc_inode(sb); if (new) { + new->i_state = 0; inode = inode_insert5(new, hashval, test, set, data); if (unlikely(inode != new)) - iput(new); + destroy_inode(new); } } return inode; @@ -1128,6 +1155,8 @@ again: inode = find_inode_fast(sb, head, ino); spin_unlock(&inode_hash_lock); if (inode) { + if (IS_ERR(inode)) + return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); @@ -1165,6 +1194,8 @@ again: */ spin_unlock(&inode_hash_lock); destroy_inode(inode); + if (IS_ERR(old)) + return NULL; inode = old; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { @@ -1282,7 +1313,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, inode = find_inode(sb, head, test, data); spin_unlock(&inode_hash_lock); - return inode; + return IS_ERR(inode) ? NULL : inode; } EXPORT_SYMBOL(ilookup5_nowait); @@ -1338,6 +1369,8 @@ again: spin_unlock(&inode_hash_lock); if (inode) { + if (IS_ERR(inode)) + return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); @@ -1421,12 +1454,17 @@ int insert_inode_locked(struct inode *inode) } if (likely(!old)) { spin_lock(&inode->i_lock); - inode->i_state |= I_NEW; + inode->i_state |= I_NEW | I_CREATING; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); return 0; } + if (unlikely(old->i_state & I_CREATING)) { + spin_unlock(&old->i_lock); + spin_unlock(&inode_hash_lock); + return -EBUSY; + } __iget(old); spin_unlock(&old->i_lock); spin_unlock(&inode_hash_lock); @@ -1443,7 +1481,10 @@ EXPORT_SYMBOL(insert_inode_locked); int insert_inode_locked4(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct inode *old = inode_insert5(inode, hashval, test, NULL, data); + struct inode *old; + + inode->i_state |= I_CREATING; + old = inode_insert5(inode, hashval, test, NULL, data); if (old != inode) { iput(old); diff --git a/fs/internal.h b/fs/internal.h index 5645b4ebf494..50a28fc71300 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -43,6 +43,8 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) extern void guard_bio_eod(int rw, struct bio *bio); extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, get_block_t *get_block, struct iomap *iomap); +int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, + struct page *page); /* * char_dev.c @@ -93,7 +95,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern struct file *get_empty_filp(void); +extern struct file *alloc_empty_file(int, const struct cred *); /* * super.c @@ -125,8 +127,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); -extern int open_check_o_direct(struct file *f); -extern int vfs_open(const struct path *, struct file *, const struct cred *); +extern int vfs_open(const struct path *, struct file *); /* * inode.c diff --git a/fs/iomap.c b/fs/iomap.c index 77397b5a96ef..009071e73bc0 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Red Hat, Inc. - * Copyright (c) 2016 Christoph Hellwig. + * Copyright (c) 2016-2018 Christoph Hellwig. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -17,7 +17,9 @@ #include <linux/iomap.h> #include <linux/uaccess.h> #include <linux/gfp.h> +#include <linux/migrate.h> #include <linux/mm.h> +#include <linux/mm_inline.h> #include <linux/swap.h> #include <linux/pagemap.h> #include <linux/pagevec.h> @@ -103,6 +105,467 @@ iomap_sector(struct iomap *iomap, loff_t pos) return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; } +static struct iomap_page * +iomap_page_create(struct inode *inode, struct page *page) +{ + struct iomap_page *iop = to_iomap_page(page); + + if (iop || i_blocksize(inode) == PAGE_SIZE) + return iop; + + iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL); + atomic_set(&iop->read_count, 0); + atomic_set(&iop->write_count, 0); + bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); + set_page_private(page, (unsigned long)iop); + SetPagePrivate(page); + return iop; +} + +static void +iomap_page_release(struct page *page) +{ + struct iomap_page *iop = to_iomap_page(page); + + if (!iop) + return; + WARN_ON_ONCE(atomic_read(&iop->read_count)); + WARN_ON_ONCE(atomic_read(&iop->write_count)); + ClearPagePrivate(page); + set_page_private(page, 0); + kfree(iop); +} + +/* + * Calculate the range inside the page that we actually need to read. + */ +static void +iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, + loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp) +{ + unsigned block_bits = inode->i_blkbits; + unsigned block_size = (1 << block_bits); + unsigned poff = offset_in_page(*pos); + unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length); + unsigned first = poff >> block_bits; + unsigned last = (poff + plen - 1) >> block_bits; + unsigned end = offset_in_page(i_size_read(inode)) >> block_bits; + + /* + * If the block size is smaller than the page size we need to check the + * per-block uptodate status and adjust the offset and length if needed + * to avoid reading in already uptodate ranges. + */ + if (iop) { + unsigned int i; + + /* move forward for each leading block marked uptodate */ + for (i = first; i <= last; i++) { + if (!test_bit(i, iop->uptodate)) + break; + *pos += block_size; + poff += block_size; + plen -= block_size; + first++; + } + + /* truncate len if we find any trailing uptodate block(s) */ + for ( ; i <= last; i++) { + if (test_bit(i, iop->uptodate)) { + plen -= (last - i + 1) * block_size; + last = i - 1; + break; + } + } + } + + /* + * If the extent spans the block that contains the i_size we need to + * handle both halves separately so that we properly zero data in the + * page cache for blocks that are entirely outside of i_size. + */ + if (first <= end && last > end) + plen -= (last - end) * block_size; + + *offp = poff; + *lenp = plen; +} + +static void +iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len) +{ + struct iomap_page *iop = to_iomap_page(page); + struct inode *inode = page->mapping->host; + unsigned first = off >> inode->i_blkbits; + unsigned last = (off + len - 1) >> inode->i_blkbits; + unsigned int i; + bool uptodate = true; + + if (iop) { + for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) { + if (i >= first && i <= last) + set_bit(i, iop->uptodate); + else if (!test_bit(i, iop->uptodate)) + uptodate = false; + } + } + + if (uptodate && !PageError(page)) + SetPageUptodate(page); +} + +static void +iomap_read_finish(struct iomap_page *iop, struct page *page) +{ + if (!iop || atomic_dec_and_test(&iop->read_count)) + unlock_page(page); +} + +static void +iomap_read_page_end_io(struct bio_vec *bvec, int error) +{ + struct page *page = bvec->bv_page; + struct iomap_page *iop = to_iomap_page(page); + + if (unlikely(error)) { + ClearPageUptodate(page); + SetPageError(page); + } else { + iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len); + } + + iomap_read_finish(iop, page); +} + +static void +iomap_read_inline_data(struct inode *inode, struct page *page, + struct iomap *iomap) +{ + size_t size = i_size_read(inode); + void *addr; + + if (PageUptodate(page)) + return; + + BUG_ON(page->index); + BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data)); + + addr = kmap_atomic(page); + memcpy(addr, iomap->inline_data, size); + memset(addr + size, 0, PAGE_SIZE - size); + kunmap_atomic(addr); + SetPageUptodate(page); +} + +static void +iomap_read_end_io(struct bio *bio) +{ + int error = blk_status_to_errno(bio->bi_status); + struct bio_vec *bvec; + int i; + + bio_for_each_segment_all(bvec, bio, i) + iomap_read_page_end_io(bvec, error); + bio_put(bio); +} + +struct iomap_readpage_ctx { + struct page *cur_page; + bool cur_page_in_bio; + bool is_readahead; + struct bio *bio; + struct list_head *pages; +}; + +static loff_t +iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + struct iomap *iomap) +{ + struct iomap_readpage_ctx *ctx = data; + struct page *page = ctx->cur_page; + struct iomap_page *iop = iomap_page_create(inode, page); + bool is_contig = false; + loff_t orig_pos = pos; + unsigned poff, plen; + sector_t sector; + + if (iomap->type == IOMAP_INLINE) { + WARN_ON_ONCE(poff); + iomap_read_inline_data(inode, page, iomap); + return PAGE_SIZE; + } + + /* zero post-eof blocks as the page may be mapped */ + iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen); + if (plen == 0) + goto done; + + if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) { + zero_user(page, poff, plen); + iomap_set_range_uptodate(page, poff, plen); + goto done; + } + + ctx->cur_page_in_bio = true; + + /* + * Try to merge into a previous segment if we can. + */ + sector = iomap_sector(iomap, pos); + if (ctx->bio && bio_end_sector(ctx->bio) == sector) { + if (__bio_try_merge_page(ctx->bio, page, plen, poff)) + goto done; + is_contig = true; + } + + /* + * If we start a new segment we need to increase the read count, and we + * need to do so before submitting any previous full bio to make sure + * that we don't prematurely unlock the page. + */ + if (iop) + atomic_inc(&iop->read_count); + + if (!ctx->bio || !is_contig || bio_full(ctx->bio)) { + gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); + int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; + + if (ctx->bio) + submit_bio(ctx->bio); + + if (ctx->is_readahead) /* same as readahead_gfp_mask */ + gfp |= __GFP_NORETRY | __GFP_NOWARN; + ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs)); + ctx->bio->bi_opf = REQ_OP_READ; + if (ctx->is_readahead) + ctx->bio->bi_opf |= REQ_RAHEAD; + ctx->bio->bi_iter.bi_sector = sector; + bio_set_dev(ctx->bio, iomap->bdev); + ctx->bio->bi_end_io = iomap_read_end_io; + } + + __bio_add_page(ctx->bio, page, plen, poff); +done: + /* + * Move the caller beyond our range so that it keeps making progress. + * For that we have to include any leading non-uptodate ranges, but + * we can skip trailing ones as they will be handled in the next + * iteration. + */ + return pos - orig_pos + plen; +} + +int +iomap_readpage(struct page *page, const struct iomap_ops *ops) +{ + struct iomap_readpage_ctx ctx = { .cur_page = page }; + struct inode *inode = page->mapping->host; + unsigned poff; + loff_t ret; + + for (poff = 0; poff < PAGE_SIZE; poff += ret) { + ret = iomap_apply(inode, page_offset(page) + poff, + PAGE_SIZE - poff, 0, ops, &ctx, + iomap_readpage_actor); + if (ret <= 0) { + WARN_ON_ONCE(ret == 0); + SetPageError(page); + break; + } + } + + if (ctx.bio) { + submit_bio(ctx.bio); + WARN_ON_ONCE(!ctx.cur_page_in_bio); + } else { + WARN_ON_ONCE(ctx.cur_page_in_bio); + unlock_page(page); + } + + /* + * Just like mpage_readpages and block_read_full_page we always + * return 0 and just mark the page as PageError on errors. This + * should be cleaned up all through the stack eventually. + */ + return 0; +} +EXPORT_SYMBOL_GPL(iomap_readpage); + +static struct page * +iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos, + loff_t length, loff_t *done) +{ + while (!list_empty(pages)) { + struct page *page = lru_to_page(pages); + + if (page_offset(page) >= (u64)pos + length) + break; + + list_del(&page->lru); + if (!add_to_page_cache_lru(page, inode->i_mapping, page->index, + GFP_NOFS)) + return page; + + /* + * If we already have a page in the page cache at index we are + * done. Upper layers don't care if it is uptodate after the + * readpages call itself as every page gets checked again once + * actually needed. + */ + *done += PAGE_SIZE; + put_page(page); + } + + return NULL; +} + +static loff_t +iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, + void *data, struct iomap *iomap) +{ + struct iomap_readpage_ctx *ctx = data; + loff_t done, ret; + + for (done = 0; done < length; done += ret) { + if (ctx->cur_page && offset_in_page(pos + done) == 0) { + if (!ctx->cur_page_in_bio) + unlock_page(ctx->cur_page); + put_page(ctx->cur_page); + ctx->cur_page = NULL; + } + if (!ctx->cur_page) { + ctx->cur_page = iomap_next_page(inode, ctx->pages, + pos, length, &done); + if (!ctx->cur_page) + break; + ctx->cur_page_in_bio = false; + } + ret = iomap_readpage_actor(inode, pos + done, length - done, + ctx, iomap); + } + + return done; +} + +int +iomap_readpages(struct address_space *mapping, struct list_head *pages, + unsigned nr_pages, const struct iomap_ops *ops) +{ + struct iomap_readpage_ctx ctx = { + .pages = pages, + .is_readahead = true, + }; + loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); + loff_t last = page_offset(list_entry(pages->next, struct page, lru)); + loff_t length = last - pos + PAGE_SIZE, ret = 0; + + while (length > 0) { + ret = iomap_apply(mapping->host, pos, length, 0, ops, + &ctx, iomap_readpages_actor); + if (ret <= 0) { + WARN_ON_ONCE(ret == 0); + goto done; + } + pos += ret; + length -= ret; + } + ret = 0; +done: + if (ctx.bio) + submit_bio(ctx.bio); + if (ctx.cur_page) { + if (!ctx.cur_page_in_bio) + unlock_page(ctx.cur_page); + put_page(ctx.cur_page); + } + + /* + * Check that we didn't lose a page due to the arcance calling + * conventions.. + */ + WARN_ON_ONCE(!ret && !list_empty(ctx.pages)); + return ret; +} +EXPORT_SYMBOL_GPL(iomap_readpages); + +int +iomap_is_partially_uptodate(struct page *page, unsigned long from, + unsigned long count) +{ + struct iomap_page *iop = to_iomap_page(page); + struct inode *inode = page->mapping->host; + unsigned first = from >> inode->i_blkbits; + unsigned last = (from + count - 1) >> inode->i_blkbits; + unsigned i; + + if (iop) { + for (i = first; i <= last; i++) + if (!test_bit(i, iop->uptodate)) + return 0; + return 1; + } + + return 0; +} +EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); + +int +iomap_releasepage(struct page *page, gfp_t gfp_mask) +{ + /* + * mm accommodates an old ext3 case where clean pages might not have had + * the dirty bit cleared. Thus, it can send actual dirty pages to + * ->releasepage() via shrink_active_list(), skip those here. + */ + if (PageDirty(page) || PageWriteback(page)) + return 0; + iomap_page_release(page); + return 1; +} +EXPORT_SYMBOL_GPL(iomap_releasepage); + +void +iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) +{ + /* + * If we are invalidating the entire page, clear the dirty state from it + * and release it to avoid unnecessary buildup of the LRU. + */ + if (offset == 0 && len == PAGE_SIZE) { + WARN_ON_ONCE(PageWriteback(page)); + cancel_dirty_page(page); + iomap_page_release(page); + } +} +EXPORT_SYMBOL_GPL(iomap_invalidatepage); + +#ifdef CONFIG_MIGRATION +int +iomap_migrate_page(struct address_space *mapping, struct page *newpage, + struct page *page, enum migrate_mode mode) +{ + int ret; + + ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); + if (ret != MIGRATEPAGE_SUCCESS) + return ret; + + if (page_has_private(page)) { + ClearPagePrivate(page); + set_page_private(newpage, page_private(page)); + set_page_private(page, 0); + SetPagePrivate(newpage); + } + + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); + return MIGRATEPAGE_SUCCESS; +} +EXPORT_SYMBOL_GPL(iomap_migrate_page); +#endif /* CONFIG_MIGRATION */ + static void iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) { @@ -117,6 +580,61 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) } static int +iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page, + unsigned poff, unsigned plen, unsigned from, unsigned to, + struct iomap *iomap) +{ + struct bio_vec bvec; + struct bio bio; + + if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) { + zero_user_segments(page, poff, from, to, poff + plen); + iomap_set_range_uptodate(page, poff, plen); + return 0; + } + + bio_init(&bio, &bvec, 1); + bio.bi_opf = REQ_OP_READ; + bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); + bio_set_dev(&bio, iomap->bdev); + __bio_add_page(&bio, page, plen, poff); + return submit_bio_wait(&bio); +} + +static int +__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, + struct page *page, struct iomap *iomap) +{ + struct iomap_page *iop = iomap_page_create(inode, page); + loff_t block_size = i_blocksize(inode); + loff_t block_start = pos & ~(block_size - 1); + loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1); + unsigned from = offset_in_page(pos), to = from + len, poff, plen; + int status = 0; + + if (PageUptodate(page)) + return 0; + + do { + iomap_adjust_read_range(inode, iop, &block_start, + block_end - block_start, &poff, &plen); + if (plen == 0) + break; + + if ((from > poff && from < poff + plen) || + (to > poff && to < poff + plen)) { + status = iomap_read_page_sync(inode, block_start, page, + poff, plen, from, to, iomap); + if (status) + break; + } + + } while ((block_start += plen) < block_end); + + return status; +} + +static int iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, struct page **pagep, struct iomap *iomap) { @@ -133,7 +651,12 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, if (!page) return -ENOMEM; - status = __block_write_begin_int(page, pos, len, NULL, iomap); + if (iomap->type == IOMAP_INLINE) + iomap_read_inline_data(inode, page, iomap); + else if (iomap->flags & IOMAP_F_BUFFER_HEAD) + status = __block_write_begin_int(page, pos, len, NULL, iomap); + else + status = __iomap_write_begin(inode, pos, len, page, iomap); if (unlikely(status)) { unlock_page(page); put_page(page); @@ -146,14 +669,93 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, return status; } +int +iomap_set_page_dirty(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + /* + * Lock out page->mem_cgroup migration to keep PageDirty + * synchronized with per-memcg dirty page counters. + */ + lock_page_memcg(page); + newly_dirty = !TestSetPageDirty(page); + if (newly_dirty) + __set_page_dirty(page, mapping, 0); + unlock_page_memcg(page); + + if (newly_dirty) + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return newly_dirty; +} +EXPORT_SYMBOL_GPL(iomap_set_page_dirty); + +static int +__iomap_write_end(struct inode *inode, loff_t pos, unsigned len, + unsigned copied, struct page *page, struct iomap *iomap) +{ + flush_dcache_page(page); + + /* + * The blocks that were entirely written will now be uptodate, so we + * don't have to worry about a readpage reading them and overwriting a + * partial write. However if we have encountered a short write and only + * partially written into a block, it will not be marked uptodate, so a + * readpage might come in and destroy our partial write. + * + * Do the simplest thing, and just treat any short write to a non + * uptodate page as a zero-length write, and force the caller to redo + * the whole thing. + */ + if (unlikely(copied < len && !PageUptodate(page))) { + copied = 0; + } else { + iomap_set_range_uptodate(page, offset_in_page(pos), len); + iomap_set_page_dirty(page); + } + return __generic_write_end(inode, pos, copied, page); +} + +static int +iomap_write_end_inline(struct inode *inode, struct page *page, + struct iomap *iomap, loff_t pos, unsigned copied) +{ + void *addr; + + WARN_ON_ONCE(!PageUptodate(page)); + BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data)); + + addr = kmap_atomic(page); + memcpy(iomap->inline_data + pos, addr + pos, copied); + kunmap_atomic(addr); + + mark_inode_dirty(inode); + __generic_write_end(inode, pos, copied, page); + return copied; +} + static int iomap_write_end(struct inode *inode, loff_t pos, unsigned len, - unsigned copied, struct page *page) + unsigned copied, struct page *page, struct iomap *iomap) { int ret; - ret = generic_write_end(NULL, inode->i_mapping, pos, len, - copied, page, NULL); + if (iomap->type == IOMAP_INLINE) { + ret = iomap_write_end_inline(inode, page, iomap, pos, copied); + } else if (iomap->flags & IOMAP_F_BUFFER_HEAD) { + ret = generic_write_end(NULL, inode->i_mapping, pos, len, + copied, page, NULL); + } else { + ret = __iomap_write_end(inode, pos, len, copied, page, iomap); + } + + if (iomap->page_done) + iomap->page_done(inode, pos, copied, page, iomap); + if (ret < len) iomap_write_failed(inode, pos, len); return ret; @@ -174,7 +776,7 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, unsigned long bytes; /* Bytes to write to page */ size_t copied; /* Bytes copied from user */ - offset = (pos & (PAGE_SIZE - 1)); + offset = offset_in_page(pos); bytes = min_t(unsigned long, PAGE_SIZE - offset, iov_iter_count(i)); again: @@ -208,7 +810,8 @@ again: flush_dcache_page(page); - status = iomap_write_end(inode, pos, bytes, copied, page); + status = iomap_write_end(inode, pos, bytes, copied, page, + iomap); if (unlikely(status < 0)) break; copied = status; @@ -287,7 +890,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, unsigned long offset; /* Offset into pagecache page */ unsigned long bytes; /* Bytes to write to page */ - offset = (pos & (PAGE_SIZE - 1)); + offset = offset_in_page(pos); bytes = min_t(loff_t, PAGE_SIZE - offset, length); rpage = __iomap_read_page(inode, pos); @@ -302,7 +905,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, WARN_ON_ONCE(!PageUptodate(page)); - status = iomap_write_end(inode, pos, bytes, bytes, page); + status = iomap_write_end(inode, pos, bytes, bytes, page, iomap); if (unlikely(status <= 0)) { if (WARN_ON_ONCE(status == 0)) return -EIO; @@ -354,7 +957,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, zero_user(page, offset, bytes); mark_page_accessed(page); - return iomap_write_end(inode, pos, bytes, bytes, page); + return iomap_write_end(inode, pos, bytes, bytes, page, iomap); } static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, @@ -379,7 +982,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, do { unsigned offset, bytes; - offset = pos & (PAGE_SIZE - 1); /* Within page */ + offset = offset_in_page(pos); bytes = min_t(loff_t, PAGE_SIZE - offset, count); if (IS_DAX(inode)) @@ -440,11 +1043,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, struct page *page = data; int ret; - ret = __block_write_begin_int(page, pos, length, NULL, iomap); - if (ret) - return ret; + if (iomap->flags & IOMAP_F_BUFFER_HEAD) { + ret = __block_write_begin_int(page, pos, length, NULL, iomap); + if (ret) + return ret; + block_commit_write(page, 0, length); + } else { + WARN_ON_ONCE(!PageUptodate(page)); + iomap_page_create(inode, page); + } - block_commit_write(page, 0, length); return length; } @@ -467,7 +1075,7 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) /* page is wholly or partially inside EOF */ if (((page->index + 1) << PAGE_SHIFT) > size) - length = size & ~PAGE_MASK; + length = offset_in_page(size); else length = PAGE_SIZE; @@ -630,7 +1238,7 @@ page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff, goto out_unlock_not_found; for (off = 0; off < PAGE_SIZE; off += bsize) { - if ((*lastoff & ~PAGE_MASK) >= off + bsize) + if (offset_in_page(*lastoff) >= off + bsize) continue; if (ops->is_partially_uptodate(page, off, bsize) == seek_data) { unlock_page(page); @@ -811,6 +1419,7 @@ struct iomap_dio { atomic_t ref; unsigned flags; int error; + bool wait_for_completion; union { /* used during submission and for synchronous completion: */ @@ -914,9 +1523,8 @@ static void iomap_dio_bio_end_io(struct bio *bio) iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); if (atomic_dec_and_test(&dio->ref)) { - if (is_sync_kiocb(dio->iocb)) { + if (dio->wait_for_completion) { struct task_struct *waiter = dio->submit.waiter; - WRITE_ONCE(dio->submit.waiter, NULL); wake_up_process(waiter); } else if (dio->flags & IOMAP_DIO_WRITE) { @@ -963,10 +1571,9 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, } static loff_t -iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, - void *data, struct iomap *iomap) +iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, + struct iomap_dio *dio, struct iomap *iomap) { - struct iomap_dio *dio = data; unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); unsigned int fs_block_size = i_blocksize(inode), pad; unsigned int align = iov_iter_alignment(dio->submit.iter); @@ -980,41 +1587,27 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, if ((pos | length | align) & ((1 << blkbits) - 1)) return -EINVAL; - switch (iomap->type) { - case IOMAP_HOLE: - if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE)) - return -EIO; - /*FALLTHRU*/ - case IOMAP_UNWRITTEN: - if (!(dio->flags & IOMAP_DIO_WRITE)) { - length = iov_iter_zero(length, dio->submit.iter); - dio->size += length; - return length; - } + if (iomap->type == IOMAP_UNWRITTEN) { dio->flags |= IOMAP_DIO_UNWRITTEN; need_zeroout = true; - break; - case IOMAP_MAPPED: - if (iomap->flags & IOMAP_F_SHARED) - dio->flags |= IOMAP_DIO_COW; - if (iomap->flags & IOMAP_F_NEW) { - need_zeroout = true; - } else { - /* - * Use a FUA write if we need datasync semantics, this - * is a pure data IO that doesn't require any metadata - * updates and the underlying device supports FUA. This - * allows us to avoid cache flushes on IO completion. - */ - if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) && - (dio->flags & IOMAP_DIO_WRITE_FUA) && - blk_queue_fua(bdev_get_queue(iomap->bdev))) - use_fua = true; - } - break; - default: - WARN_ON_ONCE(1); - return -EIO; + } + + if (iomap->flags & IOMAP_F_SHARED) + dio->flags |= IOMAP_DIO_COW; + + if (iomap->flags & IOMAP_F_NEW) { + need_zeroout = true; + } else { + /* + * Use a FUA write if we need datasync semantics, this + * is a pure data IO that doesn't require any metadata + * updates and the underlying device supports FUA. This + * allows us to avoid cache flushes on IO completion. + */ + if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) && + (dio->flags & IOMAP_DIO_WRITE_FUA) && + blk_queue_fua(bdev_get_queue(iomap->bdev))) + use_fua = true; } /* @@ -1093,6 +1686,66 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, return copied; } +static loff_t +iomap_dio_hole_actor(loff_t length, struct iomap_dio *dio) +{ + length = iov_iter_zero(length, dio->submit.iter); + dio->size += length; + return length; +} + +static loff_t +iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length, + struct iomap_dio *dio, struct iomap *iomap) +{ + struct iov_iter *iter = dio->submit.iter; + size_t copied; + + BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data)); + + if (dio->flags & IOMAP_DIO_WRITE) { + loff_t size = inode->i_size; + + if (pos > size) + memset(iomap->inline_data + size, 0, pos - size); + copied = copy_from_iter(iomap->inline_data + pos, length, iter); + if (copied) { + if (pos + copied > size) + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + } + } else { + copied = copy_to_iter(iomap->inline_data + pos, length, iter); + } + dio->size += copied; + return copied; +} + +static loff_t +iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, + void *data, struct iomap *iomap) +{ + struct iomap_dio *dio = data; + + switch (iomap->type) { + case IOMAP_HOLE: + if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE)) + return -EIO; + return iomap_dio_hole_actor(length, dio); + case IOMAP_UNWRITTEN: + if (!(dio->flags & IOMAP_DIO_WRITE)) + return iomap_dio_hole_actor(length, dio); + return iomap_dio_bio_actor(inode, pos, length, dio, iomap); + case IOMAP_MAPPED: + return iomap_dio_bio_actor(inode, pos, length, dio, iomap); + case IOMAP_INLINE: + return iomap_dio_inline_actor(inode, pos, length, dio, iomap); + default: + WARN_ON_ONCE(1); + return -EIO; + } +} + /* * iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO * is being issued as AIO or not. This allows us to optimise pure data writes @@ -1131,13 +1784,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->end_io = end_io; dio->error = 0; dio->flags = 0; + dio->wait_for_completion = is_sync_kiocb(iocb); dio->submit.iter = iter; - if (is_sync_kiocb(iocb)) { - dio->submit.waiter = current; - dio->submit.cookie = BLK_QC_T_NONE; - dio->submit.last_queue = NULL; - } + dio->submit.waiter = current; + dio->submit.cookie = BLK_QC_T_NONE; + dio->submit.last_queue = NULL; if (iov_iter_rw(iter) == READ) { if (pos >= dio->i_size) @@ -1187,7 +1839,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; - if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && + if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion && !inode->i_sb->s_dio_done_wq) { ret = sb_init_dio_done_wq(inode->i_sb); if (ret < 0) @@ -1202,8 +1854,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomap_dio_actor); if (ret <= 0) { /* magic error code to fall back to buffered I/O */ - if (ret == -ENOTBLK) + if (ret == -ENOTBLK) { + dio->wait_for_completion = true; ret = 0; + } break; } pos += ret; @@ -1224,7 +1878,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->flags &= ~IOMAP_DIO_NEED_SYNC; if (!atomic_dec_and_test(&dio->ref)) { - if (!is_sync_kiocb(iocb)) + if (!dio->wait_for_completion) return -EIOCBQUEUED; for (;;) { @@ -1443,7 +2097,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops) { struct inode *inode = mapping->host; - loff_t pos = bno >> inode->i_blkbits; + loff_t pos = bno << inode->i_blkbits; unsigned blocksize = i_blocksize(inode); if (filemap_write_and_wait(mapping)) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8de0e7723316..150cc030b4d7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -121,7 +121,7 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - struct timespec64 now = current_kernel_time64(); + struct timespec64 now; *cbh = NULL; @@ -134,6 +134,7 @@ static int journal_submit_commit_record(journal_t *journal, return 1; tmp = (struct commit_header *)bh->b_data; + ktime_get_coarse_real_ts64(&now); tmp->h_commit_sec = cpu_to_be64(now.tv_sec); tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index b2944f9218f7..f20cff1194bb 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -201,7 +201,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, if (ret) goto fail; - dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime))); + dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime)); jffs2_free_raw_inode(ri); @@ -227,14 +227,14 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry) struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i); struct jffs2_inode_info *dead_f = JFFS2_INODE_INFO(d_inode(dentry)); int ret; - uint32_t now = get_seconds(); + uint32_t now = JFFS2_NOW(); ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, dentry->d_name.len, dead_f, now); if (dead_f->inocache) set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink); if (!ret) - dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); + dir_i->i_mtime = dir_i->i_ctime = ITIME(now); return ret; } /***********************************************************************/ @@ -260,7 +260,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de type = (d_inode(old_dentry)->i_mode & S_IFMT) >> 12; if (!type) type = DT_REG; - now = get_seconds(); + now = JFFS2_NOW(); ret = jffs2_do_link(c, dir_f, f->inocache->ino, type, dentry->d_name.name, dentry->d_name.len, now); if (!ret) { @@ -268,7 +268,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink); mutex_unlock(&f->sem); d_instantiate(dentry, d_inode(old_dentry)); - dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); + dir_i->i_mtime = dir_i->i_ctime = ITIME(now); ihold(d_inode(old_dentry)); } return ret; @@ -400,7 +400,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char rd->pino = cpu_to_je32(dir_i->i_ino); rd->version = cpu_to_je32(++dir_f->highest_version); rd->ino = cpu_to_je32(inode->i_ino); - rd->mctime = cpu_to_je32(get_seconds()); + rd->mctime = cpu_to_je32(JFFS2_NOW()); rd->nsize = namelen; rd->type = DT_LNK; rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); @@ -418,7 +418,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char goto fail; } - dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime))); + dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); jffs2_free_raw_dirent(rd); @@ -543,7 +543,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode rd->pino = cpu_to_je32(dir_i->i_ino); rd->version = cpu_to_je32(++dir_f->highest_version); rd->ino = cpu_to_je32(inode->i_ino); - rd->mctime = cpu_to_je32(get_seconds()); + rd->mctime = cpu_to_je32(JFFS2_NOW()); rd->nsize = namelen; rd->type = DT_DIR; rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); @@ -561,7 +561,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode goto fail; } - dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime))); + dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); inc_nlink(dir_i); jffs2_free_raw_dirent(rd); @@ -588,7 +588,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry)); struct jffs2_full_dirent *fd; int ret; - uint32_t now = get_seconds(); + uint32_t now = JFFS2_NOW(); for (fd = f->dents ; fd; fd = fd->next) { if (fd->ino) @@ -598,7 +598,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, dentry->d_name.len, f, now); if (!ret) { - dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); + dir_i->i_mtime = dir_i->i_ctime = ITIME(now); clear_nlink(d_inode(dentry)); drop_nlink(dir_i); } @@ -712,7 +712,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode rd->pino = cpu_to_je32(dir_i->i_ino); rd->version = cpu_to_je32(++dir_f->highest_version); rd->ino = cpu_to_je32(inode->i_ino); - rd->mctime = cpu_to_je32(get_seconds()); + rd->mctime = cpu_to_je32(JFFS2_NOW()); rd->nsize = namelen; /* XXX: This is ugly. */ @@ -733,7 +733,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode goto fail; } - dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime))); + dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); jffs2_free_raw_dirent(rd); @@ -797,7 +797,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, type = (d_inode(old_dentry)->i_mode & S_IFMT) >> 12; if (!type) type = DT_REG; - now = get_seconds(); + now = JFFS2_NOW(); ret = jffs2_do_link(c, JFFS2_INODE_INFO(new_dir_i), d_inode(old_dentry)->i_ino, type, new_dentry->d_name.name, new_dentry->d_name.len, now); @@ -853,14 +853,14 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, * caller won't do it on its own since we are returning an error. */ d_invalidate(new_dentry); - new_dir_i->i_mtime = new_dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); + new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); return ret; } if (d_is_dir(old_dentry)) drop_nlink(old_dir_i); - new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); + new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now); return 0; } diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 481afd4c2e1a..7d8654a1472e 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -175,7 +175,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, ri.uid = cpu_to_je16(i_uid_read(inode)); ri.gid = cpu_to_je16(i_gid_read(inode)); ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs)); - ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds()); + ri.atime = ri.ctime = ri.mtime = cpu_to_je32(JFFS2_NOW()); ri.offset = cpu_to_je32(inode->i_size); ri.dsize = cpu_to_je32(pageofs - inode->i_size); ri.csize = cpu_to_je32(0); @@ -283,7 +283,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping, ri->uid = cpu_to_je16(i_uid_read(inode)); ri->gid = cpu_to_je16(i_gid_read(inode)); ri->isize = cpu_to_je32((uint32_t)inode->i_size); - ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds()); + ri->atime = ri->ctime = ri->mtime = cpu_to_je32(JFFS2_NOW()); /* In 2.4, it was already kmapped by generic_file_write(). Doesn't hurt to do it again. The alternative is ifdefs, which are ugly. */ @@ -308,7 +308,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping, inode->i_size = pos + writtenlen; inode->i_blocks = (inode->i_size + 511) >> 9; - inode->i_ctime = inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime))); + inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime)); } } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 0ecfb8ea38cd..eab04eca95a3 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -146,9 +146,9 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) return PTR_ERR(new_metadata); } /* It worked. Update the inode */ - inode->i_atime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->atime))); - inode->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime))); - inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->mtime))); + inode->i_atime = ITIME(je32_to_cpu(ri->atime)); + inode->i_ctime = ITIME(je32_to_cpu(ri->ctime)); + inode->i_mtime = ITIME(je32_to_cpu(ri->mtime)); inode->i_mode = jemode_to_cpu(ri->mode); i_uid_write(inode, je16_to_cpu(ri->uid)); i_gid_write(inode, je16_to_cpu(ri->gid)); @@ -280,9 +280,9 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) i_uid_write(inode, je16_to_cpu(latest_node.uid)); i_gid_write(inode, je16_to_cpu(latest_node.gid)); inode->i_size = je32_to_cpu(latest_node.isize); - inode->i_atime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.atime))); - inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.mtime))); - inode->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.ctime))); + inode->i_atime = ITIME(je32_to_cpu(latest_node.atime)); + inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); + inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); set_nlink(inode, f->inocache->pino_nlink); diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index c2fbec19c616..a2dbbb3f4c74 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -31,12 +31,13 @@ struct kvec; #define JFFS2_F_I_GID(f) (i_gid_read(OFNI_EDONI_2SFFJ(f))) #define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev) -#define ITIME(sec) ((struct timespec){sec, 0}) -#define I_SEC(tv) ((tv).tv_sec) -#define JFFS2_F_I_CTIME(f) (OFNI_EDONI_2SFFJ(f)->i_ctime.tv_sec) -#define JFFS2_F_I_MTIME(f) (OFNI_EDONI_2SFFJ(f)->i_mtime.tv_sec) -#define JFFS2_F_I_ATIME(f) (OFNI_EDONI_2SFFJ(f)->i_atime.tv_sec) - +#define JFFS2_CLAMP_TIME(t) ((uint32_t)clamp_t(time64_t, (t), 0, U32_MAX)) +#define ITIME(sec) ((struct timespec64){sec, 0}) +#define JFFS2_NOW() JFFS2_CLAMP_TIME(ktime_get_real_seconds()) +#define I_SEC(tv) JFFS2_CLAMP_TIME((tv).tv_sec) +#define JFFS2_F_I_CTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_ctime) +#define JFFS2_F_I_MTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_mtime) +#define JFFS2_F_I_ATIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_atime) #define sleep_on_spinunlock(wq, s) \ do { \ DECLARE_WAITQUEUE(__wait, current); \ diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 395c4c0d0f06..1682a87c00b2 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -115,6 +115,13 @@ struct dinode { dxd_t _dxd; /* 16: */ union { __le32 _rdev; /* 4: */ + /* + * The fast symlink area + * is expected to overflow + * into _inlineea when + * needed (which will clear + * INLINEEA). + */ u8 _fastsymlink[128]; } _u; u8 _inlineea[128]; diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index f36ef68905a7..93e8c590ff5c 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -491,13 +491,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) /* release the page */ release_metapage(mp); - /* - * __mark_inode_dirty expects inodes to be hashed. Since we don't - * want special inodes in the fileset inode space, we make them - * appear hashed, but do not put on any lists. hlist_del() - * will work fine and require no locking. - */ - hlist_add_fake(&ip->i_hash); + inode_fake_hash(ip); return (ip); } diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 1f26d1910409..912a3af2393e 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -43,7 +43,7 @@ struct jfs_inode_info { pxd_t ixpxd; /* inode extent descriptor */ dxd_t acl; /* dxd describing acl */ dxd_t ea; /* dxd describing ea */ - time_t otime; /* time created */ + time64_t otime; /* time created */ uint next_index; /* next available directory entry index */ int acltype; /* Type of ACL */ short btorder; /* access order */ @@ -87,6 +87,7 @@ struct jfs_inode_info { struct { unchar _unused[16]; /* 16: */ dxd_t _dxd; /* 16: */ + /* _inline may overflow into _inline_ea when needed */ unchar _inline[128]; /* 128: inline symlink */ /* _inline_ea may overlay the last part of * file._xtroot if maxentry = XTROOTINITSLOT diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 5e9b7bb3aabf..4572b7cf183d 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -61,8 +61,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) inode = new_inode(sb); if (!inode) { jfs_warn("ialloc: new_inode returned NULL!"); - rc = -ENOMEM; - goto fail; + return ERR_PTR(-ENOMEM); } jfs_inode = JFS_IP(inode); @@ -70,8 +69,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) rc = diAlloc(parent, S_ISDIR(mode), inode); if (rc) { jfs_warn("ialloc: diAlloc returned %d!", rc); - if (rc == -EIO) - make_bad_inode(inode); goto fail_put; } @@ -141,9 +138,10 @@ fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; clear_nlink(inode); - unlock_new_inode(inode); + discard_new_inode(inode); + return ERR_PTR(rc); + fail_put: iput(inode); -fail: return ERR_PTR(rc); } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 56c3fcbfe80e..14528c0ffe63 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -175,8 +175,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -309,8 +308,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -1054,8 +1052,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -1441,8 +1438,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 1b9264fd54b6..09da5cf14e27 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -581,7 +581,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) inode->i_ino = 0; inode->i_size = i_size_read(sb->s_bdev->bd_inode); inode->i_mapping->a_ops = &jfs_metapage_aops; - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->direct_inode = inode; @@ -967,8 +967,7 @@ static int __init init_jfs_fs(void) jfs_inode_cachep = kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, - offsetof(struct jfs_inode_info, i_inline), - sizeof_field(struct jfs_inode_info, i_inline), + offsetof(struct jfs_inode_info, i_inline), IDATASIZE, init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index d66cc0777303..4ca0b5c18192 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -619,6 +619,7 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, const char *name, umode_t mode, + kuid_t uid, kgid_t gid, unsigned flags) { struct kernfs_node *kn; @@ -661,8 +662,22 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, kn->mode = mode; kn->flags = flags; + if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) { + struct iattr iattr = { + .ia_valid = ATTR_UID | ATTR_GID, + .ia_uid = uid, + .ia_gid = gid, + }; + + ret = __kernfs_setattr(kn, &iattr); + if (ret < 0) + goto err_out3; + } + return kn; + err_out3: + idr_remove(&root->ino_idr, kn->id.ino); err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: @@ -672,11 +687,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, const char *name, umode_t mode, + kuid_t uid, kgid_t gid, unsigned flags) { struct kernfs_node *kn; - kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags); + kn = __kernfs_new_node(kernfs_root(parent), + name, mode, uid, gid, flags); if (kn) { kernfs_get(parent); kn->parent = parent; @@ -946,6 +963,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, root->next_generation = 1; kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); if (!kn) { idr_destroy(&root->ino_idr); @@ -984,6 +1002,8 @@ void kernfs_destroy_root(struct kernfs_root *root) * @parent: parent in which to create a new directory * @name: name of the new directory * @mode: mode of the new directory + * @uid: uid of the new directory + * @gid: gid of the new directory * @priv: opaque data associated with the new directory * @ns: optional namespace tag of the directory * @@ -991,13 +1011,15 @@ void kernfs_destroy_root(struct kernfs_root *root) */ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, + kuid_t uid, kgid_t gid, void *priv, const void *ns) { struct kernfs_node *kn; int rc; /* allocate */ - kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR); + kn = kernfs_new_node(parent, name, mode | S_IFDIR, + uid, gid, KERNFS_DIR); if (!kn) return ERR_PTR(-ENOMEM); @@ -1028,7 +1050,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, int rc; /* allocate */ - kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR); + kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); if (!kn) return ERR_PTR(-ENOMEM); diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 2015d8c45e4a..dbf5bc250bfd 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -965,6 +965,8 @@ const struct file_operations kernfs_file_fops = { * @parent: directory to create the file in * @name: name of the file * @mode: mode of the file + * @uid: uid of the file + * @gid: gid of the file * @size: size of the file * @ops: kernfs operations for the file * @priv: private data for the file @@ -975,7 +977,8 @@ const struct file_operations kernfs_file_fops = { */ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, - umode_t mode, loff_t size, + umode_t mode, kuid_t uid, kgid_t gid, + loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key) @@ -986,7 +989,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, flags = KERNFS_FILE; - kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags); + kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, + uid, gid, flags); if (!kn) return ERR_PTR(-ENOMEM); diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 3d73fe9d56e2..80cebcd94c90 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -63,7 +63,7 @@ out_unlock: return ret; } -static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) +int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { struct kernfs_iattrs *attrs; struct iattr *iattrs; diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 0f260dcca177..3d83b114bb08 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -90,6 +90,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr); int kernfs_iop_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags); ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); +int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); /* * dir.c @@ -104,6 +105,7 @@ void kernfs_put_active(struct kernfs_node *kn); int kernfs_add_one(struct kernfs_node *kn); struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, const char *name, umode_t mode, + kuid_t uid, kgid_t gid, unsigned flags); struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root, unsigned int ino); diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 08ccabd7047f..305b220af45d 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -21,6 +21,7 @@ * @target: target node for the symlink to point to * * Returns the created node on success, ERR_PTR() value on error. + * Ownership of the link matches ownership of the target. */ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, @@ -28,8 +29,16 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, { struct kernfs_node *kn; int error; + kuid_t uid = GLOBAL_ROOT_UID; + kgid_t gid = GLOBAL_ROOT_GID; - kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); + if (target->iattr) { + uid = target->iattr->ia_iattr.ia_uid; + gid = target->iattr->ia_iattr.ia_gid; + } + + kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid, + KERNFS_LINK); if (!kn) return ERR_PTR(-ENOMEM); @@ -88,7 +97,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, int slen = strlen(kn->name); len -= slen; - strncpy(s + len, kn->name, slen); + memcpy(s + len, kn->name, slen); if (len) s[--len] = '/'; diff --git a/fs/locks.c b/fs/locks.c index db7b6917d9c5..bc047a7edc47 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -202,10 +202,6 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * we often hold the flc_lock as well. In certain cases, when reading the fields * protected by this lock, we can skip acquiring it iff we already hold the * flc_lock. - * - * In particular, adding an entry to the fl_block list requires that you hold - * both the flc_lock and the blocked_lock_lock (acquired in that order). - * Deleting an entry from the list however only requires the file_lock_lock. */ static DEFINE_SPINLOCK(blocked_lock_lock); @@ -990,6 +986,7 @@ out: if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); + trace_flock_lock_inode(inode, request, error); return error; } @@ -2072,6 +2069,13 @@ static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns) return -1; if (IS_REMOTELCK(fl)) return fl->fl_pid; + /* + * If the flock owner process is dead and its pid has been already + * freed, the translation below won't work, but we still want to show + * flock owner pid number in init pidns. + */ + if (ns == &init_pid_ns) + return (pid_t)fl->fl_pid; rcu_read_lock(); pid = find_pid_ns(fl->fl_pid, &init_pid_ns); @@ -2626,12 +2630,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, fl_pid = locks_translate_pid(fl, proc_pidns); /* - * If there isn't a fl_pid don't display who is waiting on - * the lock if we are called from locks_show, or if we are - * called from __show_fd_info - skip lock entirely + * If lock owner is dead (and pid is freed) or not visible in current + * pidns, zero is shown as a pid value. Check lock info from + * init_pid_ns to get saved lock pid value. */ - if (fl_pid == 0) - return; if (fl->fl_file != NULL) inode = locks_inode(fl->fl_file); diff --git a/fs/mpage.c b/fs/mpage.c index b7e7f570733a..c820dc9bebab 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -51,8 +51,8 @@ static void mpage_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - page_endio(page, op_is_write(bio_op(bio)), - blk_status_to_errno(bio->bi_status)); + page_endio(page, bio_op(bio), + blk_status_to_errno(bio->bi_status)); } bio_put(bio); @@ -133,6 +133,17 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) } while (page_bh != head); } +struct mpage_readpage_args { + struct bio *bio; + struct page *page; + unsigned int nr_pages; + bool is_readahead; + sector_t last_block_in_bio; + struct buffer_head map_bh; + unsigned long first_logical_block; + get_block_t *get_block; +}; + /* * This is the worker routine which does all the work of mapping the disk * blocks and constructs largest possible bios, submits them for IO if the @@ -142,16 +153,14 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) * represent the validity of its disk mapping and to decide when to do the next * get_block() call. */ -static struct bio * -do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, - sector_t *last_block_in_bio, struct buffer_head *map_bh, - unsigned long *first_logical_block, get_block_t get_block, - gfp_t gfp) +static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) { + struct page *page = args->page; struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; + struct buffer_head *map_bh = &args->map_bh; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; @@ -161,14 +170,24 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, struct block_device *bdev = NULL; int length; int fully_mapped = 1; + int op_flags; unsigned nblocks; unsigned relative_block; + gfp_t gfp; + + if (args->is_readahead) { + op_flags = REQ_RAHEAD; + gfp = readahead_gfp_mask(page->mapping); + } else { + op_flags = 0; + gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); + } if (page_has_buffers(page)) goto confused; block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); - last_block = block_in_file + nr_pages * blocks_per_page; + last_block = block_in_file + args->nr_pages * blocks_per_page; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -178,9 +197,10 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, * Map blocks using the result from the previous get_blocks call first. */ nblocks = map_bh->b_size >> blkbits; - if (buffer_mapped(map_bh) && block_in_file > *first_logical_block && - block_in_file < (*first_logical_block + nblocks)) { - unsigned map_offset = block_in_file - *first_logical_block; + if (buffer_mapped(map_bh) && + block_in_file > args->first_logical_block && + block_in_file < (args->first_logical_block + nblocks)) { + unsigned map_offset = block_in_file - args->first_logical_block; unsigned last = nblocks - map_offset; for (relative_block = 0; ; relative_block++) { @@ -208,9 +228,9 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, if (block_in_file < last_block) { map_bh->b_size = (last_block-block_in_file) << blkbits; - if (get_block(inode, block_in_file, map_bh, 0)) + if (args->get_block(inode, block_in_file, map_bh, 0)) goto confused; - *first_logical_block = block_in_file; + args->first_logical_block = block_in_file; } if (!buffer_mapped(map_bh)) { @@ -273,43 +293,45 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, /* * This page will go to BIO. Do we need to send this BIO off first? */ - if (bio && (*last_block_in_bio != blocks[0] - 1)) - bio = mpage_bio_submit(REQ_OP_READ, 0, bio); + if (args->bio && (args->last_block_in_bio != blocks[0] - 1)) + args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); alloc_new: - if (bio == NULL) { + if (args->bio == NULL) { if (first_hole == blocks_per_page) { if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9), page)) goto out; } - bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), - min_t(int, nr_pages, BIO_MAX_PAGES), gfp); - if (bio == NULL) + args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), + min_t(int, args->nr_pages, + BIO_MAX_PAGES), + gfp); + if (args->bio == NULL) goto confused; } length = first_hole << blkbits; - if (bio_add_page(bio, page, length, 0) < length) { - bio = mpage_bio_submit(REQ_OP_READ, 0, bio); + if (bio_add_page(args->bio, page, length, 0) < length) { + args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); goto alloc_new; } - relative_block = block_in_file - *first_logical_block; + relative_block = block_in_file - args->first_logical_block; nblocks = map_bh->b_size >> blkbits; if ((buffer_boundary(map_bh) && relative_block == nblocks) || (first_hole != blocks_per_page)) - bio = mpage_bio_submit(REQ_OP_READ, 0, bio); + args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); else - *last_block_in_bio = blocks[blocks_per_page - 1]; + args->last_block_in_bio = blocks[blocks_per_page - 1]; out: - return bio; + return args->bio; confused: - if (bio) - bio = mpage_bio_submit(REQ_OP_READ, 0, bio); + if (args->bio) + args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio); if (!PageUptodate(page)) - block_read_full_page(page, get_block); + block_read_full_page(page, args->get_block); else unlock_page(page); goto out; @@ -363,15 +385,12 @@ int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block) { - struct bio *bio = NULL; + struct mpage_readpage_args args = { + .get_block = get_block, + .is_readahead = true, + }; unsigned page_idx; - sector_t last_block_in_bio = 0; - struct buffer_head map_bh; - unsigned long first_logical_block = 0; - gfp_t gfp = readahead_gfp_mask(mapping); - map_bh.b_state = 0; - map_bh.b_size = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = lru_to_page(pages); @@ -379,18 +398,16 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, list_del(&page->lru); if (!add_to_page_cache_lru(page, mapping, page->index, - gfp)) { - bio = do_mpage_readpage(bio, page, - nr_pages - page_idx, - &last_block_in_bio, &map_bh, - &first_logical_block, - get_block, gfp); + readahead_gfp_mask(mapping))) { + args.page = page; + args.nr_pages = nr_pages - page_idx; + args.bio = do_mpage_readpage(&args); } put_page(page); } BUG_ON(!list_empty(pages)); - if (bio) - mpage_bio_submit(REQ_OP_READ, 0, bio); + if (args.bio) + mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio); return 0; } EXPORT_SYMBOL(mpage_readpages); @@ -400,18 +417,15 @@ EXPORT_SYMBOL(mpage_readpages); */ int mpage_readpage(struct page *page, get_block_t get_block) { - struct bio *bio = NULL; - sector_t last_block_in_bio = 0; - struct buffer_head map_bh; - unsigned long first_logical_block = 0; - gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); + struct mpage_readpage_args args = { + .page = page, + .nr_pages = 1, + .get_block = get_block, + }; - map_bh.b_state = 0; - map_bh.b_size = 0; - bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, - &map_bh, &first_logical_block, get_block, gfp); - if (bio) - mpage_bio_submit(REQ_OP_READ, 0, bio); + args.bio = do_mpage_readpage(&args); + if (args.bio) + mpage_bio_submit(REQ_OP_READ, 0, args.bio); return 0; } EXPORT_SYMBOL(mpage_readpage); diff --git a/fs/namei.c b/fs/namei.c index 734cef54fdf8..3cd396277cd3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2028,6 +2028,8 @@ static int link_path_walk(const char *name, struct nameidata *nd) { int err; + if (IS_ERR(name)) + return PTR_ERR(name); while (*name=='/') name++; if (!*name) @@ -2125,12 +2127,15 @@ OK: } } +/* must be paired with terminate_walk() */ static const char *path_init(struct nameidata *nd, unsigned flags) { const char *s = nd->name->name; if (!*s) flags &= ~LOOKUP_RCU; + if (flags & LOOKUP_RCU) + rcu_read_lock(); nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; @@ -2143,7 +2148,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); nd->root_seq = nd->seq; nd->m_seq = read_seqbegin(&mount_lock); @@ -2159,21 +2163,15 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->m_seq = read_seqbegin(&mount_lock); if (*s == '/') { - if (flags & LOOKUP_RCU) - rcu_read_lock(); set_root(nd); if (likely(!nd_jump_root(nd))) return s; - nd->root.mnt = NULL; - rcu_read_unlock(); return ERR_PTR(-ECHILD); } else if (nd->dfd == AT_FDCWD) { if (flags & LOOKUP_RCU) { struct fs_struct *fs = current->fs; unsigned seq; - rcu_read_lock(); - do { seq = read_seqcount_begin(&fs->seq); nd->path = fs->pwd; @@ -2195,16 +2193,13 @@ static const char *path_init(struct nameidata *nd, unsigned flags) dentry = f.file->f_path.dentry; - if (*s) { - if (!d_can_lookup(dentry)) { - fdput(f); - return ERR_PTR(-ENOTDIR); - } + if (*s && unlikely(!d_can_lookup(dentry))) { + fdput(f); + return ERR_PTR(-ENOTDIR); } nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - rcu_read_lock(); nd->inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -2272,24 +2267,15 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path const char *s = path_init(nd, flags); int err; - if (IS_ERR(s)) - return PTR_ERR(s); - - if (unlikely(flags & LOOKUP_DOWN)) { + if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) { err = handle_lookup_down(nd); - if (unlikely(err < 0)) { - terminate_walk(nd); - return err; - } + if (unlikely(err < 0)) + s = ERR_PTR(err); } while (!(err = link_path_walk(s, nd)) && ((err = lookup_last(nd)) > 0)) { s = trailing_symlink(nd); - if (IS_ERR(s)) { - err = PTR_ERR(s); - break; - } } if (!err) err = complete_walk(nd); @@ -2336,10 +2322,7 @@ static int path_parentat(struct nameidata *nd, unsigned flags, struct path *parent) { const char *s = path_init(nd, flags); - int err; - if (IS_ERR(s)) - return PTR_ERR(s); - err = link_path_walk(s, nd); + int err = link_path_walk(s, nd); if (!err) err = complete_walk(nd); if (!err) { @@ -2666,15 +2649,10 @@ path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path) { const char *s = path_init(nd, flags); int err; - if (IS_ERR(s)) - return PTR_ERR(s); + while (!(err = link_path_walk(s, nd)) && (err = mountpoint_last(nd)) > 0) { s = trailing_symlink(nd); - if (IS_ERR(s)) { - err = PTR_ERR(s); - break; - } } if (!err) { *path = nd->path; @@ -3027,17 +3005,16 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m * Returns 0 if successful. The file will have been created and attached to * @file by the filesystem calling finish_open(). * - * Returns 1 if the file was looked up only or didn't need creating. The - * caller will need to perform the open themselves. @path will have been - * updated to point to the new dentry. This may be negative. + * If the file was looked up only or didn't need creating, FMODE_OPENED won't + * be set. The caller will need to perform the open themselves. @path will + * have been updated to point to the new dentry. This may be negative. * * Returns an error code otherwise. */ static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, - int open_flag, umode_t mode, - int *opened) + int open_flag, umode_t mode) { struct dentry *const DENTRY_NOT_SET = (void *) -1UL; struct inode *dir = nd->path.dentry->d_inode; @@ -3052,39 +3029,38 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, file->f_path.dentry = DENTRY_NOT_SET; file->f_path.mnt = nd->path.mnt; error = dir->i_op->atomic_open(dir, dentry, file, - open_to_namei_flags(open_flag), - mode, opened); + open_to_namei_flags(open_flag), mode); d_lookup_done(dentry); if (!error) { - /* - * We didn't have the inode before the open, so check open - * permission here. - */ - int acc_mode = op->acc_mode; - if (*opened & FILE_CREATED) { - WARN_ON(!(open_flag & O_CREAT)); - fsnotify_create(dir, dentry); - acc_mode = 0; - } - error = may_open(&file->f_path, acc_mode, open_flag); - if (WARN_ON(error > 0)) - error = -EINVAL; - } else if (error > 0) { - if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { + if (file->f_mode & FMODE_OPENED) { + /* + * We didn't have the inode before the open, so check open + * permission here. + */ + int acc_mode = op->acc_mode; + if (file->f_mode & FMODE_CREATED) { + WARN_ON(!(open_flag & O_CREAT)); + fsnotify_create(dir, dentry); + acc_mode = 0; + } + error = may_open(&file->f_path, acc_mode, open_flag); + if (WARN_ON(error > 0)) + error = -EINVAL; + } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { error = -EIO; } else { if (file->f_path.dentry) { dput(dentry); dentry = file->f_path.dentry; } - if (*opened & FILE_CREATED) + if (file->f_mode & FMODE_CREATED) fsnotify_create(dir, dentry); if (unlikely(d_is_negative(dentry))) { error = -ENOENT; } else { path->dentry = dentry; path->mnt = nd->path.mnt; - return 1; + return 0; } } } @@ -3095,25 +3071,22 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, /* * Look up and maybe create and open the last component. * - * Must be called with i_mutex held on parent. - * - * Returns 0 if the file was successfully atomically created (if necessary) and - * opened. In this case the file will be returned attached to @file. - * - * Returns 1 if the file was not completely opened at this time, though lookups - * and creations will have been performed and the dentry returned in @path will - * be positive upon return if O_CREAT was specified. If O_CREAT wasn't - * specified then a negative dentry may be returned. + * Must be called with parent locked (exclusive in O_CREAT case). * - * An error code is returned otherwise. + * Returns 0 on success, that is, if + * the file was successfully atomically created (if necessary) and opened, or + * the file was not completely opened at this time, though lookups and + * creations were performed. + * These case are distinguished by presence of FMODE_OPENED on file->f_mode. + * In the latter case dentry returned in @path might be negative if O_CREAT + * hadn't been specified. * - * FILE_CREATE will be set in @*opened if the dentry was created and will be - * cleared otherwise prior to returning. + * An error code is returned on failure. */ static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, const struct open_flags *op, - bool got_write, int *opened) + bool got_write) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -3126,7 +3099,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (unlikely(IS_DEADDIR(dir_inode))) return -ENOENT; - *opened &= ~FILE_CREATED; + file->f_mode &= ~FMODE_CREATED; dentry = d_lookup(dir, &nd->last); for (;;) { if (!dentry) { @@ -3188,7 +3161,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (dir_inode->i_op->atomic_open) { error = atomic_open(nd, dentry, path, file, op, open_flag, - mode, opened); + mode); if (unlikely(error == -ENOENT) && create_error) error = create_error; return error; @@ -3211,7 +3184,7 @@ no_open: /* Negative dentry, just create the file */ if (!dentry->d_inode && (open_flag & O_CREAT)) { - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE); if (!dir_inode->i_op->create) { error = -EACCES; @@ -3230,7 +3203,7 @@ no_open: out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; - return 1; + return 0; out_dput: dput(dentry); @@ -3241,8 +3214,7 @@ out_dput: * Handle the last step of open() */ static int do_last(struct nameidata *nd, - struct file *file, const struct open_flags *op, - int *opened) + struct file *file, const struct open_flags *op) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -3308,17 +3280,17 @@ static int do_last(struct nameidata *nd, inode_lock(dir->d_inode); else inode_lock_shared(dir->d_inode); - error = lookup_open(nd, &path, file, op, got_write, opened); + error = lookup_open(nd, &path, file, op, got_write); if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else inode_unlock_shared(dir->d_inode); - if (error <= 0) { - if (error) - goto out; + if (error) + goto out; - if ((*opened & FILE_CREATED) || + if (file->f_mode & FMODE_OPENED) { + if ((file->f_mode & FMODE_CREATED) || !S_ISREG(file_inode(file)->i_mode)) will_truncate = false; @@ -3326,7 +3298,7 @@ static int do_last(struct nameidata *nd, goto opened; } - if (*opened & FILE_CREATED) { + if (file->f_mode & FMODE_CREATED) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = false; @@ -3395,20 +3367,15 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto out; - BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - error = vfs_open(&nd->path, file, current_cred()); + BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */ + error = vfs_open(&nd->path, file); if (error) goto out; - *opened |= FILE_OPENED; opened: - error = open_check_o_direct(file); - if (!error) - error = ima_file_check(file, op->acc_mode, *opened); + error = ima_file_check(file, op->acc_mode); if (!error && will_truncate) error = handle_truncate(file); out: - if (unlikely(error) && (*opened & FILE_OPENED)) - fput(file); if (unlikely(error > 0)) { WARN_ON(1); error = -EINVAL; @@ -3458,7 +3425,7 @@ EXPORT_SYMBOL(vfs_tmpfile); static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, - struct file *file, int *opened) + struct file *file) { struct dentry *child; struct path path; @@ -3480,12 +3447,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, if (error) goto out2; file->f_path.mnt = path.mnt; - error = finish_open(file, child, NULL, opened); - if (error) - goto out2; - error = open_check_o_direct(file); - if (error) - fput(file); + error = finish_open(file, child, NULL); out2: mnt_drop_write(path.mnt); out: @@ -3499,7 +3461,7 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file) int error = path_lookupat(nd, flags, &path); if (!error) { audit_inode(nd->name, path.dentry, 0); - error = vfs_open(&path, file, current_cred()); + error = vfs_open(&path, file); path_put(&path); } return error; @@ -3508,59 +3470,40 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file) static struct file *path_openat(struct nameidata *nd, const struct open_flags *op, unsigned flags) { - const char *s; struct file *file; - int opened = 0; int error; - file = get_empty_filp(); + file = alloc_empty_file(op->open_flag, current_cred()); if (IS_ERR(file)) return file; - file->f_flags = op->open_flag; - if (unlikely(file->f_flags & __O_TMPFILE)) { - error = do_tmpfile(nd, flags, op, file, &opened); - goto out2; - } - - if (unlikely(file->f_flags & O_PATH)) { + error = do_tmpfile(nd, flags, op, file); + } else if (unlikely(file->f_flags & O_PATH)) { error = do_o_path(nd, flags, file); - if (!error) - opened |= FILE_OPENED; - goto out2; - } - - s = path_init(nd, flags); - if (IS_ERR(s)) { - put_filp(file); - return ERR_CAST(s); - } - while (!(error = link_path_walk(s, nd)) && - (error = do_last(nd, file, op, &opened)) > 0) { - nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); - s = trailing_symlink(nd); - if (IS_ERR(s)) { - error = PTR_ERR(s); - break; + } else { + const char *s = path_init(nd, flags); + while (!(error = link_path_walk(s, nd)) && + (error = do_last(nd, file, op)) > 0) { + nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); + s = trailing_symlink(nd); } + terminate_walk(nd); } - terminate_walk(nd); -out2: - if (!(opened & FILE_OPENED)) { - BUG_ON(!error); - put_filp(file); + if (likely(!error)) { + if (likely(file->f_mode & FMODE_OPENED)) + return file; + WARN_ON(1); + error = -EINVAL; } - if (unlikely(error)) { - if (error == -EOPENSTALE) { - if (flags & LOOKUP_RCU) - error = -ECHILD; - else - error = -ESTALE; - } - file = ERR_PTR(error); + fput(file); + if (error == -EOPENSTALE) { + if (flags & LOOKUP_RCU) + error = -ECHILD; + else + error = -ESTALE; } - return file; + return ERR_PTR(error); } struct file *do_filp_open(int dfd, struct filename *pathname, @@ -4712,29 +4655,6 @@ out: return len; } -/* - * A helper for ->readlink(). This should be used *ONLY* for symlinks that - * have ->get_link() not calling nd_jump_link(). Using (or not using) it - * for any given inode is up to filesystem. - */ -static int generic_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - DEFINE_DELAYED_CALL(done); - struct inode *inode = d_inode(dentry); - const char *link = inode->i_link; - int res; - - if (!link) { - link = inode->i_op->get_link(dentry, inode, &done); - if (IS_ERR(link)) - return PTR_ERR(link); - } - res = readlink_copy(buffer, buflen, link); - do_delayed_call(&done); - return res; -} - /** * vfs_readlink - copy symlink body into userspace buffer * @dentry: dentry on which to get symbolic link @@ -4748,6 +4668,9 @@ static int generic_readlink(struct dentry *dentry, char __user *buffer, int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = d_inode(dentry); + DEFINE_DELAYED_CALL(done); + const char *link; + int res; if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) { if (unlikely(inode->i_op->readlink)) @@ -4761,7 +4684,15 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) spin_unlock(&inode->i_lock); } - return generic_readlink(dentry, buffer, buflen); + link = inode->i_link; + if (!link) { + link = inode->i_op->get_link(dentry, inode, &done); + if (IS_ERR(link)) + return PTR_ERR(link); + } + res = readlink_copy(buffer, buflen, link); + do_delayed_call(&done); + return res; } EXPORT_SYMBOL(vfs_readlink); diff --git a/fs/namespace.c b/fs/namespace.c index 8ddd14806799..bd2f4c68506a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); + smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); return 1; } + lock_mount_hash(); + if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + mnt_add_count(mnt, -1); + unlock_mount_hash(); + return 1; + } + unlock_mount_hash(); + /* caller will mntput() */ return -1; } @@ -1195,12 +1204,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); - mnt_add_count(mnt, -1); - if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + if (likely(READ_ONCE(mnt->mnt_ns))) { + /* + * Since we don't do lock_mount_hash() here, + * ->mnt_ns can change under us. However, if it's + * non-NULL, then there's a reference that won't + * be dropped until after an RCU delay done after + * turning ->mnt_ns NULL. So if we observe it + * non-NULL under rcu_read_lock(), the reference + * we are dropping is not the final one. + */ + mnt_add_count(mnt, -1); rcu_read_unlock(); return; } lock_mount_hash(); + /* + * make sure that if __legitimize_mnt() has not seen us grab + * mount_lock, we'll see their refcount increment here. + */ + smp_mb(); + mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); unlock_mount_hash(); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7a9c14426855..d7f158c3efc8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1434,12 +1434,11 @@ static int do_open(struct inode *inode, struct file *filp) static int nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, - struct file *file, unsigned open_flags, - int *opened) + struct file *file, unsigned open_flags) { int err; - err = finish_open(file, dentry, do_open, opened); + err = finish_open(file, dentry, do_open); if (err) goto out; if (S_ISREG(file->f_path.dentry->d_inode->i_mode)) @@ -1452,7 +1451,7 @@ out: int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned open_flags, - umode_t mode, int *opened) + umode_t mode) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct nfs_open_context *ctx; @@ -1461,6 +1460,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct inode *inode; unsigned int lookup_flags = 0; bool switched = false; + int created = 0; int err; /* Expect a negative dentry */ @@ -1521,7 +1521,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, goto out; trace_nfs_atomic_open_enter(dir, ctx, open_flags); - inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened); + inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created); + if (created) + file->f_mode |= FMODE_CREATED; if (IS_ERR(inode)) { err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); @@ -1546,7 +1548,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, goto out; } - err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened); + err = nfs_finish_open(ctx, ctx->dentry, file, open_flags); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); out: @@ -1641,6 +1643,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); struct inode *inode; + struct dentry *d; int error = -EACCES; d_drop(dentry); @@ -1662,10 +1665,12 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, goto out_error; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); - error = PTR_ERR(inode); - if (IS_ERR(inode)) + d = d_splice_alias(inode, dentry); + if (IS_ERR(d)) { + error = PTR_ERR(d); goto out_error; - d_add(dentry, inode); + } + dput(d); out: dput(parent); return 0; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 137e18abb7e7..51beb6e38c90 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -258,7 +258,7 @@ extern const struct dentry_operations nfs4_dentry_operations; /* dir.c */ int nfs_atomic_open(struct inode *, struct dentry *, struct file *, - unsigned, umode_t, int *); + unsigned, umode_t); /* super.c */ extern struct file_system_type nfs4_fs_type; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6dd146885da9..b790976d3913 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2951,7 +2951,7 @@ static int _nfs4_do_open(struct inode *dir, } } if (opened && opendata->file_created) - *opened |= FILE_CREATED; + *opened = 1; if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) { *ctx_th = opendata->f_attr.mdsthreshold; @@ -6466,34 +6466,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) - break; + goto out_restart; } - if (data->arg.new_lock_owner != 0) { nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - goto out_done; - } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) - goto out_done; - + } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) + goto out_restart; break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { - if (nfs4_stateid_match(&data->arg.open_stateid, + if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) - goto out_done; - } else if (nfs4_stateid_match(&data->arg.lock_stateid, + goto out_restart; + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) - goto out_done; + goto out_restart; } - if (!data->cancelled) - rpc_restart_call_prepare(task); out_done: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); + return; +out_restart: + if (!data->cancelled) + rpc_restart_call_prepare(task); + goto out_done; } static void nfs4_lock_release(void *calldata) @@ -6502,7 +6502,7 @@ static void nfs4_lock_release(void *calldata) dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); - if (data->cancelled) { + if (data->cancelled && data->rpc_status == 0) { struct rpc_task *task; task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b0555d7d8200..55a099e47ba2 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -763,7 +763,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, goto out_nfserr; } - host_err = ima_file_check(file, may_flags, 0); + host_err = ima_file_check(file, may_flags); if (host_err) { fput(file); goto out_nfserr; diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index e2bea2ac5dfb..a6365e6bc047 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -384,8 +384,9 @@ out_err: static int __init dnotify_init(void) { - dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC); - dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC); + dnotify_struct_cache = KMEM_CACHE(dnotify_struct, + SLAB_PANIC|SLAB_ACCOUNT); + dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops); if (IS_ERR(dnotify_group)) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index f90842efea13..eb4e75175cfb 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/wait.h> #include <linux/audit.h> +#include <linux/sched/mm.h> #include "fanotify.h" @@ -140,8 +141,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const struct path *path) { - struct fanotify_event_info *event; - gfp_t gfp = GFP_KERNEL; + struct fanotify_event_info *event = NULL; + gfp_t gfp = GFP_KERNEL_ACCOUNT; /* * For queues with unlimited length lost events are not expected and @@ -151,19 +152,22 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, if (group->max_events == UINT_MAX) gfp |= __GFP_NOFAIL; + /* Whoever is interested in the event, pays for the allocation. */ + memalloc_use_memcg(group->memcg); + if (fanotify_is_perm_event(mask)) { struct fanotify_perm_event_info *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) - return NULL; + goto out; event = &pevent->fae; pevent->response = 0; goto init; } event = kmem_cache_alloc(fanotify_event_cachep, gfp); if (!event) - return NULL; + goto out; init: __maybe_unused fsnotify_init_event(&event->fse, inode, mask); event->tgid = get_pid(task_tgid(current)); @@ -174,6 +178,8 @@ init: __maybe_unused event->path.mnt = NULL; event->path.dentry = NULL; } +out: + memalloc_unuse_memcg(); return event; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ec4d8c59d0e3..69054886915b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -16,6 +16,7 @@ #include <linux/uaccess.h> #include <linux/compat.h> #include <linux/sched/signal.h> +#include <linux/memcontrol.h> #include <asm/ioctls.h> @@ -524,17 +525,16 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, return mask & oldmask; } -static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask, - unsigned int flags) +static int fanotify_remove_mark(struct fsnotify_group *group, + fsnotify_connp_t *connp, __u32 mask, + unsigned int flags) { struct fsnotify_mark *fsn_mark = NULL; __u32 removed; int destroy_mark; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks, - group); + fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { mutex_unlock(&group->mark_mutex); return -ENOENT; @@ -542,47 +542,33 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); - if (removed & real_mount(mnt)->mnt_fsnotify_mask) - fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks); + if (removed & fsnotify_conn_mask(fsn_mark->connector)) + fsnotify_recalc_mask(fsn_mark->connector); if (destroy_mark) fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); if (destroy_mark) fsnotify_free_mark(fsn_mark); + /* matches the fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); return 0; } +static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, + struct vfsmount *mnt, __u32 mask, + unsigned int flags) +{ + return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, + mask, flags); +} + static int fanotify_remove_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, unsigned int flags) { - struct fsnotify_mark *fsn_mark = NULL; - __u32 removed; - int destroy_mark; - - mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); - if (!fsn_mark) { - mutex_unlock(&group->mark_mutex); - return -ENOENT; - } - - removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, - &destroy_mark); - if (removed & inode->i_fsnotify_mask) - fsnotify_recalc_mask(inode->i_fsnotify_marks); - if (destroy_mark) - fsnotify_detach_mark(fsn_mark); - mutex_unlock(&group->mark_mutex); - if (destroy_mark) - fsnotify_free_mark(fsn_mark); - - /* matches the fsnotify_find_mark() */ - fsnotify_put_mark(fsn_mark); - - return 0; + return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, + flags); } static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, @@ -615,8 +601,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, } static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, - struct inode *inode, - struct vfsmount *mnt) + fsnotify_connp_t *connp, + unsigned int type) { struct fsnotify_mark *mark; int ret; @@ -629,7 +615,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, return ERR_PTR(-ENOMEM); fsnotify_init_mark(mark, group); - ret = fsnotify_add_mark_locked(mark, inode, mnt, 0); + ret = fsnotify_add_mark_locked(mark, connp, type, 0); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -639,39 +625,43 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, } -static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask, - unsigned int flags) +static int fanotify_add_mark(struct fsnotify_group *group, + fsnotify_connp_t *connp, unsigned int type, + __u32 mask, unsigned int flags) { struct fsnotify_mark *fsn_mark; __u32 added; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks, - group); + fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, NULL, mnt); + fsn_mark = fanotify_add_new_mark(group, connp, type); if (IS_ERR(fsn_mark)) { mutex_unlock(&group->mark_mutex); return PTR_ERR(fsn_mark); } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - if (added & ~real_mount(mnt)->mnt_fsnotify_mask) - fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks); + if (added & ~fsnotify_conn_mask(fsn_mark->connector)) + fsnotify_recalc_mask(fsn_mark->connector); mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); return 0; } +static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, + struct vfsmount *mnt, __u32 mask, + unsigned int flags) +{ + return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, + FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); +} + static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, unsigned int flags) { - struct fsnotify_mark *fsn_mark; - __u32 added; - pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); /* @@ -684,22 +674,8 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, (atomic_read(&inode->i_writecount) > 0)) return 0; - mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); - if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, inode, NULL); - if (IS_ERR(fsn_mark)) { - mutex_unlock(&group->mark_mutex); - return PTR_ERR(fsn_mark); - } - } - added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - if (added & ~inode->i_fsnotify_mask) - fsnotify_recalc_mask(inode->i_fsnotify_marks); - mutex_unlock(&group->mark_mutex); - - fsnotify_put_mark(fsn_mark); - return 0; + return fanotify_add_mark(group, &inode->i_fsnotify_marks, + FSNOTIFY_OBJ_TYPE_INODE, mask, flags); } /* fanotify syscalls */ @@ -756,6 +732,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); + group->memcg = get_mem_cgroup_from_mm(current->mm); oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); if (unlikely(!oevent)) { @@ -957,7 +934,8 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { - fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); + fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, + SLAB_PANIC|SLAB_ACCOUNT); fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { fanotify_perm_event_cachep = diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 10aac1942c9f..86fcf5814279 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -15,7 +15,7 @@ #include <linux/exportfs.h> #include "inotify/inotify.h" -#include "../fs/mount.h" +#include "fsnotify.h" #if defined(CONFIG_PROC_FS) @@ -81,7 +81,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); - inode = igrab(mark->connector->inode); + inode = igrab(fsnotify_conn_inode(mark->connector)); if (inode) { /* * IN_ALL_EVENTS represents all of the mask bits @@ -117,7 +117,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) mflags |= FAN_MARK_IGNORED_SURV_MODIFY; if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) { - inode = igrab(mark->connector->inode); + inode = igrab(fsnotify_conn_inode(mark->connector)); if (!inode) return; seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", @@ -127,7 +127,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_putc(m, '\n'); iput(inode); } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { - struct mount *mnt = real_mount(mark->connector->mnt); + struct mount *mnt = fsnotify_conn_mount(mark->connector); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 34515d2c4ba3..7902653dd577 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -9,6 +9,18 @@ #include "../mount.h" +static inline struct inode *fsnotify_conn_inode( + struct fsnotify_mark_connector *conn) +{ + return container_of(conn->obj, struct inode, i_fsnotify_marks); +} + +static inline struct mount *fsnotify_conn_mount( + struct fsnotify_mark_connector *conn) +{ + return container_of(conn->obj, struct mount, mnt_fsnotify_marks); +} + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -19,8 +31,8 @@ extern struct srcu_struct fsnotify_mark_srcu; extern int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b); -/* Destroy all marks connected via given connector */ -extern void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp); +/* Destroy all marks attached to an object via connector */ +extern void fsnotify_destroy_marks(fsnotify_connp_t *connp); /* run the list of all marks associated with inode and destroy them */ static inline void fsnotify_clear_marks_by_inode(struct inode *inode) { diff --git a/fs/notify/group.c b/fs/notify/group.c index aa5468f23e45..c03b83662876 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -22,6 +22,7 @@ #include <linux/srcu.h> #include <linux/rculist.h> #include <linux/wait.h> +#include <linux/memcontrol.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" @@ -36,6 +37,8 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) if (group->ops->free_group_priv) group->ops->free_group_priv(group); + mem_cgroup_put(group->memcg); + kfree(group); } diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 9ab6dde38a14..f4184b4f3815 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -31,6 +31,7 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/user.h> +#include <linux/sched/mm.h> #include "inotify.h" @@ -98,7 +99,11 @@ int inotify_handle_event(struct fsnotify_group *group, i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); - event = kmalloc(alloc_len, GFP_KERNEL); + /* Whoever is interested in the event, pays for the allocation. */ + memalloc_use_memcg(group->memcg); + event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT); + memalloc_unuse_memcg(); + if (unlikely(!event)) { /* * Treat lost event due to ENOMEM the same way as queue diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 1cf5b779d862..ac6978d3208c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -38,6 +38,7 @@ #include <linux/uaccess.h> #include <linux/poll.h> #include <linux/wait.h> +#include <linux/memcontrol.h> #include "inotify.h" #include "../fdinfo.h" @@ -510,6 +511,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, __u32 old_mask, new_mask; __u32 mask; int add = (arg & IN_MASK_ADD); + int create = (arg & IN_MASK_CREATE); int ret; mask = inotify_arg_to_mask(arg); @@ -517,6 +519,8 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; + else if (create) + return -EEXIST; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); @@ -636,6 +640,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) oevent->name_len = 0; group->max_events = max_events; + group->memcg = get_mem_cgroup_from_mm(current->mm); spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); @@ -718,6 +723,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (unlikely(!f.file)) return -EBADF; + /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ + if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) + return -EINVAL; + /* verify that this is indeed an inotify instance */ if (unlikely(f.file->f_op != &inotify_fops)) { ret = -EINVAL; @@ -806,9 +815,10 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); + BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); - inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); + inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, + SLAB_PANIC|SLAB_ACCOUNT); inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 61f4c5fa34c7..05506d60131c 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -109,6 +109,23 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) refcount_inc(&mark->refcnt); } +static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) +{ + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) + return &fsnotify_conn_inode(conn)->i_fsnotify_mask; + else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) + return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; + return NULL; +} + +__u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) +{ + if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) + return 0; + + return *fsnotify_conn_mask_p(conn); +} + static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; @@ -119,15 +136,15 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) new_mask |= mark->mask; } - if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) - conn->inode->i_fsnotify_mask = new_mask; - else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) - real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; + if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) + return; + + *fsnotify_conn_mask_p(conn) = new_mask; } /* * Calculate mask of events for a list of marks. The caller must make sure - * connector and connector->inode cannot disappear under us. Callers achieve + * connector and connector->obj cannot disappear under us. Callers achieve * this by holding a mark->lock or mark->group->mark_mutex for a mark on this * list. */ @@ -140,7 +157,8 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) __fsnotify_recalc_mask(conn); spin_unlock(&conn->lock); if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) - __fsnotify_update_child_dentry_flags(conn->inode); + __fsnotify_update_child_dentry_flags( + fsnotify_conn_inode(conn)); } /* Free all connectors queued for freeing once SRCU period ends */ @@ -166,20 +184,20 @@ static struct inode *fsnotify_detach_connector_from_object( { struct inode *inode = NULL; + if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) + return NULL; + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { - inode = conn->inode; - rcu_assign_pointer(inode->i_fsnotify_marks, NULL); + inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; - conn->inode = NULL; - conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { - rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, - NULL); - real_mount(conn->mnt)->mnt_fsnotify_mask = 0; - conn->mnt = NULL; - conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; + fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } + rcu_assign_pointer(*(conn->obj), NULL); + conn->obj = NULL; + conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; + return inode; } @@ -436,11 +454,10 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } -static int fsnotify_attach_connector_to_object( - struct fsnotify_mark_connector __rcu **connp, - struct inode *inode, - struct vfsmount *mnt) +static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, + unsigned int type) { + struct inode *inode = NULL; struct fsnotify_mark_connector *conn; conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); @@ -448,13 +465,10 @@ static int fsnotify_attach_connector_to_object( return -ENOMEM; spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); - if (inode) { - conn->type = FSNOTIFY_OBJ_TYPE_INODE; - conn->inode = igrab(inode); - } else { - conn->type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; - conn->mnt = mnt; - } + conn->type = type; + conn->obj = connp; + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) + inode = igrab(fsnotify_conn_inode(conn)); /* * cmpxchg() provides the barrier so that readers of *connp can see * only initialized structure @@ -476,7 +490,7 @@ static int fsnotify_attach_connector_to_object( * they are sure list cannot go away under them. */ static struct fsnotify_mark_connector *fsnotify_grab_connector( - struct fsnotify_mark_connector __rcu **connp) + fsnotify_connp_t *connp) { struct fsnotify_mark_connector *conn; int idx; @@ -503,27 +517,22 @@ out: * priority, highest number first, and then by the group's location in memory. */ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, - struct inode *inode, struct vfsmount *mnt, + fsnotify_connp_t *connp, unsigned int type, int allow_dups) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; - struct fsnotify_mark_connector __rcu **connp; int cmp; int err = 0; - if (WARN_ON(!inode && !mnt)) + if (WARN_ON(!fsnotify_valid_obj_type(type))) return -EINVAL; - if (inode) - connp = &inode->i_fsnotify_marks; - else - connp = &real_mount(mnt)->mnt_fsnotify_marks; restart: spin_lock(&mark->lock); conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, inode, mnt); + err = fsnotify_attach_connector_to_object(connp, type); if (err) return err; goto restart; @@ -569,14 +578,13 @@ out_err: * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. */ -int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, - struct vfsmount *mnt, int allow_dups) +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, + fsnotify_connp_t *connp, unsigned int type, + int allow_dups) { struct fsnotify_group *group = mark->group; int ret = 0; - BUG_ON(inode && mnt); - BUG_ON(!inode && !mnt); BUG_ON(!mutex_is_locked(&group->mark_mutex)); /* @@ -593,7 +601,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); + ret = fsnotify_add_mark_list(mark, connp, type, allow_dups); if (ret) goto err; @@ -613,14 +621,14 @@ err: return ret; } -int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, - struct vfsmount *mnt, int allow_dups) +int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, + unsigned int type, int allow_dups) { int ret; struct fsnotify_group *group = mark->group; mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups); + ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups); mutex_unlock(&group->mark_mutex); return ret; } @@ -629,9 +637,8 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, * Given a list of marks, find the mark associated with given group. If found * take a reference to that mark and return it, else return NULL. */ -struct fsnotify_mark *fsnotify_find_mark( - struct fsnotify_mark_connector __rcu **connp, - struct fsnotify_group *group) +struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, + struct fsnotify_group *group) { struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark; @@ -697,8 +704,8 @@ clear: } } -/* Destroy all marks attached to inode / vfsmount */ -void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) +/* Destroy all marks attached to an object via connector */ +void fsnotify_destroy_marks(fsnotify_connp_t *connp) { struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark, *old_mark = NULL; diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 3a2e509c77c5..8946130c87ad 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -93,13 +93,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) ofs = 0; if (file_ofs < init_size) ofs = init_size - file_ofs; - local_irq_save(flags); kaddr = kmap_atomic(page); memset(kaddr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); flush_dcache_page(page); kunmap_atomic(kaddr); - local_irq_restore(flags); } } else { clear_buffer_uptodate(bh); @@ -146,13 +144,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) recs = PAGE_SIZE / rec_size; /* Should have been verified before we got here... */ BUG_ON(!recs); - local_irq_save(flags); kaddr = kmap_atomic(page); for (i = 0; i < recs; i++) post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); kunmap_atomic(kaddr); - local_irq_restore(flags); flush_dcache_page(page); if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); @@ -926,7 +922,7 @@ static int ntfs_write_mst_block(struct page *page, ntfs_volume *vol = ni->vol; u8 *kaddr; unsigned int rec_size = ni->itype.index.block_size; - ntfs_inode *locked_nis[PAGE_SIZE / rec_size]; + ntfs_inode *locked_nis[PAGE_SIZE / NTFS_BLOCK_SIZE]; struct buffer_head *bh, *head, *tbh, *rec_start_bh; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; runlist_element *rl; @@ -935,6 +931,9 @@ static int ntfs_write_mst_block(struct page *page, bool sync, is_mft, page_is_dirty, rec_is_dirty; unsigned char bh_size_bits; + if (WARN_ON(rec_size < NTFS_BLOCK_SIZE)) + return -EINVAL; + ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " "0x%lx.", vi->i_ino, ni->type, page->index); BUG_ON(!NInoNonResident(ni)); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index fbd0090d7d0c..df7c32b5fac7 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -128,6 +128,7 @@ static inline void handle_bounds_compressed_page(struct page *page, /** * ntfs_decompress - decompress a compression block into an array of pages * @dest_pages: destination array of pages + * @completed_pages: scratch space to track completed pages * @dest_index: current index into @dest_pages (IN/OUT) * @dest_ofs: current offset within @dest_pages[@dest_index] (IN/OUT) * @dest_max_index: maximum index into @dest_pages (IN) @@ -162,10 +163,10 @@ static inline void handle_bounds_compressed_page(struct page *page, * Note to hackers: This function may not sleep until it has finished accessing * the compression block @cb_start as it is a per-CPU buffer. */ -static int ntfs_decompress(struct page *dest_pages[], int *dest_index, - int *dest_ofs, const int dest_max_index, const int dest_max_ofs, - const int xpage, char *xpage_done, u8 *const cb_start, - const u32 cb_size, const loff_t i_size, +static int ntfs_decompress(struct page *dest_pages[], int completed_pages[], + int *dest_index, int *dest_ofs, const int dest_max_index, + const int dest_max_ofs, const int xpage, char *xpage_done, + u8 *const cb_start, const u32 cb_size, const loff_t i_size, const s64 initialized_size) { /* @@ -190,9 +191,6 @@ static int ntfs_decompress(struct page *dest_pages[], int *dest_index, /* Variables for tag and token parsing. */ u8 tag; /* Current tag. */ int token; /* Loop counter for the eight tokens in tag. */ - - /* Need this because we can't sleep, so need two stages. */ - int completed_pages[dest_max_index - *dest_index + 1]; int nr_completed_pages = 0; /* Default error code. */ @@ -516,6 +514,7 @@ int ntfs_read_compressed_block(struct page *page) unsigned int cb_clusters, cb_max_ofs; int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0; struct page **pages; + int *completed_pages; unsigned char xpage_done = 0; ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = " @@ -528,14 +527,16 @@ int ntfs_read_compressed_block(struct page *page) BUG_ON(ni->name_len); pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS); + completed_pages = kmalloc_array(nr_pages + 1, sizeof(int), GFP_NOFS); /* Allocate memory to store the buffer heads we need. */ bhs_size = cb_size / block_size * sizeof(struct buffer_head *); bhs = kmalloc(bhs_size, GFP_NOFS); - if (unlikely(!pages || !bhs)) { + if (unlikely(!pages || !bhs || !completed_pages)) { kfree(bhs); kfree(pages); + kfree(completed_pages); unlock_page(page); ntfs_error(vol->sb, "Failed to allocate internal buffers."); return -ENOMEM; @@ -562,6 +563,7 @@ int ntfs_read_compressed_block(struct page *page) if (xpage >= max_page) { kfree(bhs); kfree(pages); + kfree(completed_pages); zero_user(page, 0, PAGE_SIZE); ntfs_debug("Compressed read outside i_size - truncated?"); SetPageUptodate(page); @@ -854,10 +856,10 @@ lock_retry_remap: unsigned int prev_cur_page = cur_page; ntfs_debug("Found compressed compression block."); - err = ntfs_decompress(pages, &cur_page, &cur_ofs, - cb_max_page, cb_max_ofs, xpage, &xpage_done, - cb_pos, cb_size - (cb_pos - cb), i_size, - initialized_size); + err = ntfs_decompress(pages, completed_pages, &cur_page, + &cur_ofs, cb_max_page, cb_max_ofs, xpage, + &xpage_done, cb_pos, cb_size - (cb_pos - cb), + i_size, initialized_size); /* * We can sleep from now on, lock already dropped by * ntfs_decompress(). @@ -912,6 +914,7 @@ lock_retry_remap: /* We no longer need the list of pages. */ kfree(pages); + kfree(completed_pages); /* If we have completed the requested page, we return success. */ if (likely(xpage_done)) @@ -956,5 +959,6 @@ err_out: } } kfree(pages); + kfree(completed_pages); return -EIO; } diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index decaf75d1cd5..bd3221cbdd95 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -667,18 +667,18 @@ static int ntfs_read_locked_inode(struct inode *vi) * mtime is the last change of the data within the file. Not changed * when only metadata is changed, e.g. a rename doesn't affect mtime. */ - vi->i_mtime = timespec_to_timespec64(ntfs2utc(si->last_data_change_time)); + vi->i_mtime = ntfs2utc(si->last_data_change_time); /* * ctime is the last change of the metadata of the file. This obviously * always changes, when mtime is changed. ctime can be changed on its * own, mtime is then not changed, e.g. when a file is renamed. */ - vi->i_ctime = timespec_to_timespec64(ntfs2utc(si->last_mft_change_time)); + vi->i_ctime = ntfs2utc(si->last_mft_change_time); /* * Last access to the data within the file. Not changed during a rename * for example but changed whenever the file is written to. */ - vi->i_atime = timespec_to_timespec64(ntfs2utc(si->last_access_time)); + vi->i_atime = ntfs2utc(si->last_access_time); /* Find the attribute list attribute if present. */ ntfs_attr_reinit_search_ctx(ctx); @@ -2997,7 +2997,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si = (STANDARD_INFORMATION*)((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset)); /* Update the access times if they have changed. */ - nt = utc2ntfs(timespec64_to_timespec(vi->i_mtime)); + nt = utc2ntfs(vi->i_mtime); if (si->last_data_change_time != nt) { ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, (long long) @@ -3006,7 +3006,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si->last_data_change_time = nt; modified = true; } - nt = utc2ntfs(timespec64_to_timespec(vi->i_ctime)); + nt = utc2ntfs(vi->i_ctime); if (si->last_mft_change_time != nt) { ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, (long long) @@ -3015,7 +3015,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si->last_mft_change_time = nt; modified = true; } - nt = utc2ntfs(timespec64_to_timespec(vi->i_atime)); + nt = utc2ntfs(vi->i_atime); if (si->last_access_time != nt) { ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 32c523cf5a2d..fb14d17666c8 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -35,6 +35,8 @@ #include "mft.h" #include "ntfs.h" +#define MAX_BHS (PAGE_SIZE / NTFS_BLOCK_SIZE) + /** * map_mft_record_page - map the page in which a specific mft record resides * @ni: ntfs inode whose mft record page to map @@ -469,7 +471,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, struct page *page; unsigned int blocksize = vol->sb->s_blocksize; int max_bhs = vol->mft_record_size / blocksize; - struct buffer_head *bhs[max_bhs]; + struct buffer_head *bhs[MAX_BHS]; struct buffer_head *bh, *head; u8 *kmirr; runlist_element *rl; @@ -479,6 +481,8 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, ntfs_debug("Entering for inode 0x%lx.", mft_no); BUG_ON(!max_bhs); + if (WARN_ON(max_bhs > MAX_BHS)) + return -EINVAL; if (unlikely(!vol->mftmirr_ino)) { /* This could happen during umount... */ err = ntfs_sync_mft_mirror_umount(vol, mft_no, m); @@ -674,7 +678,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) unsigned int blocksize = vol->sb->s_blocksize; unsigned char blocksize_bits = vol->sb->s_blocksize_bits; int max_bhs = vol->mft_record_size / blocksize; - struct buffer_head *bhs[max_bhs]; + struct buffer_head *bhs[MAX_BHS]; struct buffer_head *bh, *head; runlist_element *rl; unsigned int block_start, block_end, m_start, m_end; @@ -684,6 +688,10 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) BUG_ON(NInoAttr(ni)); BUG_ON(!max_bhs); BUG_ON(!PageLocked(page)); + if (WARN_ON(max_bhs > MAX_BHS)) { + err = -EINVAL; + goto err_out; + } /* * If the ntfs_inode is clean no need to do anything. If it is dirty, * mark it as clean now so that it can be redirtied later on if needed. diff --git a/fs/ntfs/time.h b/fs/ntfs/time.h index 01233989d5d1..24cd719f1fd2 100644 --- a/fs/ntfs/time.h +++ b/fs/ntfs/time.h @@ -36,16 +36,16 @@ * Convert the Linux UTC time @ts to its corresponding NTFS time and return * that in little endian format. * - * Linux stores time in a struct timespec consisting of a time_t (long at - * present) tv_sec and a long tv_nsec where tv_sec is the number of 1-second - * intervals since 1st January 1970, 00:00:00 UTC and tv_nsec is the number of - * 1-nano-second intervals since the value of tv_sec. + * Linux stores time in a struct timespec64 consisting of a time64_t tv_sec + * and a long tv_nsec where tv_sec is the number of 1-second intervals since + * 1st January 1970, 00:00:00 UTC and tv_nsec is the number of 1-nano-second + * intervals since the value of tv_sec. * * NTFS uses Microsoft's standard time format which is stored in a s64 and is * measured as the number of 100-nano-second intervals since 1st January 1601, * 00:00:00 UTC. */ -static inline sle64 utc2ntfs(const struct timespec ts) +static inline sle64 utc2ntfs(const struct timespec64 ts) { /* * Convert the seconds to 100ns intervals, add the nano-seconds @@ -63,7 +63,10 @@ static inline sle64 utc2ntfs(const struct timespec ts) */ static inline sle64 get_current_ntfs_time(void) { - return utc2ntfs(current_kernel_time()); + struct timespec64 ts; + + ktime_get_coarse_real_ts64(&ts); + return utc2ntfs(ts); } /** @@ -73,18 +76,18 @@ static inline sle64 get_current_ntfs_time(void) * Convert the little endian NTFS time @time to its corresponding Linux UTC * time and return that in cpu format. * - * Linux stores time in a struct timespec consisting of a time_t (long at - * present) tv_sec and a long tv_nsec where tv_sec is the number of 1-second - * intervals since 1st January 1970, 00:00:00 UTC and tv_nsec is the number of - * 1-nano-second intervals since the value of tv_sec. + * Linux stores time in a struct timespec64 consisting of a time64_t tv_sec + * and a long tv_nsec where tv_sec is the number of 1-second intervals since + * 1st January 1970, 00:00:00 UTC and tv_nsec is the number of 1-nano-second + * intervals since the value of tv_sec. * * NTFS uses Microsoft's standard time format which is stored in a s64 and is * measured as the number of 100 nano-second intervals since 1st January 1601, * 00:00:00 UTC. */ -static inline struct timespec ntfs2utc(const sle64 time) +static inline struct timespec64 ntfs2utc(const sle64 time) { - struct timespec ts; + struct timespec64 ts; /* Subtract the NTFS time offset. */ u64 t = (u64)(sle64_to_cpu(time) - NTFS_TIME_OFFSET); diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 0f157bbd3e0f..a342f008e42f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -932,13 +932,11 @@ static int ocfs2_validate_extent_block(struct super_block *sb, goto bail; } - if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) { + if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) rc = ocfs2_error(sb, "Extent block #%llu has an invalid h_fs_generation of #%u\n", (unsigned long long)bh->b_blocknr, le32_to_cpu(eb->h_fs_generation)); - goto bail; - } bail: return rc; } @@ -1481,19 +1479,17 @@ static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et, while(le16_to_cpu(el->l_tree_depth) > 1) { if (le16_to_cpu(el->l_next_free_rec) == 0) { - ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), - "Owner %llu has empty extent list (next_free_rec == 0)\n", - (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); - status = -EIO; + status = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), + "Owner %llu has empty extent list (next_free_rec == 0)\n", + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); goto bail; } i = le16_to_cpu(el->l_next_free_rec) - 1; blkno = le64_to_cpu(el->l_recs[i].e_blkno); if (!blkno) { - ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), - "Owner %llu has extent list where extent # %d has no physical block start\n", - (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i); - status = -EIO; + status = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), + "Owner %llu has extent list where extent # %d has no physical block start\n", + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i); goto bail; } @@ -1598,10 +1594,8 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et, * the new data. */ ret = ocfs2_add_branch(handle, et, bh, last_eb_bh, meta_ac); - if (ret < 0) { + if (ret < 0) mlog_errno(ret); - goto out; - } out: if (final_depth) @@ -3214,11 +3208,10 @@ rightmost_no_delete: goto rightmost_no_delete; if (le16_to_cpu(el->l_next_free_rec) == 0) { - ret = -EIO; - ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), - "Owner %llu has empty extent block at %llu\n", - (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), - (unsigned long long)le64_to_cpu(eb->h_blkno)); + ret = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), + "Owner %llu has empty extent block at %llu\n", + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), + (unsigned long long)le64_to_cpu(eb->h_blkno)); goto out; } @@ -4411,12 +4404,11 @@ static int ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, le16_to_cpu(new_el->l_count)) { bh = path_leaf_bh(left_path); eb = (struct ocfs2_extent_block *)bh->b_data; - ocfs2_error(sb, - "Extent block #%llu has an invalid l_next_free_rec of %d. It should have matched the l_count of %d\n", - (unsigned long long)le64_to_cpu(eb->h_blkno), - le16_to_cpu(new_el->l_next_free_rec), - le16_to_cpu(new_el->l_count)); - status = -EINVAL; + status = ocfs2_error(sb, + "Extent block #%llu has an invalid l_next_free_rec of %d. It should have matched the l_count of %d\n", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec), + le16_to_cpu(new_el->l_count)); goto free_left_path; } rec = &new_el->l_recs[ @@ -4466,11 +4458,10 @@ static int ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { bh = path_leaf_bh(right_path); eb = (struct ocfs2_extent_block *)bh->b_data; - ocfs2_error(sb, - "Extent block #%llu has an invalid l_next_free_rec of %d\n", - (unsigned long long)le64_to_cpu(eb->h_blkno), - le16_to_cpu(new_el->l_next_free_rec)); - status = -EINVAL; + status = ocfs2_error(sb, + "Extent block #%llu has an invalid l_next_free_rec of %d\n", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec)); goto free_right_path; } rec = &new_el->l_recs[1]; @@ -5523,10 +5514,8 @@ static int ocfs2_truncate_rec(handle_t *handle, ocfs2_journal_dirty(handle, path_leaf_bh(path)); ret = ocfs2_rotate_tree_left(handle, et, path, dealloc); - if (ret) { + if (ret) mlog_errno(ret); - goto out; - } out: ocfs2_free_path(left_path); @@ -5659,10 +5648,8 @@ int ocfs2_remove_extent(handle_t *handle, ret = ocfs2_truncate_rec(handle, et, path, index, dealloc, cpos, len); - if (ret) { + if (ret) mlog_errno(ret); - goto out; - } } out: @@ -5707,7 +5694,6 @@ static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode, if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); - goto out; } } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index ea8c551bcd7e..9b2ed62dd638 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -127,13 +127,13 @@ enum o2hb_heartbeat_modes { O2HB_HEARTBEAT_NUM_MODES, }; -char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { - "local", /* O2HB_HEARTBEAT_LOCAL */ - "global", /* O2HB_HEARTBEAT_GLOBAL */ +static const char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { + "local", /* O2HB_HEARTBEAT_LOCAL */ + "global", /* O2HB_HEARTBEAT_GLOBAL */ }; unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; -unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; +static unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; /* * o2hb_dependent_users tracks the number of registered callbacks that depend @@ -141,7 +141,7 @@ unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; * However only o2dlm depends on the heartbeat. It does not want the heartbeat * to stop while a dlm domain is still active. */ -unsigned int o2hb_dependent_users; +static unsigned int o2hb_dependent_users; /* * In global heartbeat mode, all regions are pinned if there are one or more @@ -2486,7 +2486,7 @@ unlock: return ret; } -void o2hb_region_dec_user(const char *region_uuid) +static void o2hb_region_dec_user(const char *region_uuid) { spin_lock(&o2hb_live_lock); diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index da64c3a20eeb..0e4166cc23a0 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -35,9 +35,9 @@ * cluster references throughout where nodes are looked up */ struct o2nm_cluster *o2nm_single_cluster = NULL; -char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { - "reset", /* O2NM_FENCE_RESET */ - "panic", /* O2NM_FENCE_PANIC */ +static const char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = { + "reset", /* O2NM_FENCE_RESET */ + "panic", /* O2NM_FENCE_PANIC */ }; static inline void o2nm_lock_subsystem(void); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1296f78ae966..7d9eea7d4a87 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -872,8 +872,6 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, "for type %u key %08x\n", msg_type, key); } write_unlock(&o2net_handler_lock); - if (ret) - goto out; out: if (ret) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 0ff424c6d17c..8e712b614e6e 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -96,7 +96,7 @@ struct ocfs2_unblock_ctl { }; /* Lockdep class keys */ -struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; +static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ddc3e9470c87..79279240fb6e 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -637,10 +637,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, handle = NULL; status = ocfs2_commit_truncate(osb, inode, fe_bh); - if (status < 0) { + if (status < 0) mlog_errno(status); - goto out; - } } out: @@ -1499,7 +1497,6 @@ static int ocfs2_filecheck_validate_inode_block(struct super_block *sb, (unsigned long long)bh->b_blocknr, le32_to_cpu(di->i_fs_generation)); rc = -OCFS2_FILECHECK_ERR_GENERATION; - goto bail; } bail: diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index fe0d1f9571bb..7642b6712c39 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -663,11 +663,10 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, #ifdef CONFIG_OCFS2_DEBUG_FS if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { - ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", - (unsigned long long)le64_to_cpu(alloc->i_blkno), - le32_to_cpu(alloc->id1.bitmap1.i_used), - ocfs2_local_alloc_count_bits(alloc)); - status = -EIO; + status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", + (unsigned long long)le64_to_cpu(alloc->i_blkno), + le32_to_cpu(alloc->id1.bitmap1.i_used), + ocfs2_local_alloc_count_bits(alloc)); goto bail; } #endif diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 16c42ed0dca8..b1a8b046f4c2 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -137,14 +137,13 @@ static int ocfs2_read_quota_block(struct inode *inode, u64 v_block, int rc = 0; struct buffer_head *tmp = *bh; - if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { - ocfs2_error(inode->i_sb, - "Quota file %llu is probably corrupted! Requested to read block %Lu but file has size only %Lu\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - (unsigned long long)v_block, - (unsigned long long)i_size_read(inode)); - return -EIO; - } + if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) + return ocfs2_error(inode->i_sb, + "Quota file %llu is probably corrupted! Requested to read block %Lu but file has size only %Lu\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)v_block, + (unsigned long long)i_size_read(inode)); + rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, ocfs2_validate_quota_block); if (rc) diff --git a/fs/open.c b/fs/open.c index d0e955b558ad..d98e19239bb7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -724,27 +724,13 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) return ksys_fchown(fd, user, group); } -int open_check_o_direct(struct file *f) -{ - /* NB: we're sure to have correct a_ops only after f_op->open */ - if (f->f_flags & O_DIRECT) { - if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) - return -EINVAL; - } - return 0; -} - static int do_dentry_open(struct file *f, struct inode *inode, - int (*open)(struct inode *, struct file *), - const struct cred *cred) + int (*open)(struct inode *, struct file *)) { static const struct file_operations empty_fops = {}; int error; - f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | - FMODE_PREAD | FMODE_PWRITE; - path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; @@ -753,7 +739,7 @@ static int do_dentry_open(struct file *f, f->f_wb_err = filemap_sample_wb_err(f->f_mapping); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH; + f->f_mode = FMODE_PATH | FMODE_OPENED; f->f_op = &empty_fops; return 0; } @@ -780,7 +766,7 @@ static int do_dentry_open(struct file *f, goto cleanup_all; } - error = security_file_open(f, cred); + error = security_file_open(f); if (error) goto cleanup_all; @@ -788,6 +774,8 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; + /* normally all 3 are set; ->open() can clear them if needed */ + f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; if (!open) open = f->f_op->open; if (open) { @@ -795,6 +783,7 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; } + f->f_mode |= FMODE_OPENED; if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); if ((f->f_mode & FMODE_READ) && @@ -809,9 +798,16 @@ static int do_dentry_open(struct file *f, file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); + /* NB: we're sure to have correct a_ops only after f_op->open */ + if (f->f_flags & O_DIRECT) { + if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) + return -EINVAL; + } return 0; cleanup_all: + if (WARN_ON_ONCE(error > 0)) + error = -EINVAL; fops_put(f->f_op); if (f->f_mode & FMODE_WRITER) { put_write_access(inode); @@ -847,19 +843,12 @@ cleanup_file: * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, - int (*open)(struct inode *, struct file *), - int *opened) + int (*open)(struct inode *, struct file *)) { - int error; - BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ + BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; - error = do_dentry_open(file, d_backing_inode(dentry), open, - current_cred()); - if (!error) - *opened |= FILE_OPENED; - - return error; + return do_dentry_open(file, d_backing_inode(dentry), open); } EXPORT_SYMBOL(finish_open); @@ -874,13 +863,13 @@ EXPORT_SYMBOL(finish_open); * NB: unlike finish_open() this function does consume the dentry reference and * the caller need not dput() it. * - * Returns "1" which must be the return value of ->atomic_open() after having + * Returns "0" which must be the return value of ->atomic_open() after having * called this function. */ int finish_no_open(struct file *file, struct dentry *dentry) { file->f_path.dentry = dentry; - return 1; + return 0; } EXPORT_SYMBOL(finish_no_open); @@ -896,8 +885,7 @@ EXPORT_SYMBOL(file_path); * @file: newly allocated file with f_flag initialized * @cred: credentials to use */ -int vfs_open(const struct path *path, struct file *file, - const struct cred *cred) +int vfs_open(const struct path *path, struct file *file) { struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0); @@ -905,7 +893,7 @@ int vfs_open(const struct path *path, struct file *file, return PTR_ERR(dentry); file->f_path = *path; - return do_dentry_open(file, d_backing_inode(dentry), NULL, cred); + return do_dentry_open(file, d_backing_inode(dentry), NULL); } struct file *dentry_open(const struct path *path, int flags, @@ -919,19 +907,11 @@ struct file *dentry_open(const struct path *path, int flags, /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); - f = get_empty_filp(); + f = alloc_empty_file(flags, cred); if (!IS_ERR(f)) { - f->f_flags = flags; - error = vfs_open(path, f, cred); - if (!error) { - /* from now on we need fput() to dispose of f */ - error = open_check_o_direct(f); - if (error) { - fput(f); - f = ERR_PTR(error); - } - } else { - put_filp(f); + error = vfs_open(path, f); + if (error) { + fput(f); f = ERR_PTR(error); } } @@ -1063,26 +1043,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(file_open_root); -struct file *filp_clone_open(struct file *oldfile) -{ - struct file *file; - int retval; - - file = get_empty_filp(); - if (IS_ERR(file)) - return file; - - file->f_flags = oldfile->f_flags; - retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred); - if (retval) { - put_filp(file); - return ERR_PTR(retval); - } - - return file; -} -EXPORT_SYMBOL(filp_clone_open); - long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_flags op; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index db0b52187cbc..a5a2fe76568f 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -528,18 +528,19 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar return ret; } -static int orangefs_fault(struct vm_fault *vmf) +static vm_fault_t orangefs_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; - int rc; - rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1, + int ret; + + ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1, STATX_SIZE); - if (rc == -ESTALE) - rc = -EIO; - if (rc) { - gossip_err("%s: orangefs_inode_getattr failed, " - "rc:%d:.\n", __func__, rc); - return rc; + if (ret == -ESTALE) + ret = -EIO; + if (ret) { + gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n", + __func__, ret); + return VM_FAULT_SIGBUS; } return filemap_fault(vmf); } diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 6e4d2af8f5bc..31932879b716 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -251,7 +251,6 @@ int orangefs_getattr(const struct path *path, struct kstat *stat, { int ret = -ENOENT; struct inode *inode = path->dentry->d_inode; - struct orangefs_inode_s *orangefs_inode = NULL; gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_getattr: called on %pd\n", @@ -262,8 +261,6 @@ int orangefs_getattr(const struct path *path, struct kstat *stat, generic_fillattr(inode, stat); /* override block size reported to stat */ - orangefs_inode = ORANGEFS_I(inode); - if (request_mask & STATX_SIZE) stat->result_mask = STATX_BASIC_STATS; else diff --git a/fs/pipe.c b/fs/pipe.c index 39d6f431da83..bdc5d3c0977d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -741,54 +741,33 @@ fail_inode: int create_pipe_files(struct file **res, int flags) { - int err; struct inode *inode = get_pipe_inode(); struct file *f; - struct path path; if (!inode) return -ENFILE; - err = -ENOMEM; - path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &empty_name); - if (!path.dentry) - goto err_inode; - path.mnt = mntget(pipe_mnt); - - d_instantiate(path.dentry, inode); - - f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); + f = alloc_file_pseudo(inode, pipe_mnt, "", + O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), + &pipefifo_fops); if (IS_ERR(f)) { - err = PTR_ERR(f); - goto err_dentry; + free_pipe_info(inode->i_pipe); + iput(inode); + return PTR_ERR(f); } - f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->private_data = inode->i_pipe; - res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); + res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), + &pipefifo_fops); if (IS_ERR(res[0])) { - err = PTR_ERR(res[0]); - goto err_file; + put_pipe_info(inode, inode->i_pipe); + fput(f); + return PTR_ERR(res[0]); } - - path_get(&path); res[0]->private_data = inode->i_pipe; - res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); res[1] = f; return 0; - -err_file: - put_filp(f); -err_dentry: - free_pipe_info(inode->i_pipe); - path_put(&path); - return err; - -err_inode: - free_pipe_info(inode->i_pipe); - iput(inode); - return err; } static int __do_pipe_flags(int *fd, struct file **files, int flags) diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 09c19ef91526..503086f7f7c1 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -50,12 +50,19 @@ config PSTORE_842_COMPRESS help This option enables 842 compression algorithm support. +config PSTORE_ZSTD_COMPRESS + bool "zstd compression" + depends on PSTORE + select CRYPTO_ZSTD + help + This option enables zstd compression algorithm support. + config PSTORE_COMPRESS def_bool y depends on PSTORE depends on PSTORE_DEFLATE_COMPRESS || PSTORE_LZO_COMPRESS || \ PSTORE_LZ4_COMPRESS || PSTORE_LZ4HC_COMPRESS || \ - PSTORE_842_COMPRESS + PSTORE_842_COMPRESS || PSTORE_ZSTD_COMPRESS choice prompt "Default pstore compression algorithm" @@ -65,8 +72,8 @@ choice This change be changed at boot with "pstore.compress=..." on the kernel command line. - Currently, pstore has support for 5 compression algorithms: - deflate, lzo, lz4, lz4hc and 842. + Currently, pstore has support for 6 compression algorithms: + deflate, lzo, lz4, lz4hc, 842 and zstd. The default compression algorithm is deflate. @@ -85,6 +92,9 @@ choice config PSTORE_842_COMPRESS_DEFAULT bool "842" if PSTORE_842_COMPRESS + config PSTORE_ZSTD_COMPRESS_DEFAULT + bool "zstd" if PSTORE_ZSTD_COMPRESS + endchoice config PSTORE_COMPRESS_DEFAULT @@ -95,6 +105,7 @@ config PSTORE_COMPRESS_DEFAULT default "lz4" if PSTORE_LZ4_COMPRESS_DEFAULT default "lz4hc" if PSTORE_LZ4HC_COMPRESS_DEFAULT default "842" if PSTORE_842_COMPRESS_DEFAULT + default "zstd" if PSTORE_ZSTD_COMPRESS_DEFAULT config PSTORE_CONSOLE bool "Log kernel console messages" diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index c238ab8ba31d..15e99d5a681d 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -34,6 +34,9 @@ #if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS) #include <linux/lz4.h> #endif +#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS) +#include <linux/zstd.h> +#endif #include <linux/crypto.h> #include <linux/string.h> #include <linux/timer.h> @@ -192,6 +195,13 @@ static int zbufsize_842(size_t size) } #endif +#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS) +static int zbufsize_zstd(size_t size) +{ + return ZSTD_compressBound(size); +} +#endif + static const struct pstore_zbackend *zbackend __ro_after_init; static const struct pstore_zbackend zbackends[] = { @@ -225,6 +235,12 @@ static const struct pstore_zbackend zbackends[] = { .name = "842", }, #endif +#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS) + { + .zbufsize = zbufsize_zstd, + .name = "zstd", + }, +#endif { } }; diff --git a/fs/seq_file.c b/fs/seq_file.c index 4cc090b50cc5..1dea7a8a5255 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -90,23 +90,22 @@ EXPORT_SYMBOL(seq_open); static int traverse(struct seq_file *m, loff_t offset) { - loff_t pos = 0, index; + loff_t pos = 0; int error = 0; void *p; m->version = 0; - index = 0; + m->index = 0; m->count = m->from = 0; - if (!offset) { - m->index = index; + if (!offset) return 0; - } + if (!m->buf) { m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) return -ENOMEM; } - p = m->op->start(m, &index); + p = m->op->start(m, &m->index); while (p) { error = PTR_ERR(p); if (IS_ERR(p)) @@ -123,20 +122,15 @@ static int traverse(struct seq_file *m, loff_t offset) if (pos + m->count > offset) { m->from = offset - pos; m->count -= m->from; - m->index = index; break; } pos += m->count; m->count = 0; - if (pos == offset) { - index++; - m->index = index; + p = m->op->next(m, p, &m->index); + if (pos == offset) break; - } - p = m->op->next(m, p, &index); } m->op->stop(m, p); - m->index = index; return error; Eoverflow: @@ -160,7 +154,6 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct seq_file *m = file->private_data; size_t copied = 0; - loff_t pos; size_t n; void *p; int err = 0; @@ -223,16 +216,12 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) size -= n; buf += n; copied += n; - if (!m->count) { - m->from = 0; - m->index++; - } if (!size) goto Done; } /* we need at least one record in buffer */ - pos = m->index; - p = m->op->start(m, &pos); + m->from = 0; + p = m->op->start(m, &m->index); while (1) { err = PTR_ERR(p); if (!p || IS_ERR(p)) @@ -243,8 +232,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (unlikely(err)) m->count = 0; if (unlikely(!m->count)) { - p = m->op->next(m, p, &pos); - m->index = pos; + p = m->op->next(m, p, &m->index); continue; } if (m->count < m->size) @@ -256,29 +244,33 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (!m->buf) goto Enomem; m->version = 0; - pos = m->index; - p = m->op->start(m, &pos); + p = m->op->start(m, &m->index); } m->op->stop(m, p); m->count = 0; goto Done; Fill: /* they want more? let's try to get some more */ - while (m->count < size) { + while (1) { size_t offs = m->count; - loff_t next = pos; - p = m->op->next(m, p, &next); + loff_t pos = m->index; + + p = m->op->next(m, p, &m->index); + if (pos == m->index) + /* Buggy ->next function */ + m->index++; if (!p || IS_ERR(p)) { err = PTR_ERR(p); break; } + if (m->count >= size) + break; err = m->op->show(m, p); if (seq_has_overflowed(m) || err) { m->count = offs; if (likely(err <= 0)) break; } - pos = next; } m->op->stop(m, p); n = min(m->count, size); @@ -287,11 +279,7 @@ Fill: goto Efault; copied += n; m->count -= n; - if (m->count) - m->from = n; - else - pos++; - m->index = pos; + m->from = n; Done: if (!copied) copied = err; diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2751476e6b6e..f098b9f1c396 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -167,6 +167,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, } if (compressed) { + if (!msblk->stream) + goto read_failure; length = squashfs_decompress(msblk, bh, b, offset, length, output); if (length < 0) diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 23813c078cc9..0839efa720b3 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); + if (unlikely(length < 0)) + return -EIO; + while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); if (entry->error) { diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 13d80947bf9e..f1c1430ae721 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n, } for (i = 0; i < blocks; i++) { - int size = le32_to_cpu(blist[i]); + int size = squashfs_block_size(blist[i]); + if (size < 0) { + err = size; + goto failure; + } block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); } n -= blocks; @@ -367,7 +371,24 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) sizeof(size)); if (res < 0) return res; - return le32_to_cpu(size); + return squashfs_block_size(size); +} + +void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) +{ + int copied; + void *pageaddr; + + pageaddr = kmap_atomic(page); + copied = squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + copied, 0, PAGE_SIZE - copied); + kunmap_atomic(pageaddr); + + flush_dcache_page(page); + if (copied == avail) + SetPageUptodate(page); + else + SetPageError(page); } /* Copy data into page cache */ @@ -376,7 +397,6 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - void *pageaddr; int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; int start_index = page->index & ~mask, end_index = start_index | mask; @@ -402,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, if (PageUptodate(push_page)) goto skip_page; - pageaddr = kmap_atomic(push_page); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(push_page); - SetPageUptodate(push_page); + squashfs_fill_page(push_page, buffer, offset, avail); skip_page: unlock_page(push_page); if (i != page->index) @@ -416,10 +431,9 @@ skip_page: } /* Read datablock stored packed inside a fragment (tail-end packed block) */ -static int squashfs_readpage_fragment(struct page *page) +static int squashfs_readpage_fragment(struct page *page, int expected) { struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); @@ -430,23 +444,16 @@ static int squashfs_readpage_fragment(struct page *page) squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); else - squashfs_copy_cache(page, buffer, i_size_read(inode) & - (msblk->block_size - 1), + squashfs_copy_cache(page, buffer, expected, squashfs_i(inode)->fragment_offset); squashfs_cache_put(buffer); return res; } -static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +static int squashfs_readpage_sparse(struct page *page, int expected) { - struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - - squashfs_copy_cache(page, NULL, bytes, 0); + squashfs_copy_cache(page, NULL, expected, 0); return 0; } @@ -456,6 +463,9 @@ static int squashfs_readpage(struct file *file, struct page *page) struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; int index = page->index >> (msblk->block_log - PAGE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; + int expected = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; int res; void *pageaddr; @@ -474,11 +484,11 @@ static int squashfs_readpage(struct file *file, struct page *page) goto error_out; if (bsize == 0) - res = squashfs_readpage_sparse(page, index, file_end); + res = squashfs_readpage_sparse(page, expected); else - res = squashfs_readpage_block(page, block, bsize); + res = squashfs_readpage_block(page, block, bsize, expected); } else - res = squashfs_readpage_fragment(page); + res = squashfs_readpage_fragment(page, expected); if (!res) return 0; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index f2310d2a2019..a9ba8d96776a 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -20,7 +20,7 @@ #include "squashfs.h" /* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize) +int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected) { struct inode *i = page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, @@ -31,7 +31,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize) ERROR("Unable to read page, block %llx, size %x\n", block, bsize); else - squashfs_copy_cache(page, buffer, buffer->length, 0); + squashfs_copy_cache(page, buffer, expected, 0); squashfs_cache_put(buffer); return res; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index cb485d8e0e91..80db1b86a27c 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -21,10 +21,11 @@ #include "page_actor.h" static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page); + int pages, struct page **page, int bytes); /* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, + int expected) { struct inode *inode = target_page->mapping->host; @@ -83,7 +84,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) * using an intermediate buffer. */ res = squashfs_read_cache(target_page, block, bsize, pages, - page); + page, expected); if (res < 0) goto mark_errored; @@ -95,6 +96,11 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) if (res < 0) goto mark_errored; + if (res != expected) { + res = -EIO; + goto mark_errored; + } + /* Last page may have trailing bytes not filled */ bytes = res % PAGE_SIZE; if (bytes) { @@ -138,13 +144,12 @@ out: static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page) + int pages, struct page **page, int bytes) { struct inode *i = target_page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, block, bsize); - int bytes = buffer->length, res = buffer->error, n, offset = 0; - void *pageaddr; + int res = buffer->error, n, offset = 0; if (res) { ERROR("Unable to read page, block %llx, size %x\n", block, @@ -159,12 +164,7 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, if (page[n] == NULL) continue; - pageaddr = kmap_atomic(page[n]); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(page[n]); - SetPageUptodate(page[n]); + squashfs_fill_page(page[n], buffer, offset, avail); unlock_page(page[n]); if (page[n] != target_page) put_page(page[n]); diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 0ed6edbc5c71..0681feab4a84 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, u64 *fragment_block) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int block = SQUASHFS_FRAGMENT_INDEX(fragment); - int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); - u64 start_block = le64_to_cpu(msblk->fragment_index[block]); + int block, offset, size; struct squashfs_fragment_entry fragment_entry; - int size; + u64 start_block; + + if (fragment >= msblk->fragments) + return -EIO; + block = SQUASHFS_FRAGMENT_INDEX(fragment); + offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); + + start_block = le64_to_cpu(msblk->fragment_index[block]); size = squashfs_read_metadata(sb, &fragment_entry, &start_block, &offset, sizeof(fragment_entry)); @@ -61,9 +66,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, return size; *fragment_block = le64_to_cpu(fragment_entry.start_block); - size = le32_to_cpu(fragment_entry.size); - - return size; + return squashfs_block_size(fragment_entry.size); } diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 887d6d270080..f89f8a74c6ce 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -67,11 +67,12 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); /* file.c */ +void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); /* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int); +extern int squashfs_readpage_block(struct page *, u64, int, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 24d12fd14177..4e6853f084d0 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -129,6 +129,12 @@ #define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) +static inline int squashfs_block_size(__le32 raw) +{ + u32 size = le32_to_cpu(raw); + return (size >> 25) ? -EIO : size; +} + /* * Inode number ops. Inodes consist of a compressed block number, and an * uncompressed offset within that block diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565cb50c3..ef69c31947bf 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -75,6 +75,7 @@ struct squashfs_sb_info { unsigned short block_log; long long bytes_used; unsigned int inodes; + unsigned int fragments; int xattr_ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 8a73b97217c8..40e657386fa5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->inode_table = le64_to_cpu(sblk->inode_table_start); msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); + msblk->fragments = le32_to_cpu(sblk->fragments); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %pg\n", sb->s_bdev); @@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); - TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments)); + TRACE("Number of fragments %d\n", msblk->fragments); TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); @@ -272,7 +273,7 @@ allocate_id_index_table: sb->s_export_op = &squashfs_export_ops; handle_fragments: - fragments = le32_to_cpu(sblk->fragments); + fragments = msblk->fragments; if (fragments == 0) goto check_directory_table; diff --git a/fs/statfs.c b/fs/statfs.c index 5b2a24f0f263..f0216629621d 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -335,7 +335,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat return 0; } -COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) +int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, struct compat_statfs64 __user * buf) { struct kstatfs tmp; int error; @@ -349,7 +349,12 @@ COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, s return error; } -COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) +COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + return kcompat_sys_statfs64(pathname, sz, buf); +} + +int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf) { struct kstatfs tmp; int error; @@ -363,6 +368,11 @@ COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct co return error; } +COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + return kcompat_sys_fstatfs64(fd, sz, buf); +} + /* * This is a copy of sys_ustat, just dealing with a structure layout. * Given how simple this syscall is that apporach is more maintainable diff --git a/fs/super.c b/fs/super.c index 50728d9c1a05..7429588d6b49 100644 --- a/fs/super.c +++ b/fs/super.c @@ -144,6 +144,9 @@ static unsigned long super_cache_count(struct shrinker *shrink, total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc); total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc); + if (!total_objects) + return SHRINK_EMPTY; + total_objects = vfs_pressure_ratio(total_objects); return total_objects; } @@ -244,10 +247,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, INIT_LIST_HEAD(&s->s_inodes_wb); spin_lock_init(&s->s_inode_wblist_lock); - if (list_lru_init_memcg(&s->s_dentry_lru)) - goto fail; - if (list_lru_init_memcg(&s->s_inode_lru)) - goto fail; s->s_count = 1; atomic_set(&s->s_active, 1); mutex_init(&s->s_vfs_rename_mutex); @@ -265,6 +264,10 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; if (prealloc_shrinker(&s->s_shrink)) goto fail; + if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink)) + goto fail; + if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink)) + goto fail; return s; fail: diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 58eba92a0e41..feeae8081c22 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -40,6 +40,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name) int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { struct kernfs_node *parent, *kn; + kuid_t uid; + kgid_t gid; BUG_ON(!kobj); @@ -51,8 +53,11 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) if (!parent) return -ENOENT; + kobject_get_ownership(kobj, &uid, &gid); + kn = kernfs_create_dir_ns(parent, kobject_name(kobj), - S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns); + S_IRWXU | S_IRUGO | S_IXUGO, uid, gid, + kobj, ns); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, kobject_name(kobj)); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 5c13f29bfcdb..0a7252aecfa5 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -245,7 +245,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = { int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, bool is_bin, - umode_t mode, const void *ns) + umode_t mode, kuid_t uid, kgid_t gid, const void *ns) { struct lock_class_key *key = NULL; const struct kernfs_ops *ops; @@ -302,8 +302,9 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, if (!attr->ignore_lockdep) key = attr->key ?: (struct lock_class_key *)&attr->skey; #endif - kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops, - (void *)attr, ns, key); + + kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid, + size, ops, (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, attr->name); @@ -312,12 +313,6 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, return 0; } -int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr, - bool is_bin) -{ - return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL); -} - /** * sysfs_create_file_ns - create an attribute file for an object with custom ns * @kobj: object we're creating for @@ -327,9 +322,14 @@ int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr, int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) { + kuid_t uid; + kgid_t gid; + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns); + kobject_get_ownership(kobj, &uid, &gid); + return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, + uid, gid, ns); } EXPORT_SYMBOL_GPL(sysfs_create_file_ns); @@ -358,6 +358,8 @@ int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group) { struct kernfs_node *parent; + kuid_t uid; + kgid_t gid; int error; if (group) { @@ -370,7 +372,9 @@ int sysfs_add_file_to_group(struct kobject *kobj, if (!parent) return -ENOENT; - error = sysfs_add_file(parent, attr, false); + kobject_get_ownership(kobj, &uid, &gid); + error = sysfs_add_file_mode_ns(parent, attr, false, + attr->mode, uid, gid, NULL); kernfs_put(parent); return error; @@ -406,6 +410,50 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, EXPORT_SYMBOL_GPL(sysfs_chmod_file); /** + * sysfs_break_active_protection - break "active" protection + * @kobj: The kernel object @attr is associated with. + * @attr: The attribute to break the "active" protection for. + * + * With sysfs, just like kernfs, deletion of an attribute is postponed until + * all active .show() and .store() callbacks have finished unless this function + * is called. Hence this function is useful in methods that implement self + * deletion. + */ +struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, + const struct attribute *attr) +{ + struct kernfs_node *kn; + + kobject_get(kobj); + kn = kernfs_find_and_get(kobj->sd, attr->name); + if (kn) + kernfs_break_active_protection(kn); + return kn; +} +EXPORT_SYMBOL_GPL(sysfs_break_active_protection); + +/** + * sysfs_unbreak_active_protection - restore "active" protection + * @kn: Pointer returned by sysfs_break_active_protection(). + * + * Undo the effects of sysfs_break_active_protection(). Since this function + * calls kernfs_put() on the kernfs node that corresponds to the 'attr' + * argument passed to sysfs_break_active_protection() that attribute may have + * been removed between the sysfs_break_active_protection() and + * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after + * this function has returned. + */ +void sysfs_unbreak_active_protection(struct kernfs_node *kn) +{ + struct kobject *kobj = kn->parent->priv; + + kernfs_unbreak_active_protection(kn); + kernfs_put(kn); + kobject_put(kobj); +} +EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection); + +/** * sysfs_remove_file_ns - remove an object attribute with a custom ns tag * @kobj: object we're acting for * @attr: attribute descriptor @@ -486,9 +534,14 @@ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); int sysfs_create_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { + kuid_t uid; + kgid_t gid; + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->sd, &attr->attr, true); + kobject_get_ownership(kobj, &uid, &gid); + return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true, + attr->attr.mode, uid, gid, NULL); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 4802ec0e1e3a..1eb2d6307663 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -31,6 +31,7 @@ static void remove_files(struct kernfs_node *parent, } static int create_files(struct kernfs_node *parent, struct kobject *kobj, + kuid_t uid, kgid_t gid, const struct attribute_group *grp, int update) { struct attribute *const *attr; @@ -60,7 +61,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, *attr, false, - mode, NULL); + mode, uid, gid, NULL); if (unlikely(error)) break; } @@ -90,7 +91,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, &(*bin_attr)->attr, true, - mode, NULL); + mode, + uid, gid, NULL); if (error) break; } @@ -106,6 +108,8 @@ static int internal_create_group(struct kobject *kobj, int update, const struct attribute_group *grp) { struct kernfs_node *kn; + kuid_t uid; + kgid_t gid; int error; BUG_ON(!kobj || (!update && !kobj->sd)); @@ -118,23 +122,38 @@ static int internal_create_group(struct kobject *kobj, int update, kobj->name, grp->name ?: ""); return -EINVAL; } + kobject_get_ownership(kobj, &uid, &gid); if (grp->name) { - kn = kernfs_create_dir(kobj->sd, grp->name, - S_IRWXU | S_IRUGO | S_IXUGO, kobj); - if (IS_ERR(kn)) { - if (PTR_ERR(kn) == -EEXIST) - sysfs_warn_dup(kobj->sd, grp->name); - return PTR_ERR(kn); + if (update) { + kn = kernfs_find_and_get(kobj->sd, grp->name); + if (!kn) { + pr_warn("Can't update unknown attr grp name: %s/%s\n", + kobj->name, grp->name); + return -EINVAL; + } + } else { + kn = kernfs_create_dir_ns(kobj->sd, grp->name, + S_IRWXU | S_IRUGO | S_IXUGO, + uid, gid, kobj, NULL); + if (IS_ERR(kn)) { + if (PTR_ERR(kn) == -EEXIST) + sysfs_warn_dup(kobj->sd, grp->name); + return PTR_ERR(kn); + } } } else kn = kobj->sd; kernfs_get(kn); - error = create_files(kn, kobj, grp, update); + error = create_files(kn, kobj, uid, gid, grp, update); if (error) { if (grp->name) kernfs_remove(kn); } kernfs_put(kn); + + if (grp->name && update) + kernfs_put(kn); + return error; } @@ -199,7 +218,8 @@ EXPORT_SYMBOL_GPL(sysfs_create_groups); * of the attribute files being created already exist. Furthermore, * if the visibility of the files has changed through the is_visible() * callback, it will update the permissions and add or remove the - * relevant files. + * relevant files. Changing a group's name (subdirectory name under + * kobj's directory in sysfs) is not allowed. * * The primary use for this function is to call it after making a change * that affects group visibility. @@ -281,6 +301,8 @@ int sysfs_merge_group(struct kobject *kobj, const struct attribute_group *grp) { struct kernfs_node *parent; + kuid_t uid; + kgid_t gid; int error = 0; struct attribute *const *attr; int i; @@ -289,8 +311,11 @@ int sysfs_merge_group(struct kobject *kobj, if (!parent) return -ENOENT; + kobject_get_ownership(kobj, &uid, &gid); + for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) - error = sysfs_add_file(parent, *attr, false); + error = sysfs_add_file_mode_ns(parent, *attr, false, + (*attr)->mode, uid, gid, NULL); if (error) { while (--i >= 0) kernfs_remove_by_name(parent, (*--attr)->name); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index d098e015fcc9..0050cc0c0236 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -27,11 +27,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name); /* * file.c */ -int sysfs_add_file(struct kernfs_node *parent, - const struct attribute *attr, bool is_bin); int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, bool is_bin, - umode_t amode, const void *ns); + umode_t amode, kuid_t uid, kgid_t gid, + const void *ns); /* * symlink.c diff --git a/fs/timerfd.c b/fs/timerfd.c index cdad49da3ff7..d69ad801eb80 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -66,7 +66,7 @@ static void timerfd_triggered(struct timerfd_ctx *ctx) spin_lock_irqsave(&ctx->wqh.lock, flags); ctx->expired = 1; ctx->ticks++; - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, EPOLLIN); spin_unlock_irqrestore(&ctx->wqh.lock, flags); } @@ -107,7 +107,7 @@ void timerfd_clock_was_set(void) if (ctx->moffs != moffs) { ctx->moffs = KTIME_MAX; ctx->ticks++; - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, EPOLLIN); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); } @@ -345,7 +345,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg spin_lock_irq(&ctx->wqh.lock); if (!timerfd_canceled(ctx)) { ctx->ticks = ticks; - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, EPOLLIN); } else ret = -ECANCELED; spin_unlock_irq(&ctx->wqh.lock); @@ -533,8 +533,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec64 *t) } SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, - const struct itimerspec __user *, utmr, - struct itimerspec __user *, otmr) + const struct __kernel_itimerspec __user *, utmr, + struct __kernel_itimerspec __user *, otmr) { struct itimerspec64 new, old; int ret; @@ -550,7 +550,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return ret; } -SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) +SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr) { struct itimerspec64 kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); @@ -559,7 +559,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0; } -#ifdef CONFIG_COMPAT +#ifdef CONFIG_COMPAT_32BIT_TIME COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, const struct compat_itimerspec __user *, utmr, struct compat_itimerspec __user *, otmr) diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 56569023783b..f8e5872f7cc2 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -125,7 +125,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) else iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - iinfo->i_crtime = timespec64_to_timespec(inode->i_mtime); + iinfo->i_crtime = inode->i_mtime; if (unlikely(insert_inode_locked(inode) < 0)) { make_bad_inode(inode); iput(inode); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 9915a58fbabd..5df554a9f9c9 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1270,7 +1270,6 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode) struct udf_inode_info *iinfo = UDF_I(inode); struct udf_sb_info *sbi = UDF_SB(inode->i_sb); struct kernel_lb_addr *iloc = &iinfo->i_location; - struct timespec ts; unsigned int link_count; unsigned int indirections = 0; int bs = inode->i_sb->s_blocksize; @@ -1443,12 +1442,9 @@ reread: inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); - udf_disk_stamp_to_time(&ts, fe->accessTime); - inode->i_atime = timespec_to_timespec64(ts); - udf_disk_stamp_to_time(&ts, fe->modificationTime); - inode->i_mtime = timespec_to_timespec64(ts); - udf_disk_stamp_to_time(&ts, fe->attrTime); - inode->i_ctime = timespec_to_timespec64(ts); + udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime); + udf_disk_stamp_to_time(&inode->i_mtime, fe->modificationTime); + udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime); iinfo->i_unique = le64_to_cpu(fe->uniqueID); iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); @@ -1458,13 +1454,10 @@ reread: inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); - udf_disk_stamp_to_time(&ts, efe->accessTime); - inode->i_atime = timespec_to_timespec64(ts); - udf_disk_stamp_to_time(&ts, efe->modificationTime); - inode->i_mtime = timespec_to_timespec64(ts); + udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime); + udf_disk_stamp_to_time(&inode->i_mtime, efe->modificationTime); udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime); - udf_disk_stamp_to_time(&ts, efe->attrTime); - inode->i_ctime = timespec_to_timespec64(ts); + udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime); iinfo->i_unique = le64_to_cpu(efe->uniqueID); iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); @@ -1601,7 +1594,7 @@ static int udf_sync_inode(struct inode *inode) return udf_update_inode(inode, 1); } -static void udf_adjust_time(struct udf_inode_info *iinfo, struct timespec time) +static void udf_adjust_time(struct udf_inode_info *iinfo, struct timespec64 time) { if (iinfo->i_crtime.tv_sec > time.tv_sec || (iinfo->i_crtime.tv_sec == time.tv_sec && @@ -1714,12 +1707,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) inode->i_sb->s_blocksize - sizeof(struct fileEntry)); fe->logicalBlocksRecorded = cpu_to_le64(lb_recorded); - udf_time_to_disk_stamp(&fe->accessTime, - timespec64_to_timespec(inode->i_atime)); - udf_time_to_disk_stamp(&fe->modificationTime, - timespec64_to_timespec(inode->i_mtime)); - udf_time_to_disk_stamp(&fe->attrTime, - timespec64_to_timespec(inode->i_ctime)); + udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); + udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); + udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime); memset(&(fe->impIdent), 0, sizeof(struct regid)); strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; @@ -1738,17 +1728,14 @@ static int udf_update_inode(struct inode *inode, int do_sync) efe->objectSize = cpu_to_le64(inode->i_size); efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded); - udf_adjust_time(iinfo, timespec64_to_timespec(inode->i_atime)); - udf_adjust_time(iinfo, timespec64_to_timespec(inode->i_mtime)); - udf_adjust_time(iinfo, timespec64_to_timespec(inode->i_ctime)); + udf_adjust_time(iinfo, inode->i_atime); + udf_adjust_time(iinfo, inode->i_mtime); + udf_adjust_time(iinfo, inode->i_ctime); - udf_time_to_disk_stamp(&efe->accessTime, - timespec64_to_timespec(inode->i_atime)); - udf_time_to_disk_stamp(&efe->modificationTime, - timespec64_to_timespec(inode->i_mtime)); + udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime); + udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime); udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime); - udf_time_to_disk_stamp(&efe->attrTime, - timespec64_to_timespec(inode->i_ctime)); + udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime); memset(&(efe->impIdent), 0, sizeof(efe->impIdent)); strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 06f37ddd2997..58cc2414992b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -602,8 +602,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); if (unlikely(!fi)) { inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); @@ -694,8 +693,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); if (!fi) { inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } set_nlink(inode, 2); @@ -713,8 +711,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!fi) { clear_nlink(inode); mark_inode_dirty(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); @@ -1041,8 +1038,7 @@ out: out_no_entry: up_write(&iinfo->i_data_sem); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 0c504c8031d3..3040dc2a32f6 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1980,7 +1980,7 @@ static void udf_open_lvid(struct super_block *sb) struct buffer_head *bh = sbi->s_lvid_bh; struct logicalVolIntegrityDesc *lvid; struct logicalVolIntegrityDescImpUse *lvidiu; - struct timespec ts; + struct timespec64 ts; if (!bh) return; @@ -1992,7 +1992,7 @@ static void udf_open_lvid(struct super_block *sb) mutex_lock(&sbi->s_alloc_mutex); lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; - ktime_get_real_ts(&ts); + ktime_get_real_ts64(&ts); udf_time_to_disk_stamp(&lvid->recordingDateAndTime, ts); if (le32_to_cpu(lvid->integrityType) == LVID_INTEGRITY_TYPE_CLOSE) lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN); @@ -2017,7 +2017,7 @@ static void udf_close_lvid(struct super_block *sb) struct buffer_head *bh = sbi->s_lvid_bh; struct logicalVolIntegrityDesc *lvid; struct logicalVolIntegrityDescImpUse *lvidiu; - struct timespec ts; + struct timespec64 ts; if (!bh) return; @@ -2029,7 +2029,7 @@ static void udf_close_lvid(struct super_block *sb) mutex_lock(&sbi->s_alloc_mutex); lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; - ktime_get_real_ts(&ts); + ktime_get_real_ts64(&ts); udf_time_to_disk_stamp(&lvid->recordingDateAndTime, ts); if (UDF_MAX_WRITE_VERSION > le16_to_cpu(lvidiu->maxUDFWriteRev)) lvidiu->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION); diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index 630426ffb775..2ef0e212f08a 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -28,7 +28,7 @@ struct udf_ext_cache { */ struct udf_inode_info { - struct timespec i_crtime; + struct timespec64 i_crtime; /* Physical address of inode */ struct kernel_lb_addr i_location; __u64 i_unique; diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 9dd3e1b9619e..9424d7cab790 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -138,7 +138,7 @@ struct udf_sb_info { rwlock_t s_cred_lock; /* Root Info */ - struct timespec s_record_time; + struct timespec64 s_record_time; /* Fileset Info */ __u16 s_serial_number; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 84c47dde4d26..ee246769dee4 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -258,8 +258,8 @@ extern struct long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); extern struct short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); /* udftime.c */ -extern void udf_disk_stamp_to_time(struct timespec *dest, +extern void udf_disk_stamp_to_time(struct timespec64 *dest, struct timestamp src); -extern void udf_time_to_disk_stamp(struct timestamp *dest, struct timespec src); +extern void udf_time_to_disk_stamp(struct timestamp *dest, struct timespec64 src); #endif /* __UDF_DECL_H */ diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index 67b33ac5d41b..fce4ad976c8c 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -41,7 +41,7 @@ #include <linux/time.h> void -udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src) +udf_disk_stamp_to_time(struct timespec64 *dest, struct timestamp src) { u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone); u16 year = le16_to_cpu(src.year); @@ -70,9 +70,9 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src) } void -udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts) +udf_time_to_disk_stamp(struct timestamp *dest, struct timespec64 ts) { - long seconds; + time64_t seconds; int16_t offset; struct tm tm; diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index e727ee07dbe4..075d3d9114c8 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -547,7 +547,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, /* * Block can be extended */ - ucg->cg_time = cpu_to_fs32(sb, get_seconds()); + ucg->cg_time = ufs_get_seconds(sb); for (i = newcount; i < (uspi->s_fpb - fragoff); i++) if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) break; @@ -639,7 +639,7 @@ cg_found: if (!ufs_cg_chkmagic(sb, ucg)) ufs_panic (sb, "ufs_alloc_fragments", "internal error, bad magic number on cg %u", cgno); - ucg->cg_time = cpu_to_fs32(sb, get_seconds()); + ucg->cg_time = ufs_get_seconds(sb); if (count == uspi->s_fpb) { result = ufs_alloccg_block (inode, ucpi, goal, err); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index e1ef0f0a1353..969fd60436d3 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -89,7 +89,7 @@ void ufs_free_inode (struct inode * inode) if (!ufs_cg_chkmagic(sb, ucg)) ufs_panic (sb, "ufs_free_fragments", "internal error, bad cg magic number"); - ucg->cg_time = cpu_to_fs32(sb, get_seconds()); + ucg->cg_time = ufs_get_seconds(sb); is_directory = S_ISDIR(inode->i_mode); @@ -343,8 +343,7 @@ cg_found: fail_remove_inode: mutex_unlock(&sbi->s_lock); clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index d5f43ba76c59..9ef40f100415 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -43,8 +43,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) return 0; } inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -142,8 +141,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, out_fail: inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -198,8 +196,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput (inode); + discard_new_inode(inode); out_dir: inode_dec_link_count(dir); return err; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 488088141451..a4e07e910f1b 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -698,7 +698,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) usb1 = ubh_get_usb_first(uspi); usb3 = ubh_get_usb_third(uspi); - usb1->fs_time = cpu_to_fs32(sb, get_seconds()); + usb1->fs_time = ufs_get_seconds(sb); if ((flags & UFS_ST_MASK) == UFS_ST_SUN || (flags & UFS_ST_MASK) == UFS_ST_SUNOS || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) @@ -1342,7 +1342,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) */ if (*mount_flags & SB_RDONLY) { ufs_put_super_internal(sb); - usb1->fs_time = cpu_to_fs32(sb, get_seconds()); + usb1->fs_time = ufs_get_seconds(sb); if ((flags & UFS_ST_MASK) == UFS_ST_SUN || (flags & UFS_ST_MASK) == UFS_ST_SUNOS || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 1907be6d5808..1fd3011ea623 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -590,3 +590,17 @@ static inline int ufs_is_data_ptr_zero(struct ufs_sb_private_info *uspi, else return *(__fs32 *)p == 0; } + +static inline __fs32 ufs_get_seconds(struct super_block *sbp) +{ + time64_t now = ktime_get_real_seconds(); + + /* Signed 32-bit interpretation wraps around in 2038, which + * happens in ufs1 inode stamps but not ufs2 using 64-bits + * stamps. For superblock and blockgroup, let's assume + * unsigned 32-bit stamps, which are good until y2106. + * Wrap around rather than clamp here to make the dirty + * file system detection work in the superblock stamp. + */ + return cpu_to_fs32(sbp, lower_32_bits(now)); +} diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 594d192b2331..15c265d450bf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -633,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); + } up_write(&mm->mmap_sem); userfaultfd_ctx_put(release_new_ctx); @@ -1847,17 +1849,14 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) { struct userfaultfd_ctx *ctx = f->private_data; wait_queue_entry_t *wq; - struct userfaultfd_wait_queue *uwq; unsigned long pending = 0, total = 0; spin_lock(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { - uwq = container_of(wq, struct userfaultfd_wait_queue, wq); pending++; total++; } list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { - uwq = container_of(wq, struct userfaultfd_wait_queue, wq); total++; } spin_unlock(&ctx->fault_pending_wqh.lock); diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 2f3f75a7f180..7f96bdadc372 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -158,6 +158,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y) xfs-y += $(addprefix scrub/, \ agheader_repair.o \ + bitmap.o \ repair.o \ ) endif diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index fecd187fcf2c..e701ebc36c06 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -248,7 +248,8 @@ __xfs_ag_resv_init( /* Create a per-AG block reservation. */ int xfs_ag_resv_init( - struct xfs_perag *pag) + struct xfs_perag *pag, + struct xfs_trans *tp) { struct xfs_mount *mp = pag->pag_mount; xfs_agnumber_t agno = pag->pag_agno; @@ -260,11 +261,11 @@ xfs_ag_resv_init( if (pag->pag_meta_resv.ar_asked == 0) { ask = used = 0; - error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); + error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, &used); if (error) goto out; - error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); + error = xfs_finobt_calc_reserves(mp, tp, agno, &ask, &used); if (error) goto out; @@ -282,7 +283,7 @@ xfs_ag_resv_init( mp->m_inotbt_nores = true; - error = xfs_refcountbt_calc_reserves(mp, agno, &ask, + error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, &used); if (error) goto out; @@ -298,7 +299,7 @@ xfs_ag_resv_init( if (pag->pag_rmapbt_resv.ar_asked == 0) { ask = used = 0; - error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); + error = xfs_rmapbt_calc_reserves(mp, tp, agno, &ask, &used); if (error) goto out; @@ -309,7 +310,7 @@ xfs_ag_resv_init( #ifdef DEBUG /* need to read in the AGF for the ASSERT below to work */ - error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); + error = xfs_alloc_pagf_init(pag->pag_mount, tp, pag->pag_agno, 0); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h index 4619b554ee90..c0352edc8e41 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.h +++ b/fs/xfs/libxfs/xfs_ag_resv.h @@ -7,7 +7,7 @@ #define __XFS_AG_RESV_H__ int xfs_ag_resv_free(struct xfs_perag *pag); -int xfs_ag_resv_init(struct xfs_perag *pag); +int xfs_ag_resv_init(struct xfs_perag *pag, struct xfs_trans *tp); bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type); xfs_extlen_t xfs_ag_resv_needed(struct xfs_perag *pag, @@ -28,7 +28,7 @@ xfs_ag_resv_rmapbt_alloc( struct xfs_mount *mp, xfs_agnumber_t agno) { - struct xfs_alloc_arg args = {0}; + struct xfs_alloc_arg args = { NULL }; struct xfs_perag *pag; args.len = 1; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index eef466260d43..e1c0c0d2f1b0 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -223,12 +223,13 @@ xfs_alloc_get_rec( error = xfs_btree_get_rec(cur, &rec, stat); if (error || !(*stat)) return error; - if (rec->alloc.ar_blockcount == 0) - goto out_bad_rec; *bno = be32_to_cpu(rec->alloc.ar_startblock); *len = be32_to_cpu(rec->alloc.ar_blockcount); + if (*len == 0) + goto out_bad_rec; + /* check for valid extent range, including overflow */ if (!xfs_verify_agbno(mp, agno, *bno)) goto out_bad_rec; @@ -2197,12 +2198,12 @@ xfs_agfl_reset( */ STATIC void xfs_defer_agfl_block( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_fsblock_t agbno, struct xfs_owner_info *oinfo) { + struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *new; /* new element */ ASSERT(xfs_bmap_free_item_zone != NULL); @@ -2215,7 +2216,7 @@ xfs_defer_agfl_block( trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); - xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); } /* @@ -2322,16 +2323,8 @@ xfs_alloc_fix_freelist( if (error) goto out_agbp_relse; - /* defer agfl frees if dfops is provided */ - if (tp->t_agfl_dfops) { - xfs_defer_agfl_block(mp, tp->t_agfl_dfops, args->agno, - bno, &targs.oinfo); - } else { - error = xfs_free_agfl_block(tp, args->agno, bno, agbp, - &targs.oinfo); - if (error) - goto out_agbp_relse; - } + /* defer agfl frees */ + xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); } targs.tp = tp; @@ -2754,9 +2747,6 @@ xfs_alloc_read_agf( pag->pagf_levels[XFS_BTNUM_RMAPi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); - spin_lock_init(&pag->pagb_lock); - pag->pagb_count = 0; - pag->pagb_tree = RB_ROOT; pag->pagf_init = 1; pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); } @@ -2783,16 +2773,16 @@ xfs_alloc_read_agf( */ int /* error */ xfs_alloc_vextent( - xfs_alloc_arg_t *args) /* allocation argument structure */ + struct xfs_alloc_arg *args) /* allocation argument structure */ { - xfs_agblock_t agsize; /* allocation group size */ - int error; - int flags; /* XFS_ALLOC_FLAG_... locking flags */ - xfs_mount_t *mp; /* mount structure pointer */ - xfs_agnumber_t sagno; /* starting allocation group number */ - xfs_alloctype_t type; /* input allocation type */ - int bump_rotor = 0; - xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ + xfs_agblock_t agsize; /* allocation group size */ + int error; + int flags; /* XFS_ALLOC_FLAG_... locking flags */ + struct xfs_mount *mp; /* mount structure pointer */ + xfs_agnumber_t sagno; /* starting allocation group number */ + xfs_alloctype_t type; /* input allocation type */ + int bump_rotor = 0; + xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ mp = args->mp; type = args->otype = args->type; @@ -2913,7 +2903,7 @@ xfs_alloc_vextent( * locking of AGF, which might cause deadlock. */ if (++(args->agno) == mp->m_sb.sb_agcount) { - if (args->firstblock != NULLFSBLOCK) + if (args->tp->t_firstblock != NULLFSBLOCK) args->agno = sagno; else args->agno = 0; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index e716c993ac4c..00cd5ec4cb6b 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -74,7 +74,6 @@ typedef struct xfs_alloc_arg { int datatype; /* mask defining data type treatment */ char wasdel; /* set if allocation was prev delayed */ char wasfromfl; /* set if allocation is from freelist */ - xfs_fsblock_t firstblock; /* io first block allocated */ struct xfs_owner_info oinfo; /* owner of blocks being allocated */ enum xfs_ag_resv_type resv; /* block reservation to use */ } xfs_alloc_arg_t; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 99590f61d624..1e671d4eb6fa 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -202,9 +202,7 @@ xfs_attr_set( struct xfs_mount *mp = dp->i_mount; struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; - struct xfs_defer_ops dfops; struct xfs_trans_res tres; - xfs_fsblock_t firstblock; int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, local; @@ -219,8 +217,6 @@ xfs_attr_set( args.value = value; args.valuelen = valuelen; - args.firstblock = &firstblock; - args.dfops = &dfops; args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; args.total = xfs_attr_calc_size(&args, &local); @@ -315,21 +311,18 @@ xfs_attr_set( * It won't fit in the shortform, transform to a leaf block. * GROT: another possible req'mt for a double-split btree op. */ - xfs_defer_init(args.dfops, args.firstblock); error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) - goto out_defer_cancel; + goto out; /* * Prevent the leaf buffer from being unlocked so that a * concurrent AIL push cannot grab the half-baked leaf * buffer and run into problems with the write verifier. */ xfs_trans_bhold(args.trans, leaf_bp); - xfs_defer_bjoin(args.dfops, leaf_bp); - xfs_defer_ijoin(args.dfops, dp); - error = xfs_defer_finish(&args.trans, args.dfops); + error = xfs_defer_finish(&args.trans); if (error) - goto out_defer_cancel; + goto out; /* * Commit the leaf transformation. We'll need another (linked) @@ -369,8 +362,6 @@ xfs_attr_set( return error; -out_defer_cancel: - xfs_defer_cancel(&dfops); out: if (leaf_bp) xfs_trans_brelse(args.trans, leaf_bp); @@ -392,8 +383,6 @@ xfs_attr_remove( { struct xfs_mount *mp = dp->i_mount; struct xfs_da_args args; - struct xfs_defer_ops dfops; - xfs_fsblock_t firstblock; int error; XFS_STATS_INC(mp, xs_attr_remove); @@ -405,9 +394,6 @@ xfs_attr_remove( if (error) return error; - args.firstblock = &firstblock; - args.dfops = &dfops; - /* * we have no control over the attribute names that userspace passes us * to remove, so we have to allow the name lookup prior to attribute @@ -536,11 +522,12 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) * if bmap_one_block() says there is only one block (ie: no remote blks). */ STATIC int -xfs_attr_leaf_addname(xfs_da_args_t *args) +xfs_attr_leaf_addname( + struct xfs_da_args *args) { - xfs_inode_t *dp; - struct xfs_buf *bp; - int retval, error, forkoff; + struct xfs_inode *dp; + struct xfs_buf *bp; + int retval, error, forkoff; trace_xfs_attr_leaf_addname(args); @@ -598,14 +585,12 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * Commit that transaction so that the node_addname() call * can manage its own transactions. */ - xfs_defer_init(args->dfops, args->firstblock); error = xfs_attr3_leaf_to_node(args); if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + return error; /* * Commit the current trans (including the inode) and start @@ -687,15 +672,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * If the result is small enough, shrink it all into the inode. */ if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - xfs_defer_init(args->dfops, args->firstblock); error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + return error; } /* @@ -711,7 +694,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) } return error; out_defer_cancel: - xfs_defer_cancel(args->dfops); + xfs_defer_cancel(args->trans); return error; } @@ -722,11 +705,12 @@ out_defer_cancel: * if bmap_one_block() says there is only one block (ie: no remote blks). */ STATIC int -xfs_attr_leaf_removename(xfs_da_args_t *args) +xfs_attr_leaf_removename( + struct xfs_da_args *args) { - xfs_inode_t *dp; - struct xfs_buf *bp; - int error, forkoff; + struct xfs_inode *dp; + struct xfs_buf *bp; + int error, forkoff; trace_xfs_attr_leaf_removename(args); @@ -751,19 +735,17 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) * If the result is small enough, shrink it all into the inode. */ if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - xfs_defer_init(args->dfops, args->firstblock); error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + return error; } return 0; out_defer_cancel: - xfs_defer_cancel(args->dfops); + xfs_defer_cancel(args->trans); return error; } @@ -814,13 +796,14 @@ xfs_attr_leaf_get(xfs_da_args_t *args) * add a whole extra layer of confusion on top of that. */ STATIC int -xfs_attr_node_addname(xfs_da_args_t *args) +xfs_attr_node_addname( + struct xfs_da_args *args) { - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - xfs_inode_t *dp; - xfs_mount_t *mp; - int retval, error; + struct xfs_da_state *state; + struct xfs_da_state_blk *blk; + struct xfs_inode *dp; + struct xfs_mount *mp; + int retval, error; trace_xfs_attr_node_addname(args); @@ -879,14 +862,12 @@ restart: */ xfs_da_state_free(state); state = NULL; - xfs_defer_init(args->dfops, args->firstblock); error = xfs_attr3_leaf_to_node(args); if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + goto out; /* * Commit the node conversion and start the next @@ -905,14 +886,12 @@ restart: * in the index/blkno/rmtblkno/rmtblkcnt fields and * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. */ - xfs_defer_init(args->dfops, args->firstblock); error = xfs_da3_split(state); if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + goto out; } else { /* * Addition succeeded, update Btree hashvals. @@ -1003,14 +982,12 @@ restart: * Check to see if the tree needs to be collapsed. */ if (retval && (state->path.active > 1)) { - xfs_defer_init(args->dfops, args->firstblock); error = xfs_da3_join(state); if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + goto out; } /* @@ -1037,7 +1014,7 @@ out: return error; return retval; out_defer_cancel: - xfs_defer_cancel(args->dfops); + xfs_defer_cancel(args->trans); goto out; } @@ -1049,13 +1026,14 @@ out_defer_cancel: * the root node (a special case of an intermediate node). */ STATIC int -xfs_attr_node_removename(xfs_da_args_t *args) +xfs_attr_node_removename( + struct xfs_da_args *args) { - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - xfs_inode_t *dp; - struct xfs_buf *bp; - int retval, error, forkoff; + struct xfs_da_state *state; + struct xfs_da_state_blk *blk; + struct xfs_inode *dp; + struct xfs_buf *bp; + int retval, error, forkoff; trace_xfs_attr_node_removename(args); @@ -1127,14 +1105,12 @@ xfs_attr_node_removename(xfs_da_args_t *args) * Check to see if the tree needs to be collapsed. */ if (retval && (state->path.active > 1)) { - xfs_defer_init(args->dfops, args->firstblock); error = xfs_da3_join(state); if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + goto out; /* * Commit the Btree join operation and start a new trans. */ @@ -1159,15 +1135,13 @@ xfs_attr_node_removename(xfs_da_args_t *args) goto out; if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - xfs_defer_init(args->dfops, args->firstblock); error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + goto out; } else xfs_trans_brelse(args->trans, bp); } @@ -1177,7 +1151,7 @@ out: xfs_da_state_free(state); return error; out_defer_cancel: - xfs_defer_cancel(args->dfops); + xfs_defer_cancel(args->trans); goto out; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 76e90046731c..6fc5425b1474 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -242,8 +242,9 @@ xfs_attr3_leaf_verify( struct xfs_attr3_icleaf_hdr ichdr; struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; - struct xfs_perag *pag = bp->b_pag; struct xfs_attr_leaf_entry *entries; + uint16_t end; + int i; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -268,7 +269,7 @@ xfs_attr3_leaf_verify( * because we may have transitioned an empty shortform attr to a leaf * if the attr didn't fit in shortform. */ - if (pag && pag->pagf_init && ichdr.count == 0) + if (!xfs_log_in_recovery(mp) && ichdr.count == 0) return __this_address; /* @@ -289,6 +290,26 @@ xfs_attr3_leaf_verify( /* XXX: need to range check rest of attr header values */ /* XXX: hash order check? */ + /* + * Quickly check the freemap information. Attribute data has to be + * aligned to 4-byte boundaries, and likewise for the free space. + */ + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (ichdr.freemap[i].base > mp->m_attr_geo->blksize) + return __this_address; + if (ichdr.freemap[i].base & 0x3) + return __this_address; + if (ichdr.freemap[i].size > mp->m_attr_geo->blksize) + return __this_address; + if (ichdr.freemap[i].size & 0x3) + return __this_address; + end = ichdr.freemap[i].base + ichdr.freemap[i].size; + if (end < ichdr.freemap[i].base) + return __this_address; + if (end > mp->m_attr_geo->blksize) + return __this_address; + } + return NULL; } @@ -506,7 +527,7 @@ xfs_attr_shortform_create(xfs_da_args_t *args) { xfs_attr_sf_hdr_t *hdr; xfs_inode_t *dp; - xfs_ifork_t *ifp; + struct xfs_ifork *ifp; trace_xfs_attr_sf_create(args); @@ -541,7 +562,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) int i, offset, size; xfs_mount_t *mp; xfs_inode_t *dp; - xfs_ifork_t *ifp; + struct xfs_ifork *ifp; trace_xfs_attr_sf_add(args); @@ -682,7 +703,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) xfs_attr_shortform_t *sf; xfs_attr_sf_entry_t *sfe; int i; - xfs_ifork_t *ifp; + struct xfs_ifork *ifp; trace_xfs_attr_sf_lookup(args); @@ -747,18 +768,18 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) */ int xfs_attr_shortform_to_leaf( - struct xfs_da_args *args, - struct xfs_buf **leaf_bp) + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) { - xfs_inode_t *dp; - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - xfs_da_args_t nargs; - char *tmpbuffer; - int error, i, size; - xfs_dablk_t blkno; - struct xfs_buf *bp; - xfs_ifork_t *ifp; + struct xfs_inode *dp; + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + struct xfs_da_args nargs; + char *tmpbuffer; + int error, i, size; + xfs_dablk_t blkno; + struct xfs_buf *bp; + struct xfs_ifork *ifp; trace_xfs_attr_sf_to_leaf(args); @@ -802,8 +823,6 @@ xfs_attr_shortform_to_leaf( memset((char *)&nargs, 0, sizeof(nargs)); nargs.dp = dp; nargs.geo = args->geo; - nargs.firstblock = args->firstblock; - nargs.dfops = args->dfops; nargs.total = args->total; nargs.whichfork = XFS_ATTR_FORK; nargs.trans = args->trans; @@ -1006,8 +1025,6 @@ xfs_attr3_leaf_to_shortform( memset((char *)&nargs, 0, sizeof(nargs)); nargs.geo = args->geo; nargs.dp = dp; - nargs.firstblock = args->firstblock; - nargs.dfops = args->dfops; nargs.total = args->total; nargs.whichfork = XFS_ATTR_FORK; nargs.trans = args->trans; @@ -1570,17 +1587,10 @@ xfs_attr3_leaf_rebalance( */ swap = 0; if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) { - struct xfs_da_state_blk *tmp_blk; - struct xfs_attr3_icleaf_hdr tmp_ichdr; - - tmp_blk = blk1; - blk1 = blk2; - blk2 = tmp_blk; + swap(blk1, blk2); - /* struct copies to swap them rather than reconverting */ - tmp_ichdr = ichdr1; - ichdr1 = ichdr2; - ichdr2 = tmp_ichdr; + /* swap structures rather than reconverting them */ + swap(ichdr1, ichdr2); leaf1 = blk1->bp->b_addr; leaf2 = blk2->bp->b_addr; diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index bf2e0371149b..af094063e402 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -480,17 +480,15 @@ xfs_attr_rmtval_set( * extent and then crash then the block may not contain the * correct metadata after log recovery occurs. */ - xfs_defer_init(args->dfops, args->firstblock); nmap = 1; error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, - args->total, &map, &nmap, args->dfops); + blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, + &nmap); if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + return error; ASSERT(nmap == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && @@ -522,7 +520,6 @@ xfs_attr_rmtval_set( ASSERT(blkcnt > 0); - xfs_defer_init(args->dfops, args->firstblock); nmap = 1; error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, blkcnt, &map, &nmap, @@ -557,8 +554,7 @@ xfs_attr_rmtval_set( ASSERT(valuelen == 0); return 0; out_defer_cancel: - xfs_defer_cancel(args->dfops); - args->trans = NULL; + xfs_defer_cancel(args->trans); return error; } @@ -626,16 +622,13 @@ xfs_attr_rmtval_remove( blkcnt = args->rmtblkcnt; done = 0; while (!done) { - xfs_defer_init(args->dfops, args->firstblock); error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, - XFS_BMAPI_ATTRFORK, 1, args->firstblock, - args->dfops, &done); + XFS_BMAPI_ATTRFORK, 1, &done); if (error) goto out_defer_cancel; - xfs_defer_ijoin(args->dfops, args->dp); - error = xfs_defer_finish(&args->trans, args->dfops); + error = xfs_defer_finish(&args->trans); if (error) - goto out_defer_cancel; + return error; /* * Close out trans and start the next one in the chain. @@ -646,7 +639,6 @@ xfs_attr_rmtval_remove( } return 0; out_defer_cancel: - xfs_defer_cancel(args->dfops); - args->trans = NULL; + xfs_defer_cancel(args->trans); return error; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7205268b30bc..2760314fdf7f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -326,7 +326,7 @@ xfs_bmap_check_leaf_extents( xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ xfs_extnum_t i=0, j; /* index into the extents list */ - xfs_ifork_t *ifp; /* fork structure */ + struct xfs_ifork *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ __be64 *pp; /* pointer to block address */ @@ -533,8 +533,7 @@ xfs_bmap_validate_ret( */ void __xfs_bmap_add_free( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, struct xfs_owner_info *oinfo, @@ -542,8 +541,9 @@ __xfs_bmap_add_free( { struct xfs_extent_free_item *new; /* new element */ #ifdef DEBUG - xfs_agnumber_t agno; - xfs_agblock_t agbno; + struct xfs_mount *mp = tp->t_mountp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; ASSERT(bno != NULLFSBLOCK); ASSERT(len > 0); @@ -566,9 +566,10 @@ __xfs_bmap_add_free( else xfs_rmap_skip_owner_update(&new->xefi_oinfo); new->xefi_skip_discard = skip_discard; - trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0, - XFS_FSB_TO_AGBNO(mp, bno), len); - xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); + trace_xfs_bmap_free_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, + XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); } /* @@ -594,7 +595,7 @@ xfs_bmap_btree_to_extents( xfs_fsblock_t cbno; /* child block number */ xfs_buf_t *cbp; /* child block's buffer */ int error; /* error return value */ - xfs_ifork_t *ifp; /* inode fork data */ + struct xfs_ifork *ifp; /* inode fork data */ xfs_mount_t *mp; /* mount point structure */ __be64 *pp; /* ptr to block address */ struct xfs_btree_block *rblock;/* root btree block */ @@ -624,7 +625,7 @@ xfs_bmap_btree_to_extents( if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo); + xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo); ip->i_d.di_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); @@ -644,25 +645,23 @@ xfs_bmap_btree_to_extents( */ STATIC int /* error */ xfs_bmap_extents_to_btree( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first-block-allocated */ - struct xfs_defer_ops *dfops, /* blocks freed in xaction */ - xfs_btree_cur_t **curp, /* cursor returned to caller */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode pointer */ + struct xfs_btree_cur **curp, /* cursor returned to caller */ int wasdel, /* converting a delayed alloc */ int *logflagsp, /* inode logging flags */ int whichfork) /* data or attr fork */ { struct xfs_btree_block *ablock; /* allocated (child) bt block */ - xfs_buf_t *abp; /* buffer for ablock */ - xfs_alloc_arg_t args; /* allocation arguments */ - xfs_bmbt_rec_t *arp; /* child record pointer */ + struct xfs_buf *abp; /* buffer for ablock */ + struct xfs_alloc_arg args; /* allocation arguments */ + struct xfs_bmbt_rec *arp; /* child record pointer */ struct xfs_btree_block *block; /* btree root block */ - xfs_btree_cur_t *cur; /* bmap btree cursor */ + struct xfs_btree_cur *cur; /* bmap btree cursor */ int error; /* error return value */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_key_t *kp; /* root block key pointer */ - xfs_mount_t *mp; /* mount structure */ + struct xfs_ifork *ifp; /* inode fork pointer */ + struct xfs_bmbt_key *kp; /* root block key pointer */ + struct xfs_mount *mp; /* mount structure */ xfs_bmbt_ptr_t *pp; /* root block address pointer */ struct xfs_iext_cursor icur; struct xfs_bmbt_irec rec; @@ -690,8 +689,6 @@ xfs_bmap_extents_to_btree( * Need a cursor. Can't allocate until bb_level is filled in. */ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.dfops = dfops; cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; /* * Convert to a btree with two levels, one record in root. @@ -701,45 +698,43 @@ xfs_bmap_extents_to_btree( args.tp = tp; args.mp = mp; xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); - args.firstblock = *firstblock; - if (*firstblock == NULLFSBLOCK) { + if (tp->t_firstblock == NULLFSBLOCK) { args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); - } else if (dfops->dop_low) { + } else if (tp->t_flags & XFS_TRANS_LOWMODE) { args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = *firstblock; + args.fsbno = tp->t_firstblock; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.fsbno = *firstblock; + args.fsbno = tp->t_firstblock; } args.minlen = args.maxlen = args.prod = 1; args.wasdel = wasdel; *logflagsp = 0; if ((error = xfs_alloc_vextent(&args))) { - xfs_iroot_realloc(ip, -1, whichfork); ASSERT(ifp->if_broot == NULL); - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; + goto err1; } if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { - xfs_iroot_realloc(ip, -1, whichfork); ASSERT(ifp->if_broot == NULL); - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return -ENOSPC; + error = -ENOSPC; + goto err1; } /* * Allocation can't fail, the space was reserved. */ - ASSERT(*firstblock == NULLFSBLOCK || - args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); - *firstblock = cur->bc_private.b.firstblock = args.fsbno; + ASSERT(tp->t_firstblock == NULLFSBLOCK || + args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock)); + tp->t_firstblock = args.fsbno; cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); + if (!abp) { + error = -ENOSPC; + goto err2; + } /* * Fill in the child block. */ @@ -779,6 +774,15 @@ xfs_bmap_extents_to_btree( *curp = cur; *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); return 0; + +err2: + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); +err1: + xfs_iroot_realloc(ip, -1, whichfork); + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + + return error; } /* @@ -812,7 +816,6 @@ STATIC int /* error */ xfs_bmap_local_to_extents( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated in xaction */ xfs_extlen_t total, /* total blocks needed by transaction */ int *logflagsp, /* inode logging flags */ int whichfork, @@ -823,7 +826,7 @@ xfs_bmap_local_to_extents( { int error = 0; int flags; /* logging flags returned */ - xfs_ifork_t *ifp; /* inode fork pointer */ + struct xfs_ifork *ifp; /* inode fork pointer */ xfs_alloc_arg_t args; /* allocation arguments */ xfs_buf_t *bp; /* buffer for extent block */ struct xfs_bmbt_irec rec; @@ -850,16 +853,15 @@ xfs_bmap_local_to_extents( args.tp = tp; args.mp = ip->i_mount; xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); - args.firstblock = *firstblock; /* * Allocate a block. We know we need only one, since the * file currently fits in an inode. */ - if (*firstblock == NULLFSBLOCK) { + if (tp->t_firstblock == NULLFSBLOCK) { args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); args.type = XFS_ALLOCTYPE_START_BNO; } else { - args.fsbno = *firstblock; + args.fsbno = tp->t_firstblock; args.type = XFS_ALLOCTYPE_NEAR_BNO; } args.total = total; @@ -871,7 +873,7 @@ xfs_bmap_local_to_extents( /* Can't fail, the space was reserved. */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); - *firstblock = args.fsbno; + tp->t_firstblock = args.fsbno; bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); /* @@ -917,8 +919,6 @@ STATIC int /* error */ xfs_bmap_add_attrfork_btree( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - struct xfs_defer_ops *dfops, /* blocks to free at commit */ int *flags) /* inode logging flags */ { xfs_btree_cur_t *cur; /* btree cursor */ @@ -931,8 +931,6 @@ xfs_bmap_add_attrfork_btree( *flags |= XFS_ILOG_DBROOT; else { cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); - cur->bc_private.b.dfops = dfops; - cur->bc_private.b.firstblock = *firstblock; error = xfs_bmbt_lookup_first(cur, &stat); if (error) goto error0; @@ -944,7 +942,6 @@ xfs_bmap_add_attrfork_btree( xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return -ENOSPC; } - *firstblock = cur->bc_private.b.firstblock; cur->bc_private.b.allocated = 0; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); } @@ -959,10 +956,8 @@ error0: */ STATIC int /* error */ xfs_bmap_add_attrfork_extents( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - struct xfs_defer_ops *dfops, /* blocks to free at commit */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode pointer */ int *flags) /* inode logging flags */ { xfs_btree_cur_t *cur; /* bmap btree cursor */ @@ -971,12 +966,11 @@ xfs_bmap_add_attrfork_extents( if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) return 0; cur = NULL; - error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0, - flags, XFS_DATA_FORK); + error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags, + XFS_DATA_FORK); if (cur) { cur->bc_private.b.allocated = 0; - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); } return error; } @@ -994,13 +988,11 @@ xfs_bmap_add_attrfork_extents( */ STATIC int /* error */ xfs_bmap_add_attrfork_local( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fsblock_t *firstblock, /* first block allocated */ - struct xfs_defer_ops *dfops, /* blocks to free at commit */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode pointer */ int *flags) /* inode logging flags */ { - xfs_da_args_t dargs; /* args for dir/attr code */ + struct xfs_da_args dargs; /* args for dir/attr code */ if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) return 0; @@ -1009,8 +1001,6 @@ xfs_bmap_add_attrfork_local( memset(&dargs, 0, sizeof(dargs)); dargs.geo = ip->i_mount->m_dir_geo; dargs.dp = ip; - dargs.firstblock = firstblock; - dargs.dfops = dfops; dargs.total = dargs.geo->fsbcount; dargs.whichfork = XFS_DATA_FORK; dargs.trans = tp; @@ -1018,8 +1008,8 @@ xfs_bmap_add_attrfork_local( } if (S_ISLNK(VFS_I(ip)->i_mode)) - return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, - flags, XFS_DATA_FORK, + return xfs_bmap_local_to_extents(tp, ip, 1, flags, + XFS_DATA_FORK, xfs_symlink_local_to_remote); /* should only be called for types that support local format data */ @@ -1037,8 +1027,6 @@ xfs_bmap_add_attrfork( int size, /* space new attribute needs */ int rsvd) /* xact may use reserved blks */ { - xfs_fsblock_t firstblock; /* 1st block/ag allocated */ - struct xfs_defer_ops dfops; /* freed extent records */ xfs_mount_t *mp; /* mount structure */ xfs_trans_t *tp; /* transaction pointer */ int blks; /* space reservation */ @@ -1104,19 +1092,15 @@ xfs_bmap_add_attrfork( ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); ip->i_afp->if_flags = XFS_IFEXTENTS; logflags = 0; - xfs_defer_init(&dfops, &firstblock); switch (ip->i_d.di_format) { case XFS_DINODE_FMT_LOCAL: - error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops, - &logflags); + error = xfs_bmap_add_attrfork_local(tp, ip, &logflags); break; case XFS_DINODE_FMT_EXTENTS: - error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, - &dfops, &logflags); + error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags); break; case XFS_DINODE_FMT_BTREE: - error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops, - &logflags); + error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags); break; default: error = 0; @@ -1125,7 +1109,7 @@ xfs_bmap_add_attrfork( if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (error) - goto bmap_cancel; + goto trans_cancel; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { bool log_sb = false; @@ -1144,15 +1128,10 @@ xfs_bmap_add_attrfork( xfs_log_sb(tp); } - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto bmap_cancel; error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; -bmap_cancel: - xfs_defer_cancel(&dfops); trans_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1501,7 +1480,7 @@ xfs_bmap_one_block( xfs_inode_t *ip, /* incore inode */ int whichfork) /* data or attr fork */ { - xfs_ifork_t *ifp; /* inode fork pointer */ + struct xfs_ifork *ifp; /* inode fork pointer */ int rval; /* return value */ xfs_bmbt_irec_t s; /* internal version of extent */ struct xfs_iext_cursor icur; @@ -1539,7 +1518,7 @@ xfs_bmap_add_extent_delay_real( struct xfs_bmbt_irec *new = &bma->got; int error; /* error return value */ int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ + struct xfs_ifork *ifp; /* inode fork pointer */ xfs_fileoff_t new_endoff; /* end offset of new entry */ xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ /* left is 0, right is 1, prev is 2 */ @@ -1809,7 +1788,6 @@ xfs_bmap_add_extent_delay_real( if (xfs_bmap_needs_btree(bma->ip, whichfork)) { error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->dfops, &bma->cur, 1, &tmp_rval, whichfork); rval |= tmp_rval; if (error) @@ -1887,8 +1865,7 @@ xfs_bmap_add_extent_delay_real( if (xfs_bmap_needs_btree(bma->ip, whichfork)) { error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->dfops, &bma->cur, 1, - &tmp_rval, whichfork); + &bma->cur, 1, &tmp_rval, whichfork); rval |= tmp_rval; if (error) goto done; @@ -1968,8 +1945,7 @@ xfs_bmap_add_extent_delay_real( if (xfs_bmap_needs_btree(bma->ip, whichfork)) { error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->dfops, &bma->cur, - 1, &tmp_rval, whichfork); + &bma->cur, 1, &tmp_rval, whichfork); rval |= tmp_rval; if (error) goto done; @@ -1994,8 +1970,7 @@ xfs_bmap_add_extent_delay_real( /* add reverse mapping unless caller opted out */ if (!(bma->flags & XFS_BMAPI_NORMAP)) { - error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, - whichfork, new); + error = xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new); if (error) goto done; } @@ -2006,8 +1981,8 @@ xfs_bmap_add_extent_delay_real( ASSERT(bma->cur == NULL); error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->dfops, &bma->cur, - da_old > 0, &tmp_logflags, whichfork); + &bma->cur, da_old > 0, &tmp_logflags, + whichfork); bma->logflags |= tmp_logflags; if (error) goto done; @@ -2046,14 +2021,12 @@ xfs_bmap_add_extent_unwritten_real( struct xfs_iext_cursor *icur, xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - struct xfs_defer_ops *dfops, /* list of extents to be freed */ int *logflagsp) /* inode logging flags */ { xfs_btree_cur_t *cur; /* btree cursor */ int error; /* error return value */ int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ + struct xfs_ifork *ifp; /* inode fork pointer */ xfs_fileoff_t new_endoff; /* end offset of new entry */ xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ /* left is 0, right is 1, prev is 2 */ @@ -2479,7 +2452,7 @@ xfs_bmap_add_extent_unwritten_real( } /* update reverse mappings */ - error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); + error = xfs_rmap_convert_extent(mp, tp, ip, whichfork, new); if (error) goto done; @@ -2488,8 +2461,8 @@ xfs_bmap_add_extent_unwritten_real( int tmp_logflags; /* partial log flag return val */ ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, - 0, &tmp_logflags, whichfork); + error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, + &tmp_logflags, whichfork); *logflagsp |= tmp_logflags; if (error) goto done; @@ -2520,7 +2493,7 @@ xfs_bmap_add_extent_hole_delay( struct xfs_iext_cursor *icur, xfs_bmbt_irec_t *new) /* new data to add to file extents */ { - xfs_ifork_t *ifp; /* inode fork pointer */ + struct xfs_ifork *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_filblks_t newlen=0; /* new indirect size */ xfs_filblks_t oldlen=0; /* old indirect size */ @@ -2660,8 +2633,6 @@ xfs_bmap_add_extent_hole_real( struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, - xfs_fsblock_t *first, - struct xfs_defer_ops *dfops, int *logflagsp, int flags) { @@ -2842,7 +2813,7 @@ xfs_bmap_add_extent_hole_real( /* add reverse mapping unless caller opted out */ if (!(flags & XFS_BMAPI_NORMAP)) { - error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new); + error = xfs_rmap_map_extent(tp, ip, whichfork, new); if (error) goto done; } @@ -2852,8 +2823,8 @@ xfs_bmap_add_extent_hole_real( int tmp_logflags; /* partial log flag return val */ ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp, - 0, &tmp_logflags, whichfork); + error = xfs_bmap_extents_to_btree(tp, ip, curp, 0, + &tmp_logflags, whichfork); *logflagsp |= tmp_logflags; cur = *curp; if (error) @@ -3071,10 +3042,11 @@ xfs_bmap_adjacent( XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) mp = ap->ip->i_mount; - nullfb = *ap->firstblock == NULLFSBLOCK; + nullfb = ap->tp->t_firstblock == NULLFSBLOCK; rt = XFS_IS_REALTIME_INODE(ap->ip) && xfs_alloc_is_userdata(ap->datatype); - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, + ap->tp->t_firstblock); /* * If allocating at eof, and there's a previous real block, * try to use its last block as our starting point. @@ -3432,8 +3404,9 @@ xfs_bmap_btalloc( } - nullfb = *ap->firstblock == NULLFSBLOCK; - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); + nullfb = ap->tp->t_firstblock == NULLFSBLOCK; + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, + ap->tp->t_firstblock); if (nullfb) { if (xfs_alloc_is_userdata(ap->datatype) && xfs_inode_is_filestream(ap->ip)) { @@ -3444,7 +3417,7 @@ xfs_bmap_btalloc( ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); } } else - ap->blkno = *ap->firstblock; + ap->blkno = ap->tp->t_firstblock; xfs_bmap_adjacent(ap); @@ -3455,7 +3428,7 @@ xfs_bmap_btalloc( if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) ; else - ap->blkno = *ap->firstblock; + ap->blkno = ap->tp->t_firstblock; /* * Normal allocation, done through xfs_alloc_vextent. */ @@ -3468,7 +3441,6 @@ xfs_bmap_btalloc( /* Trim the allocation back to the maximum an AG can fit. */ args.maxlen = min(ap->length, mp->m_ag_max_usable); - args.firstblock = *ap->firstblock; blen = 0; if (nullfb) { /* @@ -3483,7 +3455,7 @@ xfs_bmap_btalloc( error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); if (error) return error; - } else if (ap->dfops->dop_low) { + } else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { if (xfs_inode_is_filestream(ap->ip)) args.type = XFS_ALLOCTYPE_FIRST_AG; else @@ -3518,7 +3490,7 @@ xfs_bmap_btalloc( * is >= the stripe unit and the allocation offset is * at the end of file. */ - if (!ap->dfops->dop_low && ap->aeof) { + if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) { if (!ap->offset) { args.alignment = stripe_align; atype = args.type; @@ -3610,20 +3582,20 @@ xfs_bmap_btalloc( args.total = ap->minlen; if ((error = xfs_alloc_vextent(&args))) return error; - ap->dfops->dop_low = true; + ap->tp->t_flags |= XFS_TRANS_LOWMODE; } if (args.fsbno != NULLFSBLOCK) { /* * check the allocation happened at the same or higher AG than * the first block that was allocated. */ - ASSERT(*ap->firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= + ASSERT(ap->tp->t_firstblock == NULLFSBLOCK || + XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock) <= XFS_FSB_TO_AGNO(mp, args.fsbno)); ap->blkno = args.fsbno; - if (*ap->firstblock == NULLFSBLOCK) - *ap->firstblock = args.fsbno; + if (ap->tp->t_firstblock == NULLFSBLOCK) + ap->tp->t_firstblock = args.fsbno; ASSERT(nullfb || fb_agno <= args.agno); ap->length = args.len; /* @@ -3788,8 +3760,7 @@ xfs_bmapi_update_map( mval[-1].br_startblock != HOLESTARTBLOCK && mval->br_startblock == mval[-1].br_startblock + mval[-1].br_blockcount && - ((flags & XFS_BMAPI_IGSTATE) || - mval[-1].br_state == mval->br_state)) { + mval[-1].br_state == mval->br_state) { ASSERT(mval->br_startoff == mval[-1].br_startoff + mval[-1].br_blockcount); mval[-1].br_blockcount += mval->br_blockcount; @@ -3834,7 +3805,7 @@ xfs_bmapi_read( ASSERT(*nmap >= 1); ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| - XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK))); + XFS_BMAPI_COWFORK))); ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); if (unlikely(XFS_TEST_ERROR( @@ -4079,15 +4050,10 @@ xfs_bmapi_allocate( if (error) return error; - if (bma->cur) - bma->cur->bc_private.b.firstblock = *bma->firstblock; if (bma->blkno == NULLFSBLOCK) return 0; - if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { + if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); - bma->cur->bc_private.b.firstblock = *bma->firstblock; - bma->cur->bc_private.b.dfops = bma->dfops; - } /* * Bump the number of extents we've allocated * in this call. @@ -4122,8 +4088,7 @@ xfs_bmapi_allocate( else error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, whichfork, &bma->icur, &bma->cur, &bma->got, - bma->firstblock, bma->dfops, &bma->logflags, - bma->flags); + &bma->logflags, bma->flags); bma->logflags |= tmp_logflags; if (error) @@ -4174,8 +4139,6 @@ xfs_bmapi_convert_unwritten( if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, bma->ip, whichfork); - bma->cur->bc_private.b.firstblock = *bma->firstblock; - bma->cur->bc_private.b.dfops = bma->dfops; } mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; @@ -4192,8 +4155,7 @@ xfs_bmapi_convert_unwritten( } error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, - &bma->icur, &bma->cur, mval, bma->firstblock, - bma->dfops, &tmp_logflags); + &bma->icur, &bma->cur, mval, &tmp_logflags); /* * Log the inode core unconditionally in the unwritten extent conversion * path because the conversion might not have done so (e.g., if the @@ -4231,12 +4193,6 @@ xfs_bmapi_convert_unwritten( * extent state if necessary. Details behaviour is controlled by the flags * parameter. Only allocates blocks from a single allocation group, to avoid * locking problems. - * - * The returned value in "firstblock" from the first call in a transaction - * must be remembered and presented to subsequent calls in "firstblock". - * An upper bound for the number of blocks to be allocated is supplied to - * the first call in "total"; if no allocation group has that many free - * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). */ int xfs_bmapi_write( @@ -4245,12 +4201,9 @@ xfs_bmapi_write( xfs_fileoff_t bno, /* starting file offs. mapped */ xfs_filblks_t len, /* length to map in file */ int flags, /* XFS_BMAPI_... */ - xfs_fsblock_t *firstblock, /* first allocated block - controls a.g. for allocs */ xfs_extlen_t total, /* total blocks needed */ struct xfs_bmbt_irec *mval, /* output: map values */ - int *nmap, /* i/o: mval size/count */ - struct xfs_defer_ops *dfops) /* i/o: list extents to free */ + int *nmap) /* i/o: mval size/count */ { struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; @@ -4279,7 +4232,6 @@ xfs_bmapi_write( ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); - ASSERT(!(flags & XFS_BMAPI_IGSTATE)); ASSERT(tp != NULL || (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); @@ -4315,7 +4267,7 @@ xfs_bmapi_write( XFS_STATS_INC(mp, xs_blk_mapw); - if (*firstblock == NULLFSBLOCK) { + if (!tp || tp->t_firstblock == NULLFSBLOCK) { if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; else @@ -4342,8 +4294,6 @@ xfs_bmapi_write( bma.ip = ip; bma.total = total; bma.datatype = 0; - bma.dfops = dfops; - bma.firstblock = firstblock; while (bno < end && n < *nmap) { bool need_alloc = false, wasdelay = false; @@ -4419,7 +4369,7 @@ xfs_bmapi_write( * the refcount btree for orphan recovery. */ if (whichfork == XFS_COW_FORK) { - error = xfs_refcount_alloc_cow_extent(mp, dfops, + error = xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length); if (error) goto error0; @@ -4493,15 +4443,7 @@ error0: xfs_trans_log_inode(tp, ip, bma.logflags); if (bma.cur) { - if (!error) { - ASSERT(*firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *firstblock) <= - XFS_FSB_TO_AGNO(mp, - bma.cur->bc_private.b.firstblock)); - *firstblock = bma.cur->bc_private.b.firstblock; - } - xfs_btree_del_cursor(bma.cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bma.cur, error); } if (!error) xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, @@ -4516,13 +4458,11 @@ xfs_bmapi_remap( xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, - struct xfs_defer_ops *dfops, int flags) { struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; struct xfs_btree_cur *cur = NULL; - xfs_fsblock_t firstblock = NULLFSBLOCK; struct xfs_bmbt_irec got; struct xfs_iext_cursor icur; int whichfork = xfs_bmapi_whichfork(flags); @@ -4565,8 +4505,6 @@ xfs_bmapi_remap( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = firstblock; - cur->bc_private.b.dfops = dfops; cur->bc_private.b.flags = 0; } @@ -4579,7 +4517,7 @@ xfs_bmapi_remap( got.br_state = XFS_EXT_NORM; error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur, - &cur, &got, &firstblock, dfops, &logflags, flags); + &cur, &got, &logflags, flags); if (error) goto error0; @@ -4599,10 +4537,8 @@ error0: if (logflags) xfs_trans_log_inode(tp, ip, logflags); - if (cur) { - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - } + if (cur) + xfs_btree_del_cursor(cur, error); return error; } @@ -4898,7 +4834,6 @@ xfs_bmap_del_extent_real( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ struct xfs_iext_cursor *icur, - struct xfs_defer_ops *dfops, /* list of extents to be freed */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ @@ -4913,7 +4848,7 @@ xfs_bmap_del_extent_real( struct xfs_bmbt_irec got; /* current extent entry */ xfs_fileoff_t got_endoff; /* first offset past got */ int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ + struct xfs_ifork *ifp; /* inode fork pointer */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t nblks; /* quota/sb block count */ xfs_bmbt_irec_t new; /* new record to be inserted */ @@ -5104,7 +5039,7 @@ xfs_bmap_del_extent_real( } /* remove reverse mapping */ - error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); + error = xfs_rmap_unmap_extent(tp, ip, whichfork, del); if (error) goto done; @@ -5113,11 +5048,11 @@ xfs_bmap_del_extent_real( */ if (do_fx && !(bflags & XFS_BMAPI_REMAP)) { if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { - error = xfs_refcount_decrease_extent(mp, dfops, del); + error = xfs_refcount_decrease_extent(tp, del); if (error) goto done; } else { - __xfs_bmap_add_free(mp, dfops, del->br_startblock, + __xfs_bmap_add_free(tp, del->br_startblock, del->br_blockcount, NULL, (bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN); @@ -5148,26 +5083,23 @@ done: */ int /* error */ __xfs_bunmapi( - xfs_trans_t *tp, /* transaction pointer */ + struct xfs_trans *tp, /* transaction pointer */ struct xfs_inode *ip, /* incore inode */ xfs_fileoff_t start, /* first file offset deleted */ xfs_filblks_t *rlen, /* i/o: amount remaining */ int flags, /* misc flags */ - xfs_extnum_t nexts, /* number of extents max */ - xfs_fsblock_t *firstblock, /* first allocated block - controls a.g. for allocs */ - struct xfs_defer_ops *dfops) /* i/o: deferred updates */ + xfs_extnum_t nexts) /* number of extents max */ { - xfs_btree_cur_t *cur; /* bmap btree cursor */ - xfs_bmbt_irec_t del; /* extent being deleted */ + struct xfs_btree_cur *cur; /* bmap btree cursor */ + struct xfs_bmbt_irec del; /* extent being deleted */ int error; /* error return value */ xfs_extnum_t extno; /* extent number in list */ - xfs_bmbt_irec_t got; /* current extent record */ - xfs_ifork_t *ifp; /* inode fork pointer */ + struct xfs_bmbt_irec got; /* current extent record */ + struct xfs_ifork *ifp; /* inode fork pointer */ int isrt; /* freeing in rt area */ int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ - xfs_mount_t *mp; /* mount structure */ + struct xfs_mount *mp; /* mount structure */ int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ @@ -5230,8 +5162,6 @@ __xfs_bunmapi( if (ifp->if_flags & XFS_IFBROOT) { ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.dfops = dfops; cur->bc_private.b.flags = 0; } else cur = NULL; @@ -5347,7 +5277,7 @@ __xfs_bunmapi( del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, ip, whichfork, &icur, &cur, &del, - firstblock, dfops, &logflags); + &logflags); if (error) goto error0; goto nodelete; @@ -5404,8 +5334,7 @@ __xfs_bunmapi( prev.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, ip, whichfork, &icur, &cur, - &prev, firstblock, dfops, - &logflags); + &prev, &logflags); if (error) goto error0; goto nodelete; @@ -5414,8 +5343,7 @@ __xfs_bunmapi( del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, ip, whichfork, &icur, &cur, - &del, firstblock, dfops, - &logflags); + &del, &logflags); if (error) goto error0; goto nodelete; @@ -5427,8 +5355,8 @@ delete: error = xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del); } else { - error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops, - cur, &del, &tmp_logflags, whichfork, + error = xfs_bmap_del_extent_real(ip, tp, &icur, cur, + &del, &tmp_logflags, whichfork, flags); logflags |= tmp_logflags; } @@ -5462,8 +5390,8 @@ nodelete: */ if (xfs_bmap_needs_btree(ip, whichfork)) { ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, - &cur, 0, &tmp_logflags, whichfork); + error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, + &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; @@ -5501,12 +5429,9 @@ error0: if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (cur) { - if (!error) { - *firstblock = cur->bc_private.b.firstblock; + if (!error) cur->bc_private.b.allocated = 0; - } - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); } return error; } @@ -5520,14 +5445,11 @@ xfs_bunmapi( xfs_filblks_t len, int flags, xfs_extnum_t nexts, - xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops, int *done) { int error; - error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock, - dfops); + error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts); *done = (len == 0); return error; } @@ -5570,6 +5492,7 @@ xfs_bmse_can_merge( */ STATIC int xfs_bmse_merge( + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, xfs_fileoff_t shift, /* shift fsb */ @@ -5577,8 +5500,7 @@ xfs_bmse_merge( struct xfs_bmbt_irec *got, /* extent to shift */ struct xfs_bmbt_irec *left, /* preceding extent */ struct xfs_btree_cur *cur, - int *logflags, /* output */ - struct xfs_defer_ops *dfops) + int *logflags) /* output */ { struct xfs_bmbt_irec new; xfs_filblks_t blockcount; @@ -5634,23 +5556,23 @@ done: &new); /* update reverse mapping. rmap functions merge the rmaps for us */ - error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); + error = xfs_rmap_unmap_extent(tp, ip, whichfork, got); if (error) return error; memcpy(&new, got, sizeof(new)); new.br_startoff = left->br_startoff + left->br_blockcount; - return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); + return xfs_rmap_map_extent(tp, ip, whichfork, &new); } static int xfs_bmap_shift_update_extent( + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *icur, struct xfs_bmbt_irec *got, struct xfs_btree_cur *cur, int *logflags, - struct xfs_defer_ops *dfops, xfs_fileoff_t startoff) { struct xfs_mount *mp = ip->i_mount; @@ -5678,10 +5600,10 @@ xfs_bmap_shift_update_extent( got); /* update reverse mapping */ - error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev); + error = xfs_rmap_unmap_extent(tp, ip, whichfork, &prev); if (error) return error; - return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got); + return xfs_rmap_map_extent(tp, ip, whichfork, got); } int @@ -5690,9 +5612,7 @@ xfs_bmap_collapse_extents( struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, - bool *done, - xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops) + bool *done) { int whichfork = XFS_DATA_FORK; struct xfs_mount *mp = ip->i_mount; @@ -5725,8 +5645,6 @@ xfs_bmap_collapse_extents( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.dfops = dfops; cur->bc_private.b.flags = 0; } @@ -5745,9 +5663,9 @@ xfs_bmap_collapse_extents( } if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) { - error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb, - &icur, &got, &prev, cur, &logflags, - dfops); + error = xfs_bmse_merge(tp, ip, whichfork, + offset_shift_fsb, &icur, &got, &prev, + cur, &logflags); if (error) goto del_cursor; goto done; @@ -5759,8 +5677,8 @@ xfs_bmap_collapse_extents( } } - error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, - &logflags, dfops, new_startoff); + error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got, + cur, &logflags, new_startoff); if (error) goto del_cursor; @@ -5773,8 +5691,7 @@ done: *next_fsb = got.br_startoff; del_cursor: if (cur) - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); if (logflags) xfs_trans_log_inode(tp, ip, logflags); return error; @@ -5813,9 +5730,7 @@ xfs_bmap_insert_extents( xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, - xfs_fileoff_t stop_fsb, - xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops) + xfs_fileoff_t stop_fsb) { int whichfork = XFS_DATA_FORK; struct xfs_mount *mp = ip->i_mount; @@ -5848,8 +5763,6 @@ xfs_bmap_insert_extents( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstblock; - cur->bc_private.b.dfops = dfops; cur->bc_private.b.flags = 0; } @@ -5891,8 +5804,8 @@ xfs_bmap_insert_extents( WARN_ON_ONCE(1); } - error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, - &logflags, dfops, new_startoff); + error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got, + cur, &logflags, new_startoff); if (error) goto del_cursor; @@ -5905,8 +5818,7 @@ xfs_bmap_insert_extents( *next_fsb = got.br_startoff; del_cursor: if (cur) - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); if (logflags) xfs_trans_log_inode(tp, ip, logflags); return error; @@ -5922,9 +5834,7 @@ STATIC int xfs_bmap_split_extent_at( struct xfs_trans *tp, struct xfs_inode *ip, - xfs_fileoff_t split_fsb, - xfs_fsblock_t *firstfsb, - struct xfs_defer_ops *dfops) + xfs_fileoff_t split_fsb) { int whichfork = XFS_DATA_FORK; struct xfs_btree_cur *cur = NULL; @@ -5973,8 +5883,6 @@ xfs_bmap_split_extent_at( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.firstblock = *firstfsb; - cur->bc_private.b.dfops = dfops; cur->bc_private.b.flags = 0; error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) @@ -6018,16 +5926,15 @@ xfs_bmap_split_extent_at( int tmp_logflags; /* partial log flag return val */ ASSERT(cur == NULL); - error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops, - &cur, 0, &tmp_logflags, whichfork); + error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, + &tmp_logflags, whichfork); logflags |= tmp_logflags; } del_cursor: if (cur) { cur->bc_private.b.allocated = 0; - xfs_btree_del_cursor(cur, - error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); } if (logflags) @@ -6042,8 +5949,6 @@ xfs_bmap_split_extent( { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; - struct xfs_defer_ops dfops; - xfs_fsblock_t firstfsb; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, @@ -6054,21 +5959,13 @@ xfs_bmap_split_extent( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_defer_init(&dfops, &firstfsb); - - error = xfs_bmap_split_extent_at(tp, ip, split_fsb, - &firstfsb, &dfops); - if (error) - goto out; - - error = xfs_defer_finish(&tp, &dfops); + error = xfs_bmap_split_extent_at(tp, ip, split_fsb); if (error) goto out; return xfs_trans_commit(tp); out: - xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; } @@ -6085,20 +5982,18 @@ xfs_bmap_is_update_needed( /* Record a bmap intent. */ static int __xfs_bmap_add( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, enum xfs_bmap_intent_type type, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *bmap) { - int error; struct xfs_bmap_intent *bi; - trace_xfs_bmap_defer(mp, - XFS_FSB_TO_AGNO(mp, bmap->br_startblock), + trace_xfs_bmap_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), type, - XFS_FSB_TO_AGBNO(mp, bmap->br_startblock), + XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock), ip->i_ino, whichfork, bmap->br_startoff, bmap->br_blockcount, @@ -6111,44 +6006,34 @@ __xfs_bmap_add( bi->bi_whichfork = whichfork; bi->bi_bmap = *bmap; - error = xfs_defer_ijoin(dfops, bi->bi_owner); - if (error) { - kmem_free(bi); - return error; - } - - xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list); return 0; } /* Map an extent into a file. */ int xfs_bmap_map_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *PREV) { if (!xfs_bmap_is_update_needed(PREV)) return 0; - return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip, - XFS_DATA_FORK, PREV); + return __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV); } /* Unmap an extent out of a file. */ int xfs_bmap_unmap_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *PREV) { if (!xfs_bmap_is_update_needed(PREV)) return 0; - return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip, - XFS_DATA_FORK, PREV); + return __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV); } /* @@ -6158,7 +6043,6 @@ xfs_bmap_unmap_extent( int xfs_bmap_finish_one( struct xfs_trans *tp, - struct xfs_defer_ops *dfops, struct xfs_inode *ip, enum xfs_bmap_intent_type type, int whichfork, @@ -6167,17 +6051,9 @@ xfs_bmap_finish_one( xfs_filblks_t *blockcount, xfs_exntst_t state) { - xfs_fsblock_t firstfsb; int error = 0; - /* - * firstfsb is tied to the transaction lifetime and is used to - * ensure correct AG locking order and schedule work item - * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us - * to only making one bmap call per transaction, so it should - * be safe to have it as a local variable here. - */ - firstfsb = NULLFSBLOCK; + ASSERT(tp->t_firstblock == NULLFSBLOCK); trace_xfs_bmap_deferred(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, @@ -6194,12 +6070,12 @@ xfs_bmap_finish_one( switch (type) { case XFS_BMAP_MAP: error = xfs_bmapi_remap(tp, ip, startoff, *blockcount, - startblock, dfops, 0); + startblock, 0); *blockcount = 0; break; case XFS_BMAP_UNMAP: error = __xfs_bunmapi(tp, ip, startoff, blockcount, - XFS_BMAPI_REMAP, 1, &firstfsb, dfops); + XFS_BMAPI_REMAP, 1); break; default: ASSERT(0); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 9b49ddf99c41..b6e9b639e731 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -19,8 +19,6 @@ extern kmem_zone_t *xfs_bmap_free_item_zone; * Argument structure for xfs_bmap_alloc. */ struct xfs_bmalloca { - xfs_fsblock_t *firstblock; /* i/o first block allocated */ - struct xfs_defer_ops *dfops; /* bmap freelist */ struct xfs_trans *tp; /* transaction pointer */ struct xfs_inode *ip; /* incore inode pointer */ struct xfs_bmbt_irec prev; /* extent before the new one */ @@ -68,8 +66,6 @@ struct xfs_extent_free_item #define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */ #define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */ #define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */ -#define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */ - /* combine contig. space */ #define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */ /* * unwritten extent conversion - this needs write cache flushing and no additional @@ -116,7 +112,6 @@ struct xfs_extent_free_item { XFS_BMAPI_METADATA, "METADATA" }, \ { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ - { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ { XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \ @@ -189,9 +184,9 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); -void __xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, - xfs_fsblock_t bno, xfs_filblks_t len, - struct xfs_owner_info *oinfo, bool skip_discard); +void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, + xfs_filblks_t len, struct xfs_owner_info *oinfo, + bool skip_discard); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); @@ -205,17 +200,13 @@ int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, int *nmap, int flags); int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, int flags, - xfs_fsblock_t *firstblock, xfs_extlen_t total, - struct xfs_bmbt_irec *mval, int *nmap, - struct xfs_defer_ops *dfops); + xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap); int __xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t *rlen, int flags, - xfs_extnum_t nexts, xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops); + xfs_extnum_t nexts); int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, int flags, - xfs_extnum_t nexts, xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops, int *done); + xfs_extnum_t nexts, int *done); int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); @@ -225,14 +216,12 @@ void xfs_bmap_del_extent_cow(struct xfs_inode *ip, uint xfs_default_attroffset(struct xfs_inode *ip); int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, - bool *done, xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops); + bool *done); int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off, xfs_fileoff_t shift); int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, - bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, - struct xfs_defer_ops *dfops); + bool *done, xfs_fileoff_t stop_fsb); int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, @@ -241,13 +230,12 @@ int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, static inline void xfs_bmap_add_free( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, struct xfs_owner_info *oinfo) { - __xfs_bmap_add_free(mp, dfops, bno, len, oinfo, false); + __xfs_bmap_add_free(tp, bno, len, oinfo, false); } enum xfs_bmap_intent_type { @@ -263,14 +251,14 @@ struct xfs_bmap_intent { struct xfs_bmbt_irec bi_bmap; }; -int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops, - struct xfs_inode *ip, enum xfs_bmap_intent_type type, - int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, +int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip, + enum xfs_bmap_intent_type type, int whichfork, + xfs_fileoff_t startoff, xfs_fsblock_t startblock, xfs_filblks_t *blockcount, xfs_exntst_t state); -int xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, - struct xfs_inode *ip, struct xfs_bmbt_irec *imap); -int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, - struct xfs_inode *ip, struct xfs_bmbt_irec *imap); +int xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, + struct xfs_bmbt_irec *imap); +int xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, + struct xfs_bmbt_irec *imap); static inline int xfs_bmap_fork_to_state(int whichfork) { @@ -289,6 +277,6 @@ xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork, int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, - struct xfs_defer_ops *dfops, int flags); + int flags); #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index e1a2d9ceb615..cdb74d2e2a43 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -175,8 +175,6 @@ xfs_bmbt_dup_cursor( * Copy the firstblock, dfops, and flags values, * since init cursor doesn't get them. */ - new->bc_private.b.firstblock = cur->bc_private.b.firstblock; - new->bc_private.b.dfops = cur->bc_private.b.dfops; new->bc_private.b.flags = cur->bc_private.b.flags; return new; @@ -187,12 +185,11 @@ xfs_bmbt_update_cursor( struct xfs_btree_cur *src, struct xfs_btree_cur *dst) { - ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || + ASSERT((dst->bc_tp->t_firstblock != NULLFSBLOCK) || (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); - ASSERT(dst->bc_private.b.dfops == src->bc_private.b.dfops); dst->bc_private.b.allocated += src->bc_private.b.allocated; - dst->bc_private.b.firstblock = src->bc_private.b.firstblock; + dst->bc_tp->t_firstblock = src->bc_tp->t_firstblock; src->bc_private.b.allocated = 0; } @@ -210,8 +207,7 @@ xfs_bmbt_alloc_block( memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; - args.fsbno = cur->bc_private.b.firstblock; - args.firstblock = args.fsbno; + args.fsbno = cur->bc_tp->t_firstblock; xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino, cur->bc_private.b.whichfork); @@ -230,7 +226,7 @@ xfs_bmbt_alloc_block( * block allocation here and corrupt the filesystem. */ args.minleft = args.tp->t_blk_res; - } else if (cur->bc_private.b.dfops->dop_low) { + } else if (cur->bc_tp->t_flags & XFS_TRANS_LOWMODE) { args.type = XFS_ALLOCTYPE_START_BNO; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -257,7 +253,7 @@ xfs_bmbt_alloc_block( error = xfs_alloc_vextent(&args); if (error) goto error0; - cur->bc_private.b.dfops->dop_low = true; + cur->bc_tp->t_flags |= XFS_TRANS_LOWMODE; } if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { *stat = 0; @@ -265,7 +261,7 @@ xfs_bmbt_alloc_block( } ASSERT(args.len == 1); - cur->bc_private.b.firstblock = args.fsbno; + cur->bc_tp->t_firstblock = args.fsbno; cur->bc_private.b.allocated++; cur->bc_private.b.ip->i_d.di_nblocks++; xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); @@ -293,7 +289,7 @@ xfs_bmbt_free_block( struct xfs_owner_info oinfo; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork); - xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo); + xfs_bmap_add_free(cur->bc_tp, fsbno, 1, &oinfo); ip->i_d.di_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -564,8 +560,6 @@ xfs_bmbt_init_cursor( cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); cur->bc_private.b.ip = ip; - cur->bc_private.b.firstblock = NULLFSBLOCK; - cur->bc_private.b.dfops = NULL; cur->bc_private.b.allocated = 0; cur->bc_private.b.flags = 0; cur->bc_private.b.whichfork = whichfork; @@ -645,7 +639,7 @@ xfs_bmbt_change_owner( cur->bc_private.b.flags |= XFS_BTCUR_BPRV_INVALID_OWNER; error = xfs_btree_change_owner(cur, new_owner, buffer_list); - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); return error; } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 0a4fdf7f11a7..e3b3e9dce5da 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -7,7 +7,6 @@ #define __XFS_BTREE_H__ struct xfs_buf; -struct xfs_defer_ops; struct xfs_inode; struct xfs_mount; struct xfs_trans; @@ -209,14 +208,11 @@ typedef struct xfs_btree_cur union { struct { /* needed for BNO, CNT, INO */ struct xfs_buf *agbp; /* agf/agi buffer pointer */ - struct xfs_defer_ops *dfops; /* deferred updates */ xfs_agnumber_t agno; /* ag number */ union xfs_btree_cur_private priv; } a; struct { /* needed for BMAP */ struct xfs_inode *ip; /* pointer to our inode */ - struct xfs_defer_ops *dfops; /* deferred updates */ - xfs_fsblock_t firstblock; /* 1st blk allocated */ int allocated; /* count of alloced */ short forksize; /* fork's inode space */ char whichfork; /* data or attr fork */ diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 8a301402bbc4..376bee94b5dd 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -1481,6 +1481,7 @@ xfs_da3_node_lookup_int( int error; int retval; unsigned int expected_level = 0; + uint16_t magic; struct xfs_inode *dp = state->args->dp; args = state->args; @@ -1505,25 +1506,27 @@ xfs_da3_node_lookup_int( return error; } curr = blk->bp->b_addr; - blk->magic = be16_to_cpu(curr->magic); + magic = be16_to_cpu(curr->magic); - if (blk->magic == XFS_ATTR_LEAF_MAGIC || - blk->magic == XFS_ATTR3_LEAF_MAGIC) { + if (magic == XFS_ATTR_LEAF_MAGIC || + magic == XFS_ATTR3_LEAF_MAGIC) { blk->magic = XFS_ATTR_LEAF_MAGIC; blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); break; } - if (blk->magic == XFS_DIR2_LEAFN_MAGIC || - blk->magic == XFS_DIR3_LEAFN_MAGIC) { + if (magic == XFS_DIR2_LEAFN_MAGIC || + magic == XFS_DIR3_LEAFN_MAGIC) { blk->magic = XFS_DIR2_LEAFN_MAGIC; blk->hashval = xfs_dir2_leaf_lasthash(args->dp, blk->bp, NULL); break; } - blk->magic = XFS_DA_NODE_MAGIC; + if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) + return -EFSCORRUPTED; + blk->magic = XFS_DA_NODE_MAGIC; /* * Search an intermediate node for a match. @@ -2059,11 +2062,9 @@ xfs_da_grow_inode_int( * Try mapping it in one filesystem block. */ nmap = 1; - ASSERT(args->firstblock != NULL); error = xfs_bmapi_write(tp, dp, *bno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, - args->firstblock, args->total, &map, &nmap, - args->dfops); + args->total, &map, &nmap); if (error) return error; @@ -2085,8 +2086,7 @@ xfs_da_grow_inode_int( c = (int)(*bno + count - b); error = xfs_bmapi_write(tp, dp, b, c, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, - args->firstblock, args->total, - &mapp[mapi], &nmap, args->dfops); + args->total, &mapp[mapi], &nmap); if (error) goto out_free_map; if (nmap < 1) @@ -2375,13 +2375,13 @@ done: */ int xfs_da_shrink_inode( - xfs_da_args_t *args, - xfs_dablk_t dead_blkno, - struct xfs_buf *dead_buf) + struct xfs_da_args *args, + xfs_dablk_t dead_blkno, + struct xfs_buf *dead_buf) { - xfs_inode_t *dp; - int done, error, w, count; - xfs_trans_t *tp; + struct xfs_inode *dp; + int done, error, w, count; + struct xfs_trans *tp; trace_xfs_da_shrink_inode(args); @@ -2395,8 +2395,7 @@ xfs_da_shrink_inode( * the last block to the place we want to kill. */ error = xfs_bunmapi(tp, dp, dead_blkno, count, - xfs_bmapi_aflag(w), 0, args->firstblock, - args->dfops, &done); + xfs_bmapi_aflag(w), 0, &done); if (error == -ENOSPC) { if (w != XFS_DATA_FORK) break; diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 28260073ae71..84dd865b6c3d 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -7,7 +7,6 @@ #ifndef __XFS_DA_BTREE_H__ #define __XFS_DA_BTREE_H__ -struct xfs_defer_ops; struct xfs_inode; struct xfs_trans; struct zone; @@ -57,8 +56,6 @@ typedef struct xfs_da_args { xfs_dahash_t hashval; /* hash value of name */ xfs_ino_t inumber; /* input/output inode number */ struct xfs_inode *dp; /* directory inode to manipulate */ - xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */ - struct xfs_defer_ops *dfops; /* ptr to freelist for bmap_finish */ struct xfs_trans *trans; /* current trans (changes over time) */ xfs_extlen_t total; /* total blocks needed, for 1st bmap */ int whichfork; /* data or attribute fork */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index c3e5bffda4f5..e792b167150a 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -14,6 +14,9 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" #include "xfs_trace.h" /* @@ -177,146 +180,157 @@ static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX]; * the pending list. */ STATIC void -xfs_defer_intake_work( - struct xfs_trans *tp, - struct xfs_defer_ops *dop) +xfs_defer_create_intents( + struct xfs_trans *tp) { struct list_head *li; struct xfs_defer_pending *dfp; - list_for_each_entry(dfp, &dop->dop_intake, dfp_list) { + list_for_each_entry(dfp, &tp->t_dfops, dfp_list) { dfp->dfp_intent = dfp->dfp_type->create_intent(tp, dfp->dfp_count); - trace_xfs_defer_intake_work(tp->t_mountp, dfp); + trace_xfs_defer_create_intent(tp->t_mountp, dfp); list_sort(tp->t_mountp, &dfp->dfp_work, dfp->dfp_type->diff_items); list_for_each(li, &dfp->dfp_work) dfp->dfp_type->log_item(tp, dfp->dfp_intent, li); } - - list_splice_tail_init(&dop->dop_intake, &dop->dop_pending); } /* Abort all the intents that were committed. */ STATIC void xfs_defer_trans_abort( struct xfs_trans *tp, - struct xfs_defer_ops *dop, - int error) + struct list_head *dop_pending) { struct xfs_defer_pending *dfp; - trace_xfs_defer_trans_abort(tp->t_mountp, dop, _RET_IP_); + trace_xfs_defer_trans_abort(tp, _RET_IP_); /* Abort intent items that don't have a done item. */ - list_for_each_entry(dfp, &dop->dop_pending, dfp_list) { + list_for_each_entry(dfp, dop_pending, dfp_list) { trace_xfs_defer_pending_abort(tp->t_mountp, dfp); if (dfp->dfp_intent && !dfp->dfp_done) { dfp->dfp_type->abort_intent(dfp->dfp_intent); dfp->dfp_intent = NULL; } } - - /* Shut down FS. */ - xfs_force_shutdown(tp->t_mountp, (error == -EFSCORRUPTED) ? - SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR); } /* Roll a transaction so we can do some deferred op processing. */ STATIC int xfs_defer_trans_roll( - struct xfs_trans **tp, - struct xfs_defer_ops *dop) + struct xfs_trans **tpp) { + struct xfs_trans *tp = *tpp; + struct xfs_buf_log_item *bli; + struct xfs_inode_log_item *ili; + struct xfs_log_item *lip; + struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS]; + struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES]; + int bpcount = 0, ipcount = 0; int i; int error; - /* Log all the joined inodes. */ - for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) - xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); - - /* Hold the (previously bjoin'd) buffer locked across the roll. */ - for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) - xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); + list_for_each_entry(lip, &tp->t_items, li_trans) { + switch (lip->li_type) { + case XFS_LI_BUF: + bli = container_of(lip, struct xfs_buf_log_item, + bli_item); + if (bli->bli_flags & XFS_BLI_HOLD) { + if (bpcount >= XFS_DEFER_OPS_NR_BUFS) { + ASSERT(0); + return -EFSCORRUPTED; + } + xfs_trans_dirty_buf(tp, bli->bli_buf); + bplist[bpcount++] = bli->bli_buf; + } + break; + case XFS_LI_INODE: + ili = container_of(lip, struct xfs_inode_log_item, + ili_item); + if (ili->ili_lock_flags == 0) { + if (ipcount >= XFS_DEFER_OPS_NR_INODES) { + ASSERT(0); + return -EFSCORRUPTED; + } + xfs_trans_log_inode(tp, ili->ili_inode, + XFS_ILOG_CORE); + iplist[ipcount++] = ili->ili_inode; + } + break; + default: + break; + } + } - trace_xfs_defer_trans_roll((*tp)->t_mountp, dop, _RET_IP_); + trace_xfs_defer_trans_roll(tp, _RET_IP_); /* Roll the transaction. */ - error = xfs_trans_roll(tp); + error = xfs_trans_roll(tpp); + tp = *tpp; if (error) { - trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error); - xfs_defer_trans_abort(*tp, dop, error); + trace_xfs_defer_trans_roll_error(tp, error); return error; } - dop->dop_committed = true; /* Rejoin the joined inodes. */ - for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) - xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); + for (i = 0; i < ipcount; i++) + xfs_trans_ijoin(tp, iplist[i], 0); /* Rejoin the buffers and dirty them so the log moves forward. */ - for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { - xfs_trans_bjoin(*tp, dop->dop_bufs[i]); - xfs_trans_bhold(*tp, dop->dop_bufs[i]); + for (i = 0; i < bpcount; i++) { + xfs_trans_bjoin(tp, bplist[i]); + xfs_trans_bhold(tp, bplist[i]); } return error; } -/* Do we have any work items to finish? */ -bool -xfs_defer_has_unfinished_work( - struct xfs_defer_ops *dop) -{ - return !list_empty(&dop->dop_pending) || !list_empty(&dop->dop_intake); -} - /* - * Add this inode to the deferred op. Each joined inode is relogged - * each time we roll the transaction. + * Reset an already used dfops after finish. */ -int -xfs_defer_ijoin( - struct xfs_defer_ops *dop, - struct xfs_inode *ip) +static void +xfs_defer_reset( + struct xfs_trans *tp) { - int i; - - for (i = 0; i < XFS_DEFER_OPS_NR_INODES; i++) { - if (dop->dop_inodes[i] == ip) - return 0; - else if (dop->dop_inodes[i] == NULL) { - dop->dop_inodes[i] = ip; - return 0; - } - } + ASSERT(list_empty(&tp->t_dfops)); - ASSERT(0); - return -EFSCORRUPTED; + /* + * Low mode state transfers across transaction rolls to mirror dfops + * lifetime. Clear it now that dfops is reset. + */ + tp->t_flags &= ~XFS_TRANS_LOWMODE; } /* - * Add this buffer to the deferred op. Each joined buffer is relogged - * each time we roll the transaction. + * Free up any items left in the list. */ -int -xfs_defer_bjoin( - struct xfs_defer_ops *dop, - struct xfs_buf *bp) +static void +xfs_defer_cancel_list( + struct xfs_mount *mp, + struct list_head *dop_list) { - int i; + struct xfs_defer_pending *dfp; + struct xfs_defer_pending *pli; + struct list_head *pwi; + struct list_head *n; - for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { - if (dop->dop_bufs[i] == bp) - return 0; - else if (dop->dop_bufs[i] == NULL) { - dop->dop_bufs[i] = bp; - return 0; + /* + * Free the pending items. Caller should already have arranged + * for the intent items to be released. + */ + list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) { + trace_xfs_defer_cancel_list(mp, dfp); + list_del(&dfp->dfp_list); + list_for_each_safe(pwi, n, &dfp->dfp_work) { + list_del(pwi); + dfp->dfp_count--; + dfp->dfp_type->cancel_item(pwi); } + ASSERT(dfp->dfp_count == 0); + kmem_free(dfp); } - - ASSERT(0); - return -EFSCORRUPTED; } /* @@ -328,9 +342,8 @@ xfs_defer_bjoin( * If an inode is provided, relog it to the new transaction. */ int -xfs_defer_finish( - struct xfs_trans **tp, - struct xfs_defer_ops *dop) +xfs_defer_finish_noroll( + struct xfs_trans **tp) { struct xfs_defer_pending *dfp; struct list_head *li; @@ -338,35 +351,28 @@ xfs_defer_finish( void *state; int error = 0; void (*cleanup_fn)(struct xfs_trans *, void *, int); - struct xfs_defer_ops *orig_dop; + LIST_HEAD(dop_pending); ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); - trace_xfs_defer_finish((*tp)->t_mountp, dop, _RET_IP_); - - /* - * Attach dfops to the transaction during deferred ops processing. This - * explicitly causes calls into the allocator to defer AGFL block frees. - * Note that this code can go away once all dfops users attach to the - * associated tp. - */ - ASSERT(!(*tp)->t_agfl_dfops || ((*tp)->t_agfl_dfops == dop)); - orig_dop = (*tp)->t_agfl_dfops; - (*tp)->t_agfl_dfops = dop; + trace_xfs_defer_finish(*tp, _RET_IP_); /* Until we run out of pending work to finish... */ - while (xfs_defer_has_unfinished_work(dop)) { - /* Log intents for work items sitting in the intake. */ - xfs_defer_intake_work(*tp, dop); - - /* Roll the transaction. */ - error = xfs_defer_trans_roll(tp, dop); + while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) { + /* log intents and pull in intake items */ + xfs_defer_create_intents(*tp); + list_splice_tail_init(&(*tp)->t_dfops, &dop_pending); + + /* + * Roll the transaction. + */ + error = xfs_defer_trans_roll(tp); if (error) goto out; /* Log an intent-done item for the first pending item. */ - dfp = list_first_entry(&dop->dop_pending, - struct xfs_defer_pending, dfp_list); + dfp = list_first_entry(&dop_pending, struct xfs_defer_pending, + dfp_list); trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp); dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent, dfp->dfp_count); @@ -377,7 +383,7 @@ xfs_defer_finish( list_for_each_safe(li, n, &dfp->dfp_work) { list_del(li); dfp->dfp_count--; - error = dfp->dfp_type->finish_item(*tp, dop, li, + error = dfp->dfp_type->finish_item(*tp, li, dfp->dfp_done, &state); if (error == -EAGAIN) { /* @@ -396,7 +402,6 @@ xfs_defer_finish( */ if (cleanup_fn) cleanup_fn(*tp, state, error); - xfs_defer_trans_abort(*tp, dop, error); goto out; } } @@ -425,72 +430,72 @@ xfs_defer_finish( } out: - (*tp)->t_agfl_dfops = orig_dop; - if (error) - trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error); - else - trace_xfs_defer_finish_done((*tp)->t_mountp, dop, _RET_IP_); - return error; + if (error) { + xfs_defer_trans_abort(*tp, &dop_pending); + xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); + trace_xfs_defer_finish_error(*tp, error); + xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending); + xfs_defer_cancel(*tp); + return error; + } + + trace_xfs_defer_finish_done(*tp, _RET_IP_); + return 0; } -/* - * Free up any items left in the list. - */ -void -xfs_defer_cancel( - struct xfs_defer_ops *dop) +int +xfs_defer_finish( + struct xfs_trans **tp) { - struct xfs_defer_pending *dfp; - struct xfs_defer_pending *pli; - struct list_head *pwi; - struct list_head *n; - - trace_xfs_defer_cancel(NULL, dop, _RET_IP_); + int error; /* - * Free the pending items. Caller should already have arranged - * for the intent items to be released. + * Finish and roll the transaction once more to avoid returning to the + * caller with a dirty transaction. */ - list_for_each_entry_safe(dfp, pli, &dop->dop_intake, dfp_list) { - trace_xfs_defer_intake_cancel(NULL, dfp); - list_del(&dfp->dfp_list); - list_for_each_safe(pwi, n, &dfp->dfp_work) { - list_del(pwi); - dfp->dfp_count--; - dfp->dfp_type->cancel_item(pwi); - } - ASSERT(dfp->dfp_count == 0); - kmem_free(dfp); - } - list_for_each_entry_safe(dfp, pli, &dop->dop_pending, dfp_list) { - trace_xfs_defer_pending_cancel(NULL, dfp); - list_del(&dfp->dfp_list); - list_for_each_safe(pwi, n, &dfp->dfp_work) { - list_del(pwi); - dfp->dfp_count--; - dfp->dfp_type->cancel_item(pwi); + error = xfs_defer_finish_noroll(tp); + if (error) + return error; + if ((*tp)->t_flags & XFS_TRANS_DIRTY) { + error = xfs_defer_trans_roll(tp); + if (error) { + xfs_force_shutdown((*tp)->t_mountp, + SHUTDOWN_CORRUPT_INCORE); + return error; } - ASSERT(dfp->dfp_count == 0); - kmem_free(dfp); } + xfs_defer_reset(*tp); + return 0; +} + +void +xfs_defer_cancel( + struct xfs_trans *tp) +{ + struct xfs_mount *mp = tp->t_mountp; + + trace_xfs_defer_cancel(tp, _RET_IP_); + xfs_defer_cancel_list(mp, &tp->t_dfops); } /* Add an item for later deferred processing. */ void xfs_defer_add( - struct xfs_defer_ops *dop, + struct xfs_trans *tp, enum xfs_defer_ops_type type, struct list_head *li) { struct xfs_defer_pending *dfp = NULL; + ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + /* * Add the item to a pending item at the end of the intake list. * If the last pending item has the same type, reuse it. Else, * create a new pending item at the end of the intake list. */ - if (!list_empty(&dop->dop_intake)) { - dfp = list_last_entry(&dop->dop_intake, + if (!list_empty(&tp->t_dfops)) { + dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending, dfp_list); if (dfp->dfp_type->type != type || (dfp->dfp_type->max_items && @@ -505,7 +510,7 @@ xfs_defer_add( dfp->dfp_done = NULL; dfp->dfp_count = 0; INIT_LIST_HEAD(&dfp->dfp_work); - list_add_tail(&dfp->dfp_list, &dop->dop_intake); + list_add_tail(&dfp->dfp_list, &tp->t_dfops); } list_add_tail(li, &dfp->dfp_work); @@ -520,15 +525,25 @@ xfs_defer_init_op_type( defer_op_types[type->type] = type; } -/* Initialize a deferred operation. */ +/* + * Move deferred ops from one transaction to another and reset the source to + * initial state. This is primarily used to carry state forward across + * transaction rolls with pending dfops. + */ void -xfs_defer_init( - struct xfs_defer_ops *dop, - xfs_fsblock_t *fbp) +xfs_defer_move( + struct xfs_trans *dtp, + struct xfs_trans *stp) { - memset(dop, 0, sizeof(struct xfs_defer_ops)); - *fbp = NULLFSBLOCK; - INIT_LIST_HEAD(&dop->dop_intake); - INIT_LIST_HEAD(&dop->dop_pending); - trace_xfs_defer_init(NULL, dop, _RET_IP_); + list_splice_init(&stp->t_dfops, &dtp->t_dfops); + + /* + * Low free space mode was historically controlled by a dfops field. + * This meant that low mode state potentially carried across multiple + * transaction rolls. Transfer low mode on a dfops move to preserve + * that behavior. + */ + dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE); + + xfs_defer_reset(stp); } diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index a02b2b748b6d..2584a5b95b0d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -24,17 +24,6 @@ struct xfs_defer_pending { /* * Header for deferred operation list. - * - * dop_low is used by the allocator to activate the lowspace algorithm - - * when free space is running low the extent allocator may choose to - * allocate an extent from an AG without leaving sufficient space for - * a btree split when inserting the new extent. In this case the allocator - * will enable the lowspace algorithm which is supposed to allow further - * allocations (such as btree splits and newroots) to allocate from - * sequential AGs. In order to avoid locking AGs out of order the lowspace - * algorithm will start searching for free space from AG 0. If the correct - * transaction reservations have been made then this algorithm will eventually - * find all the space it needs. */ enum xfs_defer_ops_type { XFS_DEFER_OPS_TYPE_BMAP, @@ -45,28 +34,12 @@ enum xfs_defer_ops_type { XFS_DEFER_OPS_TYPE_MAX, }; -#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ -#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ - -struct xfs_defer_ops { - bool dop_committed; /* did any trans commit? */ - bool dop_low; /* alloc in low mode */ - struct list_head dop_intake; /* unlogged pending work */ - struct list_head dop_pending; /* logged pending work */ - - /* relog these with each roll */ - struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; - struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; -}; - -void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, +void xfs_defer_add(struct xfs_trans *tp, enum xfs_defer_ops_type type, struct list_head *h); -int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop); -void xfs_defer_cancel(struct xfs_defer_ops *dop); -void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); -bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); -int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); -int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); +int xfs_defer_finish_noroll(struct xfs_trans **tp); +int xfs_defer_finish(struct xfs_trans **tp); +void xfs_defer_cancel(struct xfs_trans *); +void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp); /* Description of a deferred type. */ struct xfs_defer_op_type { @@ -74,8 +47,8 @@ struct xfs_defer_op_type { unsigned int max_items; void (*abort_intent)(void *); void *(*create_done)(struct xfs_trans *, void *, unsigned int); - int (*finish_item)(struct xfs_trans *, struct xfs_defer_ops *, - struct list_head *, void *, void **); + int (*finish_item)(struct xfs_trans *, struct list_head *, void *, + void **); void (*finish_cleanup)(struct xfs_trans *, void *, int); void (*cancel_item)(struct list_head *); int (*diff_items)(void *, struct list_head *, struct list_head *); diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 59169aff30fe..229152cd1a24 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -239,12 +239,10 @@ xfs_dir_init( */ int xfs_dir_createname( - xfs_trans_t *tp, - xfs_inode_t *dp, + struct xfs_trans *tp, + struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t inum, /* new entry inode number */ - xfs_fsblock_t *first, /* bmap's firstblock */ - struct xfs_defer_ops *dfops, /* bmap's freeblock list */ xfs_extlen_t total) /* bmap's total block count */ { struct xfs_da_args *args; @@ -252,6 +250,7 @@ xfs_dir_createname( int v; /* type-checking value */ ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); + if (inum) { rval = xfs_dir_ino_validate(tp->t_mountp, inum); if (rval) @@ -270,8 +269,6 @@ xfs_dir_createname( args->hashval = dp->i_mount->m_dirnameops->hashname(name); args->inumber = inum; args->dp = dp; - args->firstblock = first; - args->dfops = dfops; args->total = total; args->whichfork = XFS_DATA_FORK; args->trans = tp; @@ -416,17 +413,15 @@ out_free: */ int xfs_dir_removename( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name, - xfs_ino_t ino, - xfs_fsblock_t *first, /* bmap's firstblock */ - struct xfs_defer_ops *dfops, /* bmap's freeblock list */ - xfs_extlen_t total) /* bmap's total block count */ + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_name *name, + xfs_ino_t ino, + xfs_extlen_t total) /* bmap's total block count */ { - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ + struct xfs_da_args *args; + int rval; + int v; /* type-checking value */ ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); XFS_STATS_INC(dp->i_mount, xs_dir_remove); @@ -442,8 +437,6 @@ xfs_dir_removename( args->hashval = dp->i_mount->m_dirnameops->hashname(name); args->inumber = ino; args->dp = dp; - args->firstblock = first; - args->dfops = dfops; args->total = total; args->whichfork = XFS_DATA_FORK; args->trans = tp; @@ -478,17 +471,15 @@ out_free: */ int xfs_dir_replace( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name, /* name of entry to replace */ - xfs_ino_t inum, /* new inode number */ - xfs_fsblock_t *first, /* bmap's firstblock */ - struct xfs_defer_ops *dfops, /* bmap's freeblock list */ - xfs_extlen_t total) /* bmap's total block count */ + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_name *name, /* name of entry to replace */ + xfs_ino_t inum, /* new inode number */ + xfs_extlen_t total) /* bmap's total block count */ { - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ + struct xfs_da_args *args; + int rval; + int v; /* type-checking value */ ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); @@ -507,8 +498,6 @@ xfs_dir_replace( args->hashval = dp->i_mount->m_dirnameops->hashname(name); args->inumber = inum; args->dp = dp; - args->firstblock = first; - args->dfops = dfops; args->total = total; args->whichfork = XFS_DATA_FORK; args->trans = tp; @@ -547,7 +536,7 @@ xfs_dir_canenter( xfs_inode_t *dp, struct xfs_name *name) /* name of entry to add */ { - return xfs_dir_createname(tp, dp, name, 0, NULL, NULL, 0); + return xfs_dir_createname(tp, dp, name, 0, 0); } /* @@ -645,17 +634,17 @@ xfs_dir2_isleaf( */ int xfs_dir2_shrink_inode( - xfs_da_args_t *args, - xfs_dir2_db_t db, - struct xfs_buf *bp) + struct xfs_da_args *args, + xfs_dir2_db_t db, + struct xfs_buf *bp) { - xfs_fileoff_t bno; /* directory file offset */ - xfs_dablk_t da; /* directory file offset */ - int done; /* bunmap is finished */ - xfs_inode_t *dp; - int error; - xfs_mount_t *mp; - xfs_trans_t *tp; + xfs_fileoff_t bno; /* directory file offset */ + xfs_dablk_t da; /* directory file offset */ + int done; /* bunmap is finished */ + struct xfs_inode *dp; + int error; + struct xfs_mount *mp; + struct xfs_trans *tp; trace_xfs_dir2_shrink_inode(args, db); @@ -665,8 +654,7 @@ xfs_dir2_shrink_inode( da = xfs_dir2_db_to_da(args->geo, db); /* Unmap the fsblock(s). */ - error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0, - args->firstblock, args->dfops, &done); + error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0, &done); if (error) { /* * ENOSPC actually can happen if we're in a removename with no diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index ed385316c7dc..c3e3f6b813d8 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -9,7 +9,6 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" -struct xfs_defer_ops; struct xfs_da_args; struct xfs_inode; struct xfs_mount; @@ -118,19 +117,16 @@ extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_inode *pdp); extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t inum, - xfs_fsblock_t *first, - struct xfs_defer_ops *dfops, xfs_extlen_t tot); + xfs_extlen_t tot); extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t *inum, struct xfs_name *ci_name); extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t ino, - xfs_fsblock_t *first, - struct xfs_defer_ops *dfops, xfs_extlen_t tot); + xfs_extlen_t tot); extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t inum, - xfs_fsblock_t *first, - struct xfs_defer_ops *dfops, xfs_extlen_t tot); + xfs_extlen_t tot); extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name); diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 2daf874969ab..f1bb3434f51c 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -1012,7 +1012,7 @@ xfs_dir2_leafn_rebalance( int oldstale; /* old count of stale leaves */ #endif int oldsum; /* old total leaf count */ - int swap; /* swapped leaf blocks */ + int swap_blocks; /* swapped leaf blocks */ struct xfs_dir2_leaf_entry *ents1; struct xfs_dir2_leaf_entry *ents2; struct xfs_dir3_icleaf_hdr hdr1; @@ -1023,13 +1023,10 @@ xfs_dir2_leafn_rebalance( /* * If the block order is wrong, swap the arguments. */ - if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) { - xfs_da_state_blk_t *tmp; /* temp for block swap */ + swap_blocks = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp); + if (swap_blocks) + swap(blk1, blk2); - tmp = blk1; - blk1 = blk2; - blk2 = tmp; - } leaf1 = blk1->bp->b_addr; leaf2 = blk2->bp->b_addr; dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); @@ -1093,11 +1090,11 @@ xfs_dir2_leafn_rebalance( * Mark whether we're inserting into the old or new leaf. */ if (hdr1.count < hdr2.count) - state->inleaf = swap; + state->inleaf = swap_blocks; else if (hdr1.count > hdr2.count) - state->inleaf = !swap; + state->inleaf = !swap_blocks; else - state->inleaf = swap ^ (blk1->index <= hdr1.count); + state->inleaf = swap_blocks ^ (blk1->index <= hdr1.count); /* * Adjust the expected index for insertion. */ diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index b9974e7a8e6e..66077a105cbb 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -53,7 +53,8 @@ #define XFS_ERRTAG_LOG_ITEM_PIN 30 #define XFS_ERRTAG_BUF_LRU_REF 31 #define XFS_ERRTAG_FORCE_SCRUB_REPAIR 32 -#define XFS_ERRTAG_MAX 33 +#define XFS_ERRTAG_FORCE_SUMMARY_RECALC 33 +#define XFS_ERRTAG_MAX 34 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -91,5 +92,6 @@ #define XFS_RANDOM_LOG_ITEM_PIN 1 #define XFS_RANDOM_BUF_LRU_REF 2 #define XFS_RANDOM_FORCE_SCRUB_REPAIR 1 +#define XFS_RANDOM_FORCE_SUMMARY_RECALC 1 #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0d968e8143aa..a8f6db735d5d 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1838,23 +1838,24 @@ out_error: */ STATIC void xfs_difree_inode_chunk( - struct xfs_mount *mp, + struct xfs_trans *tp, xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *rec, - struct xfs_defer_ops *dfops) + struct xfs_inobt_rec_incore *rec) { - xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino); - int startidx, endidx; - int nextbit; - xfs_agblock_t agbno; - int contigblk; - struct xfs_owner_info oinfo; + struct xfs_mount *mp = tp->t_mountp; + xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, + rec->ir_startino); + int startidx, endidx; + int nextbit; + xfs_agblock_t agbno; + int contigblk; + struct xfs_owner_info oinfo; DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, sagbno), + xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), mp->m_ialloc_blks, &oinfo); return; } @@ -1898,7 +1899,7 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, agbno), + xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, &oinfo); /* reset range to current bit and carry on... */ @@ -1915,7 +1916,6 @@ xfs_difree_inobt( struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agino_t agino, - struct xfs_defer_ops *dfops, struct xfs_icluster *xic, struct xfs_inobt_rec_incore *orec) { @@ -2003,7 +2003,7 @@ xfs_difree_inobt( goto error0; } - xfs_difree_inode_chunk(mp, agno, &rec, dfops); + xfs_difree_inode_chunk(tp, agno, &rec); } else { xic->deleted = false; @@ -2148,7 +2148,6 @@ int xfs_difree( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t inode, /* inode to be freed */ - struct xfs_defer_ops *dfops, /* extents to free */ struct xfs_icluster *xic) /* cluster info if deleted */ { /* REFERENCED */ @@ -2200,7 +2199,7 @@ xfs_difree( /* * Fix up the inode allocation btree. */ - error = xfs_difree_inobt(mp, tp, agbp, agino, dfops, xic, &rec); + error = xfs_difree_inobt(mp, tp, agbp, agino, xic, &rec); if (error) goto error0; @@ -2260,7 +2259,7 @@ xfs_imap_lookup( } xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); if (error) return error; @@ -2539,7 +2538,7 @@ xfs_agi_verify( return __this_address; for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { - if (agi->agi_unlinked[i] == NULLAGINO) + if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO)) continue; if (!xfs_verify_ino(mp, be32_to_cpu(agi->agi_unlinked[i]))) return __this_address; diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 90b09c5f163b..e936b7cc9389 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -82,7 +82,6 @@ int /* error */ xfs_difree( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t inode, /* inode to be freed */ - struct xfs_defer_ops *dfops, /* extents to free */ struct xfs_icluster *ifree); /* cluster info if deleted */ /* diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index a5237afec5ab..86c50208a143 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -552,6 +552,7 @@ xfs_inobt_max_size( static int xfs_inobt_count_blocks( struct xfs_mount *mp, + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_btnum_t btnum, xfs_extlen_t *tree_blocks) @@ -560,14 +561,14 @@ xfs_inobt_count_blocks( struct xfs_btree_cur *cur; int error; - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) return error; - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); error = xfs_btree_count_blocks(cur, tree_blocks); - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); + xfs_btree_del_cursor(cur, error); + xfs_trans_brelse(tp, agbp); return error; } @@ -578,6 +579,7 @@ xfs_inobt_count_blocks( int xfs_finobt_calc_reserves( struct xfs_mount *mp, + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used) @@ -588,7 +590,7 @@ xfs_finobt_calc_reserves( if (!xfs_sb_version_hasfinobt(&mp->m_sb)) return 0; - error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); + error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, &tree_len); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index bf8f0c405e7d..ebdd0c6b8766 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -60,8 +60,8 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, #define xfs_inobt_rec_check_count(mp, rec) 0 #endif /* DEBUG */ -int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_extlen_t *ask, xfs_extlen_t *used); +int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, unsigned long long len); diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index b80c63faace2..771dd072015d 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -14,6 +14,7 @@ #include "xfs_inode_fork.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" +#include "xfs_bmap.h" #include "xfs_trace.h" /* @@ -612,6 +613,19 @@ xfs_iext_realloc_root( cur->leaf = new; } +/* + * Increment the sequence counter if we are on a COW fork. This allows + * the writeback code to skip looking for a COW extent if the COW fork + * hasn't changed. We use WRITE_ONCE here to ensure the update to the + * sequence counter is seen before the modifications to the extent + * tree itself take effect. + */ +static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp, int state) +{ + if (state & BMAP_COWFORK) + WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1); +} + void xfs_iext_insert( struct xfs_inode *ip, @@ -624,6 +638,8 @@ xfs_iext_insert( struct xfs_iext_leaf *new = NULL; int nr_entries, i; + xfs_iext_inc_seq(ifp, state); + if (ifp->if_height == 0) xfs_iext_alloc_root(ifp, cur); else if (ifp->if_height == 1) @@ -864,6 +880,8 @@ xfs_iext_remove( ASSERT(ifp->if_u1.if_root != NULL); ASSERT(xfs_iext_valid(ifp, cur)); + xfs_iext_inc_seq(ifp, state); + nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1; for (i = cur->pos; i < nr_entries; i++) leaf->recs[i] = leaf->recs[i + 1]; @@ -970,6 +988,8 @@ xfs_iext_update_extent( { struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); + xfs_iext_inc_seq(ifp, state); + if (cur->pos == 0) { struct xfs_bmbt_irec old; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 33dc34655ac3..30d1d60f1d46 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -731,7 +731,8 @@ xfs_inode_validate_extsize( if ((hint_flag || inherit_flag) && extsize == 0) return __this_address; - if (!(hint_flag || inherit_flag) && extsize != 0) + /* free inodes get flags set to zero but extsize remains */ + if (mode && !(hint_flag || inherit_flag) && extsize != 0) return __this_address; if (extsize_bytes % blocksize_bytes) @@ -777,7 +778,8 @@ xfs_inode_validate_cowextsize( if (hint_flag && cowextsize == 0) return __this_address; - if (!hint_flag && cowextsize != 0) + /* free inodes get flags set to zero but cowextsize remains */ + if (mode && !hint_flag && cowextsize != 0) return __this_address; if (hint_flag && rt_flag) diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 183ec0cb8921..f9acf1d436f6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -158,7 +158,6 @@ xfs_init_local_fork( } ifp->if_bytes = size; - ifp->if_real_bytes = real_size; ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); ifp->if_flags |= XFS_IFINLINE; } @@ -226,7 +225,6 @@ xfs_iformat_extents( return -EFSCORRUPTED; } - ifp->if_real_bytes = 0; ifp->if_bytes = 0; ifp->if_u1.if_root = NULL; ifp->if_height = 0; @@ -271,7 +269,7 @@ xfs_iformat_btree( { struct xfs_mount *mp = ip->i_mount; xfs_bmdr_block_t *dfp; - xfs_ifork_t *ifp; + struct xfs_ifork *ifp; /* REFERENCED */ int nrecs; int size; @@ -317,7 +315,6 @@ xfs_iformat_btree( ifp->if_flags &= ~XFS_IFEXTENTS; ifp->if_flags |= XFS_IFBROOT; - ifp->if_real_bytes = 0; ifp->if_bytes = 0; ifp->if_u1.if_root = NULL; ifp->if_height = 0; @@ -350,7 +347,7 @@ xfs_iroot_realloc( { struct xfs_mount *mp = ip->i_mount; int cur_max; - xfs_ifork_t *ifp; + struct xfs_ifork *ifp; struct xfs_btree_block *new_broot; int new_max; size_t new_size; @@ -471,55 +468,34 @@ xfs_iroot_realloc( */ void xfs_idata_realloc( - xfs_inode_t *ip, - int byte_diff, - int whichfork) + struct xfs_inode *ip, + int byte_diff, + int whichfork) { - xfs_ifork_t *ifp; - int new_size; - int real_size; - - if (byte_diff == 0) { - return; - } + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + int new_size = (int)ifp->if_bytes + byte_diff; - ifp = XFS_IFORK_PTR(ip, whichfork); - new_size = (int)ifp->if_bytes + byte_diff; ASSERT(new_size >= 0); + ASSERT(new_size <= XFS_IFORK_SIZE(ip, whichfork)); + + if (byte_diff == 0) + return; if (new_size == 0) { kmem_free(ifp->if_u1.if_data); ifp->if_u1.if_data = NULL; - real_size = 0; - } else { - /* - * Stuck with malloc/realloc. - * For inline data, the underlying buffer must be - * a multiple of 4 bytes in size so that it can be - * logged and stay on word boundaries. We enforce - * that here. - */ - real_size = roundup(new_size, 4); - if (ifp->if_u1.if_data == NULL) { - ASSERT(ifp->if_real_bytes == 0); - ifp->if_u1.if_data = kmem_alloc(real_size, - KM_SLEEP | KM_NOFS); - } else { - /* - * Only do the realloc if the underlying size - * is really changing. - */ - if (ifp->if_real_bytes != real_size) { - ifp->if_u1.if_data = - kmem_realloc(ifp->if_u1.if_data, - real_size, - KM_SLEEP | KM_NOFS); - } - } + ifp->if_bytes = 0; + return; } - ifp->if_real_bytes = real_size; + + /* + * For inline data, the underlying buffer must be a multiple of 4 bytes + * in size so that it can be logged and stay on word boundaries. + * We enforce that here. + */ + ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data, + roundup(new_size, 4), KM_SLEEP | KM_NOFS); ifp->if_bytes = new_size; - ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); } void @@ -527,7 +503,7 @@ xfs_idestroy_fork( xfs_inode_t *ip, int whichfork) { - xfs_ifork_t *ifp; + struct xfs_ifork *ifp; ifp = XFS_IFORK_PTR(ip, whichfork); if (ifp->if_broot != NULL) { @@ -543,17 +519,13 @@ xfs_idestroy_fork( */ if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { if (ifp->if_u1.if_data != NULL) { - ASSERT(ifp->if_real_bytes != 0); kmem_free(ifp->if_u1.if_data); ifp->if_u1.if_data = NULL; - ifp->if_real_bytes = 0; } } else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) { xfs_iext_destroy(ifp); } - ASSERT(ifp->if_real_bytes == 0); - if (whichfork == XFS_ATTR_FORK) { kmem_zone_free(xfs_ifork_zone, ip->i_afp); ip->i_afp = NULL; @@ -620,7 +592,7 @@ xfs_iflush_fork( int whichfork) { char *cp; - xfs_ifork_t *ifp; + struct xfs_ifork *ifp; xfs_mount_t *mp; static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 781b1603df5e..60361d2d74a1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -12,9 +12,9 @@ struct xfs_dinode; /* * File incore extent information, present for each of data & attr forks. */ -typedef struct xfs_ifork { +struct xfs_ifork { int if_bytes; /* bytes in if_u1 */ - int if_real_bytes; /* bytes allocated in if_u1 */ + unsigned int if_seq; /* cow fork mod counter */ struct xfs_btree_block *if_broot; /* file's incore btree root */ short if_broot_bytes; /* bytes allocated for root */ unsigned char if_flags; /* per-fork flags */ @@ -23,7 +23,7 @@ typedef struct xfs_ifork { void *if_root; /* extent tree root */ char *if_data; /* inline file data */ } if_u1; -} xfs_ifork_t; +}; /* * Per-fork incore inode flags. diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 79bb79853c9f..e5f97c69b320 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -77,6 +77,19 @@ static inline uint xlog_get_cycle(char *ptr) #define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ +/* + * Log item for unmount records. + * + * The unmount record used to have a string "Unmount filesystem--" in the + * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE). + * We just write the magic number now; see xfs_log_unmount_write. + */ +struct xfs_unmount_log_format { + uint16_t magic; /* XLOG_UNMOUNT_TYPE */ + uint16_t pad1; + uint32_t pad2; /* may as well make it 64 bits */ +}; + /* Region types for iovec's i_type */ #define XLOG_REG_TYPE_BFORMAT 1 #define XLOG_REG_TYPE_BCHUNK 2 diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 9dda6fd0bb13..542aa1475b5f 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -34,11 +34,9 @@ enum xfs_refc_adjust_op { }; STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, - xfs_agblock_t agbno, xfs_extlen_t aglen, - struct xfs_defer_ops *dfops); + xfs_agblock_t agbno, xfs_extlen_t aglen); STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, - xfs_agblock_t agbno, xfs_extlen_t aglen, - struct xfs_defer_ops *dfops); + xfs_agblock_t agbno, xfs_extlen_t aglen); /* * Look up the first record less than or equal to [bno, len] in the btree @@ -870,7 +868,6 @@ xfs_refcount_adjust_extents( xfs_agblock_t *agbno, xfs_extlen_t *aglen, enum xfs_refc_adjust_op adj, - struct xfs_defer_ops *dfops, struct xfs_owner_info *oinfo) { struct xfs_refcount_irec ext, tmp; @@ -925,8 +922,8 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, tmp.rc_startblock); - xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, - tmp.rc_blockcount, oinfo); + xfs_bmap_add_free(cur->bc_tp, fsbno, + tmp.rc_blockcount, oinfo); } (*agbno) += tmp.rc_blockcount; @@ -968,8 +965,8 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, ext.rc_startblock); - xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, - ext.rc_blockcount, oinfo); + xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, + oinfo); } skip: @@ -998,7 +995,6 @@ xfs_refcount_adjust( xfs_agblock_t *new_agbno, xfs_extlen_t *new_aglen, enum xfs_refc_adjust_op adj, - struct xfs_defer_ops *dfops, struct xfs_owner_info *oinfo) { bool shape_changed; @@ -1043,7 +1039,7 @@ xfs_refcount_adjust( /* Now that we've taken care of the ends, adjust the middle extents */ error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, - adj, dfops, oinfo); + adj, oinfo); if (error) goto out_error; @@ -1067,7 +1063,7 @@ xfs_refcount_finish_one_cleanup( if (rcur == NULL) return; agbp = rcur->bc_private.a.agbp; - xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(rcur, error); if (error) xfs_trans_brelse(tp, agbp); } @@ -1082,7 +1078,6 @@ xfs_refcount_finish_one_cleanup( int xfs_refcount_finish_one( struct xfs_trans *tp, - struct xfs_defer_ops *dfops, enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, xfs_extlen_t blockcount, @@ -1132,7 +1127,7 @@ xfs_refcount_finish_one( if (!agbp) return -EFSCORRUPTED; - rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops); + rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); if (!rcur) { error = -ENOMEM; goto out_cur; @@ -1145,23 +1140,23 @@ xfs_refcount_finish_one( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL); + new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL); + new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); break; case XFS_REFCOUNT_ALLOC_COW: *new_fsb = startblock + blockcount; *new_len = 0; - error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops); + error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); break; case XFS_REFCOUNT_FREE_COW: *new_fsb = startblock + blockcount; *new_len = 0; - error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops); + error = __xfs_refcount_cow_free(rcur, bno, blockcount); break; default: ASSERT(0); @@ -1183,16 +1178,16 @@ out_cur: */ static int __xfs_refcount_add( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, xfs_extlen_t blockcount) { struct xfs_refcount_intent *ri; - trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock), - type, XFS_FSB_TO_AGBNO(mp, startblock), + trace_xfs_refcount_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, startblock), + type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), blockcount); ri = kmem_alloc(sizeof(struct xfs_refcount_intent), @@ -1202,7 +1197,7 @@ __xfs_refcount_add( ri->ri_startblock = startblock; ri->ri_blockcount = blockcount; - xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); return 0; } @@ -1211,14 +1206,13 @@ __xfs_refcount_add( */ int xfs_refcount_increase_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, struct xfs_bmbt_irec *PREV) { - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) return 0; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE, + return __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, PREV->br_blockcount); } @@ -1227,14 +1221,13 @@ xfs_refcount_increase_extent( */ int xfs_refcount_decrease_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, struct xfs_bmbt_irec *PREV) { - if (!xfs_sb_version_hasreflink(&mp->m_sb)) + if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) return 0; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE, + return __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, PREV->br_blockcount); } @@ -1522,8 +1515,7 @@ STATIC int __xfs_refcount_cow_alloc( struct xfs_btree_cur *rcur, xfs_agblock_t agbno, - xfs_extlen_t aglen, - struct xfs_defer_ops *dfops) + xfs_extlen_t aglen) { trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); @@ -1540,8 +1532,7 @@ STATIC int __xfs_refcount_cow_free( struct xfs_btree_cur *rcur, xfs_agblock_t agbno, - xfs_extlen_t aglen, - struct xfs_defer_ops *dfops) + xfs_extlen_t aglen) { trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); @@ -1554,47 +1545,45 @@ __xfs_refcount_cow_free( /* Record a CoW staging extent in the refcount btree. */ int xfs_refcount_alloc_cow_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, xfs_fsblock_t fsb, xfs_extlen_t len) { + struct xfs_mount *mp = tp->t_mountp; int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, - fsb, len); + error = __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); if (error) return error; /* Add rmap entry */ - return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + return xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ int xfs_refcount_free_cow_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, xfs_fsblock_t fsb, xfs_extlen_t len) { + struct xfs_mount *mp = tp->t_mountp; int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; /* Remove rmap entry */ - error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + error = xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); if (error) return error; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, - fsb, len); + return __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); } struct xfs_refcount_recovery { @@ -1635,7 +1624,6 @@ xfs_refcount_recover_cow_leftovers( struct list_head debris; union xfs_btree_irec low; union xfs_btree_irec high; - struct xfs_defer_ops dfops; xfs_fsblock_t fsb; xfs_agblock_t agbno; int error; @@ -1666,7 +1654,7 @@ xfs_refcount_recover_cow_leftovers( error = -ENOMEM; goto out_trans; } - cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); /* Find all the leftover CoW staging extents. */ memset(&low, 0, sizeof(low)); @@ -1675,11 +1663,11 @@ xfs_refcount_recover_cow_leftovers( high.rc.rc_startblock = -1U; error = xfs_btree_query_range(cur, &low, &high, xfs_refcount_recover_extent, &debris); - if (error) - goto out_cursor; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); xfs_trans_brelse(tp, agbp); xfs_trans_cancel(tp); + if (error) + goto out_free; /* Now iterate the list to free the leftovers */ list_for_each_entry_safe(rr, n, &debris, rr_list) { @@ -1691,21 +1679,15 @@ xfs_refcount_recover_cow_leftovers( trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); /* Free the orphan record */ - xfs_defer_init(&dfops, &fsb); agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; fsb = XFS_AGB_TO_FSB(mp, agno, agbno); - error = xfs_refcount_free_cow_extent(mp, &dfops, fsb, + error = xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); if (error) - goto out_defer; + goto out_trans; /* Free the block. */ - xfs_bmap_add_free(mp, &dfops, fsb, - rr->rr_rrec.rc_blockcount, NULL); - - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_defer; + xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); error = xfs_trans_commit(tp); if (error) @@ -1716,8 +1698,6 @@ xfs_refcount_recover_cow_leftovers( } return error; -out_defer: - xfs_defer_cancel(&dfops); out_trans: xfs_trans_cancel(tp); out_free: @@ -1727,11 +1707,6 @@ out_free: kmem_free(rr); } return error; - -out_cursor: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_trans_brelse(tp, agbp); - goto out_trans; } /* Is there a record covering a given extent? */ diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 5fef74412727..1d9c518575e7 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -29,29 +29,26 @@ struct xfs_refcount_intent { xfs_extlen_t ri_blockcount; }; -extern int xfs_refcount_increase_extent(struct xfs_mount *mp, - struct xfs_defer_ops *dfops, struct xfs_bmbt_irec *irec); -extern int xfs_refcount_decrease_extent(struct xfs_mount *mp, - struct xfs_defer_ops *dfops, struct xfs_bmbt_irec *irec); +extern int xfs_refcount_increase_extent(struct xfs_trans *tp, + struct xfs_bmbt_irec *irec); +extern int xfs_refcount_decrease_extent(struct xfs_trans *tp, + struct xfs_bmbt_irec *irec); extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); extern int xfs_refcount_finish_one(struct xfs_trans *tp, - struct xfs_defer_ops *dfops, enum xfs_refcount_intent_type type, - xfs_fsblock_t startblock, xfs_extlen_t blockcount, - xfs_fsblock_t *new_fsb, xfs_extlen_t *new_len, - struct xfs_btree_cur **pcur); + enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, + xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, + xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_end_of_shared); -extern int xfs_refcount_alloc_cow_extent(struct xfs_mount *mp, - struct xfs_defer_ops *dfops, xfs_fsblock_t fsb, - xfs_extlen_t len); -extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp, - struct xfs_defer_ops *dfops, xfs_fsblock_t fsb, - xfs_extlen_t len); +extern int xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, + xfs_fsblock_t fsb, xfs_extlen_t len); +extern int xfs_refcount_free_cow_extent(struct xfs_trans *tp, + xfs_fsblock_t fsb, xfs_extlen_t len); extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, xfs_agnumber_t agno); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index b71937982c5b..1aaa01c97517 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -27,8 +27,7 @@ xfs_refcountbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, - cur->bc_private.a.dfops); + cur->bc_private.a.agbp, cur->bc_private.a.agno); } STATIC void @@ -71,7 +70,6 @@ xfs_refcountbt_alloc_block( args.type = XFS_ALLOCTYPE_NEAR_BNO; args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, xfs_refc_block(args.mp)); - args.firstblock = args.fsbno; xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_REFC); args.minlen = args.maxlen = args.prod = 1; args.resv = XFS_AG_RESV_METADATA; @@ -323,8 +321,7 @@ xfs_refcountbt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno, - struct xfs_defer_ops *dfops) + xfs_agnumber_t agno) { struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); struct xfs_btree_cur *cur; @@ -344,7 +341,6 @@ xfs_refcountbt_init_cursor( cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; - cur->bc_private.a.dfops = dfops; cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; cur->bc_private.a.priv.refc.nr_ops = 0; @@ -408,6 +404,7 @@ xfs_refcountbt_max_size( int xfs_refcountbt_calc_reserves( struct xfs_mount *mp, + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used) @@ -422,14 +419,14 @@ xfs_refcountbt_calc_reserves( return 0; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_refcount_blocks); - xfs_buf_relse(agbp); + xfs_trans_brelse(tp, agbp); *ask += xfs_refcountbt_max_size(mp, agblocks); *used += tree_len; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index d2852b6e1fa8..ba416f71c824 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -44,8 +44,8 @@ struct xfs_mount; ((index) - 1) * sizeof(xfs_refcount_ptr_t))) extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, - struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno, - struct xfs_defer_ops *dfops); + struct xfs_trans *tp, struct xfs_buf *agbp, + xfs_agnumber_t agno); extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf); extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); @@ -55,6 +55,7 @@ extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp, xfs_agblock_t agblocks); extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, - xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, + xfs_extlen_t *used); #endif /* __XFS_REFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index d4460b0d2d81..245af452840e 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -670,14 +670,8 @@ xfs_rmap_free( cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); error = xfs_rmap_unmap(cur, bno, len, false, oinfo); - if (error) - goto out_error; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - return 0; - -out_error: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_btree_del_cursor(cur, error); return error; } @@ -753,19 +747,19 @@ xfs_rmap_map( &have_lt); if (error) goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error); - - error = xfs_rmap_get_rec(cur, <rec, &have_lt); - if (error) - goto out_error; - XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error); - trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, ltrec.rm_startblock, - ltrec.rm_blockcount, ltrec.rm_owner, - ltrec.rm_offset, ltrec.rm_flags); + if (have_lt) { + error = xfs_rmap_get_rec(cur, <rec, &have_lt); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error); + trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, + cur->bc_private.a.agno, ltrec.rm_startblock, + ltrec.rm_blockcount, ltrec.rm_owner, + ltrec.rm_offset, ltrec.rm_flags); - if (!xfs_rmap_is_mergeable(<rec, owner, flags)) - have_lt = 0; + if (!xfs_rmap_is_mergeable(<rec, owner, flags)) + have_lt = 0; + } XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 0 || @@ -912,14 +906,8 @@ xfs_rmap_alloc( cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); error = xfs_rmap_map(cur, bno, len, false, oinfo); - if (error) - goto out_error; - - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - return 0; -out_error: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_btree_del_cursor(cur, error); return error; } @@ -2156,7 +2144,7 @@ xfs_rmap_finish_one_cleanup( if (rcur == NULL) return; agbp = rcur->bc_private.a.agbp; - xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(rcur, error); if (error) xfs_trans_brelse(tp, agbp); } @@ -2289,18 +2277,18 @@ xfs_rmap_update_is_needed( */ static int __xfs_rmap_add( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, enum xfs_rmap_intent_type type, uint64_t owner, int whichfork, struct xfs_bmbt_irec *bmap) { - struct xfs_rmap_intent *ri; + struct xfs_rmap_intent *ri; - trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock), + trace_xfs_rmap_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), type, - XFS_FSB_TO_AGBNO(mp, bmap->br_startblock), + XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock), owner, whichfork, bmap->br_startoff, bmap->br_blockcount, @@ -2313,23 +2301,22 @@ __xfs_rmap_add( ri->ri_whichfork = whichfork; ri->ri_bmap = *bmap; - xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); return 0; } /* Map an extent into a file. */ int xfs_rmap_map_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *PREV) { - if (!xfs_rmap_update_is_needed(mp, whichfork)) + if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) return 0; - return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ? + return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino, whichfork, PREV); } @@ -2337,25 +2324,29 @@ xfs_rmap_map_extent( /* Unmap an extent out of a file. */ int xfs_rmap_unmap_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *PREV) { - if (!xfs_rmap_update_is_needed(mp, whichfork)) + if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) return 0; - return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ? + return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino, whichfork, PREV); } -/* Convert a data fork extent from unwritten to real or vice versa. */ +/* + * Convert a data fork extent from unwritten to real or vice versa. + * + * Note that tp can be NULL here as no transaction is used for COW fork + * unwritten conversion. + */ int xfs_rmap_convert_extent( struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *PREV) @@ -2363,7 +2354,7 @@ xfs_rmap_convert_extent( if (!xfs_rmap_update_is_needed(mp, whichfork)) return 0; - return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ? + return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino, whichfork, PREV); } @@ -2371,8 +2362,7 @@ xfs_rmap_convert_extent( /* Schedule the creation of an rmap for non-file data. */ int xfs_rmap_alloc_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, @@ -2380,23 +2370,21 @@ xfs_rmap_alloc_extent( { struct xfs_bmbt_irec bmap; - if (!xfs_rmap_update_is_needed(mp, XFS_DATA_FORK)) + if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) return 0; - bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno); + bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); bmap.br_blockcount = len; bmap.br_startoff = 0; bmap.br_state = XFS_EXT_NORM; - return __xfs_rmap_add(mp, dfops, XFS_RMAP_ALLOC, owner, - XFS_DATA_FORK, &bmap); + return __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap); } /* Schedule the deletion of an rmap for non-file data. */ int xfs_rmap_free_extent( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops, + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, @@ -2404,16 +2392,15 @@ xfs_rmap_free_extent( { struct xfs_bmbt_irec bmap; - if (!xfs_rmap_update_is_needed(mp, XFS_DATA_FORK)) + if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK)) return 0; - bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno); + bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno); bmap.br_blockcount = len; bmap.br_startoff = 0; bmap.br_state = XFS_EXT_NORM; - return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner, - XFS_DATA_FORK, &bmap); + return __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap); } /* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */ diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 9f19454768b2..157dc722ad35 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -185,21 +185,17 @@ struct xfs_rmap_intent { }; /* functions for updating the rmapbt based on bmbt map/unmap operations */ -int xfs_rmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, +int xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork, struct xfs_bmbt_irec *imap); +int xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork, struct xfs_bmbt_irec *imap); +int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *imap); -int xfs_rmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, - struct xfs_inode *ip, int whichfork, - struct xfs_bmbt_irec *imap); -int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, - struct xfs_inode *ip, int whichfork, - struct xfs_bmbt_irec *imap); -int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, - xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, - uint64_t owner); -int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, - xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, - uint64_t owner); +int xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno, + xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); +int xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno, + xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 221a88ea60bb..f79cf040d745 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -554,6 +554,7 @@ xfs_rmapbt_max_size( int xfs_rmapbt_calc_reserves( struct xfs_mount *mp, + struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used) @@ -567,14 +568,14 @@ xfs_rmapbt_calc_reserves( if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_rmap_blocks); - xfs_buf_relse(agbp); + xfs_trans_brelse(tp, agbp); /* Reserve 1% of the AG or enough for 1 block per record. */ *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 50198b6c3bb2..820d668b063d 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -51,7 +51,7 @@ extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, xfs_agblock_t agblocks); -extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, +extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); #endif /* __XFS_RMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 350119eeaecb..081f46e30556 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -96,80 +96,146 @@ xfs_perag_put( trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); } -/* - * Check the validity of the SB found. - */ +/* Check all the superblock fields we care about when reading one in. */ STATIC int -xfs_mount_validate_sb( - xfs_mount_t *mp, - xfs_sb_t *sbp, - bool check_inprogress, - bool check_version) +xfs_validate_sb_read( + struct xfs_mount *mp, + struct xfs_sb *sbp) { - uint32_t agcount = 0; - uint32_t rem; - - if (sbp->sb_magicnum != XFS_SB_MAGIC) { - xfs_warn(mp, "bad magic number"); - return -EWRONGFS; - } - - - if (!xfs_sb_good_version(sbp)) { - xfs_warn(mp, "bad version"); - return -EWRONGFS; - } + if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5) + return 0; /* - * Version 5 superblock feature mask validation. Reject combinations the - * kernel cannot support up front before checking anything else. For - * write validation, we don't need to check feature masks. + * Version 5 superblock feature mask validation. Reject combinations + * the kernel cannot support up front before checking anything else. */ - if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) { - if (xfs_sb_has_compat_feature(sbp, - XFS_SB_FEAT_COMPAT_UNKNOWN)) { - xfs_warn(mp, + if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { + xfs_warn(mp, "Superblock has unknown compatible features (0x%x) enabled.", - (sbp->sb_features_compat & - XFS_SB_FEAT_COMPAT_UNKNOWN)); - xfs_warn(mp, + (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); + xfs_warn(mp, "Using a more recent kernel is recommended."); - } + } - if (xfs_sb_has_ro_compat_feature(sbp, - XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { - xfs_alert(mp, + if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + xfs_alert(mp, "Superblock has unknown read-only compatible features (0x%x) enabled.", - (sbp->sb_features_ro_compat & - XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_warn(mp, + (sbp->sb_features_ro_compat & + XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + xfs_warn(mp, "Attempted to mount read-only compatible filesystem read-write."); - xfs_warn(mp, + xfs_warn(mp, "Filesystem can only be safely mounted read only."); - return -EINVAL; - } - } - if (xfs_sb_has_incompat_feature(sbp, - XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { - xfs_warn(mp, -"Superblock has unknown incompatible features (0x%x) enabled.", - (sbp->sb_features_incompat & - XFS_SB_FEAT_INCOMPAT_UNKNOWN)); - xfs_warn(mp, -"Filesystem can not be safely mounted by this kernel."); return -EINVAL; } - } else if (xfs_sb_version_hascrc(sbp)) { - /* - * We can't read verify the sb LSN because the read verifier is - * called before the log is allocated and processed. We know the - * log is set up before write verifier (!check_version) calls, - * so just check it here. - */ - if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) - return -EFSCORRUPTED; + } + if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { + xfs_warn(mp, +"Superblock has unknown incompatible features (0x%x) enabled.", + (sbp->sb_features_incompat & + XFS_SB_FEAT_INCOMPAT_UNKNOWN)); + xfs_warn(mp, +"Filesystem cannot be safely mounted by this kernel."); + return -EINVAL; + } + + return 0; +} + +/* Check all the superblock fields we care about when writing one out. */ +STATIC int +xfs_validate_sb_write( + struct xfs_mount *mp, + struct xfs_buf *bp, + struct xfs_sb *sbp) +{ + /* + * Carry out additional sb summary counter sanity checks when we write + * the superblock. We skip this in the read validator because there + * could be newer superblocks in the log and if the values are garbage + * even after replay we'll recalculate them at the end of log mount. + * + * mkfs has traditionally written zeroed counters to inprogress and + * secondary superblocks, so allow this usage to continue because + * we never read counters from such superblocks. + */ + if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && !sbp->sb_inprogress && + (sbp->sb_fdblocks > sbp->sb_dblocks || + !xfs_verify_icount(mp, sbp->sb_icount) || + sbp->sb_ifree > sbp->sb_icount)) { + xfs_warn(mp, "SB summary counter sanity check failed"); + return -EFSCORRUPTED; + } + + if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5) + return 0; + + /* + * Version 5 superblock feature mask validation. Reject combinations + * the kernel cannot support since we checked for unsupported bits in + * the read verifier, which means that memory is corrupt. + */ + if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { + xfs_warn(mp, +"Corruption detected in superblock compatible features (0x%x)!", + (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); + return -EFSCORRUPTED; + } + + if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + xfs_alert(mp, +"Corruption detected in superblock read-only compatible features (0x%x)!", + (sbp->sb_features_ro_compat & + XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); + return -EFSCORRUPTED; + } + if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { + xfs_warn(mp, +"Corruption detected in superblock incompatible features (0x%x)!", + (sbp->sb_features_incompat & + XFS_SB_FEAT_INCOMPAT_UNKNOWN)); + return -EFSCORRUPTED; + } + if (xfs_sb_has_incompat_log_feature(sbp, + XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) { + xfs_warn(mp, +"Corruption detected in superblock incompatible log features (0x%x)!", + (sbp->sb_features_log_incompat & + XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); + return -EFSCORRUPTED; + } + + /* + * We can't read verify the sb LSN because the read verifier is called + * before the log is allocated and processed. We know the log is set up + * before write verifier calls, so check it here. + */ + if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) + return -EFSCORRUPTED; + + return 0; +} + +/* Check the validity of the SB. */ +STATIC int +xfs_validate_sb_common( + struct xfs_mount *mp, + struct xfs_buf *bp, + struct xfs_sb *sbp) +{ + uint32_t agcount = 0; + uint32_t rem; + + if (sbp->sb_magicnum != XFS_SB_MAGIC) { + xfs_warn(mp, "bad magic number"); + return -EWRONGFS; + } + + if (!xfs_sb_good_version(sbp)) { + xfs_warn(mp, "bad version"); + return -EWRONGFS; } if (xfs_sb_version_has_pquotino(sbp)) { @@ -321,7 +387,12 @@ xfs_mount_validate_sb( return -EFBIG; } - if (check_inprogress && sbp->sb_inprogress) { + /* + * Don't touch the filesystem if a user tool thinks it owns the primary + * superblock. mkfs doesn't clear the flag from secondary supers, so + * we don't check them at all. + */ + if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && sbp->sb_inprogress) { xfs_warn(mp, "Offline file system operation in progress!"); return -EFSCORRUPTED; } @@ -596,29 +667,6 @@ xfs_sb_to_disk( } } -static int -xfs_sb_verify( - struct xfs_buf *bp, - bool check_version) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_sb sb; - - /* - * Use call variant which doesn't convert quota flags from disk - * format, because xfs_mount_validate_sb checks the on-disk flags. - */ - __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); - - /* - * Only check the in progress field for the primary superblock as - * mkfs.xfs doesn't clear it from secondary superblocks. - */ - return xfs_mount_validate_sb(mp, &sb, - bp->b_maps[0].bm_bn == XFS_SB_DADDR, - check_version); -} - /* * If the superblock has the CRC feature bit set or the CRC field is non-null, * check that the CRC is valid. We check the CRC field is non-null because a @@ -633,11 +681,12 @@ xfs_sb_verify( */ static void xfs_sb_read_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); - int error; + struct xfs_sb sb; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + int error; /* * open code the version check to avoid needing to convert the entire @@ -657,7 +706,16 @@ xfs_sb_read_verify( } } } - error = xfs_sb_verify(bp, true); + + /* + * Check all the superblock fields. Don't byteswap the xquota flags + * because _verify_common checks the on-disk values. + */ + __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); + error = xfs_validate_sb_common(mp, bp, &sb); + if (error) + goto out_error; + error = xfs_validate_sb_read(mp, &sb); out_error: if (error == -EFSCORRUPTED || error == -EFSBADCRC) @@ -691,15 +749,22 @@ static void xfs_sb_write_verify( struct xfs_buf *bp) { + struct xfs_sb sb; struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_log_item; int error; - error = xfs_sb_verify(bp, false); - if (error) { - xfs_verifier_error(bp, error, __this_address); - return; - } + /* + * Check all the superblock fields. Don't byteswap the xquota flags + * because _verify_common checks the on-disk values. + */ + __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); + error = xfs_validate_sb_common(mp, bp, &sb); + if (error) + goto out_error; + error = xfs_validate_sb_write(mp, bp, &sb); + if (error) + goto out_error; if (!xfs_sb_version_hascrc(&mp->m_sb)) return; @@ -708,6 +773,10 @@ xfs_sb_write_verify( XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); + return; + +out_error: + xfs_verifier_error(bp, error, __this_address); } const struct xfs_buf_ops xfs_sb_buf_ops = { @@ -804,6 +873,7 @@ xfs_initialize_perag_data( uint64_t bfree = 0; uint64_t bfreelst = 0; uint64_t btree = 0; + uint64_t fdblocks; int error; for (index = 0; index < agcount; index++) { @@ -827,17 +897,31 @@ xfs_initialize_perag_data( btree += pag->pagf_btreeblks; xfs_perag_put(pag); } + fdblocks = bfree + bfreelst + btree; + + /* + * If the new summary counts are obviously incorrect, fail the + * mount operation because that implies the AGFs are also corrupt. + * Clear BAD_SUMMARY so that we don't unmount with a dirty log, which + * will prevent xfs_repair from fixing anything. + */ + if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { + xfs_alert(mp, "AGF corruption. Please run xfs_repair."); + error = -EFSCORRUPTED; + goto out; + } /* Overwrite incore superblock counters with just-read data */ spin_lock(&mp->m_sb_lock); sbp->sb_ifree = ifree; sbp->sb_icount = ialloc; - sbp->sb_fdblocks = bfree + bfreelst + btree; + sbp->sb_fdblocks = fdblocks; spin_unlock(&mp->m_sb_lock); xfs_reinit_percpu_counters(mp); - - return 0; +out: + mp->m_flags &= ~XFS_MOUNT_BAD_SUMMARY; + return error; } /* diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 22089f1c880a..1c5debe748f0 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -64,6 +64,18 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ #define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */ +/* + * LOWMODE is used by the allocator to activate the lowspace algorithm - when + * free space is running low the extent allocator may choose to allocate an + * extent from an AG without leaving sufficient space for a btree split when + * inserting the new extent. In this case the allocator will enable the + * lowspace algorithm which is supposed to allow further allocations (such as + * btree splits and newroots) to allocate from sequential AGs. In order to + * avoid locking AGs out of order the lowspace algorithm will start searching + * for free space from AG 0. If the correct transaction reservations have been + * made then this algorithm will eventually find all the space it needs. + */ +#define XFS_TRANS_LOWMODE 0x100 /* allocate in low space mode */ /* * Field values for xfs_trans_mod_sb. diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 2e2a243cef2e..33a5ca346baf 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -171,3 +171,37 @@ xfs_verify_rtbno( { return rtbno < mp->m_sb.sb_rblocks; } + +/* Calculate the range of valid icount values. */ +static void +xfs_icount_range( + struct xfs_mount *mp, + unsigned long long *min, + unsigned long long *max) +{ + unsigned long long nr_inos = 0; + xfs_agnumber_t agno; + + /* root, rtbitmap, rtsum all live in the first chunk */ + *min = XFS_INODES_PER_CHUNK; + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + xfs_agino_t first, last; + + xfs_agino_range(mp, agno, &first, &last); + nr_inos += last - first + 1; + } + *max = nr_inos; +} + +/* Sanity-checking of inode counts. */ +bool +xfs_verify_icount( + struct xfs_mount *mp, + unsigned long long icount) +{ + unsigned long long min, max; + + xfs_icount_range(mp, &min, &max); + return icount >= min && icount <= max; +} diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 4055d62f690c..b9e6c89284c3 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -165,5 +165,6 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); +bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount); #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 9bb0745f1ad2..3068a9382feb 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -28,30 +28,30 @@ /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_superblock_xref( - struct xfs_scrub_context *sc, - struct xfs_buf *bp) +xchk_superblock_xref( + struct xfs_scrub *sc, + struct xfs_buf *bp) { - struct xfs_owner_info oinfo; - struct xfs_mount *mp = sc->mp; - xfs_agnumber_t agno = sc->sm->sm_agno; - xfs_agblock_t agbno; - int error; + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sc->sm->sm_agno; + xfs_agblock_t agbno; + int error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; agbno = XFS_SB_BLOCK(mp); - error = xfs_scrub_ag_init(sc, agno, &sc->sa); - if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) + error = xchk_ag_init(sc, agno, &sc->sa); + if (!xchk_xref_process_error(sc, agno, agbno, &error)) return; - xfs_scrub_xref_is_used_space(sc, agbno, 1); - xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xchk_xref_is_used_space(sc, agbno, 1); + xchk_xref_is_not_inode_chunk(sc, agbno, 1); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); - xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); - xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xchk_xref_is_owned_by(sc, agbno, 1, &oinfo); + xchk_xref_is_not_shared(sc, agbno, 1); /* scrub teardown will take care of sc->sa for us */ } @@ -65,17 +65,17 @@ xfs_scrub_superblock_xref( * sb 0 is ok and we can use its information to check everything else. */ int -xfs_scrub_superblock( - struct xfs_scrub_context *sc) +xchk_superblock( + struct xfs_scrub *sc) { - struct xfs_mount *mp = sc->mp; - struct xfs_buf *bp; - struct xfs_dsb *sb; - xfs_agnumber_t agno; - uint32_t v2_ok; - __be32 features_mask; - int error; - __be16 vernum_mask; + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + struct xfs_dsb *sb; + xfs_agnumber_t agno; + uint32_t v2_ok; + __be32 features_mask; + int error; + __be16 vernum_mask; agno = sc->sm->sm_agno; if (agno == 0) @@ -98,7 +98,7 @@ xfs_scrub_superblock( default: break; } - if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) + if (!xchk_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) return error; sb = XFS_BUF_TO_SBP(bp); @@ -110,46 +110,46 @@ xfs_scrub_superblock( * checked. */ if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_rblocks != cpu_to_be64(mp->m_sb.sb_rblocks)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_rextents != cpu_to_be64(mp->m_sb.sb_rextents)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (!uuid_equal(&sb->sb_uuid, &mp->m_sb.sb_uuid)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_logstart != cpu_to_be64(mp->m_sb.sb_logstart)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_rextsize != cpu_to_be32(mp->m_sb.sb_rextsize)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_agblocks != cpu_to_be32(mp->m_sb.sb_agblocks)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_agcount != cpu_to_be32(mp->m_sb.sb_agcount)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_rbmblocks != cpu_to_be32(mp->m_sb.sb_rbmblocks)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_logblocks != cpu_to_be32(mp->m_sb.sb_logblocks)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); /* Check sb_versionnum bits that are set at mkfs time. */ vernum_mask = cpu_to_be16(~XFS_SB_VERSION_OKBITS | @@ -163,7 +163,7 @@ xfs_scrub_superblock( XFS_SB_VERSION_DIRV2BIT); if ((sb->sb_versionnum & vernum_mask) != (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); /* Check sb_versionnum bits that can be set after mkfs time. */ vernum_mask = cpu_to_be16(XFS_SB_VERSION_ATTRBIT | @@ -171,40 +171,40 @@ xfs_scrub_superblock( XFS_SB_VERSION_QUOTABIT); if ((sb->sb_versionnum & vernum_mask) != (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_sectsize != cpu_to_be16(mp->m_sb.sb_sectsize)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_inodesize != cpu_to_be16(mp->m_sb.sb_inodesize)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_inopblock != cpu_to_be16(mp->m_sb.sb_inopblock)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (memcmp(sb->sb_fname, mp->m_sb.sb_fname, sizeof(sb->sb_fname))) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_blocklog != mp->m_sb.sb_blocklog) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_sectlog != mp->m_sb.sb_sectlog) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_inodelog != mp->m_sb.sb_inodelog) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_inopblog != mp->m_sb.sb_inopblog) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_agblklog != mp->m_sb.sb_agblklog) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_rextslog != mp->m_sb.sb_rextslog) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_imax_pct != mp->m_sb.sb_imax_pct) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); /* * Skip the summary counters since we track them in memory anyway. @@ -212,10 +212,10 @@ xfs_scrub_superblock( */ if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); /* * Skip the quota flags since repair will force quotacheck. @@ -223,46 +223,46 @@ xfs_scrub_superblock( */ if (sb->sb_flags != mp->m_sb.sb_flags) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_shared_vn != mp->m_sb.sb_shared_vn) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_inoalignmt != cpu_to_be32(mp->m_sb.sb_inoalignmt)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_unit != cpu_to_be32(mp->m_sb.sb_unit)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_width != cpu_to_be32(mp->m_sb.sb_width)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); if (sb->sb_dirblklog != mp->m_sb.sb_dirblklog) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_logsectlog != mp->m_sb.sb_logsectlog) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_logsectsize != cpu_to_be16(mp->m_sb.sb_logsectsize)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_logsunit != cpu_to_be32(mp->m_sb.sb_logsunit)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); /* Do we see any invalid bits in sb_features2? */ if (!xfs_sb_version_hasmorebits(&mp->m_sb)) { if (sb->sb_features2 != 0) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); } else { v2_ok = XFS_SB_VERSION2_OKBITS; if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5) v2_ok |= XFS_SB_VERSION2_CRCBIT; if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok))) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_features2 != sb->sb_bad_features2) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); } /* Check sb_features2 flags that are set at mkfs time. */ @@ -272,26 +272,26 @@ xfs_scrub_superblock( XFS_SB_VERSION2_FTYPE); if ((sb->sb_features2 & features_mask) != (cpu_to_be32(mp->m_sb.sb_features2) & features_mask)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); /* Check sb_features2 flags that can be set after mkfs time. */ features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT); if ((sb->sb_features2 & features_mask) != (cpu_to_be32(mp->m_sb.sb_features2) & features_mask)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (!xfs_sb_version_hascrc(&mp->m_sb)) { /* all v5 fields must be zero */ if (memchr_inv(&sb->sb_features_compat, 0, sizeof(struct xfs_dsb) - offsetof(struct xfs_dsb, sb_features_compat))) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); } else { /* Check compat flags; all are set at mkfs time. */ features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN); if ((sb->sb_features_compat & features_mask) != (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); /* Check ro compat flags; all are set at mkfs time. */ features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN | @@ -301,7 +301,7 @@ xfs_scrub_superblock( if ((sb->sb_features_ro_compat & features_mask) != (cpu_to_be32(mp->m_sb.sb_features_ro_compat) & features_mask)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); /* Check incompat flags; all are set at mkfs time. */ features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN | @@ -311,22 +311,22 @@ xfs_scrub_superblock( if ((sb->sb_features_incompat & features_mask) != (cpu_to_be32(mp->m_sb.sb_features_incompat) & features_mask)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); /* Check log incompat flags; all are set at mkfs time. */ features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN); if ((sb->sb_features_log_incompat & features_mask) != (cpu_to_be32(mp->m_sb.sb_features_log_incompat) & features_mask)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); /* Don't care about sb_crc */ if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino)) - xfs_scrub_block_set_preen(sc, bp); + xchk_block_set_preen(sc, bp); /* Don't care about sb_lsn */ } @@ -334,15 +334,15 @@ xfs_scrub_superblock( if (xfs_sb_version_hasmetauuid(&mp->m_sb)) { /* The metadata UUID must be the same for all supers */ if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid)) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); } /* Everything else must be zero. */ if (memchr_inv(sb + 1, 0, BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); - xfs_scrub_superblock_xref(sc, bp); + xchk_superblock_xref(sc, bp); return error; } @@ -351,7 +351,7 @@ xfs_scrub_superblock( /* Tally freespace record lengths. */ STATIC int -xfs_scrub_agf_record_bno_lengths( +xchk_agf_record_bno_lengths( struct xfs_btree_cur *cur, struct xfs_alloc_rec_incore *rec, void *priv) @@ -364,75 +364,75 @@ xfs_scrub_agf_record_bno_lengths( /* Check agf_freeblks */ static inline void -xfs_scrub_agf_xref_freeblks( - struct xfs_scrub_context *sc) +xchk_agf_xref_freeblks( + struct xfs_scrub *sc) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); - xfs_extlen_t blocks = 0; - int error; + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_extlen_t blocks = 0; + int error; if (!sc->sa.bno_cur) return; error = xfs_alloc_query_all(sc->sa.bno_cur, - xfs_scrub_agf_record_bno_lengths, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + xchk_agf_record_bno_lengths, &blocks); + if (!xchk_should_check_xref(sc, &error, &sc->sa.bno_cur)) return; if (blocks != be32_to_cpu(agf->agf_freeblks)) - xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp); } /* Cross reference the AGF with the cntbt (freespace by length btree) */ static inline void -xfs_scrub_agf_xref_cntbt( - struct xfs_scrub_context *sc) +xchk_agf_xref_cntbt( + struct xfs_scrub *sc) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); - xfs_agblock_t agbno; - xfs_extlen_t blocks; - int have; - int error; + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_agblock_t agbno; + xfs_extlen_t blocks; + int have; + int error; if (!sc->sa.cnt_cur) return; /* Any freespace at all? */ error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.cnt_cur)) return; if (!have) { if (agf->agf_freeblks != be32_to_cpu(0)) - xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp); return; } /* Check agf_longest */ error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.cnt_cur)) return; if (!have || blocks != be32_to_cpu(agf->agf_longest)) - xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp); } /* Check the btree block counts in the AGF against the btrees. */ STATIC void -xfs_scrub_agf_xref_btreeblks( - struct xfs_scrub_context *sc) +xchk_agf_xref_btreeblks( + struct xfs_scrub *sc) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); - struct xfs_mount *mp = sc->mp; - xfs_agblock_t blocks; - xfs_agblock_t btreeblks; - int error; + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + struct xfs_mount *mp = sc->mp; + xfs_agblock_t blocks; + xfs_agblock_t btreeblks; + int error; /* Check agf_rmap_blocks; set up for agf_btreeblks check */ if (sc->sa.rmap_cur) { error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) return; btreeblks = blocks - 1; if (blocks != be32_to_cpu(agf->agf_rmap_blocks)) - xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp); } else { btreeblks = 0; } @@ -447,136 +447,136 @@ xfs_scrub_agf_xref_btreeblks( /* Check agf_btreeblks */ error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.bno_cur)) return; btreeblks += blocks - 1; error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.cnt_cur)) return; btreeblks += blocks - 1; if (btreeblks != be32_to_cpu(agf->agf_btreeblks)) - xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp); } /* Check agf_refcount_blocks against tree size */ static inline void -xfs_scrub_agf_xref_refcblks( - struct xfs_scrub_context *sc) +xchk_agf_xref_refcblks( + struct xfs_scrub *sc) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); - xfs_agblock_t blocks; - int error; + struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); + xfs_agblock_t blocks; + int error; if (!sc->sa.refc_cur) return; error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (blocks != be32_to_cpu(agf->agf_refcount_blocks)) - xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp); } /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_agf_xref( - struct xfs_scrub_context *sc) +xchk_agf_xref( + struct xfs_scrub *sc) { - struct xfs_owner_info oinfo; - struct xfs_mount *mp = sc->mp; - xfs_agblock_t agbno; - int error; + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; agbno = XFS_AGF_BLOCK(mp); - error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + error = xchk_ag_btcur_init(sc, &sc->sa); if (error) return; - xfs_scrub_xref_is_used_space(sc, agbno, 1); - xfs_scrub_agf_xref_freeblks(sc); - xfs_scrub_agf_xref_cntbt(sc); - xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xchk_xref_is_used_space(sc, agbno, 1); + xchk_agf_xref_freeblks(sc); + xchk_agf_xref_cntbt(sc); + xchk_xref_is_not_inode_chunk(sc, agbno, 1); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); - xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); - xfs_scrub_agf_xref_btreeblks(sc); - xfs_scrub_xref_is_not_shared(sc, agbno, 1); - xfs_scrub_agf_xref_refcblks(sc); + xchk_xref_is_owned_by(sc, agbno, 1, &oinfo); + xchk_agf_xref_btreeblks(sc); + xchk_xref_is_not_shared(sc, agbno, 1); + xchk_agf_xref_refcblks(sc); /* scrub teardown will take care of sc->sa for us */ } /* Scrub the AGF. */ int -xfs_scrub_agf( - struct xfs_scrub_context *sc) +xchk_agf( + struct xfs_scrub *sc) { - struct xfs_mount *mp = sc->mp; - struct xfs_agf *agf; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_agblock_t eoag; - xfs_agblock_t agfl_first; - xfs_agblock_t agfl_last; - xfs_agblock_t agfl_count; - xfs_agblock_t fl_count; - int level; - int error = 0; + struct xfs_mount *mp = sc->mp; + struct xfs_agf *agf; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_agblock_t eoag; + xfs_agblock_t agfl_first; + xfs_agblock_t agfl_last; + xfs_agblock_t agfl_count; + xfs_agblock_t fl_count; + int level; + int error = 0; agno = sc->sa.agno = sc->sm->sm_agno; - error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, + error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp, &sc->sa.agf_bp, &sc->sa.agfl_bp); - if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) + if (!xchk_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) goto out; - xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp); + xchk_buffer_recheck(sc, sc->sa.agf_bp); agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); /* Check the AG length */ eoag = be32_to_cpu(agf->agf_length); if (eoag != xfs_ag_block_count(mp, agno)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); /* Check the AGF btree roots and levels */ agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]); if (!xfs_verify_agbno(mp, agno, agbno)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]); if (!xfs_verify_agbno(mp, agno, agbno)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); if (level <= 0 || level > XFS_BTREE_MAXLEVELS) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); if (level <= 0 || level > XFS_BTREE_MAXLEVELS) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]); if (!xfs_verify_agbno(mp, agno, agbno)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); if (level <= 0 || level > XFS_BTREE_MAXLEVELS) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); } if (xfs_sb_version_hasreflink(&mp->m_sb)) { agbno = be32_to_cpu(agf->agf_refcount_root); if (!xfs_verify_agbno(mp, agno, agbno)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_refcount_level); if (level <= 0 || level > XFS_BTREE_MAXLEVELS) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); } /* Check the AGFL counters */ @@ -588,57 +588,57 @@ xfs_scrub_agf( else fl_count = xfs_agfl_size(mp) - agfl_first + agfl_last + 1; if (agfl_count != 0 && fl_count != agfl_count) - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); - xfs_scrub_agf_xref(sc); + xchk_agf_xref(sc); out: return error; } /* AGFL */ -struct xfs_scrub_agfl_info { - struct xfs_owner_info oinfo; - unsigned int sz_entries; - unsigned int nr_entries; - xfs_agblock_t *entries; - struct xfs_scrub_context *sc; +struct xchk_agfl_info { + struct xfs_owner_info oinfo; + unsigned int sz_entries; + unsigned int nr_entries; + xfs_agblock_t *entries; + struct xfs_scrub *sc; }; /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_agfl_block_xref( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - struct xfs_owner_info *oinfo) +xchk_agfl_block_xref( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + struct xfs_owner_info *oinfo) { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xfs_scrub_xref_is_used_space(sc, agbno, 1); - xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); - xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo); - xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xchk_xref_is_used_space(sc, agbno, 1); + xchk_xref_is_not_inode_chunk(sc, agbno, 1); + xchk_xref_is_owned_by(sc, agbno, 1, oinfo); + xchk_xref_is_not_shared(sc, agbno, 1); } /* Scrub an AGFL block. */ STATIC int -xfs_scrub_agfl_block( - struct xfs_mount *mp, - xfs_agblock_t agbno, - void *priv) +xchk_agfl_block( + struct xfs_mount *mp, + xfs_agblock_t agbno, + void *priv) { - struct xfs_scrub_agfl_info *sai = priv; - struct xfs_scrub_context *sc = sai->sc; - xfs_agnumber_t agno = sc->sa.agno; + struct xchk_agfl_info *sai = priv; + struct xfs_scrub *sc = sai->sc; + xfs_agnumber_t agno = sc->sa.agno; if (xfs_verify_agbno(mp, agno, agbno) && sai->nr_entries < sai->sz_entries) sai->entries[sai->nr_entries++] = agbno; else - xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); + xchk_block_set_corrupt(sc, sc->sa.agfl_bp); - xfs_scrub_agfl_block_xref(sc, agbno, priv); + xchk_agfl_block_xref(sc, agbno, priv); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return XFS_BTREE_QUERY_RANGE_ABORT; @@ -647,7 +647,7 @@ xfs_scrub_agfl_block( } static int -xfs_scrub_agblock_cmp( +xchk_agblock_cmp( const void *pa, const void *pb) { @@ -659,28 +659,28 @@ xfs_scrub_agblock_cmp( /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_agfl_xref( - struct xfs_scrub_context *sc) +xchk_agfl_xref( + struct xfs_scrub *sc) { - struct xfs_owner_info oinfo; - struct xfs_mount *mp = sc->mp; - xfs_agblock_t agbno; - int error; + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; agbno = XFS_AGFL_BLOCK(mp); - error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + error = xchk_ag_btcur_init(sc, &sc->sa); if (error) return; - xfs_scrub_xref_is_used_space(sc, agbno, 1); - xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); + xchk_xref_is_used_space(sc, agbno, 1); + xchk_xref_is_not_inode_chunk(sc, agbno, 1); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); - xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); - xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xchk_xref_is_owned_by(sc, agbno, 1, &oinfo); + xchk_xref_is_not_shared(sc, agbno, 1); /* * Scrub teardown will take care of sc->sa for us. Leave sc->sa @@ -690,26 +690,26 @@ xfs_scrub_agfl_xref( /* Scrub the AGFL. */ int -xfs_scrub_agfl( - struct xfs_scrub_context *sc) +xchk_agfl( + struct xfs_scrub *sc) { - struct xfs_scrub_agfl_info sai; - struct xfs_agf *agf; - xfs_agnumber_t agno; - unsigned int agflcount; - unsigned int i; - int error; + struct xchk_agfl_info sai; + struct xfs_agf *agf; + xfs_agnumber_t agno; + unsigned int agflcount; + unsigned int i; + int error; agno = sc->sa.agno = sc->sm->sm_agno; - error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, + error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp, &sc->sa.agf_bp, &sc->sa.agfl_bp); - if (!xfs_scrub_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) + if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error)) goto out; if (!sc->sa.agf_bp) return -EFSCORRUPTED; - xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp); + xchk_buffer_recheck(sc, sc->sa.agfl_bp); - xfs_scrub_agfl_xref(sc); + xchk_agfl_xref(sc); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; @@ -718,7 +718,7 @@ xfs_scrub_agfl( agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); agflcount = be32_to_cpu(agf->agf_flcount); if (agflcount > xfs_agfl_size(sc->mp)) { - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); goto out; } memset(&sai, 0, sizeof(sai)); @@ -734,7 +734,7 @@ xfs_scrub_agfl( /* Check the blocks in the AGFL. */ xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG); error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), - sc->sa.agfl_bp, xfs_scrub_agfl_block, &sai); + sc->sa.agfl_bp, xchk_agfl_block, &sai); if (error == XFS_BTREE_QUERY_RANGE_ABORT) { error = 0; goto out_free; @@ -743,16 +743,16 @@ xfs_scrub_agfl( goto out_free; if (agflcount != sai.nr_entries) { - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); goto out_free; } /* Sort entries, check for duplicates. */ sort(sai.entries, sai.nr_entries, sizeof(sai.entries[0]), - xfs_scrub_agblock_cmp, NULL); + xchk_agblock_cmp, NULL); for (i = 1; i < sai.nr_entries; i++) { if (sai.entries[i] == sai.entries[i - 1]) { - xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); + xchk_block_set_corrupt(sc, sc->sa.agf_bp); break; } } @@ -767,103 +767,103 @@ out: /* Check agi_count/agi_freecount */ static inline void -xfs_scrub_agi_xref_icounts( - struct xfs_scrub_context *sc) +xchk_agi_xref_icounts( + struct xfs_scrub *sc) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); - xfs_agino_t icount; - xfs_agino_t freecount; - int error; + struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); + xfs_agino_t icount; + xfs_agino_t freecount; + int error; if (!sc->sa.ino_cur) return; error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.ino_cur)) return; if (be32_to_cpu(agi->agi_count) != icount || be32_to_cpu(agi->agi_freecount) != freecount) - xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_xref_set_corrupt(sc, sc->sa.agi_bp); } /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_agi_xref( - struct xfs_scrub_context *sc) +xchk_agi_xref( + struct xfs_scrub *sc) { - struct xfs_owner_info oinfo; - struct xfs_mount *mp = sc->mp; - xfs_agblock_t agbno; - int error; + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + xfs_agblock_t agbno; + int error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; agbno = XFS_AGI_BLOCK(mp); - error = xfs_scrub_ag_btcur_init(sc, &sc->sa); + error = xchk_ag_btcur_init(sc, &sc->sa); if (error) return; - xfs_scrub_xref_is_used_space(sc, agbno, 1); - xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1); - xfs_scrub_agi_xref_icounts(sc); + xchk_xref_is_used_space(sc, agbno, 1); + xchk_xref_is_not_inode_chunk(sc, agbno, 1); + xchk_agi_xref_icounts(sc); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS); - xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); - xfs_scrub_xref_is_not_shared(sc, agbno, 1); + xchk_xref_is_owned_by(sc, agbno, 1, &oinfo); + xchk_xref_is_not_shared(sc, agbno, 1); /* scrub teardown will take care of sc->sa for us */ } /* Scrub the AGI. */ int -xfs_scrub_agi( - struct xfs_scrub_context *sc) +xchk_agi( + struct xfs_scrub *sc) { - struct xfs_mount *mp = sc->mp; - struct xfs_agi *agi; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_agblock_t eoag; - xfs_agino_t agino; - xfs_agino_t first_agino; - xfs_agino_t last_agino; - xfs_agino_t icount; - int i; - int level; - int error = 0; + struct xfs_mount *mp = sc->mp; + struct xfs_agi *agi; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_agblock_t eoag; + xfs_agino_t agino; + xfs_agino_t first_agino; + xfs_agino_t last_agino; + xfs_agino_t icount; + int i; + int level; + int error = 0; agno = sc->sa.agno = sc->sm->sm_agno; - error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp, + error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp, &sc->sa.agf_bp, &sc->sa.agfl_bp); - if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) + if (!xchk_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) goto out; - xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp); + xchk_buffer_recheck(sc, sc->sa.agi_bp); agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); /* Check the AG length */ eoag = be32_to_cpu(agi->agi_length); if (eoag != xfs_ag_block_count(mp, agno)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); /* Check btree roots and levels */ agbno = be32_to_cpu(agi->agi_root); if (!xfs_verify_agbno(mp, agno, agbno)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_level); if (level <= 0 || level > XFS_BTREE_MAXLEVELS) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); if (xfs_sb_version_hasfinobt(&mp->m_sb)) { agbno = be32_to_cpu(agi->agi_free_root); if (!xfs_verify_agbno(mp, agno, agbno)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_free_level); if (level <= 0 || level > XFS_BTREE_MAXLEVELS) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); } /* Check inode counters */ @@ -871,16 +871,16 @@ xfs_scrub_agi( icount = be32_to_cpu(agi->agi_count); if (icount > last_agino - first_agino + 1 || icount < be32_to_cpu(agi->agi_freecount)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); /* Check inode pointers */ agino = be32_to_cpu(agi->agi_newino); if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); agino = be32_to_cpu(agi->agi_dirino); if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); /* Check unlinked inode buckets */ for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { @@ -888,13 +888,13 @@ xfs_scrub_agi( if (agino == NULLAGINO) continue; if (!xfs_verify_agino(mp, agno, agino)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); } if (agi->agi_pad32 != cpu_to_be32(0)) - xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); + xchk_block_set_corrupt(sc, sc->sa.agi_bp); - xfs_scrub_agi_xref(sc); + xchk_agi_xref(sc); out: return error; } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 117eedac53df..f7568a4b5fe5 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -17,24 +17,31 @@ #include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_alloc.h" +#include "xfs_alloc_btree.h" #include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" #include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_refcount.h" +#include "xfs_refcount_btree.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/bitmap.h" /* Superblock */ /* Repair the superblock. */ int -xfs_repair_superblock( - struct xfs_scrub_context *sc) +xrep_superblock( + struct xfs_scrub *sc) { - struct xfs_mount *mp = sc->mp; - struct xfs_buf *bp; - xfs_agnumber_t agno; - int error; + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + xfs_agnumber_t agno; + int error; /* Don't try to repair AG 0's sb; let xfs_repair deal with it. */ agno = sc->sm->sm_agno; @@ -54,3 +61,873 @@ xfs_repair_superblock( xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1); return error; } + +/* AGF */ + +struct xrep_agf_allocbt { + struct xfs_scrub *sc; + xfs_agblock_t freeblks; + xfs_agblock_t longest; +}; + +/* Record free space shape information. */ +STATIC int +xrep_agf_walk_allocbt( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *rec, + void *priv) +{ + struct xrep_agf_allocbt *raa = priv; + int error = 0; + + if (xchk_should_terminate(raa->sc, &error)) + return error; + + raa->freeblks += rec->ar_blockcount; + if (rec->ar_blockcount > raa->longest) + raa->longest = rec->ar_blockcount; + return error; +} + +/* Does this AGFL block look sane? */ +STATIC int +xrep_agf_check_agfl_block( + struct xfs_mount *mp, + xfs_agblock_t agbno, + void *priv) +{ + struct xfs_scrub *sc = priv; + + if (!xfs_verify_agbno(mp, sc->sa.agno, agbno)) + return -EFSCORRUPTED; + return 0; +} + +/* + * Offset within the xrep_find_ag_btree array for each btree type. Avoid the + * XFS_BTNUM_ names here to avoid creating a sparse array. + */ +enum { + XREP_AGF_BNOBT = 0, + XREP_AGF_CNTBT, + XREP_AGF_RMAPBT, + XREP_AGF_REFCOUNTBT, + XREP_AGF_END, + XREP_AGF_MAX +}; + +/* Check a btree root candidate. */ +static inline bool +xrep_check_btree_root( + struct xfs_scrub *sc, + struct xrep_find_ag_btree *fab) +{ + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sc->sm->sm_agno; + + return xfs_verify_agbno(mp, agno, fab->root) && + fab->height <= XFS_BTREE_MAXLEVELS; +} + +/* + * Given the btree roots described by *fab, find the roots, check them for + * sanity, and pass the root data back out via *fab. + * + * This is /also/ a chicken and egg problem because we have to use the rmapbt + * (rooted in the AGF) to find the btrees rooted in the AGF. We also have no + * idea if the btrees make any sense. If we hit obvious corruptions in those + * btrees we'll bail out. + */ +STATIC int +xrep_agf_find_btrees( + struct xfs_scrub *sc, + struct xfs_buf *agf_bp, + struct xrep_find_ag_btree *fab, + struct xfs_buf *agfl_bp) +{ + struct xfs_agf *old_agf = XFS_BUF_TO_AGF(agf_bp); + int error; + + /* Go find the root data. */ + error = xrep_find_ag_btree_roots(sc, agf_bp, fab, agfl_bp); + if (error) + return error; + + /* We must find the bnobt, cntbt, and rmapbt roots. */ + if (!xrep_check_btree_root(sc, &fab[XREP_AGF_BNOBT]) || + !xrep_check_btree_root(sc, &fab[XREP_AGF_CNTBT]) || + !xrep_check_btree_root(sc, &fab[XREP_AGF_RMAPBT])) + return -EFSCORRUPTED; + + /* + * We relied on the rmapbt to reconstruct the AGF. If we get a + * different root then something's seriously wrong. + */ + if (fab[XREP_AGF_RMAPBT].root != + be32_to_cpu(old_agf->agf_roots[XFS_BTNUM_RMAPi])) + return -EFSCORRUPTED; + + /* We must find the refcountbt root if that feature is enabled. */ + if (xfs_sb_version_hasreflink(&sc->mp->m_sb) && + !xrep_check_btree_root(sc, &fab[XREP_AGF_REFCOUNTBT])) + return -EFSCORRUPTED; + + return 0; +} + +/* + * Reinitialize the AGF header, making an in-core copy of the old contents so + * that we know which in-core state needs to be reinitialized. + */ +STATIC void +xrep_agf_init_header( + struct xfs_scrub *sc, + struct xfs_buf *agf_bp, + struct xfs_agf *old_agf) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agf_bp); + + memcpy(old_agf, agf, sizeof(*old_agf)); + memset(agf, 0, BBTOB(agf_bp->b_length)); + agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); + agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); + agf->agf_seqno = cpu_to_be32(sc->sa.agno); + agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno)); + agf->agf_flfirst = old_agf->agf_flfirst; + agf->agf_fllast = old_agf->agf_fllast; + agf->agf_flcount = old_agf->agf_flcount; + if (xfs_sb_version_hascrc(&mp->m_sb)) + uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); + + /* Mark the incore AGF data stale until we're done fixing things. */ + ASSERT(sc->sa.pag->pagf_init); + sc->sa.pag->pagf_init = 0; +} + +/* Set btree root information in an AGF. */ +STATIC void +xrep_agf_set_roots( + struct xfs_scrub *sc, + struct xfs_agf *agf, + struct xrep_find_ag_btree *fab) +{ + agf->agf_roots[XFS_BTNUM_BNOi] = + cpu_to_be32(fab[XREP_AGF_BNOBT].root); + agf->agf_levels[XFS_BTNUM_BNOi] = + cpu_to_be32(fab[XREP_AGF_BNOBT].height); + + agf->agf_roots[XFS_BTNUM_CNTi] = + cpu_to_be32(fab[XREP_AGF_CNTBT].root); + agf->agf_levels[XFS_BTNUM_CNTi] = + cpu_to_be32(fab[XREP_AGF_CNTBT].height); + + agf->agf_roots[XFS_BTNUM_RMAPi] = + cpu_to_be32(fab[XREP_AGF_RMAPBT].root); + agf->agf_levels[XFS_BTNUM_RMAPi] = + cpu_to_be32(fab[XREP_AGF_RMAPBT].height); + + if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) { + agf->agf_refcount_root = + cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].root); + agf->agf_refcount_level = + cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].height); + } +} + +/* Update all AGF fields which derive from btree contents. */ +STATIC int +xrep_agf_calc_from_btrees( + struct xfs_scrub *sc, + struct xfs_buf *agf_bp) +{ + struct xrep_agf_allocbt raa = { .sc = sc }; + struct xfs_btree_cur *cur = NULL; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agf_bp); + struct xfs_mount *mp = sc->mp; + xfs_agblock_t btreeblks; + xfs_agblock_t blocks; + int error; + + /* Update the AGF counters from the bnobt. */ + cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno, + XFS_BTNUM_BNO); + error = xfs_alloc_query_all(cur, xrep_agf_walk_allocbt, &raa); + if (error) + goto err; + error = xfs_btree_count_blocks(cur, &blocks); + if (error) + goto err; + xfs_btree_del_cursor(cur, error); + btreeblks = blocks - 1; + agf->agf_freeblks = cpu_to_be32(raa.freeblks); + agf->agf_longest = cpu_to_be32(raa.longest); + + /* Update the AGF counters from the cntbt. */ + cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno, + XFS_BTNUM_CNT); + error = xfs_btree_count_blocks(cur, &blocks); + if (error) + goto err; + xfs_btree_del_cursor(cur, error); + btreeblks += blocks - 1; + + /* Update the AGF counters from the rmapbt. */ + cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno); + error = xfs_btree_count_blocks(cur, &blocks); + if (error) + goto err; + xfs_btree_del_cursor(cur, error); + agf->agf_rmap_blocks = cpu_to_be32(blocks); + btreeblks += blocks - 1; + + agf->agf_btreeblks = cpu_to_be32(btreeblks); + + /* Update the AGF counters from the refcountbt. */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp, + sc->sa.agno); + error = xfs_btree_count_blocks(cur, &blocks); + if (error) + goto err; + xfs_btree_del_cursor(cur, error); + agf->agf_refcount_blocks = cpu_to_be32(blocks); + } + + return 0; +err: + xfs_btree_del_cursor(cur, error); + return error; +} + +/* Commit the new AGF and reinitialize the incore state. */ +STATIC int +xrep_agf_commit_new( + struct xfs_scrub *sc, + struct xfs_buf *agf_bp) +{ + struct xfs_perag *pag; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agf_bp); + + /* Trigger fdblocks recalculation */ + xfs_force_summary_recalc(sc->mp); + + /* Write this to disk. */ + xfs_trans_buf_set_type(sc->tp, agf_bp, XFS_BLFT_AGF_BUF); + xfs_trans_log_buf(sc->tp, agf_bp, 0, BBTOB(agf_bp->b_length) - 1); + + /* Now reinitialize the in-core counters we changed. */ + pag = sc->sa.pag; + pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); + pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); + pag->pagf_longest = be32_to_cpu(agf->agf_longest); + pag->pagf_levels[XFS_BTNUM_BNOi] = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); + pag->pagf_levels[XFS_BTNUM_CNTi] = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); + pag->pagf_levels[XFS_BTNUM_RMAPi] = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); + pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); + pag->pagf_init = 1; + + return 0; +} + +/* Repair the AGF. v5 filesystems only. */ +int +xrep_agf( + struct xfs_scrub *sc) +{ + struct xrep_find_ag_btree fab[XREP_AGF_MAX] = { + [XREP_AGF_BNOBT] = { + .rmap_owner = XFS_RMAP_OWN_AG, + .buf_ops = &xfs_allocbt_buf_ops, + .magic = XFS_ABTB_CRC_MAGIC, + }, + [XREP_AGF_CNTBT] = { + .rmap_owner = XFS_RMAP_OWN_AG, + .buf_ops = &xfs_allocbt_buf_ops, + .magic = XFS_ABTC_CRC_MAGIC, + }, + [XREP_AGF_RMAPBT] = { + .rmap_owner = XFS_RMAP_OWN_AG, + .buf_ops = &xfs_rmapbt_buf_ops, + .magic = XFS_RMAP_CRC_MAGIC, + }, + [XREP_AGF_REFCOUNTBT] = { + .rmap_owner = XFS_RMAP_OWN_REFC, + .buf_ops = &xfs_refcountbt_buf_ops, + .magic = XFS_REFC_CRC_MAGIC, + }, + [XREP_AGF_END] = { + .buf_ops = NULL, + }, + }; + struct xfs_agf old_agf; + struct xfs_mount *mp = sc->mp; + struct xfs_buf *agf_bp; + struct xfs_buf *agfl_bp; + struct xfs_agf *agf; + int error; + + /* We require the rmapbt to rebuild anything. */ + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return -EOPNOTSUPP; + + xchk_perag_get(sc->mp, &sc->sa); + /* + * Make sure we have the AGF buffer, as scrub might have decided it + * was corrupt after xfs_alloc_read_agf failed with -EFSCORRUPTED. + */ + error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGF_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL); + if (error) + return error; + agf_bp->b_ops = &xfs_agf_buf_ops; + agf = XFS_BUF_TO_AGF(agf_bp); + + /* + * Load the AGFL so that we can screen out OWN_AG blocks that are on + * the AGFL now; these blocks might have once been part of the + * bno/cnt/rmap btrees but are not now. This is a chicken and egg + * problem: the AGF is corrupt, so we have to trust the AGFL contents + * because we can't do any serious cross-referencing with any of the + * btrees rooted in the AGF. If the AGFL contents are obviously bad + * then we'll bail out. + */ + error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.agno, &agfl_bp); + if (error) + return error; + + /* + * Spot-check the AGFL blocks; if they're obviously corrupt then + * there's nothing we can do but bail out. + */ + error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(agf_bp), agfl_bp, + xrep_agf_check_agfl_block, sc); + if (error) + return error; + + /* + * Find the AGF btree roots. This is also a chicken-and-egg situation; + * see the function for more details. + */ + error = xrep_agf_find_btrees(sc, agf_bp, fab, agfl_bp); + if (error) + return error; + + /* Start rewriting the header and implant the btrees we found. */ + xrep_agf_init_header(sc, agf_bp, &old_agf); + xrep_agf_set_roots(sc, agf, fab); + error = xrep_agf_calc_from_btrees(sc, agf_bp); + if (error) + goto out_revert; + + /* Commit the changes and reinitialize incore state. */ + return xrep_agf_commit_new(sc, agf_bp); + +out_revert: + /* Mark the incore AGF state stale and revert the AGF. */ + sc->sa.pag->pagf_init = 0; + memcpy(agf, &old_agf, sizeof(old_agf)); + return error; +} + +/* AGFL */ + +struct xrep_agfl { + /* Bitmap of other OWN_AG metadata blocks. */ + struct xfs_bitmap agmetablocks; + + /* Bitmap of free space. */ + struct xfs_bitmap *freesp; + + struct xfs_scrub *sc; +}; + +/* Record all OWN_AG (free space btree) information from the rmap data. */ +STATIC int +xrep_agfl_walk_rmap( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xrep_agfl *ra = priv; + xfs_fsblock_t fsb; + int error = 0; + + if (xchk_should_terminate(ra->sc, &error)) + return error; + + /* Record all the OWN_AG blocks. */ + if (rec->rm_owner == XFS_RMAP_OWN_AG) { + fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, + rec->rm_startblock); + error = xfs_bitmap_set(ra->freesp, fsb, rec->rm_blockcount); + if (error) + return error; + } + + return xfs_bitmap_set_btcur_path(&ra->agmetablocks, cur); +} + +/* + * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce + * which blocks belong to the AGFL. + * + * Compute the set of old AGFL blocks by subtracting from the list of OWN_AG + * blocks the list of blocks owned by all other OWN_AG metadata (bnobt, cntbt, + * rmapbt). These are the old AGFL blocks, so return that list and the number + * of blocks we're actually going to put back on the AGFL. + */ +STATIC int +xrep_agfl_collect_blocks( + struct xfs_scrub *sc, + struct xfs_buf *agf_bp, + struct xfs_bitmap *agfl_extents, + xfs_agblock_t *flcount) +{ + struct xrep_agfl ra; + struct xfs_mount *mp = sc->mp; + struct xfs_btree_cur *cur; + struct xfs_bitmap_range *br; + struct xfs_bitmap_range *n; + int error; + + ra.sc = sc; + ra.freesp = agfl_extents; + xfs_bitmap_init(&ra.agmetablocks); + + /* Find all space used by the free space btrees & rmapbt. */ + cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno); + error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra); + if (error) + goto err; + xfs_btree_del_cursor(cur, error); + + /* Find all blocks currently being used by the bnobt. */ + cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno, + XFS_BTNUM_BNO); + error = xfs_bitmap_set_btblocks(&ra.agmetablocks, cur); + if (error) + goto err; + xfs_btree_del_cursor(cur, error); + + /* Find all blocks currently being used by the cntbt. */ + cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno, + XFS_BTNUM_CNT); + error = xfs_bitmap_set_btblocks(&ra.agmetablocks, cur); + if (error) + goto err; + + xfs_btree_del_cursor(cur, error); + + /* + * Drop the freesp meta blocks that are in use by btrees. + * The remaining blocks /should/ be AGFL blocks. + */ + error = xfs_bitmap_disunion(agfl_extents, &ra.agmetablocks); + xfs_bitmap_destroy(&ra.agmetablocks); + if (error) + return error; + + /* + * Calculate the new AGFL size. If we found more blocks than fit in + * the AGFL we'll free them later. + */ + *flcount = 0; + for_each_xfs_bitmap_extent(br, n, agfl_extents) { + *flcount += br->len; + if (*flcount > xfs_agfl_size(mp)) + break; + } + if (*flcount > xfs_agfl_size(mp)) + *flcount = xfs_agfl_size(mp); + return 0; + +err: + xfs_bitmap_destroy(&ra.agmetablocks); + xfs_btree_del_cursor(cur, error); + return error; +} + +/* Update the AGF and reset the in-core state. */ +STATIC void +xrep_agfl_update_agf( + struct xfs_scrub *sc, + struct xfs_buf *agf_bp, + xfs_agblock_t flcount) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agf_bp); + + ASSERT(flcount <= xfs_agfl_size(sc->mp)); + + /* Trigger fdblocks recalculation */ + xfs_force_summary_recalc(sc->mp); + + /* Update the AGF counters. */ + if (sc->sa.pag->pagf_init) + sc->sa.pag->pagf_flcount = flcount; + agf->agf_flfirst = cpu_to_be32(0); + agf->agf_flcount = cpu_to_be32(flcount); + agf->agf_fllast = cpu_to_be32(flcount - 1); + + xfs_alloc_log_agf(sc->tp, agf_bp, + XFS_AGF_FLFIRST | XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); +} + +/* Write out a totally new AGFL. */ +STATIC void +xrep_agfl_init_header( + struct xfs_scrub *sc, + struct xfs_buf *agfl_bp, + struct xfs_bitmap *agfl_extents, + xfs_agblock_t flcount) +{ + struct xfs_mount *mp = sc->mp; + __be32 *agfl_bno; + struct xfs_bitmap_range *br; + struct xfs_bitmap_range *n; + struct xfs_agfl *agfl; + xfs_agblock_t agbno; + unsigned int fl_off; + + ASSERT(flcount <= xfs_agfl_size(mp)); + + /* + * Start rewriting the header by setting the bno[] array to + * NULLAGBLOCK, then setting AGFL header fields. + */ + agfl = XFS_BUF_TO_AGFL(agfl_bp); + memset(agfl, 0xFF, BBTOB(agfl_bp->b_length)); + agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); + agfl->agfl_seqno = cpu_to_be32(sc->sa.agno); + uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); + + /* + * Fill the AGFL with the remaining blocks. If agfl_extents has more + * blocks than fit in the AGFL, they will be freed in a subsequent + * step. + */ + fl_off = 0; + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agfl_bp); + for_each_xfs_bitmap_extent(br, n, agfl_extents) { + agbno = XFS_FSB_TO_AGBNO(mp, br->start); + + trace_xrep_agfl_insert(mp, sc->sa.agno, agbno, br->len); + + while (br->len > 0 && fl_off < flcount) { + agfl_bno[fl_off] = cpu_to_be32(agbno); + fl_off++; + agbno++; + + /* + * We've now used br->start by putting it in the AGFL, + * so bump br so that we don't reap the block later. + */ + br->start++; + br->len--; + } + + if (br->len) + break; + list_del(&br->list); + kmem_free(br); + } + + /* Write new AGFL to disk. */ + xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF); + xfs_trans_log_buf(sc->tp, agfl_bp, 0, BBTOB(agfl_bp->b_length) - 1); +} + +/* Repair the AGFL. */ +int +xrep_agfl( + struct xfs_scrub *sc) +{ + struct xfs_owner_info oinfo; + struct xfs_bitmap agfl_extents; + struct xfs_mount *mp = sc->mp; + struct xfs_buf *agf_bp; + struct xfs_buf *agfl_bp; + xfs_agblock_t flcount; + int error; + + /* We require the rmapbt to rebuild anything. */ + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return -EOPNOTSUPP; + + xchk_perag_get(sc->mp, &sc->sa); + xfs_bitmap_init(&agfl_extents); + + /* + * Read the AGF so that we can query the rmapbt. We hope that there's + * nothing wrong with the AGF, but all the AG header repair functions + * have this chicken-and-egg problem. + */ + error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp); + if (error) + return error; + if (!agf_bp) + return -ENOMEM; + + /* + * Make sure we have the AGFL buffer, as scrub might have decided it + * was corrupt after xfs_alloc_read_agfl failed with -EFSCORRUPTED. + */ + error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGFL_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL); + if (error) + return error; + agfl_bp->b_ops = &xfs_agfl_buf_ops; + + /* Gather all the extents we're going to put on the new AGFL. */ + error = xrep_agfl_collect_blocks(sc, agf_bp, &agfl_extents, &flcount); + if (error) + goto err; + + /* + * Update AGF and AGFL. We reset the global free block counter when + * we adjust the AGF flcount (which can fail) so avoid updating any + * buffers until we know that part works. + */ + xrep_agfl_update_agf(sc, agf_bp, flcount); + xrep_agfl_init_header(sc, agfl_bp, &agfl_extents, flcount); + + /* + * Ok, the AGFL should be ready to go now. Roll the transaction to + * make the new AGFL permanent before we start using it to return + * freespace overflow to the freespace btrees. + */ + sc->sa.agf_bp = agf_bp; + sc->sa.agfl_bp = agfl_bp; + error = xrep_roll_ag_trans(sc); + if (error) + goto err; + + /* Dump any AGFL overflow. */ + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); + return xrep_reap_extents(sc, &agfl_extents, &oinfo, XFS_AG_RESV_AGFL); +err: + xfs_bitmap_destroy(&agfl_extents); + return error; +} + +/* AGI */ + +/* + * Offset within the xrep_find_ag_btree array for each btree type. Avoid the + * XFS_BTNUM_ names here to avoid creating a sparse array. + */ +enum { + XREP_AGI_INOBT = 0, + XREP_AGI_FINOBT, + XREP_AGI_END, + XREP_AGI_MAX +}; + +/* + * Given the inode btree roots described by *fab, find the roots, check them + * for sanity, and pass the root data back out via *fab. + */ +STATIC int +xrep_agi_find_btrees( + struct xfs_scrub *sc, + struct xrep_find_ag_btree *fab) +{ + struct xfs_buf *agf_bp; + struct xfs_mount *mp = sc->mp; + int error; + + /* Read the AGF. */ + error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp); + if (error) + return error; + if (!agf_bp) + return -ENOMEM; + + /* Find the btree roots. */ + error = xrep_find_ag_btree_roots(sc, agf_bp, fab, NULL); + if (error) + return error; + + /* We must find the inobt root. */ + if (!xrep_check_btree_root(sc, &fab[XREP_AGI_INOBT])) + return -EFSCORRUPTED; + + /* We must find the finobt root if that feature is enabled. */ + if (xfs_sb_version_hasfinobt(&mp->m_sb) && + !xrep_check_btree_root(sc, &fab[XREP_AGI_FINOBT])) + return -EFSCORRUPTED; + + return 0; +} + +/* + * Reinitialize the AGI header, making an in-core copy of the old contents so + * that we know which in-core state needs to be reinitialized. + */ +STATIC void +xrep_agi_init_header( + struct xfs_scrub *sc, + struct xfs_buf *agi_bp, + struct xfs_agi *old_agi) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agi_bp); + struct xfs_mount *mp = sc->mp; + + memcpy(old_agi, agi, sizeof(*old_agi)); + memset(agi, 0, BBTOB(agi_bp->b_length)); + agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); + agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); + agi->agi_seqno = cpu_to_be32(sc->sa.agno); + agi->agi_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno)); + agi->agi_newino = cpu_to_be32(NULLAGINO); + agi->agi_dirino = cpu_to_be32(NULLAGINO); + if (xfs_sb_version_hascrc(&mp->m_sb)) + uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); + + /* We don't know how to fix the unlinked list yet. */ + memcpy(&agi->agi_unlinked, &old_agi->agi_unlinked, + sizeof(agi->agi_unlinked)); + + /* Mark the incore AGF data stale until we're done fixing things. */ + ASSERT(sc->sa.pag->pagi_init); + sc->sa.pag->pagi_init = 0; +} + +/* Set btree root information in an AGI. */ +STATIC void +xrep_agi_set_roots( + struct xfs_scrub *sc, + struct xfs_agi *agi, + struct xrep_find_ag_btree *fab) +{ + agi->agi_root = cpu_to_be32(fab[XREP_AGI_INOBT].root); + agi->agi_level = cpu_to_be32(fab[XREP_AGI_INOBT].height); + + if (xfs_sb_version_hasfinobt(&sc->mp->m_sb)) { + agi->agi_free_root = cpu_to_be32(fab[XREP_AGI_FINOBT].root); + agi->agi_free_level = cpu_to_be32(fab[XREP_AGI_FINOBT].height); + } +} + +/* Update the AGI counters. */ +STATIC int +xrep_agi_calc_from_btrees( + struct xfs_scrub *sc, + struct xfs_buf *agi_bp) +{ + struct xfs_btree_cur *cur; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agi_bp); + struct xfs_mount *mp = sc->mp; + xfs_agino_t count; + xfs_agino_t freecount; + int error; + + cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno, + XFS_BTNUM_INO); + error = xfs_ialloc_count_inodes(cur, &count, &freecount); + if (error) + goto err; + xfs_btree_del_cursor(cur, error); + + agi->agi_count = cpu_to_be32(count); + agi->agi_freecount = cpu_to_be32(freecount); + return 0; +err: + xfs_btree_del_cursor(cur, error); + return error; +} + +/* Trigger reinitialization of the in-core data. */ +STATIC int +xrep_agi_commit_new( + struct xfs_scrub *sc, + struct xfs_buf *agi_bp) +{ + struct xfs_perag *pag; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agi_bp); + + /* Trigger inode count recalculation */ + xfs_force_summary_recalc(sc->mp); + + /* Write this to disk. */ + xfs_trans_buf_set_type(sc->tp, agi_bp, XFS_BLFT_AGI_BUF); + xfs_trans_log_buf(sc->tp, agi_bp, 0, BBTOB(agi_bp->b_length) - 1); + + /* Now reinitialize the in-core counters if necessary. */ + pag = sc->sa.pag; + pag->pagi_count = be32_to_cpu(agi->agi_count); + pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); + pag->pagi_init = 1; + + return 0; +} + +/* Repair the AGI. */ +int +xrep_agi( + struct xfs_scrub *sc) +{ + struct xrep_find_ag_btree fab[XREP_AGI_MAX] = { + [XREP_AGI_INOBT] = { + .rmap_owner = XFS_RMAP_OWN_INOBT, + .buf_ops = &xfs_inobt_buf_ops, + .magic = XFS_IBT_CRC_MAGIC, + }, + [XREP_AGI_FINOBT] = { + .rmap_owner = XFS_RMAP_OWN_INOBT, + .buf_ops = &xfs_inobt_buf_ops, + .magic = XFS_FIBT_CRC_MAGIC, + }, + [XREP_AGI_END] = { + .buf_ops = NULL + }, + }; + struct xfs_agi old_agi; + struct xfs_mount *mp = sc->mp; + struct xfs_buf *agi_bp; + struct xfs_agi *agi; + int error; + + /* We require the rmapbt to rebuild anything. */ + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return -EOPNOTSUPP; + + xchk_perag_get(sc->mp, &sc->sa); + /* + * Make sure we have the AGI buffer, as scrub might have decided it + * was corrupt after xfs_ialloc_read_agi failed with -EFSCORRUPTED. + */ + error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGI_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), 0, &agi_bp, NULL); + if (error) + return error; + agi_bp->b_ops = &xfs_agi_buf_ops; + agi = XFS_BUF_TO_AGI(agi_bp); + + /* Find the AGI btree roots. */ + error = xrep_agi_find_btrees(sc, fab); + if (error) + return error; + + /* Start rewriting the header and implant the btrees we found. */ + xrep_agi_init_header(sc, agi_bp, &old_agi); + xrep_agi_set_roots(sc, agi, fab); + error = xrep_agi_calc_from_btrees(sc, agi_bp); + if (error) + goto out_revert; + + /* Reinitialize in-core state. */ + return xrep_agi_commit_new(sc, agi_bp); + +out_revert: + /* Mark the incore AGI state stale and revert the AGI. */ + sc->sa.pag->pagi_init = 0; + memcpy(agi, &old_agi, sizeof(old_agi)); + return error; +} diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 50e4f7fa06f0..036b5c7021eb 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -28,11 +28,11 @@ * Set us up to scrub free space btrees. */ int -xfs_scrub_setup_ag_allocbt( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_ag_allocbt( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - return xfs_scrub_setup_ag_btree(sc, ip, false); + return xchk_setup_ag_btree(sc, ip, false); } /* Free space btree scrubber. */ @@ -41,71 +41,71 @@ xfs_scrub_setup_ag_allocbt( * bnobt/cntbt record, respectively. */ STATIC void -xfs_scrub_allocbt_xref_other( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - xfs_extlen_t len) +xchk_allocbt_xref_other( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) { - struct xfs_btree_cur **pcur; - xfs_agblock_t fbno; - xfs_extlen_t flen; - int has_otherrec; - int error; + struct xfs_btree_cur **pcur; + xfs_agblock_t fbno; + xfs_extlen_t flen; + int has_otherrec; + int error; if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) pcur = &sc->sa.cnt_cur; else pcur = &sc->sa.bno_cur; - if (!*pcur || xfs_scrub_skip_xref(sc->sm)) + if (!*pcur || xchk_skip_xref(sc->sm)) return; error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec); - if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + if (!xchk_should_check_xref(sc, &error, pcur)) return; if (!has_otherrec) { - xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + xchk_btree_xref_set_corrupt(sc, *pcur, 0); return; } error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec); - if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + if (!xchk_should_check_xref(sc, &error, pcur)) return; if (!has_otherrec) { - xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + xchk_btree_xref_set_corrupt(sc, *pcur, 0); return; } if (fbno != agbno || flen != len) - xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + xchk_btree_xref_set_corrupt(sc, *pcur, 0); } /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_allocbt_xref( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - xfs_extlen_t len) +xchk_allocbt_xref( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xfs_scrub_allocbt_xref_other(sc, agbno, len); - xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); - xfs_scrub_xref_has_no_owner(sc, agbno, len); - xfs_scrub_xref_is_not_shared(sc, agbno, len); + xchk_allocbt_xref_other(sc, agbno, len); + xchk_xref_is_not_inode_chunk(sc, agbno, len); + xchk_xref_has_no_owner(sc, agbno, len); + xchk_xref_is_not_shared(sc, agbno, len); } /* Scrub a bnobt/cntbt record. */ STATIC int -xfs_scrub_allocbt_rec( - struct xfs_scrub_btree *bs, - union xfs_btree_rec *rec) +xchk_allocbt_rec( + struct xchk_btree *bs, + union xfs_btree_rec *rec) { - struct xfs_mount *mp = bs->cur->bc_mp; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; - xfs_agblock_t bno; - xfs_extlen_t len; - int error = 0; + struct xfs_mount *mp = bs->cur->bc_mp; + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agblock_t bno; + xfs_extlen_t len; + int error = 0; bno = be32_to_cpu(rec->alloc.ar_startblock); len = be32_to_cpu(rec->alloc.ar_blockcount); @@ -113,57 +113,57 @@ xfs_scrub_allocbt_rec( if (bno + len <= bno || !xfs_verify_agbno(mp, agno, bno) || !xfs_verify_agbno(mp, agno, bno + len - 1)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - xfs_scrub_allocbt_xref(bs->sc, bno, len); + xchk_allocbt_xref(bs->sc, bno, len); return error; } /* Scrub the freespace btrees for some AG. */ STATIC int -xfs_scrub_allocbt( - struct xfs_scrub_context *sc, - xfs_btnum_t which) +xchk_allocbt( + struct xfs_scrub *sc, + xfs_btnum_t which) { - struct xfs_owner_info oinfo; - struct xfs_btree_cur *cur; + struct xfs_owner_info oinfo; + struct xfs_btree_cur *cur; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur; - return xfs_scrub_btree(sc, cur, xfs_scrub_allocbt_rec, &oinfo, NULL); + return xchk_btree(sc, cur, xchk_allocbt_rec, &oinfo, NULL); } int -xfs_scrub_bnobt( - struct xfs_scrub_context *sc) +xchk_bnobt( + struct xfs_scrub *sc) { - return xfs_scrub_allocbt(sc, XFS_BTNUM_BNO); + return xchk_allocbt(sc, XFS_BTNUM_BNO); } int -xfs_scrub_cntbt( - struct xfs_scrub_context *sc) +xchk_cntbt( + struct xfs_scrub *sc) { - return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT); + return xchk_allocbt(sc, XFS_BTNUM_CNT); } /* xref check that the extent is not free */ void -xfs_scrub_xref_is_used_space( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - xfs_extlen_t len) +xchk_xref_is_used_space( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) { - bool is_freesp; - int error; + bool is_freesp; + int error; - if (!sc->sa.bno_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.bno_cur || xchk_skip_xref(sc->sm)) return; error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.bno_cur)) return; if (is_freesp) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0); } diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index de51cf8a8516..81d5e90547a1 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -32,11 +32,11 @@ /* Set us up to scrub an inode's extended attributes. */ int -xfs_scrub_setup_xattr( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_xattr( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - size_t sz; + size_t sz; /* * Allocate the buffer without the inode lock held. We need enough @@ -50,14 +50,14 @@ xfs_scrub_setup_xattr( if (!sc->buf) return -ENOMEM; - return xfs_scrub_setup_inode_contents(sc, ip, 0); + return xchk_setup_inode_contents(sc, ip, 0); } /* Extended Attributes */ -struct xfs_scrub_xattr { +struct xchk_xattr { struct xfs_attr_list_context context; - struct xfs_scrub_context *sc; + struct xfs_scrub *sc; }; /* @@ -69,22 +69,22 @@ struct xfs_scrub_xattr { * or if we get more or less data than we expected. */ static void -xfs_scrub_xattr_listent( +xchk_xattr_listent( struct xfs_attr_list_context *context, int flags, unsigned char *name, int namelen, int valuelen) { - struct xfs_scrub_xattr *sx; + struct xchk_xattr *sx; struct xfs_da_args args = { NULL }; int error = 0; - sx = container_of(context, struct xfs_scrub_xattr, context); + sx = container_of(context, struct xchk_xattr, context); if (flags & XFS_ATTR_INCOMPLETE) { /* Incomplete attr key, just mark the inode for preening. */ - xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino); + xchk_ino_set_preen(sx->sc, context->dp->i_ino); return; } @@ -106,11 +106,11 @@ xfs_scrub_xattr_listent( error = xfs_attr_get_ilocked(context->dp, &args); if (error == -EEXIST) error = 0; - if (!xfs_scrub_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, + if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, &error)) goto fail_xref; if (args.valuelen != valuelen) - xfs_scrub_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, + xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno); fail_xref: if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) @@ -126,14 +126,14 @@ fail_xref: * the smallest address */ STATIC bool -xfs_scrub_xattr_set_map( - struct xfs_scrub_context *sc, - unsigned long *map, - unsigned int start, - unsigned int len) +xchk_xattr_set_map( + struct xfs_scrub *sc, + unsigned long *map, + unsigned int start, + unsigned int len) { - unsigned int mapsize = sc->mp->m_attr_geo->blksize; - bool ret = true; + unsigned int mapsize = sc->mp->m_attr_geo->blksize; + bool ret = true; if (start >= mapsize) return false; @@ -154,8 +154,8 @@ xfs_scrub_xattr_set_map( * attr freemap has problems or points to used space. */ STATIC bool -xfs_scrub_xattr_check_freemap( - struct xfs_scrub_context *sc, +xchk_xattr_check_freemap( + struct xfs_scrub *sc, unsigned long *map, struct xfs_attr3_icleaf_hdr *leafhdr) { @@ -168,7 +168,7 @@ xfs_scrub_xattr_check_freemap( freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize); bitmap_zero(freemap, mapsize); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - if (!xfs_scrub_xattr_set_map(sc, freemap, + if (!xchk_xattr_set_map(sc, freemap, leafhdr->freemap[i].base, leafhdr->freemap[i].size)) return false; @@ -184,8 +184,8 @@ xfs_scrub_xattr_check_freemap( * Returns the number of bytes used for the name/value data. */ STATIC void -xfs_scrub_xattr_entry( - struct xfs_scrub_da_btree *ds, +xchk_xattr_entry( + struct xchk_da_btree *ds, int level, char *buf_end, struct xfs_attr_leafblock *leaf, @@ -204,17 +204,17 @@ xfs_scrub_xattr_entry( unsigned int namesize; if (ent->pad2 != 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); /* Hash values in order? */ if (be32_to_cpu(ent->hashval) < *last_hashval) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); *last_hashval = be32_to_cpu(ent->hashval); nameidx = be16_to_cpu(ent->nameidx); if (nameidx < leafhdr->firstused || nameidx >= mp->m_attr_geo->blksize) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); return; } @@ -225,27 +225,27 @@ xfs_scrub_xattr_entry( be16_to_cpu(lentry->valuelen)); name_end = (char *)lentry + namesize; if (lentry->namelen == 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); } else { rentry = xfs_attr3_leaf_name_remote(leaf, idx); namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); name_end = (char *)rentry + namesize; if (rentry->namelen == 0 || rentry->valueblk == 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); } if (name_end > buf_end) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); - if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, nameidx, namesize)) - xfs_scrub_da_set_corrupt(ds, level); + if (!xchk_xattr_set_map(ds->sc, usedmap, nameidx, namesize)) + xchk_da_set_corrupt(ds, level); if (!(ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) *usedbytes += namesize; } /* Scrub an attribute leaf. */ STATIC int -xfs_scrub_xattr_block( - struct xfs_scrub_da_btree *ds, +xchk_xattr_block( + struct xchk_da_btree *ds, int level) { struct xfs_attr3_icleaf_hdr leafhdr; @@ -275,10 +275,10 @@ xfs_scrub_xattr_block( if (leaf->hdr.pad1 != 0 || leaf->hdr.pad2 != 0 || leaf->hdr.info.hdr.pad != 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); } else { if (leaf->hdr.pad1 != 0 || leaf->hdr.info.pad != 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); } /* Check the leaf header */ @@ -286,44 +286,44 @@ xfs_scrub_xattr_block( hdrsize = xfs_attr3_leaf_hdr_size(leaf); if (leafhdr.usedbytes > mp->m_attr_geo->blksize) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); if (leafhdr.firstused > mp->m_attr_geo->blksize) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); if (leafhdr.firstused < hdrsize) - xfs_scrub_da_set_corrupt(ds, level); - if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, 0, hdrsize)) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); + if (!xchk_xattr_set_map(ds->sc, usedmap, 0, hdrsize)) + xchk_da_set_corrupt(ds, level); if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; entries = xfs_attr3_leaf_entryp(leaf); if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { /* Mark the leaf entry itself. */ off = (char *)ent - (char *)leaf; - if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, off, + if (!xchk_xattr_set_map(ds->sc, usedmap, off, sizeof(xfs_attr_leaf_entry_t))) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out; } /* Check the entry and nameval. */ - xfs_scrub_xattr_entry(ds, level, buf_end, leaf, &leafhdr, + xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr, usedmap, ent, i, &usedbytes, &last_hashval); if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; } - if (!xfs_scrub_xattr_check_freemap(ds->sc, usedmap, &leafhdr)) - xfs_scrub_da_set_corrupt(ds, level); + if (!xchk_xattr_check_freemap(ds->sc, usedmap, &leafhdr)) + xchk_da_set_corrupt(ds, level); if (leafhdr.usedbytes != usedbytes) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); out: return 0; @@ -331,8 +331,8 @@ out: /* Scrub a attribute btree record. */ STATIC int -xfs_scrub_xattr_rec( - struct xfs_scrub_da_btree *ds, +xchk_xattr_rec( + struct xchk_da_btree *ds, int level, void *rec) { @@ -352,14 +352,14 @@ xfs_scrub_xattr_rec( blk = &ds->state->path.blk[level]; /* Check the whole block, if necessary. */ - error = xfs_scrub_xattr_block(ds, level); + error = xchk_xattr_block(ds, level); if (error) goto out; if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; /* Check the hash of the entry. */ - error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval); + error = xchk_da_btree_hash(ds, level, &ent->hashval); if (error) goto out; @@ -368,7 +368,7 @@ xfs_scrub_xattr_rec( hdrsize = xfs_attr3_leaf_hdr_size(bp->b_addr); nameidx = be16_to_cpu(ent->nameidx); if (nameidx < hdrsize || nameidx >= mp->m_attr_geo->blksize) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out; } @@ -377,12 +377,12 @@ xfs_scrub_xattr_rec( badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE | XFS_ATTR_INCOMPLETE); if ((ent->flags & badflags) != 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); if (ent->flags & XFS_ATTR_LOCAL) { lentry = (struct xfs_attr_leaf_name_local *) (((char *)bp->b_addr) + nameidx); if (lentry->namelen <= 0) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out; } calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen); @@ -390,13 +390,13 @@ xfs_scrub_xattr_rec( rentry = (struct xfs_attr_leaf_name_remote *) (((char *)bp->b_addr) + nameidx); if (rentry->namelen <= 0) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out; } calc_hash = xfs_da_hashname(rentry->name, rentry->namelen); } if (calc_hash != hash) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); out: return error; @@ -404,10 +404,10 @@ out: /* Scrub the extended attribute metadata. */ int -xfs_scrub_xattr( - struct xfs_scrub_context *sc) +xchk_xattr( + struct xfs_scrub *sc) { - struct xfs_scrub_xattr sx; + struct xchk_xattr sx; struct attrlist_cursor_kern cursor = { 0 }; xfs_dablk_t last_checked = -1U; int error = 0; @@ -417,7 +417,7 @@ xfs_scrub_xattr( memset(&sx, 0, sizeof(sx)); /* Check attribute tree structure */ - error = xfs_scrub_da_btree(sc, XFS_ATTR_FORK, xfs_scrub_xattr_rec, + error = xchk_da_btree(sc, XFS_ATTR_FORK, xchk_xattr_rec, &last_checked); if (error) goto out; @@ -429,7 +429,7 @@ xfs_scrub_xattr( sx.context.dp = sc->ip; sx.context.cursor = &cursor; sx.context.resynch = 1; - sx.context.put_listent = xfs_scrub_xattr_listent; + sx.context.put_listent = xchk_xattr_listent; sx.context.tp = sc->tp; sx.context.flags = ATTR_INCOMPLETE; sx.sc = sc; @@ -438,7 +438,7 @@ xfs_scrub_xattr( * Look up every xattr in this file by name. * * Use the backend implementation of xfs_attr_list to call - * xfs_scrub_xattr_listent on every attribute key in this inode. + * xchk_xattr_listent on every attribute key in this inode. * In other words, we use the same iterator/callback mechanism * that listattr uses to scrub extended attributes, though in our * _listent function, we check the value of the attribute. @@ -451,7 +451,7 @@ xfs_scrub_xattr( * locking order. */ error = xfs_attr_list_int_ilocked(&sx.context); - if (!xfs_scrub_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) + if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) goto out; out: return error; diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c new file mode 100644 index 000000000000..fdadc9e1dc49 --- /dev/null +++ b/fs/xfs/scrub/bitmap.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/bitmap.h" + +/* + * Set a range of this bitmap. Caller must ensure the range is not set. + * + * This is the logical equivalent of bitmap |= mask(start, len). + */ +int +xfs_bitmap_set( + struct xfs_bitmap *bitmap, + uint64_t start, + uint64_t len) +{ + struct xfs_bitmap_range *bmr; + + bmr = kmem_alloc(sizeof(struct xfs_bitmap_range), KM_MAYFAIL); + if (!bmr) + return -ENOMEM; + + INIT_LIST_HEAD(&bmr->list); + bmr->start = start; + bmr->len = len; + list_add_tail(&bmr->list, &bitmap->list); + + return 0; +} + +/* Free everything related to this bitmap. */ +void +xfs_bitmap_destroy( + struct xfs_bitmap *bitmap) +{ + struct xfs_bitmap_range *bmr; + struct xfs_bitmap_range *n; + + for_each_xfs_bitmap_extent(bmr, n, bitmap) { + list_del(&bmr->list); + kmem_free(bmr); + } +} + +/* Set up a per-AG block bitmap. */ +void +xfs_bitmap_init( + struct xfs_bitmap *bitmap) +{ + INIT_LIST_HEAD(&bitmap->list); +} + +/* Compare two btree extents. */ +static int +xfs_bitmap_range_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_bitmap_range *ap; + struct xfs_bitmap_range *bp; + + ap = container_of(a, struct xfs_bitmap_range, list); + bp = container_of(b, struct xfs_bitmap_range, list); + + if (ap->start > bp->start) + return 1; + if (ap->start < bp->start) + return -1; + return 0; +} + +/* + * Remove all the blocks mentioned in @sub from the extents in @bitmap. + * + * The intent is that callers will iterate the rmapbt for all of its records + * for a given owner to generate @bitmap; and iterate all the blocks of the + * metadata structures that are not being rebuilt and have the same rmapbt + * owner to generate @sub. This routine subtracts all the extents + * mentioned in sub from all the extents linked in @bitmap, which leaves + * @bitmap as the list of blocks that are not accounted for, which we assume + * are the dead blocks of the old metadata structure. The blocks mentioned in + * @bitmap can be reaped. + * + * This is the logical equivalent of bitmap &= ~sub. + */ +#define LEFT_ALIGNED (1 << 0) +#define RIGHT_ALIGNED (1 << 1) +int +xfs_bitmap_disunion( + struct xfs_bitmap *bitmap, + struct xfs_bitmap *sub) +{ + struct list_head *lp; + struct xfs_bitmap_range *br; + struct xfs_bitmap_range *new_br; + struct xfs_bitmap_range *sub_br; + uint64_t sub_start; + uint64_t sub_len; + int state; + int error = 0; + + if (list_empty(&bitmap->list) || list_empty(&sub->list)) + return 0; + ASSERT(!list_empty(&sub->list)); + + list_sort(NULL, &bitmap->list, xfs_bitmap_range_cmp); + list_sort(NULL, &sub->list, xfs_bitmap_range_cmp); + + /* + * Now that we've sorted both lists, we iterate bitmap once, rolling + * forward through sub and/or bitmap as necessary until we find an + * overlap or reach the end of either list. We do not reset lp to the + * head of bitmap nor do we reset sub_br to the head of sub. The + * list traversal is similar to merge sort, but we're deleting + * instead. In this manner we avoid O(n^2) operations. + */ + sub_br = list_first_entry(&sub->list, struct xfs_bitmap_range, + list); + lp = bitmap->list.next; + while (lp != &bitmap->list) { + br = list_entry(lp, struct xfs_bitmap_range, list); + + /* + * Advance sub_br and/or br until we find a pair that + * intersect or we run out of extents. + */ + while (sub_br->start + sub_br->len <= br->start) { + if (list_is_last(&sub_br->list, &sub->list)) + goto out; + sub_br = list_next_entry(sub_br, list); + } + if (sub_br->start >= br->start + br->len) { + lp = lp->next; + continue; + } + + /* trim sub_br to fit the extent we have */ + sub_start = sub_br->start; + sub_len = sub_br->len; + if (sub_br->start < br->start) { + sub_len -= br->start - sub_br->start; + sub_start = br->start; + } + if (sub_len > br->len) + sub_len = br->len; + + state = 0; + if (sub_start == br->start) + state |= LEFT_ALIGNED; + if (sub_start + sub_len == br->start + br->len) + state |= RIGHT_ALIGNED; + switch (state) { + case LEFT_ALIGNED: + /* Coincides with only the left. */ + br->start += sub_len; + br->len -= sub_len; + break; + case RIGHT_ALIGNED: + /* Coincides with only the right. */ + br->len -= sub_len; + lp = lp->next; + break; + case LEFT_ALIGNED | RIGHT_ALIGNED: + /* Total overlap, just delete ex. */ + lp = lp->next; + list_del(&br->list); + kmem_free(br); + break; + case 0: + /* + * Deleting from the middle: add the new right extent + * and then shrink the left extent. + */ + new_br = kmem_alloc(sizeof(struct xfs_bitmap_range), + KM_MAYFAIL); + if (!new_br) { + error = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&new_br->list); + new_br->start = sub_start + sub_len; + new_br->len = br->start + br->len - new_br->start; + list_add(&new_br->list, &br->list); + br->len = sub_start - br->start; + lp = lp->next; + break; + default: + ASSERT(0); + break; + } + } + +out: + return error; +} +#undef LEFT_ALIGNED +#undef RIGHT_ALIGNED + +/* + * Record all btree blocks seen while iterating all records of a btree. + * + * We know that the btree query_all function starts at the left edge and walks + * towards the right edge of the tree. Therefore, we know that we can walk up + * the btree cursor towards the root; if the pointer for a given level points + * to the first record/key in that block, we haven't seen this block before; + * and therefore we need to remember that we saw this block in the btree. + * + * So if our btree is: + * + * 4 + * / | \ + * 1 2 3 + * + * Pretend for this example that each leaf block has 100 btree records. For + * the first btree record, we'll observe that bc_ptrs[0] == 1, so we record + * that we saw block 1. Then we observe that bc_ptrs[1] == 1, so we record + * block 4. The list is [1, 4]. + * + * For the second btree record, we see that bc_ptrs[0] == 2, so we exit the + * loop. The list remains [1, 4]. + * + * For the 101st btree record, we've moved onto leaf block 2. Now + * bc_ptrs[0] == 1 again, so we record that we saw block 2. We see that + * bc_ptrs[1] == 2, so we exit the loop. The list is now [1, 4, 2]. + * + * For the 102nd record, bc_ptrs[0] == 2, so we continue. + * + * For the 201st record, we've moved on to leaf block 3. bc_ptrs[0] == 1, so + * we add 3 to the list. Now it is [1, 4, 2, 3]. + * + * For the 300th record we just exit, with the list being [1, 4, 2, 3]. + */ + +/* + * Record all the buffers pointed to by the btree cursor. Callers already + * engaged in a btree walk should call this function to capture the list of + * blocks going from the leaf towards the root. + */ +int +xfs_bitmap_set_btcur_path( + struct xfs_bitmap *bitmap, + struct xfs_btree_cur *cur) +{ + struct xfs_buf *bp; + xfs_fsblock_t fsb; + int i; + int error; + + for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) { + xfs_btree_get_block(cur, i, &bp); + if (!bp) + continue; + fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn); + error = xfs_bitmap_set(bitmap, fsb, 1); + if (error) + return error; + } + + return 0; +} + +/* Collect a btree's block in the bitmap. */ +STATIC int +xfs_bitmap_collect_btblock( + struct xfs_btree_cur *cur, + int level, + void *priv) +{ + struct xfs_bitmap *bitmap = priv; + struct xfs_buf *bp; + xfs_fsblock_t fsbno; + + xfs_btree_get_block(cur, level, &bp); + if (!bp) + return 0; + + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn); + return xfs_bitmap_set(bitmap, fsbno, 1); +} + +/* Walk the btree and mark the bitmap wherever a btree block is found. */ +int +xfs_bitmap_set_btblocks( + struct xfs_bitmap *bitmap, + struct xfs_btree_cur *cur) +{ + return xfs_btree_visit_blocks(cur, xfs_bitmap_collect_btblock, bitmap); +} diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h new file mode 100644 index 000000000000..ae8ecbce6fa6 --- /dev/null +++ b/fs/xfs/scrub/bitmap.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_SCRUB_BITMAP_H__ +#define __XFS_SCRUB_BITMAP_H__ + +struct xfs_bitmap_range { + struct list_head list; + uint64_t start; + uint64_t len; +}; + +struct xfs_bitmap { + struct list_head list; +}; + +void xfs_bitmap_init(struct xfs_bitmap *bitmap); +void xfs_bitmap_destroy(struct xfs_bitmap *bitmap); + +#define for_each_xfs_bitmap_extent(bex, n, bitmap) \ + list_for_each_entry_safe((bex), (n), &(bitmap)->list, list) + +#define for_each_xfs_bitmap_block(b, bex, n, bitmap) \ + list_for_each_entry_safe((bex), (n), &(bitmap)->list, list) \ + for ((b) = bex->start; (b) < bex->start + bex->len; (b)++) + +int xfs_bitmap_set(struct xfs_bitmap *bitmap, uint64_t start, uint64_t len); +int xfs_bitmap_disunion(struct xfs_bitmap *bitmap, struct xfs_bitmap *sub); +int xfs_bitmap_set_btcur_path(struct xfs_bitmap *bitmap, + struct xfs_btree_cur *cur); +int xfs_bitmap_set_btblocks(struct xfs_bitmap *bitmap, + struct xfs_btree_cur *cur); + +#endif /* __XFS_SCRUB_BITMAP_H__ */ diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 3d08589f5c60..e1d11f3223e3 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -33,13 +33,13 @@ /* Set us up with an inode's bmap. */ int -xfs_scrub_setup_inode_bmap( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_inode_bmap( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - int error; + int error; - error = xfs_scrub_get_inode(sc, ip); + error = xchk_get_inode(sc, ip); if (error) goto out; @@ -60,7 +60,7 @@ xfs_scrub_setup_inode_bmap( } /* Got the inode, lock it and we're ready to go. */ - error = xfs_scrub_trans_alloc(sc, 0); + error = xchk_trans_alloc(sc, 0); if (error) goto out; sc->ilock_flags |= XFS_ILOCK_EXCL; @@ -78,27 +78,27 @@ out: * is in btree format. */ -struct xfs_scrub_bmap_info { - struct xfs_scrub_context *sc; - xfs_fileoff_t lastoff; - bool is_rt; - bool is_shared; - int whichfork; +struct xchk_bmap_info { + struct xfs_scrub *sc; + xfs_fileoff_t lastoff; + bool is_rt; + bool is_shared; + int whichfork; }; /* Look for a corresponding rmap for this irec. */ static inline bool -xfs_scrub_bmap_get_rmap( - struct xfs_scrub_bmap_info *info, - struct xfs_bmbt_irec *irec, - xfs_agblock_t agbno, - uint64_t owner, - struct xfs_rmap_irec *rmap) +xchk_bmap_get_rmap( + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec, + xfs_agblock_t agbno, + uint64_t owner, + struct xfs_rmap_irec *rmap) { - xfs_fileoff_t offset; - unsigned int rflags = 0; - int has_rmap; - int error; + xfs_fileoff_t offset; + unsigned int rflags = 0; + int has_rmap; + int error; if (info->whichfork == XFS_ATTR_FORK) rflags |= XFS_RMAP_ATTR_FORK; @@ -120,7 +120,7 @@ xfs_scrub_bmap_get_rmap( if (info->is_shared) { error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, owner, offset, rflags, rmap, &has_rmap); - if (!xfs_scrub_should_check_xref(info->sc, &error, + if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur)) return false; goto out; @@ -131,36 +131,36 @@ xfs_scrub_bmap_get_rmap( */ error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner, offset, rflags, &has_rmap); - if (!xfs_scrub_should_check_xref(info->sc, &error, + if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur)) return false; if (!has_rmap) goto out; error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap); - if (!xfs_scrub_should_check_xref(info->sc, &error, + if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur)) return false; out: if (!has_rmap) - xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); return has_rmap; } /* Make sure that we have rmapbt records for this extent. */ STATIC void -xfs_scrub_bmap_xref_rmap( - struct xfs_scrub_bmap_info *info, - struct xfs_bmbt_irec *irec, - xfs_agblock_t agbno) +xchk_bmap_xref_rmap( + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec, + xfs_agblock_t agbno) { - struct xfs_rmap_irec rmap; - unsigned long long rmap_end; - uint64_t owner; + struct xfs_rmap_irec rmap; + unsigned long long rmap_end; + uint64_t owner; - if (!info->sc->sa.rmap_cur || xfs_scrub_skip_xref(info->sc->sm)) + if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) return; if (info->whichfork == XFS_COW_FORK) @@ -169,14 +169,14 @@ xfs_scrub_bmap_xref_rmap( owner = info->sc->ip->i_ino; /* Find the rmap record for this irec. */ - if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap)) + if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) return; /* Check the rmap. */ rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; if (rmap.rm_startblock > agbno || agbno + irec->br_blockcount > rmap_end) - xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); /* @@ -189,12 +189,12 @@ xfs_scrub_bmap_xref_rmap( rmap.rm_blockcount; if (rmap.rm_offset > irec->br_startoff || irec->br_startoff + irec->br_blockcount > rmap_end) - xfs_scrub_fblock_xref_set_corrupt(info->sc, + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); } if (rmap.rm_owner != owner) - xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); /* @@ -207,46 +207,46 @@ xfs_scrub_bmap_xref_rmap( if (owner != XFS_RMAP_OWN_COW && irec->br_state == XFS_EXT_UNWRITTEN && !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) - xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (info->whichfork == XFS_ATTR_FORK && !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) - xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) - xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork, + xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); } /* Cross-reference a single rtdev extent record. */ STATIC void -xfs_scrub_bmap_rt_extent_xref( - struct xfs_scrub_bmap_info *info, - struct xfs_inode *ip, - struct xfs_btree_cur *cur, - struct xfs_bmbt_irec *irec) +xchk_bmap_rt_extent_xref( + struct xchk_bmap_info *info, + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *irec) { if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock, + xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, irec->br_blockcount); } /* Cross-reference a single datadev extent record. */ STATIC void -xfs_scrub_bmap_extent_xref( - struct xfs_scrub_bmap_info *info, - struct xfs_inode *ip, - struct xfs_btree_cur *cur, - struct xfs_bmbt_irec *irec) +xchk_bmap_extent_xref( + struct xchk_bmap_info *info, + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *irec) { - struct xfs_mount *mp = info->sc->mp; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_extlen_t len; - int error; + struct xfs_mount *mp = info->sc->mp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_extlen_t len; + int error; if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; @@ -255,44 +255,44 @@ xfs_scrub_bmap_extent_xref( agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); len = irec->br_blockcount; - error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa); - if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork, + error = xchk_ag_init(info->sc, agno, &info->sc->sa); + if (!xchk_fblock_process_error(info->sc, info->whichfork, irec->br_startoff, &error)) return; - xfs_scrub_xref_is_used_space(info->sc, agbno, len); - xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len); - xfs_scrub_bmap_xref_rmap(info, irec, agbno); + xchk_xref_is_used_space(info->sc, agbno, len); + xchk_xref_is_not_inode_chunk(info->sc, agbno, len); + xchk_bmap_xref_rmap(info, irec, agbno); switch (info->whichfork) { case XFS_DATA_FORK: if (xfs_is_reflink_inode(info->sc->ip)) break; /* fall through */ case XFS_ATTR_FORK: - xfs_scrub_xref_is_not_shared(info->sc, agbno, + xchk_xref_is_not_shared(info->sc, agbno, irec->br_blockcount); break; case XFS_COW_FORK: - xfs_scrub_xref_is_cow_staging(info->sc, agbno, + xchk_xref_is_cow_staging(info->sc, agbno, irec->br_blockcount); break; } - xfs_scrub_ag_free(info->sc, &info->sc->sa); + xchk_ag_free(info->sc, &info->sc->sa); } /* Scrub a single extent record. */ STATIC int -xfs_scrub_bmap_extent( - struct xfs_inode *ip, - struct xfs_btree_cur *cur, - struct xfs_scrub_bmap_info *info, - struct xfs_bmbt_irec *irec) +xchk_bmap_extent( + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xchk_bmap_info *info, + struct xfs_bmbt_irec *irec) { - struct xfs_mount *mp = info->sc->mp; - struct xfs_buf *bp = NULL; - xfs_filblks_t end; - int error = 0; + struct xfs_mount *mp = info->sc->mp; + struct xfs_buf *bp = NULL; + xfs_filblks_t end; + int error = 0; if (cur) xfs_btree_get_block(cur, 0, &bp); @@ -302,12 +302,12 @@ xfs_scrub_bmap_extent( * from the incore list, for which there is no ordering check. */ if (irec->br_startoff < info->lastoff) - xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); /* There should never be a "hole" extent in either extent list. */ if (irec->br_startblock == HOLESTARTBLOCK) - xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); /* @@ -315,40 +315,40 @@ xfs_scrub_bmap_extent( * in-core extent scan, and we should never see these in the bmbt. */ if (isnullstartblock(irec->br_startblock)) - xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); /* Make sure the extent points to a valid place. */ if (irec->br_blockcount > MAXEXTLEN) - xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) - xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); end = irec->br_startblock + irec->br_blockcount - 1; if (info->is_rt && (!xfs_verify_rtbno(mp, irec->br_startblock) || !xfs_verify_rtbno(mp, end))) - xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (!info->is_rt && (!xfs_verify_fsbno(mp, irec->br_startblock) || !xfs_verify_fsbno(mp, end) || XFS_FSB_TO_AGNO(mp, irec->br_startblock) != XFS_FSB_TO_AGNO(mp, end))) - xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); /* We don't allow unwritten extents on attr forks. */ if (irec->br_state == XFS_EXT_UNWRITTEN && info->whichfork == XFS_ATTR_FORK) - xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, + xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (info->is_rt) - xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec); + xchk_bmap_rt_extent_xref(info, ip, cur, irec); else - xfs_scrub_bmap_extent_xref(info, ip, cur, irec); + xchk_bmap_extent_xref(info, ip, cur, irec); info->lastoff = irec->br_startoff + irec->br_blockcount; return error; @@ -356,17 +356,17 @@ xfs_scrub_bmap_extent( /* Scrub a bmbt record. */ STATIC int -xfs_scrub_bmapbt_rec( - struct xfs_scrub_btree *bs, - union xfs_btree_rec *rec) +xchk_bmapbt_rec( + struct xchk_btree *bs, + union xfs_btree_rec *rec) { - struct xfs_bmbt_irec irec; - struct xfs_scrub_bmap_info *info = bs->private; - struct xfs_inode *ip = bs->cur->bc_private.b.ip; - struct xfs_buf *bp = NULL; - struct xfs_btree_block *block; - uint64_t owner; - int i; + struct xfs_bmbt_irec irec; + struct xchk_bmap_info *info = bs->private; + struct xfs_inode *ip = bs->cur->bc_private.b.ip; + struct xfs_buf *bp = NULL; + struct xfs_btree_block *block; + uint64_t owner; + int i; /* * Check the owners of the btree blocks up to the level below @@ -378,54 +378,53 @@ xfs_scrub_bmapbt_rec( block = xfs_btree_get_block(bs->cur, i, &bp); owner = be64_to_cpu(block->bb_u.l.bb_owner); if (owner != ip->i_ino) - xfs_scrub_fblock_set_corrupt(bs->sc, + xchk_fblock_set_corrupt(bs->sc, info->whichfork, 0); } } /* Set up the in-core record and scrub it. */ xfs_bmbt_disk_get_all(&rec->bmbt, &irec); - return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec); + return xchk_bmap_extent(ip, bs->cur, info, &irec); } /* Scan the btree records. */ STATIC int -xfs_scrub_bmap_btree( - struct xfs_scrub_context *sc, - int whichfork, - struct xfs_scrub_bmap_info *info) +xchk_bmap_btree( + struct xfs_scrub *sc, + int whichfork, + struct xchk_bmap_info *info) { - struct xfs_owner_info oinfo; - struct xfs_mount *mp = sc->mp; - struct xfs_inode *ip = sc->ip; - struct xfs_btree_cur *cur; - int error; + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *ip = sc->ip; + struct xfs_btree_cur *cur; + int error; cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_rec, &oinfo, info); - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : - XFS_BTREE_NOERROR); + error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); + xfs_btree_del_cursor(cur, error); return error; } -struct xfs_scrub_bmap_check_rmap_info { - struct xfs_scrub_context *sc; - int whichfork; - struct xfs_iext_cursor icur; +struct xchk_bmap_check_rmap_info { + struct xfs_scrub *sc; + int whichfork; + struct xfs_iext_cursor icur; }; /* Can we find bmaps that fit this rmap? */ STATIC int -xfs_scrub_bmap_check_rmap( +xchk_bmap_check_rmap( struct xfs_btree_cur *cur, struct xfs_rmap_irec *rec, void *priv) { struct xfs_bmbt_irec irec; - struct xfs_scrub_bmap_check_rmap_info *sbcri = priv; + struct xchk_bmap_check_rmap_info *sbcri = priv; struct xfs_ifork *ifp; - struct xfs_scrub_context *sc = sbcri->sc; + struct xfs_scrub *sc = sbcri->sc; bool have_map; /* Is this even the right fork? */ @@ -440,14 +439,14 @@ xfs_scrub_bmap_check_rmap( /* Now look up the bmbt record. */ ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork); if (!ifp) { - xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + xchk_fblock_set_corrupt(sc, sbcri->whichfork, rec->rm_offset); goto out; } have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, &sbcri->icur, &irec); if (!have_map) - xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + xchk_fblock_set_corrupt(sc, sbcri->whichfork, rec->rm_offset); /* * bmap extent record lengths are constrained to 2^21 blocks in length @@ -458,14 +457,14 @@ xfs_scrub_bmap_check_rmap( */ while (have_map) { if (irec.br_startoff != rec->rm_offset) - xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + xchk_fblock_set_corrupt(sc, sbcri->whichfork, rec->rm_offset); if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, cur->bc_private.a.agno, rec->rm_startblock)) - xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + xchk_fblock_set_corrupt(sc, sbcri->whichfork, rec->rm_offset); if (irec.br_blockcount > rec->rm_blockcount) - xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + xchk_fblock_set_corrupt(sc, sbcri->whichfork, rec->rm_offset); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) break; @@ -476,7 +475,7 @@ xfs_scrub_bmap_check_rmap( break; have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); if (!have_map) - xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork, + xchk_fblock_set_corrupt(sc, sbcri->whichfork, rec->rm_offset); } @@ -488,12 +487,12 @@ out: /* Make sure each rmap has a corresponding bmbt entry. */ STATIC int -xfs_scrub_bmap_check_ag_rmaps( - struct xfs_scrub_context *sc, +xchk_bmap_check_ag_rmaps( + struct xfs_scrub *sc, int whichfork, xfs_agnumber_t agno) { - struct xfs_scrub_bmap_check_rmap_info sbcri; + struct xchk_bmap_check_rmap_info sbcri; struct xfs_btree_cur *cur; struct xfs_buf *agf; int error; @@ -510,11 +509,11 @@ xfs_scrub_bmap_check_ag_rmaps( sbcri.sc = sc; sbcri.whichfork = whichfork; - error = xfs_rmap_query_all(cur, xfs_scrub_bmap_check_rmap, &sbcri); + error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); if (error == XFS_BTREE_QUERY_RANGE_ABORT) error = 0; - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); out_agf: xfs_trans_brelse(sc->tp, agf); return error; @@ -522,13 +521,13 @@ out_agf: /* Make sure each rmap has a corresponding bmbt entry. */ STATIC int -xfs_scrub_bmap_check_rmaps( - struct xfs_scrub_context *sc, - int whichfork) +xchk_bmap_check_rmaps( + struct xfs_scrub *sc, + int whichfork) { - loff_t size; - xfs_agnumber_t agno; - int error; + loff_t size; + xfs_agnumber_t agno; + int error; if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) || whichfork == XFS_COW_FORK || @@ -562,7 +561,7 @@ xfs_scrub_bmap_check_rmaps( return 0; for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) { - error = xfs_scrub_bmap_check_ag_rmaps(sc, whichfork, agno); + error = xchk_bmap_check_ag_rmaps(sc, whichfork, agno); if (error) return error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) @@ -579,18 +578,18 @@ xfs_scrub_bmap_check_rmaps( * Then we unconditionally scan the incore extent cache. */ STATIC int -xfs_scrub_bmap( - struct xfs_scrub_context *sc, - int whichfork) +xchk_bmap( + struct xfs_scrub *sc, + int whichfork) { - struct xfs_bmbt_irec irec; - struct xfs_scrub_bmap_info info = { NULL }; - struct xfs_mount *mp = sc->mp; - struct xfs_inode *ip = sc->ip; - struct xfs_ifork *ifp; - xfs_fileoff_t endoff; - struct xfs_iext_cursor icur; - int error = 0; + struct xfs_bmbt_irec irec; + struct xchk_bmap_info info = { NULL }; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *ip = sc->ip; + struct xfs_ifork *ifp; + xfs_fileoff_t endoff; + struct xfs_iext_cursor icur; + int error = 0; ifp = XFS_IFORK_PTR(ip, whichfork); @@ -606,7 +605,7 @@ xfs_scrub_bmap( goto out; /* No CoW forks on non-reflink inodes/filesystems. */ if (!xfs_is_reflink_inode(ip)) { - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_set_corrupt(sc, sc->ip->i_ino); goto out; } break; @@ -615,7 +614,7 @@ xfs_scrub_bmap( goto out_check_rmap; if (!xfs_sb_version_hasattr(&mp->m_sb) && !xfs_sb_version_hasattr2(&mp->m_sb)) - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_set_corrupt(sc, sc->ip->i_ino); break; default: ASSERT(whichfork == XFS_DATA_FORK); @@ -631,22 +630,22 @@ xfs_scrub_bmap( goto out; case XFS_DINODE_FMT_EXTENTS: if (!(ifp->if_flags & XFS_IFEXTENTS)) { - xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); + xchk_fblock_set_corrupt(sc, whichfork, 0); goto out; } break; case XFS_DINODE_FMT_BTREE: if (whichfork == XFS_COW_FORK) { - xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); + xchk_fblock_set_corrupt(sc, whichfork, 0); goto out; } - error = xfs_scrub_bmap_btree(sc, whichfork, &info); + error = xchk_bmap_btree(sc, whichfork, &info); if (error) goto out; break; default: - xfs_scrub_fblock_set_corrupt(sc, whichfork, 0); + xchk_fblock_set_corrupt(sc, whichfork, 0); goto out; } @@ -656,37 +655,37 @@ xfs_scrub_bmap( /* Now try to scrub the in-memory extent list. */ if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(sc->tp, ip, whichfork); - if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error)) + if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) goto out; } /* Find the offset of the last extent in the mapping. */ error = xfs_bmap_last_offset(ip, &endoff, whichfork); - if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error)) + if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) goto out; /* Scrub extent records. */ info.lastoff = 0; ifp = XFS_IFORK_PTR(ip, whichfork); for_each_xfs_iext(ifp, &icur, &irec) { - if (xfs_scrub_should_terminate(sc, &error) || + if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) break; if (isnullstartblock(irec.br_startblock)) continue; if (irec.br_startoff >= endoff) { - xfs_scrub_fblock_set_corrupt(sc, whichfork, + xchk_fblock_set_corrupt(sc, whichfork, irec.br_startoff); goto out; } - error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec); + error = xchk_bmap_extent(ip, NULL, &info, &irec); if (error) goto out; } out_check_rmap: - error = xfs_scrub_bmap_check_rmaps(sc, whichfork); - if (!xfs_scrub_fblock_xref_process_error(sc, whichfork, 0, &error)) + error = xchk_bmap_check_rmaps(sc, whichfork); + if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) goto out; out: return error; @@ -694,27 +693,27 @@ out: /* Scrub an inode's data fork. */ int -xfs_scrub_bmap_data( - struct xfs_scrub_context *sc) +xchk_bmap_data( + struct xfs_scrub *sc) { - return xfs_scrub_bmap(sc, XFS_DATA_FORK); + return xchk_bmap(sc, XFS_DATA_FORK); } /* Scrub an inode's attr fork. */ int -xfs_scrub_bmap_attr( - struct xfs_scrub_context *sc) +xchk_bmap_attr( + struct xfs_scrub *sc) { - return xfs_scrub_bmap(sc, XFS_ATTR_FORK); + return xchk_bmap(sc, XFS_ATTR_FORK); } /* Scrub an inode's CoW fork. */ int -xfs_scrub_bmap_cow( - struct xfs_scrub_context *sc) +xchk_bmap_cow( + struct xfs_scrub *sc) { if (!xfs_is_reflink_inode(sc->ip)) return -ENOENT; - return xfs_scrub_bmap(sc, XFS_COW_FORK); + return xchk_bmap(sc, XFS_COW_FORK); } diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 5b472045f036..4ae959f7ad2c 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -29,13 +29,13 @@ * operational errors in common.c. */ static bool -__xfs_scrub_btree_process_error( - struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - int level, - int *error, - __u32 errflag, - void *ret_ip) +__xchk_btree_process_error( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + int level, + int *error, + __u32 errflag, + void *ret_ip) { if (*error == 0) return true; @@ -43,7 +43,7 @@ __xfs_scrub_btree_process_error( switch (*error) { case -EDEADLOCK: /* Used to restart an op with deadlock avoidance. */ - trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); + trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); break; case -EFSBADCRC: case -EFSCORRUPTED: @@ -53,10 +53,10 @@ __xfs_scrub_btree_process_error( /* fall through */ default: if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, + trace_xchk_ifork_btree_op_error(sc, cur, level, *error, ret_ip); else - trace_xfs_scrub_btree_op_error(sc, cur, level, + trace_xchk_btree_op_error(sc, cur, level, *error, ret_ip); break; } @@ -64,63 +64,63 @@ __xfs_scrub_btree_process_error( } bool -xfs_scrub_btree_process_error( - struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - int level, - int *error) +xchk_btree_process_error( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + int level, + int *error) { - return __xfs_scrub_btree_process_error(sc, cur, level, error, + return __xchk_btree_process_error(sc, cur, level, error, XFS_SCRUB_OFLAG_CORRUPT, __return_address); } bool -xfs_scrub_btree_xref_process_error( - struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - int level, - int *error) +xchk_btree_xref_process_error( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + int level, + int *error) { - return __xfs_scrub_btree_process_error(sc, cur, level, error, + return __xchk_btree_process_error(sc, cur, level, error, XFS_SCRUB_OFLAG_XFAIL, __return_address); } /* Record btree block corruption. */ static void -__xfs_scrub_btree_set_corrupt( - struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - int level, - __u32 errflag, - void *ret_ip) +__xchk_btree_set_corrupt( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + int level, + __u32 errflag, + void *ret_ip) { sc->sm->sm_flags |= errflag; if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - trace_xfs_scrub_ifork_btree_error(sc, cur, level, + trace_xchk_ifork_btree_error(sc, cur, level, ret_ip); else - trace_xfs_scrub_btree_error(sc, cur, level, + trace_xchk_btree_error(sc, cur, level, ret_ip); } void -xfs_scrub_btree_set_corrupt( - struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - int level) +xchk_btree_set_corrupt( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + int level) { - __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, + __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, __return_address); } void -xfs_scrub_btree_xref_set_corrupt( - struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - int level) +xchk_btree_xref_set_corrupt( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + int level) { - __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, + __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, __return_address); } @@ -129,8 +129,8 @@ xfs_scrub_btree_xref_set_corrupt( * keys. */ STATIC void -xfs_scrub_btree_rec( - struct xfs_scrub_btree *bs) +xchk_btree_rec( + struct xchk_btree *bs) { struct xfs_btree_cur *cur = bs->cur; union xfs_btree_rec *rec; @@ -144,11 +144,11 @@ xfs_scrub_btree_rec( block = xfs_btree_get_block(cur, 0, &bp); rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); - trace_xfs_scrub_btree_rec(bs->sc, cur, 0); + trace_xchk_btree_rec(bs->sc, cur, 0); /* If this isn't the first record, are they in order? */ if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) - xfs_scrub_btree_set_corrupt(bs->sc, cur, 0); + xchk_btree_set_corrupt(bs->sc, cur, 0); bs->firstrec = false; memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); @@ -160,7 +160,7 @@ xfs_scrub_btree_rec( keyblock = xfs_btree_get_block(cur, 1, &bp); keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock); if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0) - xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); + xchk_btree_set_corrupt(bs->sc, cur, 1); if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) return; @@ -169,7 +169,7 @@ xfs_scrub_btree_rec( cur->bc_ops->init_high_key_from_rec(&hkey, rec); keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0) - xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); + xchk_btree_set_corrupt(bs->sc, cur, 1); } /* @@ -177,8 +177,8 @@ xfs_scrub_btree_rec( * keys. */ STATIC void -xfs_scrub_btree_key( - struct xfs_scrub_btree *bs, +xchk_btree_key( + struct xchk_btree *bs, int level) { struct xfs_btree_cur *cur = bs->cur; @@ -191,12 +191,12 @@ xfs_scrub_btree_key( block = xfs_btree_get_block(cur, level, &bp); key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); - trace_xfs_scrub_btree_key(bs->sc, cur, level); + trace_xchk_btree_key(bs->sc, cur, level); /* If this isn't the first key, are they in order? */ if (!bs->firstkey[level] && !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key)) - xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + xchk_btree_set_corrupt(bs->sc, cur, level); bs->firstkey[level] = false; memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len); @@ -207,7 +207,7 @@ xfs_scrub_btree_key( keyblock = xfs_btree_get_block(cur, level + 1, &bp); keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0) - xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + xchk_btree_set_corrupt(bs->sc, cur, level); if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) return; @@ -216,7 +216,7 @@ xfs_scrub_btree_key( key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0) - xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + xchk_btree_set_corrupt(bs->sc, cur, level); } /* @@ -224,12 +224,12 @@ xfs_scrub_btree_key( * Callers do not need to set the corrupt flag. */ static bool -xfs_scrub_btree_ptr_ok( - struct xfs_scrub_btree *bs, - int level, - union xfs_btree_ptr *ptr) +xchk_btree_ptr_ok( + struct xchk_btree *bs, + int level, + union xfs_btree_ptr *ptr) { - bool res; + bool res; /* A btree rooted in an inode has no block pointer to the root. */ if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && @@ -242,29 +242,29 @@ xfs_scrub_btree_ptr_ok( else res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level); if (!res) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); + xchk_btree_set_corrupt(bs->sc, bs->cur, level); return res; } /* Check that a btree block's sibling matches what we expect it. */ STATIC int -xfs_scrub_btree_block_check_sibling( - struct xfs_scrub_btree *bs, - int level, - int direction, - union xfs_btree_ptr *sibling) +xchk_btree_block_check_sibling( + struct xchk_btree *bs, + int level, + int direction, + union xfs_btree_ptr *sibling) { - struct xfs_btree_cur *cur = bs->cur; - struct xfs_btree_block *pblock; - struct xfs_buf *pbp; - struct xfs_btree_cur *ncur = NULL; - union xfs_btree_ptr *pp; - int success; - int error; + struct xfs_btree_cur *cur = bs->cur; + struct xfs_btree_block *pblock; + struct xfs_buf *pbp; + struct xfs_btree_cur *ncur = NULL; + union xfs_btree_ptr *pp; + int success; + int error; error = xfs_btree_dup_cursor(cur, &ncur); - if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) || + if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) || !ncur) return error; @@ -278,7 +278,7 @@ xfs_scrub_btree_block_check_sibling( else error = xfs_btree_decrement(ncur, level + 1, &success); if (error == 0 && success) - xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + xchk_btree_set_corrupt(bs->sc, cur, level); error = 0; goto out; } @@ -288,23 +288,23 @@ xfs_scrub_btree_block_check_sibling( error = xfs_btree_increment(ncur, level + 1, &success); else error = xfs_btree_decrement(ncur, level + 1, &success); - if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error)) + if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error)) goto out; if (!success) { - xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1); + xchk_btree_set_corrupt(bs->sc, cur, level + 1); goto out; } /* Compare upper level pointer to sibling pointer. */ pblock = xfs_btree_get_block(ncur, level + 1, &pbp); pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); - if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) + if (!xchk_btree_ptr_ok(bs, level + 1, pp)) goto out; if (pbp) - xfs_scrub_buffer_recheck(bs->sc, pbp); + xchk_buffer_recheck(bs->sc, pbp); if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) - xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + xchk_btree_set_corrupt(bs->sc, cur, level); out: xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR); return error; @@ -312,15 +312,15 @@ out: /* Check the siblings of a btree block. */ STATIC int -xfs_scrub_btree_block_check_siblings( - struct xfs_scrub_btree *bs, - struct xfs_btree_block *block) +xchk_btree_block_check_siblings( + struct xchk_btree *bs, + struct xfs_btree_block *block) { - struct xfs_btree_cur *cur = bs->cur; - union xfs_btree_ptr leftsib; - union xfs_btree_ptr rightsib; - int level; - int error = 0; + struct xfs_btree_cur *cur = bs->cur; + union xfs_btree_ptr leftsib; + union xfs_btree_ptr rightsib; + int level; + int error = 0; xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB); xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB); @@ -330,7 +330,7 @@ xfs_scrub_btree_block_check_siblings( if (level == cur->bc_nlevels - 1) { if (!xfs_btree_ptr_is_null(cur, &leftsib) || !xfs_btree_ptr_is_null(cur, &rightsib)) - xfs_scrub_btree_set_corrupt(bs->sc, cur, level); + xchk_btree_set_corrupt(bs->sc, cur, level); goto out; } @@ -339,10 +339,10 @@ xfs_scrub_btree_block_check_siblings( * parent level pointers? * (These function absorbs error codes for us.) */ - error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib); + error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib); if (error) return error; - error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib); + error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib); if (error) return error; out: @@ -360,16 +360,16 @@ struct check_owner { * an rmap record for it. */ STATIC int -xfs_scrub_btree_check_block_owner( - struct xfs_scrub_btree *bs, - int level, - xfs_daddr_t daddr) +xchk_btree_check_block_owner( + struct xchk_btree *bs, + int level, + xfs_daddr_t daddr) { - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_btnum_t btnum; - bool init_sa; - int error = 0; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_btnum_t btnum; + bool init_sa; + int error = 0; if (!bs->cur) return 0; @@ -380,13 +380,13 @@ xfs_scrub_btree_check_block_owner( init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; if (init_sa) { - error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa); - if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, + error = xchk_ag_init(bs->sc, agno, &bs->sc->sa); + if (!xchk_btree_xref_process_error(bs->sc, bs->cur, level, &error)) return error; } - xfs_scrub_xref_is_used_space(bs->sc, agbno, 1); + xchk_xref_is_used_space(bs->sc, agbno, 1); /* * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we * have to nullify it (to shut down further block owner checks) if @@ -395,25 +395,25 @@ xfs_scrub_btree_check_block_owner( if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO) bs->cur = NULL; - xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo); + xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo); if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP) bs->cur = NULL; if (init_sa) - xfs_scrub_ag_free(bs->sc, &bs->sc->sa); + xchk_ag_free(bs->sc, &bs->sc->sa); return error; } /* Check the owner of a btree block. */ STATIC int -xfs_scrub_btree_check_owner( - struct xfs_scrub_btree *bs, - int level, - struct xfs_buf *bp) +xchk_btree_check_owner( + struct xchk_btree *bs, + int level, + struct xfs_buf *bp) { - struct xfs_btree_cur *cur = bs->cur; - struct check_owner *co; + struct xfs_btree_cur *cur = bs->cur; + struct check_owner *co; if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) return 0; @@ -437,7 +437,7 @@ xfs_scrub_btree_check_owner( return 0; } - return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); + return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); } /* @@ -445,8 +445,8 @@ xfs_scrub_btree_check_owner( * special blocks that don't require that. */ STATIC void -xfs_scrub_btree_check_minrecs( - struct xfs_scrub_btree *bs, +xchk_btree_check_minrecs( + struct xchk_btree *bs, int level, struct xfs_btree_block *block) { @@ -475,7 +475,7 @@ xfs_scrub_btree_check_minrecs( if (level >= ok_level) return; - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); + xchk_btree_set_corrupt(bs->sc, bs->cur, level); } /* @@ -483,21 +483,21 @@ xfs_scrub_btree_check_minrecs( * and buffer pointers (if applicable) if they're ok to use. */ STATIC int -xfs_scrub_btree_get_block( - struct xfs_scrub_btree *bs, - int level, - union xfs_btree_ptr *pp, - struct xfs_btree_block **pblock, - struct xfs_buf **pbp) +xchk_btree_get_block( + struct xchk_btree *bs, + int level, + union xfs_btree_ptr *pp, + struct xfs_btree_block **pblock, + struct xfs_buf **pbp) { - void *failed_at; - int error; + xfs_failaddr_t failed_at; + int error; *pblock = NULL; *pbp = NULL; error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock); - if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) || + if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) || !*pblock) return error; @@ -509,19 +509,19 @@ xfs_scrub_btree_get_block( failed_at = __xfs_btree_check_sblock(bs->cur, *pblock, level, *pbp); if (failed_at) { - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); + xchk_btree_set_corrupt(bs->sc, bs->cur, level); return 0; } if (*pbp) - xfs_scrub_buffer_recheck(bs->sc, *pbp); + xchk_buffer_recheck(bs->sc, *pbp); - xfs_scrub_btree_check_minrecs(bs, level, *pblock); + xchk_btree_check_minrecs(bs, level, *pblock); /* * Check the block's owner; this function absorbs error codes * for us. */ - error = xfs_scrub_btree_check_owner(bs, level, *pbp); + error = xchk_btree_check_owner(bs, level, *pbp); if (error) return error; @@ -529,7 +529,7 @@ xfs_scrub_btree_get_block( * Check the block's siblings; this function absorbs error codes * for us. */ - return xfs_scrub_btree_block_check_siblings(bs, *pblock); + return xchk_btree_block_check_siblings(bs, *pblock); } /* @@ -537,18 +537,18 @@ xfs_scrub_btree_get_block( * in the parent block. */ STATIC void -xfs_scrub_btree_block_keys( - struct xfs_scrub_btree *bs, - int level, - struct xfs_btree_block *block) +xchk_btree_block_keys( + struct xchk_btree *bs, + int level, + struct xfs_btree_block *block) { - union xfs_btree_key block_keys; - struct xfs_btree_cur *cur = bs->cur; - union xfs_btree_key *high_bk; - union xfs_btree_key *parent_keys; - union xfs_btree_key *high_pk; - struct xfs_btree_block *parent_block; - struct xfs_buf *bp; + union xfs_btree_key block_keys; + struct xfs_btree_cur *cur = bs->cur; + union xfs_btree_key *high_bk; + union xfs_btree_key *parent_keys; + union xfs_btree_key *high_pk; + struct xfs_btree_block *parent_block; + struct xfs_buf *bp; if (level >= cur->bc_nlevels - 1) return; @@ -562,7 +562,7 @@ xfs_scrub_btree_block_keys( parent_block); if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0) - xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); + xchk_btree_set_corrupt(bs->sc, cur, 1); if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) return; @@ -573,7 +573,7 @@ xfs_scrub_btree_block_keys( parent_block); if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0) - xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); + xchk_btree_set_corrupt(bs->sc, cur, 1); } /* @@ -582,24 +582,24 @@ xfs_scrub_btree_block_keys( * so that the caller can verify individual records. */ int -xfs_scrub_btree( - struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - xfs_scrub_btree_rec_fn scrub_fn, - struct xfs_owner_info *oinfo, - void *private) +xchk_btree( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + xchk_btree_rec_fn scrub_fn, + struct xfs_owner_info *oinfo, + void *private) { - struct xfs_scrub_btree bs = { NULL }; - union xfs_btree_ptr ptr; - union xfs_btree_ptr *pp; - union xfs_btree_rec *recp; - struct xfs_btree_block *block; - int level; - struct xfs_buf *bp; - struct check_owner *co; - struct check_owner *n; - int i; - int error = 0; + struct xchk_btree bs = { NULL }; + union xfs_btree_ptr ptr; + union xfs_btree_ptr *pp; + union xfs_btree_rec *recp; + struct xfs_btree_block *block; + int level; + struct xfs_buf *bp; + struct check_owner *co; + struct check_owner *n; + int i; + int error = 0; /* Initialize scrub state */ bs.cur = cur; @@ -614,7 +614,7 @@ xfs_scrub_btree( /* Don't try to check a tree with a height we can't handle. */ if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) { - xfs_scrub_btree_set_corrupt(sc, cur, 0); + xchk_btree_set_corrupt(sc, cur, 0); goto out; } @@ -624,9 +624,9 @@ xfs_scrub_btree( */ level = cur->bc_nlevels - 1; cur->bc_ops->init_ptr_from_cur(cur, &ptr); - if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) + if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) goto out; - error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp); + error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp); if (error || !block) goto out; @@ -639,7 +639,7 @@ xfs_scrub_btree( /* End of leaf, pop back towards the root. */ if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { - xfs_scrub_btree_block_keys(&bs, level, block); + xchk_btree_block_keys(&bs, level, block); if (level < cur->bc_nlevels - 1) cur->bc_ptrs[level + 1]++; level++; @@ -647,14 +647,14 @@ xfs_scrub_btree( } /* Records in order for scrub? */ - xfs_scrub_btree_rec(&bs); + xchk_btree_rec(&bs); /* Call out to the record checker. */ recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); error = bs.scrub_rec(&bs, recp); if (error) break; - if (xfs_scrub_should_terminate(sc, &error) || + if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) break; @@ -664,7 +664,7 @@ xfs_scrub_btree( /* End of node, pop back towards the root. */ if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { - xfs_scrub_btree_block_keys(&bs, level, block); + xchk_btree_block_keys(&bs, level, block); if (level < cur->bc_nlevels - 1) cur->bc_ptrs[level + 1]++; level++; @@ -672,16 +672,16 @@ xfs_scrub_btree( } /* Keys in order for scrub? */ - xfs_scrub_btree_key(&bs, level); + xchk_btree_key(&bs, level); /* Drill another level deeper. */ pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); - if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) { + if (!xchk_btree_ptr_ok(&bs, level, pp)) { cur->bc_ptrs[level]++; continue; } level--; - error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp); + error = xchk_btree_get_block(&bs, level, pp, &block, &bp); if (error || !block) goto out; @@ -692,7 +692,7 @@ out: /* Process deferred owner checks on btree blocks. */ list_for_each_entry_safe(co, n, &bs.to_check, list) { if (!error && bs.cur) - error = xfs_scrub_btree_check_block_owner(&bs, + error = xchk_btree_check_block_owner(&bs, co->level, co->daddr); list_del(&co->list); kmem_free(co); diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h index 956627500f2c..aada763cd006 100644 --- a/fs/xfs/scrub/btree.h +++ b/fs/xfs/scrub/btree.h @@ -9,44 +9,43 @@ /* btree scrub */ /* Check for btree operation errors. */ -bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc, +bool xchk_btree_process_error(struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level, int *error); /* Check for btree xref operation errors. */ -bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, int level, - int *error); +bool xchk_btree_xref_process_error(struct xfs_scrub *sc, + struct xfs_btree_cur *cur, int level, int *error); /* Check for btree corruption. */ -void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc, +void xchk_btree_set_corrupt(struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level); /* Check for btree xref discrepancies. */ -void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc, +void xchk_btree_xref_set_corrupt(struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level); -struct xfs_scrub_btree; -typedef int (*xfs_scrub_btree_rec_fn)( - struct xfs_scrub_btree *bs, +struct xchk_btree; +typedef int (*xchk_btree_rec_fn)( + struct xchk_btree *bs, union xfs_btree_rec *rec); -struct xfs_scrub_btree { +struct xchk_btree { /* caller-provided scrub state */ - struct xfs_scrub_context *sc; - struct xfs_btree_cur *cur; - xfs_scrub_btree_rec_fn scrub_rec; - struct xfs_owner_info *oinfo; - void *private; + struct xfs_scrub *sc; + struct xfs_btree_cur *cur; + xchk_btree_rec_fn scrub_rec; + struct xfs_owner_info *oinfo; + void *private; /* internal scrub state */ - union xfs_btree_rec lastrec; - bool firstrec; - union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS]; - bool firstkey[XFS_BTREE_MAXLEVELS]; - struct list_head to_check; + union xfs_btree_rec lastrec; + bool firstrec; + union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS]; + bool firstkey[XFS_BTREE_MAXLEVELS]; + struct list_head to_check; }; -int xfs_scrub_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, - xfs_scrub_btree_rec_fn scrub_fn, - struct xfs_owner_info *oinfo, void *private); +int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur, + xchk_btree_rec_fn scrub_fn, struct xfs_owner_info *oinfo, + void *private); #endif /* __XFS_SCRUB_BTREE_H__ */ diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 70e70c69f83f..346b02abccf7 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -68,20 +68,20 @@ /* Check for operational errors. */ static bool -__xfs_scrub_process_error( - struct xfs_scrub_context *sc, - xfs_agnumber_t agno, - xfs_agblock_t bno, - int *error, - __u32 errflag, - void *ret_ip) +__xchk_process_error( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error, + __u32 errflag, + void *ret_ip) { switch (*error) { case 0: return true; case -EDEADLOCK: /* Used to restart an op with deadlock avoidance. */ - trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); + trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); break; case -EFSBADCRC: case -EFSCORRUPTED: @@ -90,7 +90,7 @@ __xfs_scrub_process_error( *error = 0; /* fall through */ default: - trace_xfs_scrub_op_error(sc, agno, bno, *error, + trace_xchk_op_error(sc, agno, bno, *error, ret_ip); break; } @@ -98,43 +98,43 @@ __xfs_scrub_process_error( } bool -xfs_scrub_process_error( - struct xfs_scrub_context *sc, - xfs_agnumber_t agno, - xfs_agblock_t bno, - int *error) +xchk_process_error( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error) { - return __xfs_scrub_process_error(sc, agno, bno, error, + return __xchk_process_error(sc, agno, bno, error, XFS_SCRUB_OFLAG_CORRUPT, __return_address); } bool -xfs_scrub_xref_process_error( - struct xfs_scrub_context *sc, - xfs_agnumber_t agno, - xfs_agblock_t bno, - int *error) +xchk_xref_process_error( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + xfs_agblock_t bno, + int *error) { - return __xfs_scrub_process_error(sc, agno, bno, error, + return __xchk_process_error(sc, agno, bno, error, XFS_SCRUB_OFLAG_XFAIL, __return_address); } /* Check for operational errors for a file offset. */ static bool -__xfs_scrub_fblock_process_error( - struct xfs_scrub_context *sc, - int whichfork, - xfs_fileoff_t offset, - int *error, - __u32 errflag, - void *ret_ip) +__xchk_fblock_process_error( + struct xfs_scrub *sc, + int whichfork, + xfs_fileoff_t offset, + int *error, + __u32 errflag, + void *ret_ip) { switch (*error) { case 0: return true; case -EDEADLOCK: /* Used to restart an op with deadlock avoidance. */ - trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); + trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); break; case -EFSBADCRC: case -EFSCORRUPTED: @@ -143,7 +143,7 @@ __xfs_scrub_fblock_process_error( *error = 0; /* fall through */ default: - trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, + trace_xchk_file_op_error(sc, whichfork, offset, *error, ret_ip); break; } @@ -151,24 +151,24 @@ __xfs_scrub_fblock_process_error( } bool -xfs_scrub_fblock_process_error( - struct xfs_scrub_context *sc, - int whichfork, - xfs_fileoff_t offset, - int *error) +xchk_fblock_process_error( + struct xfs_scrub *sc, + int whichfork, + xfs_fileoff_t offset, + int *error) { - return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, + return __xchk_fblock_process_error(sc, whichfork, offset, error, XFS_SCRUB_OFLAG_CORRUPT, __return_address); } bool -xfs_scrub_fblock_xref_process_error( - struct xfs_scrub_context *sc, - int whichfork, - xfs_fileoff_t offset, - int *error) +xchk_fblock_xref_process_error( + struct xfs_scrub *sc, + int whichfork, + xfs_fileoff_t offset, + int *error) { - return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error, + return __xchk_fblock_process_error(sc, whichfork, offset, error, XFS_SCRUB_OFLAG_XFAIL, __return_address); } @@ -186,12 +186,12 @@ xfs_scrub_fblock_xref_process_error( /* Record a block which could be optimized. */ void -xfs_scrub_block_set_preen( - struct xfs_scrub_context *sc, - struct xfs_buf *bp) +xchk_block_set_preen( + struct xfs_scrub *sc, + struct xfs_buf *bp) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; - trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address); + trace_xchk_block_preen(sc, bp->b_bn, __return_address); } /* @@ -200,32 +200,32 @@ xfs_scrub_block_set_preen( * the block location of the inode record itself. */ void -xfs_scrub_ino_set_preen( - struct xfs_scrub_context *sc, - xfs_ino_t ino) +xchk_ino_set_preen( + struct xfs_scrub *sc, + xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; - trace_xfs_scrub_ino_preen(sc, ino, __return_address); + trace_xchk_ino_preen(sc, ino, __return_address); } /* Record a corrupt block. */ void -xfs_scrub_block_set_corrupt( - struct xfs_scrub_context *sc, - struct xfs_buf *bp) +xchk_block_set_corrupt( + struct xfs_scrub *sc, + struct xfs_buf *bp) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); + trace_xchk_block_error(sc, bp->b_bn, __return_address); } /* Record a corruption while cross-referencing. */ void -xfs_scrub_block_xref_set_corrupt( - struct xfs_scrub_context *sc, - struct xfs_buf *bp) +xchk_block_xref_set_corrupt( + struct xfs_scrub *sc, + struct xfs_buf *bp) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; - trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); + trace_xchk_block_error(sc, bp->b_bn, __return_address); } /* @@ -234,44 +234,44 @@ xfs_scrub_block_xref_set_corrupt( * inode record itself. */ void -xfs_scrub_ino_set_corrupt( - struct xfs_scrub_context *sc, - xfs_ino_t ino) +xchk_ino_set_corrupt( + struct xfs_scrub *sc, + xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - trace_xfs_scrub_ino_error(sc, ino, __return_address); + trace_xchk_ino_error(sc, ino, __return_address); } /* Record a corruption while cross-referencing with an inode. */ void -xfs_scrub_ino_xref_set_corrupt( - struct xfs_scrub_context *sc, - xfs_ino_t ino) +xchk_ino_xref_set_corrupt( + struct xfs_scrub *sc, + xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; - trace_xfs_scrub_ino_error(sc, ino, __return_address); + trace_xchk_ino_error(sc, ino, __return_address); } /* Record corruption in a block indexed by a file fork. */ void -xfs_scrub_fblock_set_corrupt( - struct xfs_scrub_context *sc, - int whichfork, - xfs_fileoff_t offset) +xchk_fblock_set_corrupt( + struct xfs_scrub *sc, + int whichfork, + xfs_fileoff_t offset) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); + trace_xchk_fblock_error(sc, whichfork, offset, __return_address); } /* Record a corruption while cross-referencing a fork block. */ void -xfs_scrub_fblock_xref_set_corrupt( - struct xfs_scrub_context *sc, - int whichfork, - xfs_fileoff_t offset) +xchk_fblock_xref_set_corrupt( + struct xfs_scrub *sc, + int whichfork, + xfs_fileoff_t offset) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; - trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); + trace_xchk_fblock_error(sc, whichfork, offset, __return_address); } /* @@ -279,32 +279,32 @@ xfs_scrub_fblock_xref_set_corrupt( * incorrect. */ void -xfs_scrub_ino_set_warning( - struct xfs_scrub_context *sc, - xfs_ino_t ino) +xchk_ino_set_warning( + struct xfs_scrub *sc, + xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; - trace_xfs_scrub_ino_warning(sc, ino, __return_address); + trace_xchk_ino_warning(sc, ino, __return_address); } /* Warn about a block indexed by a file fork that needs review. */ void -xfs_scrub_fblock_set_warning( - struct xfs_scrub_context *sc, - int whichfork, - xfs_fileoff_t offset) +xchk_fblock_set_warning( + struct xfs_scrub *sc, + int whichfork, + xfs_fileoff_t offset) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; - trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address); + trace_xchk_fblock_warning(sc, whichfork, offset, __return_address); } /* Signal an incomplete scrub. */ void -xfs_scrub_set_incomplete( - struct xfs_scrub_context *sc) +xchk_set_incomplete( + struct xfs_scrub *sc) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; - trace_xfs_scrub_incomplete(sc, __return_address); + trace_xchk_incomplete(sc, __return_address); } /* @@ -312,20 +312,20 @@ xfs_scrub_set_incomplete( * at least according to the reverse mapping data. */ -struct xfs_scrub_rmap_ownedby_info { +struct xchk_rmap_ownedby_info { struct xfs_owner_info *oinfo; xfs_filblks_t *blocks; }; STATIC int -xfs_scrub_count_rmap_ownedby_irec( - struct xfs_btree_cur *cur, - struct xfs_rmap_irec *rec, - void *priv) +xchk_count_rmap_ownedby_irec( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) { - struct xfs_scrub_rmap_ownedby_info *sroi = priv; - bool irec_attr; - bool oinfo_attr; + struct xchk_rmap_ownedby_info *sroi = priv; + bool irec_attr; + bool oinfo_attr; irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; @@ -344,19 +344,19 @@ xfs_scrub_count_rmap_ownedby_irec( * The caller should pass us an rmapbt cursor. */ int -xfs_scrub_count_rmap_ownedby_ag( - struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - struct xfs_owner_info *oinfo, - xfs_filblks_t *blocks) +xchk_count_rmap_ownedby_ag( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + struct xfs_owner_info *oinfo, + xfs_filblks_t *blocks) { - struct xfs_scrub_rmap_ownedby_info sroi; + struct xchk_rmap_ownedby_info sroi; sroi.oinfo = oinfo; *blocks = 0; sroi.blocks = blocks; - return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec, + return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec, &sroi); } @@ -371,8 +371,8 @@ xfs_scrub_count_rmap_ownedby_ag( /* Decide if we want to return an AG header read failure. */ static inline bool want_ag_read_header_failure( - struct xfs_scrub_context *sc, - unsigned int type) + struct xfs_scrub *sc, + unsigned int type) { /* Return all AG header read failures when scanning btrees. */ if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF && @@ -392,20 +392,20 @@ want_ag_read_header_failure( /* * Grab all the headers for an AG. * - * The headers should be released by xfs_scrub_ag_free, but as a fail + * The headers should be released by xchk_ag_free, but as a fail * safe we attach all the buffers we grab to the scrub transaction so * they'll all be freed when we cancel it. */ int -xfs_scrub_ag_read_headers( - struct xfs_scrub_context *sc, - xfs_agnumber_t agno, - struct xfs_buf **agi, - struct xfs_buf **agf, - struct xfs_buf **agfl) -{ - struct xfs_mount *mp = sc->mp; - int error; +xchk_ag_read_headers( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + struct xfs_buf **agi, + struct xfs_buf **agf, + struct xfs_buf **agfl) +{ + struct xfs_mount *mp = sc->mp; + int error; error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) @@ -425,8 +425,8 @@ out: /* Release all the AG btree cursors. */ void -xfs_scrub_ag_btcur_free( - struct xfs_scrub_ag *sa) +xchk_ag_btcur_free( + struct xchk_ag *sa) { if (sa->refc_cur) xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR); @@ -451,12 +451,12 @@ xfs_scrub_ag_btcur_free( /* Initialize all the btree cursors for an AG. */ int -xfs_scrub_ag_btcur_init( - struct xfs_scrub_context *sc, - struct xfs_scrub_ag *sa) +xchk_ag_btcur_init( + struct xfs_scrub *sc, + struct xchk_ag *sa) { - struct xfs_mount *mp = sc->mp; - xfs_agnumber_t agno = sa->agno; + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sa->agno; if (sa->agf_bp) { /* Set up a bnobt cursor for cross-referencing. */ @@ -499,7 +499,7 @@ xfs_scrub_ag_btcur_init( /* Set up a refcountbt cursor for cross-referencing. */ if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) { sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, - sa->agf_bp, agno, NULL); + sa->agf_bp, agno); if (!sa->refc_cur) goto err; } @@ -511,11 +511,11 @@ err: /* Release the AG header context and btree cursors. */ void -xfs_scrub_ag_free( - struct xfs_scrub_context *sc, - struct xfs_scrub_ag *sa) +xchk_ag_free( + struct xfs_scrub *sc, + struct xchk_ag *sa) { - xfs_scrub_ag_btcur_free(sa); + xchk_ag_btcur_free(sa); if (sa->agfl_bp) { xfs_trans_brelse(sc->tp, sa->agfl_bp); sa->agfl_bp = NULL; @@ -543,30 +543,30 @@ xfs_scrub_ag_free( * transaction ourselves. */ int -xfs_scrub_ag_init( - struct xfs_scrub_context *sc, - xfs_agnumber_t agno, - struct xfs_scrub_ag *sa) +xchk_ag_init( + struct xfs_scrub *sc, + xfs_agnumber_t agno, + struct xchk_ag *sa) { - int error; + int error; sa->agno = agno; - error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp, + error = xchk_ag_read_headers(sc, agno, &sa->agi_bp, &sa->agf_bp, &sa->agfl_bp); if (error) return error; - return xfs_scrub_ag_btcur_init(sc, sa); + return xchk_ag_btcur_init(sc, sa); } /* * Grab the per-ag structure if we haven't already gotten it. Teardown of the - * xfs_scrub_ag will release it for us. + * xchk_ag will release it for us. */ void -xfs_scrub_perag_get( +xchk_perag_get( struct xfs_mount *mp, - struct xfs_scrub_ag *sa) + struct xchk_ag *sa) { if (!sa->pag) sa->pag = xfs_perag_get(mp, sa->agno); @@ -585,9 +585,9 @@ xfs_scrub_perag_get( * the metadata object. */ int -xfs_scrub_trans_alloc( - struct xfs_scrub_context *sc, - uint resblks) +xchk_trans_alloc( + struct xfs_scrub *sc, + uint resblks) { if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, @@ -598,25 +598,25 @@ xfs_scrub_trans_alloc( /* Set us up with a transaction and an empty context. */ int -xfs_scrub_setup_fs( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_fs( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - uint resblks; + uint resblks; - resblks = xfs_repair_calc_ag_resblks(sc); - return xfs_scrub_trans_alloc(sc, resblks); + resblks = xrep_calc_ag_resblks(sc); + return xchk_trans_alloc(sc, resblks); } /* Set us up with AG headers and btree cursors. */ int -xfs_scrub_setup_ag_btree( - struct xfs_scrub_context *sc, - struct xfs_inode *ip, - bool force_log) +xchk_setup_ag_btree( + struct xfs_scrub *sc, + struct xfs_inode *ip, + bool force_log) { - struct xfs_mount *mp = sc->mp; - int error; + struct xfs_mount *mp = sc->mp; + int error; /* * If the caller asks us to checkpont the log, do so. This @@ -625,21 +625,21 @@ xfs_scrub_setup_ag_btree( * document why they need to do so. */ if (force_log) { - error = xfs_scrub_checkpoint_log(mp); + error = xchk_checkpoint_log(mp); if (error) return error; } - error = xfs_scrub_setup_fs(sc, ip); + error = xchk_setup_fs(sc, ip); if (error) return error; - return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa); + return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa); } /* Push everything out of the log onto disk. */ int -xfs_scrub_checkpoint_log( +xchk_checkpoint_log( struct xfs_mount *mp) { int error; @@ -657,14 +657,14 @@ xfs_scrub_checkpoint_log( * The inode is not locked. */ int -xfs_scrub_get_inode( - struct xfs_scrub_context *sc, - struct xfs_inode *ip_in) +xchk_get_inode( + struct xfs_scrub *sc, + struct xfs_inode *ip_in) { - struct xfs_imap imap; - struct xfs_mount *mp = sc->mp; - struct xfs_inode *ip = NULL; - int error; + struct xfs_imap imap; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *ip = NULL; + int error; /* We want to scan the inode we already had opened. */ if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { @@ -704,14 +704,14 @@ xfs_scrub_get_inode( error = -EFSCORRUPTED; /* fall through */ default: - trace_xfs_scrub_op_error(sc, + trace_xchk_op_error(sc, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), error, __return_address); return error; } if (VFS_I(ip)->i_generation != sc->sm->sm_gen) { - iput(VFS_I(ip)); + xfs_irele(ip); return -ENOENT; } @@ -721,21 +721,21 @@ xfs_scrub_get_inode( /* Set us up to scrub a file's contents. */ int -xfs_scrub_setup_inode_contents( - struct xfs_scrub_context *sc, - struct xfs_inode *ip, - unsigned int resblks) +xchk_setup_inode_contents( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int resblks) { - int error; + int error; - error = xfs_scrub_get_inode(sc, ip); + error = xchk_get_inode(sc, ip); if (error) return error; /* Got the inode, lock it and we're ready to go. */ sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; xfs_ilock(sc->ip, sc->ilock_flags); - error = xfs_scrub_trans_alloc(sc, resblks); + error = xchk_trans_alloc(sc, resblks); if (error) goto out; sc->ilock_flags |= XFS_ILOCK_EXCL; @@ -752,13 +752,13 @@ out: * the cursor and skip the check. */ bool -xfs_scrub_should_check_xref( - struct xfs_scrub_context *sc, - int *error, - struct xfs_btree_cur **curpp) +xchk_should_check_xref( + struct xfs_scrub *sc, + int *error, + struct xfs_btree_cur **curpp) { /* No point in xref if we already know we're corrupt. */ - if (xfs_scrub_skip_xref(sc->sm)) + if (xchk_skip_xref(sc->sm)) return false; if (*error == 0) @@ -775,7 +775,7 @@ xfs_scrub_should_check_xref( } sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; - trace_xfs_scrub_xref_error(sc, *error, __return_address); + trace_xchk_xref_error(sc, *error, __return_address); /* * Errors encountered during cross-referencing with another @@ -787,25 +787,25 @@ xfs_scrub_should_check_xref( /* Run the structure verifiers on in-memory buffers to detect bad memory. */ void -xfs_scrub_buffer_recheck( - struct xfs_scrub_context *sc, - struct xfs_buf *bp) +xchk_buffer_recheck( + struct xfs_scrub *sc, + struct xfs_buf *bp) { - xfs_failaddr_t fa; + xfs_failaddr_t fa; if (bp->b_ops == NULL) { - xfs_scrub_block_set_corrupt(sc, bp); + xchk_block_set_corrupt(sc, bp); return; } if (bp->b_ops->verify_struct == NULL) { - xfs_scrub_set_incomplete(sc); + xchk_set_incomplete(sc); return; } fa = bp->b_ops->verify_struct(bp); if (!fa) return; sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - trace_xfs_scrub_block_error(sc, bp->b_bn, fa); + trace_xchk_block_error(sc, bp->b_bn, fa); } /* @@ -813,38 +813,38 @@ xfs_scrub_buffer_recheck( * pointed to by sc->ip and the ILOCK must be held. */ int -xfs_scrub_metadata_inode_forks( - struct xfs_scrub_context *sc) +xchk_metadata_inode_forks( + struct xfs_scrub *sc) { - __u32 smtype; - bool shared; - int error; + __u32 smtype; + bool shared; + int error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return 0; /* Metadata inodes don't live on the rt device. */ if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) { - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_set_corrupt(sc, sc->ip->i_ino); return 0; } /* They should never participate in reflink. */ if (xfs_is_reflink_inode(sc->ip)) { - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_set_corrupt(sc, sc->ip->i_ino); return 0; } /* They also should never have extended attributes. */ if (xfs_inode_hasattr(sc->ip)) { - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_set_corrupt(sc, sc->ip->i_ino); return 0; } /* Invoke the data fork scrubber. */ smtype = sc->sm->sm_type; sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD; - error = xfs_scrub_bmap_data(sc); + error = xchk_bmap_data(sc); sc->sm->sm_type = smtype; if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) return error; @@ -853,11 +853,11 @@ xfs_scrub_metadata_inode_forks( if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) { error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, &shared); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) return error; if (shared) - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_set_corrupt(sc, sc->ip->i_ino); } return error; @@ -871,7 +871,7 @@ xfs_scrub_metadata_inode_forks( * we can't. */ int -xfs_scrub_ilock_inverted( +xchk_ilock_inverted( struct xfs_inode *ip, uint lock_mode) { diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 2172bd5361e2..2d4324d12f9a 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -12,8 +12,8 @@ * Note that we're careful not to make any judgements about *error. */ static inline bool -xfs_scrub_should_terminate( - struct xfs_scrub_context *sc, +xchk_should_terminate( + struct xfs_scrub *sc, int *error) { if (fatal_signal_pending(current)) { @@ -24,121 +24,118 @@ xfs_scrub_should_terminate( return false; } -int xfs_scrub_trans_alloc(struct xfs_scrub_context *sc, uint resblks); -bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno, +int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks); +bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno, xfs_agblock_t bno, int *error); -bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork, +bool xchk_fblock_process_error(struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset, int *error); -bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc, +bool xchk_xref_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno, xfs_agblock_t bno, int *error); -bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc, +bool xchk_fblock_xref_process_error(struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset, int *error); -void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, +void xchk_block_set_preen(struct xfs_scrub *sc, struct xfs_buf *bp); -void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino); +void xchk_ino_set_preen(struct xfs_scrub *sc, xfs_ino_t ino); -void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc, +void xchk_block_set_corrupt(struct xfs_scrub *sc, struct xfs_buf *bp); -void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino); -void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, +void xchk_ino_set_corrupt(struct xfs_scrub *sc, xfs_ino_t ino); +void xchk_fblock_set_corrupt(struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset); -void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc, +void xchk_block_xref_set_corrupt(struct xfs_scrub *sc, struct xfs_buf *bp); -void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, +void xchk_ino_xref_set_corrupt(struct xfs_scrub *sc, xfs_ino_t ino); -void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc, +void xchk_fblock_xref_set_corrupt(struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset); -void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino); -void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, +void xchk_ino_set_warning(struct xfs_scrub *sc, xfs_ino_t ino); +void xchk_fblock_set_warning(struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset); -void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc); -int xfs_scrub_checkpoint_log(struct xfs_mount *mp); +void xchk_set_incomplete(struct xfs_scrub *sc); +int xchk_checkpoint_log(struct xfs_mount *mp); /* Are we set up for a cross-referencing check? */ -bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error, +bool xchk_should_check_xref(struct xfs_scrub *sc, int *error, struct xfs_btree_cur **curpp); /* Setup functions */ -int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); -int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, +int xchk_setup_fs(struct xfs_scrub *sc, struct xfs_inode *ip); +int xchk_setup_ag_allocbt(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc, +int xchk_setup_ag_iallocbt(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_ag_rmapbt(struct xfs_scrub_context *sc, +int xchk_setup_ag_rmapbt(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_ag_refcountbt(struct xfs_scrub_context *sc, +int xchk_setup_ag_refcountbt(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_inode(struct xfs_scrub_context *sc, +int xchk_setup_inode(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc, +int xchk_setup_inode_bmap(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_inode_bmap_data(struct xfs_scrub_context *sc, +int xchk_setup_inode_bmap_data(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_directory(struct xfs_scrub_context *sc, +int xchk_setup_directory(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_xattr(struct xfs_scrub_context *sc, +int xchk_setup_xattr(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_symlink(struct xfs_scrub_context *sc, +int xchk_setup_symlink(struct xfs_scrub *sc, struct xfs_inode *ip); -int xfs_scrub_setup_parent(struct xfs_scrub_context *sc, +int xchk_setup_parent(struct xfs_scrub *sc, struct xfs_inode *ip); #ifdef CONFIG_XFS_RT -int xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip); +int xchk_setup_rt(struct xfs_scrub *sc, struct xfs_inode *ip); #else static inline int -xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip) +xchk_setup_rt(struct xfs_scrub *sc, struct xfs_inode *ip) { return -ENOENT; } #endif #ifdef CONFIG_XFS_QUOTA -int xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip); +int xchk_setup_quota(struct xfs_scrub *sc, struct xfs_inode *ip); #else static inline int -xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip) +xchk_setup_quota(struct xfs_scrub *sc, struct xfs_inode *ip) { return -ENOENT; } #endif -void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa); -int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno, - struct xfs_scrub_ag *sa); -void xfs_scrub_perag_get(struct xfs_mount *mp, struct xfs_scrub_ag *sa); -int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno, - struct xfs_buf **agi, struct xfs_buf **agf, - struct xfs_buf **agfl); -void xfs_scrub_ag_btcur_free(struct xfs_scrub_ag *sa); -int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc, - struct xfs_scrub_ag *sa); -int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc, - struct xfs_btree_cur *cur, - struct xfs_owner_info *oinfo, - xfs_filblks_t *blocks); +void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa); +int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno, + struct xchk_ag *sa); +void xchk_perag_get(struct xfs_mount *mp, struct xchk_ag *sa); +int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno, + struct xfs_buf **agi, struct xfs_buf **agf, + struct xfs_buf **agfl); +void xchk_ag_btcur_free(struct xchk_ag *sa); +int xchk_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa); +int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc, struct xfs_btree_cur *cur, + struct xfs_owner_info *oinfo, xfs_filblks_t *blocks); -int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, - struct xfs_inode *ip, bool force_log); -int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in); -int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc, - struct xfs_inode *ip, unsigned int resblks); -void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp); +int xchk_setup_ag_btree(struct xfs_scrub *sc, struct xfs_inode *ip, + bool force_log); +int xchk_get_inode(struct xfs_scrub *sc, struct xfs_inode *ip_in); +int xchk_setup_inode_contents(struct xfs_scrub *sc, struct xfs_inode *ip, + unsigned int resblks); +void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp); /* * Don't bother cross-referencing if we already found corruption or cross * referencing discrepancies. */ -static inline bool xfs_scrub_skip_xref(struct xfs_scrub_metadata *sm) +static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) { return sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | XFS_SCRUB_OFLAG_XCORRUPT); } -int xfs_scrub_metadata_inode_forks(struct xfs_scrub_context *sc); -int xfs_scrub_ilock_inverted(struct xfs_inode *ip, uint lock_mode); +int xchk_metadata_inode_forks(struct xfs_scrub *sc); +int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode); #endif /* __XFS_SCRUB_COMMON_H__ */ diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index d700c4d4d4ef..f1260b4bfdee 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -35,12 +35,12 @@ * operational errors in common.c. */ bool -xfs_scrub_da_process_error( - struct xfs_scrub_da_btree *ds, - int level, - int *error) +xchk_da_process_error( + struct xchk_da_btree *ds, + int level, + int *error) { - struct xfs_scrub_context *sc = ds->sc; + struct xfs_scrub *sc = ds->sc; if (*error == 0) return true; @@ -48,7 +48,7 @@ xfs_scrub_da_process_error( switch (*error) { case -EDEADLOCK: /* Used to restart an op with deadlock avoidance. */ - trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); + trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); break; case -EFSBADCRC: case -EFSCORRUPTED: @@ -57,7 +57,7 @@ xfs_scrub_da_process_error( *error = 0; /* fall through */ default: - trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork, + trace_xchk_file_op_error(sc, ds->dargs.whichfork, xfs_dir2_da_to_db(ds->dargs.geo, ds->state->path.blk[level].blkno), *error, __return_address); @@ -71,15 +71,15 @@ xfs_scrub_da_process_error( * operational errors in common.c. */ void -xfs_scrub_da_set_corrupt( - struct xfs_scrub_da_btree *ds, - int level) +xchk_da_set_corrupt( + struct xchk_da_btree *ds, + int level) { - struct xfs_scrub_context *sc = ds->sc; + struct xfs_scrub *sc = ds->sc; sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork, + trace_xchk_fblock_error(sc, ds->dargs.whichfork, xfs_dir2_da_to_db(ds->dargs.geo, ds->state->path.blk[level].blkno), __return_address); @@ -87,14 +87,14 @@ xfs_scrub_da_set_corrupt( /* Find an entry at a certain level in a da btree. */ STATIC void * -xfs_scrub_da_btree_entry( - struct xfs_scrub_da_btree *ds, - int level, - int rec) +xchk_da_btree_entry( + struct xchk_da_btree *ds, + int level, + int rec) { - char *ents; - struct xfs_da_state_blk *blk; - void *baddr; + char *ents; + struct xfs_da_state_blk *blk; + void *baddr; /* Dispatch the entry finding function. */ blk = &ds->state->path.blk[level]; @@ -123,8 +123,8 @@ xfs_scrub_da_btree_entry( /* Scrub a da btree hash (key). */ int -xfs_scrub_da_btree_hash( - struct xfs_scrub_da_btree *ds, +xchk_da_btree_hash( + struct xchk_da_btree *ds, int level, __be32 *hashp) { @@ -136,7 +136,7 @@ xfs_scrub_da_btree_hash( /* Is this hash in order? */ hash = be32_to_cpu(*hashp); if (hash < ds->hashes[level]) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); ds->hashes[level] = hash; if (level == 0) @@ -144,10 +144,10 @@ xfs_scrub_da_btree_hash( /* Is this hash no larger than the parent hash? */ blks = ds->state->path.blk; - entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index); + entry = xchk_da_btree_entry(ds, level - 1, blks[level - 1].index); parent_hash = be32_to_cpu(entry->hashval); if (parent_hash < hash) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); return 0; } @@ -157,13 +157,13 @@ xfs_scrub_da_btree_hash( * pointer. */ STATIC bool -xfs_scrub_da_btree_ptr_ok( - struct xfs_scrub_da_btree *ds, - int level, - xfs_dablk_t blkno) +xchk_da_btree_ptr_ok( + struct xchk_da_btree *ds, + int level, + xfs_dablk_t blkno) { if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); return false; } @@ -176,7 +176,7 @@ xfs_scrub_da_btree_ptr_ok( * leaf1, we must multiplex the verifiers. */ static void -xfs_scrub_da_btree_read_verify( +xchk_da_btree_read_verify( struct xfs_buf *bp) { struct xfs_da_blkinfo *info = bp->b_addr; @@ -198,7 +198,7 @@ xfs_scrub_da_btree_read_verify( } } static void -xfs_scrub_da_btree_write_verify( +xchk_da_btree_write_verify( struct xfs_buf *bp) { struct xfs_da_blkinfo *info = bp->b_addr; @@ -220,7 +220,7 @@ xfs_scrub_da_btree_write_verify( } } static void * -xfs_scrub_da_btree_verify( +xchk_da_btree_verify( struct xfs_buf *bp) { struct xfs_da_blkinfo *info = bp->b_addr; @@ -236,23 +236,23 @@ xfs_scrub_da_btree_verify( } } -static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = { - .name = "xfs_scrub_da_btree", - .verify_read = xfs_scrub_da_btree_read_verify, - .verify_write = xfs_scrub_da_btree_write_verify, - .verify_struct = xfs_scrub_da_btree_verify, +static const struct xfs_buf_ops xchk_da_btree_buf_ops = { + .name = "xchk_da_btree", + .verify_read = xchk_da_btree_read_verify, + .verify_write = xchk_da_btree_write_verify, + .verify_struct = xchk_da_btree_verify, }; /* Check a block's sibling. */ STATIC int -xfs_scrub_da_btree_block_check_sibling( - struct xfs_scrub_da_btree *ds, - int level, - int direction, - xfs_dablk_t sibling) +xchk_da_btree_block_check_sibling( + struct xchk_da_btree *ds, + int level, + int direction, + xfs_dablk_t sibling) { - int retval; - int error; + int retval; + int error; memcpy(&ds->state->altpath, &ds->state->path, sizeof(ds->state->altpath)); @@ -265,7 +265,7 @@ xfs_scrub_da_btree_block_check_sibling( error = xfs_da3_path_shift(ds->state, &ds->state->altpath, direction, false, &retval); if (error == 0 && retval == 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); error = 0; goto out; } @@ -273,19 +273,19 @@ xfs_scrub_da_btree_block_check_sibling( /* Move the alternate cursor one block in the direction given. */ error = xfs_da3_path_shift(ds->state, &ds->state->altpath, direction, false, &retval); - if (!xfs_scrub_da_process_error(ds, level, &error)) + if (!xchk_da_process_error(ds, level, &error)) return error; if (retval) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); return error; } if (ds->state->altpath.blk[level].bp) - xfs_scrub_buffer_recheck(ds->sc, + xchk_buffer_recheck(ds->sc, ds->state->altpath.blk[level].bp); /* Compare upper level pointer to sibling pointer. */ if (ds->state->altpath.blk[level].blkno != sibling) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp); out: return error; @@ -293,14 +293,14 @@ out: /* Check a block's sibling pointers. */ STATIC int -xfs_scrub_da_btree_block_check_siblings( - struct xfs_scrub_da_btree *ds, - int level, - struct xfs_da_blkinfo *hdr) +xchk_da_btree_block_check_siblings( + struct xchk_da_btree *ds, + int level, + struct xfs_da_blkinfo *hdr) { - xfs_dablk_t forw; - xfs_dablk_t back; - int error = 0; + xfs_dablk_t forw; + xfs_dablk_t back; + int error = 0; forw = be32_to_cpu(hdr->forw); back = be32_to_cpu(hdr->back); @@ -308,7 +308,7 @@ xfs_scrub_da_btree_block_check_siblings( /* Top level blocks should not have sibling pointers. */ if (level == 0) { if (forw != 0 || back != 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); return 0; } @@ -316,10 +316,10 @@ xfs_scrub_da_btree_block_check_siblings( * Check back (left) and forw (right) pointers. These functions * absorb error codes for us. */ - error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back); + error = xchk_da_btree_block_check_sibling(ds, level, 0, back); if (error) goto out; - error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw); + error = xchk_da_btree_block_check_sibling(ds, level, 1, forw); out: memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); @@ -328,8 +328,8 @@ out: /* Load a dir/attribute block from a btree. */ STATIC int -xfs_scrub_da_btree_block( - struct xfs_scrub_da_btree *ds, +xchk_da_btree_block( + struct xchk_da_btree *ds, int level, xfs_dablk_t blkno) { @@ -355,17 +355,17 @@ xfs_scrub_da_btree_block( /* Check the pointer. */ blk->blkno = blkno; - if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno)) + if (!xchk_da_btree_ptr_ok(ds, level, blkno)) goto out_nobuf; /* Read the buffer. */ error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2, &blk->bp, dargs->whichfork, - &xfs_scrub_da_btree_buf_ops); - if (!xfs_scrub_da_process_error(ds, level, &error)) + &xchk_da_btree_buf_ops); + if (!xchk_da_process_error(ds, level, &error)) goto out_nobuf; if (blk->bp) - xfs_scrub_buffer_recheck(ds->sc, blk->bp); + xchk_buffer_recheck(ds->sc, blk->bp); /* * We didn't find a dir btree root block, which means that @@ -378,7 +378,7 @@ xfs_scrub_da_btree_block( /* It's /not/ ok for attr trees not to have a da btree. */ if (blk->bp == NULL) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out_nobuf; } @@ -388,17 +388,17 @@ xfs_scrub_da_btree_block( /* We only started zeroing the header on v5 filesystems. */ if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); /* Check the owner. */ if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) { owner = be64_to_cpu(hdr3->owner); if (owner != ip->i_ino) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); } /* Check the siblings. */ - error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr); + error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr); if (error) goto out; @@ -411,7 +411,7 @@ xfs_scrub_da_btree_block( blk->magic = XFS_ATTR_LEAF_MAGIC; blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); if (ds->tree_level != 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); break; case XFS_DIR2_LEAFN_MAGIC: case XFS_DIR3_LEAFN_MAGIC: @@ -420,7 +420,7 @@ xfs_scrub_da_btree_block( blk->magic = XFS_DIR2_LEAFN_MAGIC; blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); if (ds->tree_level != 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); break; case XFS_DIR2_LEAF1_MAGIC: case XFS_DIR3_LEAF1_MAGIC: @@ -429,7 +429,7 @@ xfs_scrub_da_btree_block( blk->magic = XFS_DIR2_LEAF1_MAGIC; blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); if (ds->tree_level != 0) - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); break; case XFS_DA_NODE_MAGIC: case XFS_DA3_NODE_MAGIC: @@ -443,13 +443,13 @@ xfs_scrub_da_btree_block( blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); if (level == 0) { if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out_freebp; } ds->tree_level = nodehdr.level; } else { if (ds->tree_level != nodehdr.level) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out_freebp; } } @@ -457,7 +457,7 @@ xfs_scrub_da_btree_block( /* XXX: Check hdr3.pad32 once we know how to fix it. */ break; default: - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out_freebp; } @@ -473,13 +473,13 @@ out_nobuf: /* Visit all nodes and leaves of a da btree. */ int -xfs_scrub_da_btree( - struct xfs_scrub_context *sc, +xchk_da_btree( + struct xfs_scrub *sc, int whichfork, - xfs_scrub_da_btree_rec_fn scrub_fn, + xchk_da_btree_rec_fn scrub_fn, void *private) { - struct xfs_scrub_da_btree ds = {}; + struct xchk_da_btree ds = {}; struct xfs_mount *mp = sc->mp; struct xfs_da_state_blk *blks; struct xfs_da_node_entry *key; @@ -517,7 +517,7 @@ xfs_scrub_da_btree( /* Find the root of the da tree, if present. */ blks = ds.state->path.blk; - error = xfs_scrub_da_btree_block(&ds, level, blkno); + error = xchk_da_btree_block(&ds, level, blkno); if (error) goto out_state; /* @@ -542,12 +542,12 @@ xfs_scrub_da_btree( } /* Dispatch record scrubbing. */ - rec = xfs_scrub_da_btree_entry(&ds, level, + rec = xchk_da_btree_entry(&ds, level, blks[level].index); error = scrub_fn(&ds, level, rec); if (error) break; - if (xfs_scrub_should_terminate(sc, &error) || + if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) break; @@ -566,8 +566,8 @@ xfs_scrub_da_btree( } /* Hashes in order for scrub? */ - key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index); - error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval); + key = xchk_da_btree_entry(&ds, level, blks[level].index); + error = xchk_da_btree_hash(&ds, level, &key->hashval); if (error) goto out; @@ -575,7 +575,7 @@ xfs_scrub_da_btree( blkno = be32_to_cpu(key->before); level++; ds.tree_level--; - error = xfs_scrub_da_btree_block(&ds, level, blkno); + error = xchk_da_btree_block(&ds, level, blkno); if (error) goto out; if (blks[level].bp == NULL) diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h index 365f9f0019e6..cb3f0003245b 100644 --- a/fs/xfs/scrub/dabtree.h +++ b/fs/xfs/scrub/dabtree.h @@ -8,13 +8,13 @@ /* dir/attr btree */ -struct xfs_scrub_da_btree { - struct xfs_da_args dargs; - xfs_dahash_t hashes[XFS_DA_NODE_MAXDEPTH]; - int maxrecs[XFS_DA_NODE_MAXDEPTH]; - struct xfs_da_state *state; - struct xfs_scrub_context *sc; - void *private; +struct xchk_da_btree { + struct xfs_da_args dargs; + xfs_dahash_t hashes[XFS_DA_NODE_MAXDEPTH]; + int maxrecs[XFS_DA_NODE_MAXDEPTH]; + struct xfs_da_state *state; + struct xfs_scrub *sc; + void *private; /* * Lowest and highest directory block address in which we expect @@ -22,24 +22,23 @@ struct xfs_scrub_da_btree { * (presumably) means between LEAF_OFFSET and FREE_OFFSET; for * attributes there is no limit. */ - xfs_dablk_t lowest; - xfs_dablk_t highest; + xfs_dablk_t lowest; + xfs_dablk_t highest; - int tree_level; + int tree_level; }; -typedef int (*xfs_scrub_da_btree_rec_fn)(struct xfs_scrub_da_btree *ds, +typedef int (*xchk_da_btree_rec_fn)(struct xchk_da_btree *ds, int level, void *rec); /* Check for da btree operation errors. */ -bool xfs_scrub_da_process_error(struct xfs_scrub_da_btree *ds, int level, int *error); +bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error); /* Check for da btree corruption. */ -void xfs_scrub_da_set_corrupt(struct xfs_scrub_da_btree *ds, int level); +void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level); -int xfs_scrub_da_btree_hash(struct xfs_scrub_da_btree *ds, int level, - __be32 *hashp); -int xfs_scrub_da_btree(struct xfs_scrub_context *sc, int whichfork, - xfs_scrub_da_btree_rec_fn scrub_fn, void *private); +int xchk_da_btree_hash(struct xchk_da_btree *ds, int level, __be32 *hashp); +int xchk_da_btree(struct xfs_scrub *sc, int whichfork, + xchk_da_btree_rec_fn scrub_fn, void *private); #endif /* __XFS_SCRUB_DABTREE_H__ */ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 86324775fc9b..cd3e4d768a18 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -31,40 +31,40 @@ /* Set us up to scrub directories. */ int -xfs_scrub_setup_directory( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_directory( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - return xfs_scrub_setup_inode_contents(sc, ip, 0); + return xchk_setup_inode_contents(sc, ip, 0); } /* Directories */ /* Scrub a directory entry. */ -struct xfs_scrub_dir_ctx { +struct xchk_dir_ctx { /* VFS fill-directory iterator */ - struct dir_context dir_iter; + struct dir_context dir_iter; - struct xfs_scrub_context *sc; + struct xfs_scrub *sc; }; /* Check that an inode's mode matches a given DT_ type. */ STATIC int -xfs_scrub_dir_check_ftype( - struct xfs_scrub_dir_ctx *sdc, - xfs_fileoff_t offset, - xfs_ino_t inum, - int dtype) +xchk_dir_check_ftype( + struct xchk_dir_ctx *sdc, + xfs_fileoff_t offset, + xfs_ino_t inum, + int dtype) { - struct xfs_mount *mp = sdc->sc->mp; - struct xfs_inode *ip; - int ino_dtype; - int error = 0; + struct xfs_mount *mp = sdc->sc->mp; + struct xfs_inode *ip; + int ino_dtype; + int error = 0; if (!xfs_sb_version_hasftype(&mp->m_sb)) { if (dtype != DT_UNKNOWN && dtype != DT_DIR) - xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); goto out; } @@ -78,7 +78,7 @@ xfs_scrub_dir_check_ftype( * inodes can trigger immediate inactive cleanup of the inode. */ error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); - if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, + if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, &error)) goto out; @@ -86,8 +86,8 @@ xfs_scrub_dir_check_ftype( ino_dtype = xfs_dir3_get_dtype(mp, xfs_mode_to_ftype(VFS_I(ip)->i_mode)); if (ino_dtype != dtype) - xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); - iput(VFS_I(ip)); + xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + xfs_irele(ip); out: return error; } @@ -101,23 +101,23 @@ out: * we can look up this filename. Finally, we check the ftype. */ STATIC int -xfs_scrub_dir_actor( - struct dir_context *dir_iter, - const char *name, - int namelen, - loff_t pos, - u64 ino, - unsigned type) +xchk_dir_actor( + struct dir_context *dir_iter, + const char *name, + int namelen, + loff_t pos, + u64 ino, + unsigned type) { - struct xfs_mount *mp; - struct xfs_inode *ip; - struct xfs_scrub_dir_ctx *sdc; - struct xfs_name xname; - xfs_ino_t lookup_ino; - xfs_dablk_t offset; - int error = 0; - - sdc = container_of(dir_iter, struct xfs_scrub_dir_ctx, dir_iter); + struct xfs_mount *mp; + struct xfs_inode *ip; + struct xchk_dir_ctx *sdc; + struct xfs_name xname; + xfs_ino_t lookup_ino; + xfs_dablk_t offset; + int error = 0; + + sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter); ip = sdc->sc->ip; mp = ip->i_mount; offset = xfs_dir2_db_to_da(mp->m_dir_geo, @@ -125,17 +125,17 @@ xfs_scrub_dir_actor( /* Does this inode number make sense? */ if (!xfs_verify_dir_ino(mp, ino)) { - xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); goto out; } if (!strncmp(".", name, namelen)) { /* If this is "." then check that the inum matches the dir. */ if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) - xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); if (ino != ip->i_ino) - xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); } else if (!strncmp("..", name, namelen)) { /* @@ -143,10 +143,10 @@ xfs_scrub_dir_actor( * matches this dir. */ if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) - xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino) - xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); } @@ -156,23 +156,23 @@ xfs_scrub_dir_actor( xname.type = XFS_DIR3_FT_UNKNOWN; error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL); - if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, + if (!xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, &error)) goto out; if (lookup_ino != ino) { - xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); goto out; } /* Verify the file type. This function absorbs error codes. */ - error = xfs_scrub_dir_check_ftype(sdc, offset, lookup_ino, type); + error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); if (error) goto out; out: /* * A negative error code returned here is supposed to cause the * dir_emit caller (xfs_readdir) to abort the directory iteration - * and return zero to xfs_scrub_directory. + * and return zero to xchk_directory. */ if (error == 0 && sdc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return -EFSCORRUPTED; @@ -181,8 +181,8 @@ out: /* Scrub a directory btree record. */ STATIC int -xfs_scrub_dir_rec( - struct xfs_scrub_da_btree *ds, +xchk_dir_rec( + struct xchk_da_btree *ds, int level, void *rec) { @@ -203,7 +203,7 @@ xfs_scrub_dir_rec( int error; /* Check the hash of the entry. */ - error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval); + error = xchk_da_btree_hash(ds, level, &ent->hashval); if (error) goto out; @@ -218,18 +218,18 @@ xfs_scrub_dir_rec( rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db); if (rec_bno >= mp->m_dir_geo->leafblk) { - xfs_scrub_da_set_corrupt(ds, level); + xchk_da_set_corrupt(ds, level); goto out; } error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp); - if (!xfs_scrub_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno, + if (!xchk_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno, &error)) goto out; if (!bp) { - xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); goto out; } - xfs_scrub_buffer_recheck(ds->sc, bp); + xchk_buffer_recheck(ds->sc, bp); if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out_relse; @@ -240,7 +240,7 @@ xfs_scrub_dir_rec( p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr); endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr); if (!endp) { - xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); goto out_relse; } while (p < endp) { @@ -258,7 +258,7 @@ xfs_scrub_dir_rec( p += mp->m_dir_inode_ops->data_entsize(dep->namelen); } if (p >= endp) { - xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); goto out_relse; } @@ -267,14 +267,14 @@ xfs_scrub_dir_rec( hash = be32_to_cpu(ent->hashval); tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); if (!xfs_verify_dir_ino(mp, ino) || tag != off) - xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); if (dent->namelen == 0) { - xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); goto out_relse; } calc_hash = xfs_da_hashname(dent->name, dent->namelen); if (calc_hash != hash) - xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); + xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); out_relse: xfs_trans_brelse(ds->dargs.trans, bp); @@ -288,8 +288,8 @@ out: * shortest, and that there aren't any bogus entries. */ STATIC void -xfs_scrub_directory_check_free_entry( - struct xfs_scrub_context *sc, +xchk_directory_check_free_entry( + struct xfs_scrub *sc, xfs_dablk_t lblk, struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup) @@ -308,13 +308,13 @@ xfs_scrub_directory_check_free_entry( return; /* Unused entry should be in the bestfrees but wasn't found. */ - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); } /* Check free space info in a directory data block. */ STATIC int -xfs_scrub_directory_data_bestfree( - struct xfs_scrub_context *sc, +xchk_directory_data_bestfree( + struct xfs_scrub *sc, xfs_dablk_t lblk, bool is_block) { @@ -339,15 +339,15 @@ xfs_scrub_directory_data_bestfree( if (is_block) { /* dir block format */ if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET)) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); error = xfs_dir3_block_read(sc->tp, sc->ip, &bp); } else { /* dir data format */ error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp); } - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; - xfs_scrub_buffer_recheck(sc, bp); + xchk_buffer_recheck(sc, bp); /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ @@ -362,7 +362,7 @@ xfs_scrub_directory_data_bestfree( if (offset == 0) continue; if (offset >= mp->m_dir_geo->blksize) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset); @@ -372,13 +372,13 @@ xfs_scrub_directory_data_bestfree( if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) || be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) || tag != ((char *)dup - (char *)bp->b_addr)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } /* bestfree records should be ordered largest to smallest */ if (smallest_bestfree < be16_to_cpu(dfp->length)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } @@ -400,7 +400,7 @@ xfs_scrub_directory_data_bestfree( dep = (struct xfs_dir2_data_entry *)ptr; newlen = d_ops->data_entsize(dep->namelen); if (newlen <= 0) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } @@ -411,7 +411,7 @@ xfs_scrub_directory_data_bestfree( /* Spot check this free entry */ tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)); if (tag != ((char *)dup - (char *)bp->b_addr)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } @@ -419,14 +419,14 @@ xfs_scrub_directory_data_bestfree( * Either this entry is a bestfree or it's smaller than * any of the bestfrees. */ - xfs_scrub_directory_check_free_entry(sc, lblk, bf, dup); + xchk_directory_check_free_entry(sc, lblk, bf, dup); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out_buf; /* Move on. */ newlen = be16_to_cpu(dup->length); if (newlen <= 0) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out_buf; } ptr += newlen; @@ -436,11 +436,11 @@ xfs_scrub_directory_data_bestfree( /* We're required to fill all the space. */ if (ptr != endptr) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); /* Did we see at least as many free slots as there are bestfrees? */ if (nr_frees < nr_bestfrees) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); out_buf: xfs_trans_brelse(sc->tp, bp); out: @@ -454,8 +454,8 @@ out: * array is in order. */ STATIC void -xfs_scrub_directory_check_freesp( - struct xfs_scrub_context *sc, +xchk_directory_check_freesp( + struct xfs_scrub *sc, xfs_dablk_t lblk, struct xfs_buf *dbp, unsigned int len) @@ -465,16 +465,16 @@ xfs_scrub_directory_check_freesp( dfp = sc->ip->d_ops->data_bestfree_p(dbp->b_addr); if (len != be16_to_cpu(dfp->length)) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); if (len > 0 && be16_to_cpu(dfp->offset) == 0) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); } /* Check free space info in a directory leaf1 block. */ STATIC int -xfs_scrub_directory_leaf1_bestfree( - struct xfs_scrub_context *sc, +xchk_directory_leaf1_bestfree( + struct xfs_scrub *sc, struct xfs_da_args *args, xfs_dablk_t lblk) { @@ -497,9 +497,9 @@ xfs_scrub_directory_leaf1_bestfree( /* Read the free space block. */ error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; - xfs_scrub_buffer_recheck(sc, bp); + xchk_buffer_recheck(sc, bp); leaf = bp->b_addr; d_ops->leaf_hdr_from_disk(&leafhdr, leaf); @@ -512,7 +512,7 @@ xfs_scrub_directory_leaf1_bestfree( struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; if (hdr3->pad != cpu_to_be32(0)) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); } /* @@ -520,19 +520,19 @@ xfs_scrub_directory_leaf1_bestfree( * blocks that can fit under i_size. */ if (bestcount != xfs_dir2_byte_to_db(geo, sc->ip->i_d.di_size)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out; } /* Is the leaf count even remotely sane? */ if (leafhdr.count > d_ops->leaf_max_ents(geo)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out; } /* Leaves and bests don't overlap in leaf format. */ if ((char *)&ents[leafhdr.count] > (char *)bestp) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out; } @@ -540,13 +540,13 @@ xfs_scrub_directory_leaf1_bestfree( for (i = 0; i < leafhdr.count; i++) { hash = be32_to_cpu(ents[i].hashval); if (i > 0 && lasthash > hash) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); lasthash = hash; if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } if (leafhdr.stale != stale) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; @@ -557,10 +557,10 @@ xfs_scrub_directory_leaf1_bestfree( continue; error = xfs_dir3_data_read(sc->tp, sc->ip, i * args->geo->fsbcount, -1, &dbp); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) break; - xfs_scrub_directory_check_freesp(sc, lblk, dbp, best); + xchk_directory_check_freesp(sc, lblk, dbp, best); xfs_trans_brelse(sc->tp, dbp); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; @@ -571,8 +571,8 @@ out: /* Check free space info in a directory freespace block. */ STATIC int -xfs_scrub_directory_free_bestfree( - struct xfs_scrub_context *sc, +xchk_directory_free_bestfree( + struct xfs_scrub *sc, struct xfs_da_args *args, xfs_dablk_t lblk) { @@ -587,15 +587,15 @@ xfs_scrub_directory_free_bestfree( /* Read the free space block */ error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; - xfs_scrub_buffer_recheck(sc, bp); + xchk_buffer_recheck(sc, bp); if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; if (hdr3->pad != cpu_to_be32(0)) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); } /* Check all the entries. */ @@ -610,36 +610,36 @@ xfs_scrub_directory_free_bestfree( error = xfs_dir3_data_read(sc->tp, sc->ip, (freehdr.firstdb + i) * args->geo->fsbcount, -1, &dbp); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) break; - xfs_scrub_directory_check_freesp(sc, lblk, dbp, best); + xchk_directory_check_freesp(sc, lblk, dbp, best); xfs_trans_brelse(sc->tp, dbp); } if (freehdr.nused + stale != freehdr.nvalid) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); out: return error; } /* Check free space information in directories. */ STATIC int -xfs_scrub_directory_blocks( - struct xfs_scrub_context *sc) +xchk_directory_blocks( + struct xfs_scrub *sc) { - struct xfs_bmbt_irec got; - struct xfs_da_args args; - struct xfs_ifork *ifp; - struct xfs_mount *mp = sc->mp; - xfs_fileoff_t leaf_lblk; - xfs_fileoff_t free_lblk; - xfs_fileoff_t lblk; - struct xfs_iext_cursor icur; - xfs_dablk_t dabno; - bool found; - int is_block = 0; - int error; + struct xfs_bmbt_irec got; + struct xfs_da_args args; + struct xfs_ifork *ifp; + struct xfs_mount *mp = sc->mp; + xfs_fileoff_t leaf_lblk; + xfs_fileoff_t free_lblk; + xfs_fileoff_t lblk; + struct xfs_iext_cursor icur; + xfs_dablk_t dabno; + bool found; + int is_block = 0; + int error; /* Ignore local format directories. */ if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && @@ -656,7 +656,7 @@ xfs_scrub_directory_blocks( args.geo = mp->m_dir_geo; args.trans = sc->tp; error = xfs_dir2_isblock(&args, &is_block); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) goto out; /* Iterate all the data extents in the directory... */ @@ -666,7 +666,7 @@ xfs_scrub_directory_blocks( if (is_block && (got.br_startoff > 0 || got.br_blockcount != args.geo->fsbcount)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, got.br_startoff); break; } @@ -690,7 +690,7 @@ xfs_scrub_directory_blocks( args.geo->fsbcount); lblk < got.br_startoff + got.br_blockcount; lblk += args.geo->fsbcount) { - error = xfs_scrub_directory_data_bestfree(sc, lblk, + error = xchk_directory_data_bestfree(sc, lblk, is_block); if (error) goto out; @@ -709,10 +709,10 @@ xfs_scrub_directory_blocks( got.br_blockcount == args.geo->fsbcount && !xfs_iext_next_extent(ifp, &icur, &got)) { if (is_block) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out; } - error = xfs_scrub_directory_leaf1_bestfree(sc, &args, + error = xchk_directory_leaf1_bestfree(sc, &args, leaf_lblk); if (error) goto out; @@ -731,11 +731,11 @@ xfs_scrub_directory_blocks( */ lblk = got.br_startoff; if (lblk & ~0xFFFFFFFFULL) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out; } if (is_block) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); goto out; } @@ -754,7 +754,7 @@ xfs_scrub_directory_blocks( args.geo->fsbcount); lblk < got.br_startoff + got.br_blockcount; lblk += args.geo->fsbcount) { - error = xfs_scrub_directory_free_bestfree(sc, &args, + error = xchk_directory_free_bestfree(sc, &args, lblk); if (error) goto out; @@ -769,29 +769,29 @@ out: /* Scrub a whole directory. */ int -xfs_scrub_directory( - struct xfs_scrub_context *sc) +xchk_directory( + struct xfs_scrub *sc) { - struct xfs_scrub_dir_ctx sdc = { - .dir_iter.actor = xfs_scrub_dir_actor, + struct xchk_dir_ctx sdc = { + .dir_iter.actor = xchk_dir_actor, .dir_iter.pos = 0, .sc = sc, }; - size_t bufsize; - loff_t oldpos; - int error = 0; + size_t bufsize; + loff_t oldpos; + int error = 0; if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) return -ENOENT; /* Plausible size? */ if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) { - xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_set_corrupt(sc, sc->ip->i_ino); goto out; } /* Check directory tree structure */ - error = xfs_scrub_da_btree(sc, XFS_DATA_FORK, xfs_scrub_dir_rec, NULL); + error = xchk_da_btree(sc, XFS_DATA_FORK, xchk_dir_rec, NULL); if (error) return error; @@ -799,7 +799,7 @@ xfs_scrub_directory( return error; /* Check the freespace. */ - error = xfs_scrub_directory_blocks(sc); + error = xchk_directory_blocks(sc); if (error) return error; @@ -816,7 +816,7 @@ xfs_scrub_directory( /* * Look up every name in this directory by hash. * - * Use the xfs_readdir function to call xfs_scrub_dir_actor on + * Use the xfs_readdir function to call xchk_dir_actor on * every directory entry in this directory. In _actor, we check * the name, inode number, and ftype (if applicable) of the * entry. xfs_readdir uses the VFS filldir functions to provide @@ -834,7 +834,7 @@ xfs_scrub_directory( xfs_iunlock(sc->ip, XFS_ILOCK_EXCL); while (true) { error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; if (oldpos == sdc.dir_iter.pos) diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 13d43d108574..224dba937492 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -35,11 +35,11 @@ * try again after forcing logged inode cores out to disk. */ int -xfs_scrub_setup_ag_iallocbt( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_ag_iallocbt( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - return xfs_scrub_setup_ag_btree(sc, ip, sc->try_harder); + return xchk_setup_ag_btree(sc, ip, sc->try_harder); } /* Inode btree scrubber. */ @@ -50,8 +50,8 @@ xfs_scrub_setup_ag_iallocbt( * we have a record or not depending on freecount. */ static inline void -xfs_scrub_iallocbt_chunk_xref_other( - struct xfs_scrub_context *sc, +xchk_iallocbt_chunk_xref_other( + struct xfs_scrub *sc, struct xfs_inobt_rec_incore *irec, xfs_agino_t agino) { @@ -66,17 +66,17 @@ xfs_scrub_iallocbt_chunk_xref_other( if (!(*pcur)) return; error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec); - if (!xfs_scrub_should_check_xref(sc, &error, pcur)) + if (!xchk_should_check_xref(sc, &error, pcur)) return; if (((irec->ir_freecount > 0 && !has_irec) || (irec->ir_freecount == 0 && has_irec))) - xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0); + xchk_btree_xref_set_corrupt(sc, *pcur, 0); } /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_iallocbt_chunk_xref( - struct xfs_scrub_context *sc, +xchk_iallocbt_chunk_xref( + struct xfs_scrub *sc, struct xfs_inobt_rec_incore *irec, xfs_agino_t agino, xfs_agblock_t agbno, @@ -87,17 +87,17 @@ xfs_scrub_iallocbt_chunk_xref( if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xfs_scrub_xref_is_used_space(sc, agbno, len); - xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino); + xchk_xref_is_used_space(sc, agbno, len); + xchk_iallocbt_chunk_xref_other(sc, irec, agino); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); - xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo); - xfs_scrub_xref_is_not_shared(sc, agbno, len); + xchk_xref_is_owned_by(sc, agbno, len, &oinfo); + xchk_xref_is_not_shared(sc, agbno, len); } /* Is this chunk worth checking? */ STATIC bool -xfs_scrub_iallocbt_chunk( - struct xfs_scrub_btree *bs, +xchk_iallocbt_chunk( + struct xchk_btree *bs, struct xfs_inobt_rec_incore *irec, xfs_agino_t agino, xfs_extlen_t len) @@ -110,16 +110,16 @@ xfs_scrub_iallocbt_chunk( if (bno + len <= bno || !xfs_verify_agbno(mp, agno, bno) || !xfs_verify_agbno(mp, agno, bno + len - 1)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); + xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); return true; } /* Count the number of free inodes. */ static unsigned int -xfs_scrub_iallocbt_freecount( +xchk_iallocbt_freecount( xfs_inofree_t freemask) { BUILD_BUG_ON(sizeof(freemask) != sizeof(__u64)); @@ -128,8 +128,8 @@ xfs_scrub_iallocbt_freecount( /* Check a particular inode with ir_free. */ STATIC int -xfs_scrub_iallocbt_check_cluster_freemask( - struct xfs_scrub_btree *bs, +xchk_iallocbt_check_cluster_freemask( + struct xchk_btree *bs, xfs_ino_t fsino, xfs_agino_t chunkino, xfs_agino_t clusterino, @@ -143,14 +143,14 @@ xfs_scrub_iallocbt_check_cluster_freemask( bool inuse; int error = 0; - if (xfs_scrub_should_terminate(bs->sc, &error)) + if (xchk_should_terminate(bs->sc, &error)) return error; dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize); if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino + clusterino)) { - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); goto out; } @@ -175,15 +175,15 @@ xfs_scrub_iallocbt_check_cluster_freemask( freemask_ok = inode_is_free ^ inuse; } if (!freemask_ok) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); out: return 0; } /* Make sure the free mask is consistent with what the inodes think. */ STATIC int -xfs_scrub_iallocbt_check_freemask( - struct xfs_scrub_btree *bs, +xchk_iallocbt_check_freemask( + struct xchk_btree *bs, struct xfs_inobt_rec_incore *irec) { struct xfs_owner_info oinfo; @@ -223,18 +223,18 @@ xfs_scrub_iallocbt_check_freemask( /* The whole cluster must be a hole or not a hole. */ ir_holemask = (irec->ir_holemask & holemask); if (ir_holemask != holemask && ir_holemask != 0) { - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); continue; } /* If any part of this is a hole, skip it. */ if (ir_holemask) { - xfs_scrub_xref_is_not_owned_by(bs->sc, agbno, + xchk_xref_is_not_owned_by(bs->sc, agbno, blks_per_cluster, &oinfo); continue; } - xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster, + xchk_xref_is_owned_by(bs->sc, agbno, blks_per_cluster, &oinfo); /* Grab the inode cluster buffer. */ @@ -245,13 +245,13 @@ xfs_scrub_iallocbt_check_freemask( error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &bp, 0, 0); - if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 0, + if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error)) continue; /* Which inodes are free? */ for (clusterino = 0; clusterino < nr_inodes; clusterino++) { - error = xfs_scrub_iallocbt_check_cluster_freemask(bs, + error = xchk_iallocbt_check_cluster_freemask(bs, fsino, chunkino, clusterino, irec, bp); if (error) { xfs_trans_brelse(bs->cur->bc_tp, bp); @@ -267,8 +267,8 @@ xfs_scrub_iallocbt_check_freemask( /* Scrub an inobt/finobt record. */ STATIC int -xfs_scrub_iallocbt_rec( - struct xfs_scrub_btree *bs, +xchk_iallocbt_rec( + struct xchk_btree *bs, union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; @@ -289,18 +289,18 @@ xfs_scrub_iallocbt_rec( if (irec.ir_count > XFS_INODES_PER_CHUNK || irec.ir_freecount > XFS_INODES_PER_CHUNK) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); real_freecount = irec.ir_freecount + (XFS_INODES_PER_CHUNK - irec.ir_count); - if (real_freecount != xfs_scrub_iallocbt_freecount(irec.ir_free)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + if (real_freecount != xchk_iallocbt_freecount(irec.ir_free)) + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); agino = irec.ir_startino; /* Record has to be properly aligned within the AG. */ if (!xfs_verify_agino(mp, agno, agino) || !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) { - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); goto out; } @@ -308,7 +308,7 @@ xfs_scrub_iallocbt_rec( agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino); if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) || (agbno & (xfs_icluster_size_fsb(mp) - 1))) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); *inode_blocks += XFS_B_TO_FSB(mp, irec.ir_count * mp->m_sb.sb_inodesize); @@ -318,9 +318,9 @@ xfs_scrub_iallocbt_rec( len = XFS_B_TO_FSB(mp, XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize); if (irec.ir_count != XFS_INODES_PER_CHUNK) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len)) + if (!xchk_iallocbt_chunk(bs, &irec, agino, len)) goto out; goto check_freemask; } @@ -333,12 +333,12 @@ xfs_scrub_iallocbt_rec( holes = ~xfs_inobt_irec_to_allocmask(&irec); if ((holes & irec.ir_free) != holes || irec.ir_freecount > irec.ir_count) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) { if (holemask & 1) holecount += XFS_INODES_PER_HOLEMASK_BIT; - else if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len)) + else if (!xchk_iallocbt_chunk(bs, &irec, agino, len)) break; holemask >>= 1; agino += XFS_INODES_PER_HOLEMASK_BIT; @@ -346,10 +346,10 @@ xfs_scrub_iallocbt_rec( if (holecount > XFS_INODES_PER_CHUNK || holecount + irec.ir_count != XFS_INODES_PER_CHUNK) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); check_freemask: - error = xfs_scrub_iallocbt_check_freemask(bs, &irec); + error = xchk_iallocbt_check_freemask(bs, &irec); if (error) goto out; @@ -362,39 +362,39 @@ out: * Don't bother if we're missing btree cursors, as we're already corrupt. */ STATIC void -xfs_scrub_iallocbt_xref_rmap_btreeblks( - struct xfs_scrub_context *sc, - int which) +xchk_iallocbt_xref_rmap_btreeblks( + struct xfs_scrub *sc, + int which) { - struct xfs_owner_info oinfo; - xfs_filblks_t blocks; - xfs_extlen_t inobt_blocks = 0; - xfs_extlen_t finobt_blocks = 0; - int error; + struct xfs_owner_info oinfo; + xfs_filblks_t blocks; + xfs_extlen_t inobt_blocks = 0; + xfs_extlen_t finobt_blocks = 0; + int error; if (!sc->sa.ino_cur || !sc->sa.rmap_cur || (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur) || - xfs_scrub_skip_xref(sc->sm)) + xchk_skip_xref(sc->sm)) return; /* Check that we saw as many inobt blocks as the rmap says. */ error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks); - if (!xfs_scrub_process_error(sc, 0, 0, &error)) + if (!xchk_process_error(sc, 0, 0, &error)) return; if (sc->sa.fino_cur) { error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks); - if (!xfs_scrub_process_error(sc, 0, 0, &error)) + if (!xchk_process_error(sc, 0, 0, &error)) return; } xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); - error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, + error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) return; if (blocks != inobt_blocks + finobt_blocks) - xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0); + xchk_btree_set_corrupt(sc, sc->sa.ino_cur, 0); } /* @@ -402,47 +402,47 @@ xfs_scrub_iallocbt_xref_rmap_btreeblks( * the rmap says are owned by inodes. */ STATIC void -xfs_scrub_iallocbt_xref_rmap_inodes( - struct xfs_scrub_context *sc, - int which, - xfs_filblks_t inode_blocks) +xchk_iallocbt_xref_rmap_inodes( + struct xfs_scrub *sc, + int which, + xfs_filblks_t inode_blocks) { - struct xfs_owner_info oinfo; - xfs_filblks_t blocks; - int error; + struct xfs_owner_info oinfo; + xfs_filblks_t blocks; + int error; - if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) return; /* Check that we saw as many inode blocks as the rmap knows about. */ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); - error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, + error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) return; if (blocks != inode_blocks) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); } /* Scrub the inode btrees for some AG. */ STATIC int -xfs_scrub_iallocbt( - struct xfs_scrub_context *sc, - xfs_btnum_t which) +xchk_iallocbt( + struct xfs_scrub *sc, + xfs_btnum_t which) { - struct xfs_btree_cur *cur; - struct xfs_owner_info oinfo; - xfs_filblks_t inode_blocks = 0; - int error; + struct xfs_btree_cur *cur; + struct xfs_owner_info oinfo; + xfs_filblks_t inode_blocks = 0; + int error; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur; - error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, + error = xchk_btree(sc, cur, xchk_iallocbt_rec, &oinfo, &inode_blocks); if (error) return error; - xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which); + xchk_iallocbt_xref_rmap_btreeblks(sc, which); /* * If we're scrubbing the inode btree, inode_blocks is the number of @@ -452,64 +452,64 @@ xfs_scrub_iallocbt( * to inode chunks with free inodes. */ if (which == XFS_BTNUM_INO) - xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks); + xchk_iallocbt_xref_rmap_inodes(sc, which, inode_blocks); return error; } int -xfs_scrub_inobt( - struct xfs_scrub_context *sc) +xchk_inobt( + struct xfs_scrub *sc) { - return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO); + return xchk_iallocbt(sc, XFS_BTNUM_INO); } int -xfs_scrub_finobt( - struct xfs_scrub_context *sc) +xchk_finobt( + struct xfs_scrub *sc) { - return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO); + return xchk_iallocbt(sc, XFS_BTNUM_FINO); } /* See if an inode btree has (or doesn't have) an inode chunk record. */ static inline void -xfs_scrub_xref_inode_check( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - xfs_extlen_t len, - struct xfs_btree_cur **icur, - bool should_have_inodes) +xchk_xref_inode_check( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + xfs_extlen_t len, + struct xfs_btree_cur **icur, + bool should_have_inodes) { - bool has_inodes; - int error; + bool has_inodes; + int error; - if (!(*icur) || xfs_scrub_skip_xref(sc->sm)) + if (!(*icur) || xchk_skip_xref(sc->sm)) return; error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes); - if (!xfs_scrub_should_check_xref(sc, &error, icur)) + if (!xchk_should_check_xref(sc, &error, icur)) return; if (has_inodes != should_have_inodes) - xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0); + xchk_btree_xref_set_corrupt(sc, *icur, 0); } /* xref check that the extent is not covered by inodes */ void -xfs_scrub_xref_is_not_inode_chunk( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - xfs_extlen_t len) +xchk_xref_is_not_inode_chunk( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) { - xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false); - xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false); + xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false); + xchk_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false); } /* xref check that the extent is covered by inodes */ void -xfs_scrub_xref_is_inode_chunk( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - xfs_extlen_t len) +xchk_xref_is_inode_chunk( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) { - xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true); + xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true); } diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 7a6208505980..5b3b177c0fc9 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -37,23 +37,23 @@ * the goal. */ int -xfs_scrub_setup_inode( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_inode( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - int error; + int error; /* * Try to get the inode. If the verifiers fail, we try again * in raw mode. */ - error = xfs_scrub_get_inode(sc, ip); + error = xchk_get_inode(sc, ip); switch (error) { case 0: break; case -EFSCORRUPTED: case -EFSBADCRC: - return xfs_scrub_trans_alloc(sc, 0); + return xchk_trans_alloc(sc, 0); default: return error; } @@ -61,7 +61,7 @@ xfs_scrub_setup_inode( /* Got the inode, lock it and we're ready to go. */ sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; xfs_ilock(sc->ip, sc->ilock_flags); - error = xfs_scrub_trans_alloc(sc, 0); + error = xchk_trans_alloc(sc, 0); if (error) goto out; sc->ilock_flags |= XFS_ILOCK_EXCL; @@ -76,19 +76,19 @@ out: /* Validate di_extsize hint. */ STATIC void -xfs_scrub_inode_extsize( - struct xfs_scrub_context *sc, - struct xfs_dinode *dip, - xfs_ino_t ino, - uint16_t mode, - uint16_t flags) +xchk_inode_extsize( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + xfs_ino_t ino, + uint16_t mode, + uint16_t flags) { - xfs_failaddr_t fa; + xfs_failaddr_t fa; fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), mode, flags); if (fa) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); } /* @@ -98,33 +98,33 @@ xfs_scrub_inode_extsize( * These functions must be kept in sync with each other. */ STATIC void -xfs_scrub_inode_cowextsize( - struct xfs_scrub_context *sc, - struct xfs_dinode *dip, - xfs_ino_t ino, - uint16_t mode, - uint16_t flags, - uint64_t flags2) +xchk_inode_cowextsize( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + xfs_ino_t ino, + uint16_t mode, + uint16_t flags, + uint64_t flags2) { - xfs_failaddr_t fa; + xfs_failaddr_t fa; fa = xfs_inode_validate_cowextsize(sc->mp, be32_to_cpu(dip->di_cowextsize), mode, flags, flags2); if (fa) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); } /* Make sure the di_flags make sense for the inode. */ STATIC void -xfs_scrub_inode_flags( - struct xfs_scrub_context *sc, - struct xfs_dinode *dip, - xfs_ino_t ino, - uint16_t mode, - uint16_t flags) +xchk_inode_flags( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + xfs_ino_t ino, + uint16_t mode, + uint16_t flags) { - struct xfs_mount *mp = sc->mp; + struct xfs_mount *mp = sc->mp; if (flags & ~XFS_DIFLAG_ANY) goto bad; @@ -157,20 +157,20 @@ xfs_scrub_inode_flags( return; bad: - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); } /* Make sure the di_flags2 make sense for the inode. */ STATIC void -xfs_scrub_inode_flags2( - struct xfs_scrub_context *sc, - struct xfs_dinode *dip, - xfs_ino_t ino, - uint16_t mode, - uint16_t flags, - uint64_t flags2) +xchk_inode_flags2( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + xfs_ino_t ino, + uint16_t mode, + uint16_t flags, + uint64_t flags2) { - struct xfs_mount *mp = sc->mp; + struct xfs_mount *mp = sc->mp; if (flags2 & ~XFS_DIFLAG2_ANY) goto bad; @@ -200,23 +200,23 @@ xfs_scrub_inode_flags2( return; bad: - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); } /* Scrub all the ondisk inode fields. */ STATIC void -xfs_scrub_dinode( - struct xfs_scrub_context *sc, - struct xfs_dinode *dip, - xfs_ino_t ino) +xchk_dinode( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + xfs_ino_t ino) { - struct xfs_mount *mp = sc->mp; - size_t fork_recs; - unsigned long long isize; - uint64_t flags2; - uint32_t nextents; - uint16_t flags; - uint16_t mode; + struct xfs_mount *mp = sc->mp; + size_t fork_recs; + unsigned long long isize; + uint64_t flags2; + uint32_t nextents; + uint16_t flags; + uint16_t mode; flags = be16_to_cpu(dip->di_flags); if (dip->di_version >= 3) @@ -237,7 +237,7 @@ xfs_scrub_dinode( /* mode is recognized */ break; default: - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; } @@ -248,22 +248,22 @@ xfs_scrub_dinode( * We autoconvert v1 inodes into v2 inodes on writeout, * so just mark this inode for preening. */ - xfs_scrub_ino_set_preen(sc, ino); + xchk_ino_set_preen(sc, ino); break; case 2: case 3: if (dip->di_onlink != 0) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); if (dip->di_mode == 0 && sc->ip) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); if (dip->di_projid_hi != 0 && !xfs_sb_version_hasprojid32bit(&mp->m_sb)) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; default: - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); return; } @@ -273,40 +273,40 @@ xfs_scrub_dinode( */ if (dip->di_uid == cpu_to_be32(-1U) || dip->di_gid == cpu_to_be32(-1U)) - xfs_scrub_ino_set_warning(sc, ino); + xchk_ino_set_warning(sc, ino); /* di_format */ switch (dip->di_format) { case XFS_DINODE_FMT_DEV: if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode) && !S_ISSOCK(mode)) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_LOCAL: if (!S_ISDIR(mode) && !S_ISLNK(mode)) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_EXTENTS: if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_BTREE: if (!S_ISREG(mode) && !S_ISDIR(mode)) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_UUID: default: - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; } /* di_[amc]time.nsec */ if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); /* * di_size. xfs_dinode_verify checks for things that screw up @@ -315,19 +315,19 @@ xfs_scrub_dinode( */ isize = be64_to_cpu(dip->di_size); if (isize & (1ULL << 63)) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); /* Devices, fifos, and sockets must have zero size */ if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); /* Directories can't be larger than the data section size (32G) */ if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); /* Symlinks can't be larger than SYMLINK_MAXLEN */ if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); /* * Warn if the running kernel can't handle the kinds of offsets @@ -336,7 +336,7 @@ xfs_scrub_dinode( * overly large offsets, flag the inode for admin review. */ if (isize >= mp->m_super->s_maxbytes) - xfs_scrub_ino_set_warning(sc, ino); + xchk_ino_set_warning(sc, ino); /* di_nblocks */ if (flags2 & XFS_DIFLAG2_REFLINK) { @@ -351,15 +351,15 @@ xfs_scrub_dinode( */ if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); } else { if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); } - xfs_scrub_inode_flags(sc, dip, ino, mode, flags); + xchk_inode_flags(sc, dip, ino, mode, flags); - xfs_scrub_inode_extsize(sc, dip, ino, mode, flags); + xchk_inode_extsize(sc, dip, ino, mode, flags); /* di_nextents */ nextents = be32_to_cpu(dip->di_nextents); @@ -367,31 +367,31 @@ xfs_scrub_dinode( switch (dip->di_format) { case XFS_DINODE_FMT_EXTENTS: if (nextents > fork_recs) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_BTREE: if (nextents <= fork_recs) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; default: if (nextents != 0) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; } /* di_forkoff */ if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); if (dip->di_anextents != 0 && dip->di_forkoff == 0) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); /* di_aformat */ if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && dip->di_aformat != XFS_DINODE_FMT_EXTENTS && dip->di_aformat != XFS_DINODE_FMT_BTREE) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); /* di_anextents */ nextents = be16_to_cpu(dip->di_anextents); @@ -399,22 +399,22 @@ xfs_scrub_dinode( switch (dip->di_aformat) { case XFS_DINODE_FMT_EXTENTS: if (nextents > fork_recs) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; case XFS_DINODE_FMT_BTREE: if (nextents <= fork_recs) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); break; default: if (nextents != 0) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); } if (dip->di_version >= 3) { if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) - xfs_scrub_ino_set_corrupt(sc, ino); - xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2); - xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags, + xchk_ino_set_corrupt(sc, ino); + xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); + xchk_inode_cowextsize(sc, dip, ino, mode, flags, flags2); } } @@ -425,8 +425,8 @@ xfs_scrub_dinode( * IGET_UNTRUSTED, which checks the inobt for us. */ static void -xfs_scrub_inode_xref_finobt( - struct xfs_scrub_context *sc, +xchk_inode_xref_finobt( + struct xfs_scrub *sc, xfs_ino_t ino) { struct xfs_inobt_rec_incore rec; @@ -434,7 +434,7 @@ xfs_scrub_inode_xref_finobt( int has_record; int error; - if (!sc->sa.fino_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) return; agino = XFS_INO_TO_AGINO(sc->mp, ino); @@ -445,12 +445,12 @@ xfs_scrub_inode_xref_finobt( */ error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, &has_record); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || + if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || !has_record) return; error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || + if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || !has_record) return; @@ -463,54 +463,54 @@ xfs_scrub_inode_xref_finobt( return; if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); } /* Cross reference the inode fields with the forks. */ STATIC void -xfs_scrub_inode_xref_bmap( - struct xfs_scrub_context *sc, - struct xfs_dinode *dip) +xchk_inode_xref_bmap( + struct xfs_scrub *sc, + struct xfs_dinode *dip) { - xfs_extnum_t nextents; - xfs_filblks_t count; - xfs_filblks_t acount; - int error; + xfs_extnum_t nextents; + xfs_filblks_t count; + xfs_filblks_t acount; + int error; - if (xfs_scrub_skip_xref(sc->sm)) + if (xchk_skip_xref(sc->sm)) return; /* Walk all the extents to check nextents/naextents/nblocks. */ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, &nextents, &count); - if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + if (!xchk_should_check_xref(sc, &error, NULL)) return; if (nextents < be32_to_cpu(dip->di_nextents)) - xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, &nextents, &acount); - if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + if (!xchk_should_check_xref(sc, &error, NULL)) return; if (nextents != be16_to_cpu(dip->di_anextents)) - xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); /* Check nblocks against the inode. */ if (count + acount != be64_to_cpu(dip->di_nblocks)) - xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); + xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); } /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_inode_xref( - struct xfs_scrub_context *sc, - xfs_ino_t ino, - struct xfs_dinode *dip) +xchk_inode_xref( + struct xfs_scrub *sc, + xfs_ino_t ino, + struct xfs_dinode *dip) { - struct xfs_owner_info oinfo; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - int error; + struct xfs_owner_info oinfo; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + int error; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; @@ -518,18 +518,18 @@ xfs_scrub_inode_xref( agno = XFS_INO_TO_AGNO(sc->mp, ino); agbno = XFS_INO_TO_AGBNO(sc->mp, ino); - error = xfs_scrub_ag_init(sc, agno, &sc->sa); - if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) + error = xchk_ag_init(sc, agno, &sc->sa); + if (!xchk_xref_process_error(sc, agno, agbno, &error)) return; - xfs_scrub_xref_is_used_space(sc, agbno, 1); - xfs_scrub_inode_xref_finobt(sc, ino); + xchk_xref_is_used_space(sc, agbno, 1); + xchk_inode_xref_finobt(sc, ino); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); - xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); - xfs_scrub_xref_is_not_shared(sc, agbno, 1); - xfs_scrub_inode_xref_bmap(sc, dip); + xchk_xref_is_owned_by(sc, agbno, 1, &oinfo); + xchk_xref_is_not_shared(sc, agbno, 1); + xchk_inode_xref_bmap(sc, dip); - xfs_scrub_ag_free(sc, &sc->sa); + xchk_ag_free(sc, &sc->sa); } /* @@ -539,35 +539,35 @@ xfs_scrub_inode_xref( * reflink filesystem. */ static void -xfs_scrub_inode_check_reflink_iflag( - struct xfs_scrub_context *sc, - xfs_ino_t ino) +xchk_inode_check_reflink_iflag( + struct xfs_scrub *sc, + xfs_ino_t ino) { - struct xfs_mount *mp = sc->mp; - bool has_shared; - int error; + struct xfs_mount *mp = sc->mp; + bool has_shared; + int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return; error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, &has_shared); - if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), + if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), XFS_INO_TO_AGBNO(mp, ino), &error)) return; if (xfs_is_reflink_inode(sc->ip) && !has_shared) - xfs_scrub_ino_set_preen(sc, ino); + xchk_ino_set_preen(sc, ino); else if (!xfs_is_reflink_inode(sc->ip) && has_shared) - xfs_scrub_ino_set_corrupt(sc, ino); + xchk_ino_set_corrupt(sc, ino); } /* Scrub an inode. */ int -xfs_scrub_inode( - struct xfs_scrub_context *sc) +xchk_inode( + struct xfs_scrub *sc) { - struct xfs_dinode di; - int error = 0; + struct xfs_dinode di; + int error = 0; /* * If sc->ip is NULL, that means that the setup function called @@ -575,13 +575,13 @@ xfs_scrub_inode( * and a NULL inode, so flag the corruption error and return. */ if (!sc->ip) { - xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino); + xchk_ino_set_corrupt(sc, sc->sm->sm_ino); return 0; } /* Scrub the inode core. */ xfs_inode_to_disk(sc->ip, &di, 0); - xfs_scrub_dinode(sc, &di, sc->ip->i_ino); + xchk_dinode(sc, &di, sc->ip->i_ino); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; @@ -591,9 +591,9 @@ xfs_scrub_inode( * we scrubbed the dinode. */ if (S_ISREG(VFS_I(sc->ip)->i_mode)) - xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino); + xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); - xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di); + xchk_inode_xref(sc, sc->ip->i_ino, &di); out: return error; } diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index e2bda58c32f0..1c9d7c7f64f5 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -27,36 +27,36 @@ /* Set us up to scrub parents. */ int -xfs_scrub_setup_parent( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_parent( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - return xfs_scrub_setup_inode_contents(sc, ip, 0); + return xchk_setup_inode_contents(sc, ip, 0); } /* Parent pointers */ /* Look for an entry in a parent pointing to this inode. */ -struct xfs_scrub_parent_ctx { - struct dir_context dc; - xfs_ino_t ino; - xfs_nlink_t nlink; +struct xchk_parent_ctx { + struct dir_context dc; + xfs_ino_t ino; + xfs_nlink_t nlink; }; /* Look for a single entry in a directory pointing to an inode. */ STATIC int -xfs_scrub_parent_actor( - struct dir_context *dc, - const char *name, - int namelen, - loff_t pos, - u64 ino, - unsigned type) +xchk_parent_actor( + struct dir_context *dc, + const char *name, + int namelen, + loff_t pos, + u64 ino, + unsigned type) { - struct xfs_scrub_parent_ctx *spc; + struct xchk_parent_ctx *spc; - spc = container_of(dc, struct xfs_scrub_parent_ctx, dc); + spc = container_of(dc, struct xchk_parent_ctx, dc); if (spc->ino == ino) spc->nlink++; return 0; @@ -64,21 +64,21 @@ xfs_scrub_parent_actor( /* Count the number of dentries in the parent dir that point to this inode. */ STATIC int -xfs_scrub_parent_count_parent_dentries( - struct xfs_scrub_context *sc, - struct xfs_inode *parent, - xfs_nlink_t *nlink) +xchk_parent_count_parent_dentries( + struct xfs_scrub *sc, + struct xfs_inode *parent, + xfs_nlink_t *nlink) { - struct xfs_scrub_parent_ctx spc = { - .dc.actor = xfs_scrub_parent_actor, + struct xchk_parent_ctx spc = { + .dc.actor = xchk_parent_actor, .dc.pos = 0, .ino = sc->ip->i_ino, .nlink = 0, }; - size_t bufsize; - loff_t oldpos; - uint lock_mode; - int error = 0; + size_t bufsize; + loff_t oldpos; + uint lock_mode; + int error = 0; /* * If there are any blocks, read-ahead block 0 as we're almost @@ -120,16 +120,16 @@ out: * entry pointing back to the inode being scrubbed. */ STATIC int -xfs_scrub_parent_validate( - struct xfs_scrub_context *sc, - xfs_ino_t dnum, - bool *try_again) +xchk_parent_validate( + struct xfs_scrub *sc, + xfs_ino_t dnum, + bool *try_again) { - struct xfs_mount *mp = sc->mp; - struct xfs_inode *dp = NULL; - xfs_nlink_t expected_nlink; - xfs_nlink_t nlink; - int error = 0; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *dp = NULL; + xfs_nlink_t expected_nlink; + xfs_nlink_t nlink; + int error = 0; *try_again = false; @@ -138,7 +138,7 @@ xfs_scrub_parent_validate( /* '..' must not point to ourselves. */ if (sc->ip->i_ino == dnum) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out; } @@ -165,13 +165,13 @@ xfs_scrub_parent_validate( error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); if (error == -EINVAL) { error = -EFSCORRUPTED; - xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); + xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); goto out; } - if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out_rele; } @@ -183,12 +183,12 @@ xfs_scrub_parent_validate( * the child inodes. */ if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { - error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); - if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, + error = xchk_parent_count_parent_dentries(sc, dp, &nlink); + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_unlock; if (nlink != expected_nlink) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out_unlock; } @@ -200,18 +200,18 @@ xfs_scrub_parent_validate( */ xfs_iunlock(sc->ip, sc->ilock_flags); sc->ilock_flags = 0; - error = xfs_scrub_ilock_inverted(dp, XFS_IOLOCK_SHARED); + error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED); if (error) goto out_rele; /* Go looking for our dentry. */ - error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); - if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + error = xchk_parent_count_parent_dentries(sc, dp, &nlink); + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_unlock; /* Drop the parent lock, relock this inode. */ xfs_iunlock(dp, XFS_IOLOCK_SHARED); - error = xfs_scrub_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL); + error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL); if (error) goto out_rele; sc->ilock_flags = XFS_IOLOCK_EXCL; @@ -225,43 +225,43 @@ xfs_scrub_parent_validate( /* Look up '..' to see if the inode changed. */ error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out_rele; /* Drat, parent changed. Try again! */ if (dnum != dp->i_ino) { - iput(VFS_I(dp)); + xfs_irele(dp); *try_again = true; return 0; } - iput(VFS_I(dp)); + xfs_irele(dp); /* * '..' didn't change, so check that there was only one entry * for us in the parent. */ if (nlink != expected_nlink) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return error; out_unlock: xfs_iunlock(dp, XFS_IOLOCK_SHARED); out_rele: - iput(VFS_I(dp)); + xfs_irele(dp); out: return error; } /* Scrub a parent pointer. */ int -xfs_scrub_parent( - struct xfs_scrub_context *sc) +xchk_parent( + struct xfs_scrub *sc) { - struct xfs_mount *mp = sc->mp; - xfs_ino_t dnum; - bool try_again; - int tries = 0; - int error = 0; + struct xfs_mount *mp = sc->mp; + xfs_ino_t dnum; + bool try_again; + int tries = 0; + int error = 0; /* * If we're a directory, check that the '..' link points up to @@ -272,7 +272,7 @@ xfs_scrub_parent( /* We're not a special inode, are we? */ if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out; } @@ -288,10 +288,10 @@ xfs_scrub_parent( /* Look up '..' */ error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; if (!xfs_verify_dir_ino(mp, dnum)) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out; } @@ -299,12 +299,12 @@ xfs_scrub_parent( if (sc->ip == mp->m_rootip) { if (sc->ip->i_ino != mp->m_sb.sb_rootino || sc->ip->i_ino != dnum) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out; } do { - error = xfs_scrub_parent_validate(sc, dnum, &try_again); + error = xchk_parent_validate(sc, dnum, &try_again); if (error) goto out; } while (try_again && ++tries < 20); @@ -314,7 +314,7 @@ xfs_scrub_parent( * incomplete. Userspace can decide if it wants to try again. */ if (try_again && tries == 20) - xfs_scrub_set_incomplete(sc); + xchk_set_incomplete(sc); out: /* * If we failed to lock the parent inode even after a retry, just mark @@ -322,7 +322,7 @@ out: */ if (sc->try_harder && error == -EDEADLOCK) { error = 0; - xfs_scrub_set_incomplete(sc); + xchk_set_incomplete(sc); } return error; } diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 6ff906aa0a3b..782d582d3edd 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -30,8 +30,8 @@ /* Convert a scrub type code to a DQ flag, or return 0 if error. */ static inline uint -xfs_scrub_quota_to_dqtype( - struct xfs_scrub_context *sc) +xchk_quota_to_dqtype( + struct xfs_scrub *sc) { switch (sc->sm->sm_type) { case XFS_SCRUB_TYPE_UQUOTA: @@ -47,24 +47,24 @@ xfs_scrub_quota_to_dqtype( /* Set us up to scrub a quota. */ int -xfs_scrub_setup_quota( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_quota( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - uint dqtype; - int error; + uint dqtype; + int error; if (!XFS_IS_QUOTA_RUNNING(sc->mp) || !XFS_IS_QUOTA_ON(sc->mp)) return -ENOENT; - dqtype = xfs_scrub_quota_to_dqtype(sc); + dqtype = xchk_quota_to_dqtype(sc); if (dqtype == 0) return -EINVAL; sc->has_quotaofflock = true; mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock); if (!xfs_this_quota_on(sc->mp, dqtype)) return -ENOENT; - error = xfs_scrub_setup_fs(sc, ip); + error = xchk_setup_fs(sc, ip); if (error) return error; sc->ip = xfs_quota_inode(sc->mp, dqtype); @@ -75,35 +75,35 @@ xfs_scrub_setup_quota( /* Quotas. */ -struct xfs_scrub_quota_info { - struct xfs_scrub_context *sc; - xfs_dqid_t last_id; +struct xchk_quota_info { + struct xfs_scrub *sc; + xfs_dqid_t last_id; }; /* Scrub the fields in an individual quota item. */ STATIC int -xfs_scrub_quota_item( - struct xfs_dquot *dq, - uint dqtype, - void *priv) +xchk_quota_item( + struct xfs_dquot *dq, + uint dqtype, + void *priv) { - struct xfs_scrub_quota_info *sqi = priv; - struct xfs_scrub_context *sc = sqi->sc; - struct xfs_mount *mp = sc->mp; - struct xfs_disk_dquot *d = &dq->q_core; - struct xfs_quotainfo *qi = mp->m_quotainfo; - xfs_fileoff_t offset; - unsigned long long bsoft; - unsigned long long isoft; - unsigned long long rsoft; - unsigned long long bhard; - unsigned long long ihard; - unsigned long long rhard; - unsigned long long bcount; - unsigned long long icount; - unsigned long long rcount; - xfs_ino_t fs_icount; - xfs_dqid_t id = be32_to_cpu(d->d_id); + struct xchk_quota_info *sqi = priv; + struct xfs_scrub *sc = sqi->sc; + struct xfs_mount *mp = sc->mp; + struct xfs_disk_dquot *d = &dq->q_core; + struct xfs_quotainfo *qi = mp->m_quotainfo; + xfs_fileoff_t offset; + unsigned long long bsoft; + unsigned long long isoft; + unsigned long long rsoft; + unsigned long long bhard; + unsigned long long ihard; + unsigned long long rhard; + unsigned long long bcount; + unsigned long long icount; + unsigned long long rcount; + xfs_ino_t fs_icount; + xfs_dqid_t id = be32_to_cpu(d->d_id); /* * Except for the root dquot, the actual dquot we got must either have @@ -111,16 +111,16 @@ xfs_scrub_quota_item( */ offset = id / qi->qi_dqperchunk; if (id && id <= sqi->last_id) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); sqi->last_id = id; /* Did we get the dquot type we wanted? */ if (dqtype != (d->d_flags & XFS_DQ_ALLTYPES)) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); if (d->d_pad0 != cpu_to_be32(0) || d->d_pad != cpu_to_be16(0)) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); /* Check the limits. */ bhard = be64_to_cpu(d->d_blk_hardlimit); @@ -140,19 +140,19 @@ xfs_scrub_quota_item( * the hard limit. */ if (bhard > mp->m_sb.sb_dblocks) - xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); if (bsoft > bhard) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); if (ihard > mp->m_maxicount) - xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); if (isoft > ihard) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); if (rhard > mp->m_sb.sb_rblocks) - xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); if (rsoft > rhard) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); /* Check the resource counts. */ bcount = be64_to_cpu(d->d_bcount); @@ -167,15 +167,15 @@ xfs_scrub_quota_item( */ if (xfs_sb_version_hasreflink(&mp->m_sb)) { if (mp->m_sb.sb_dblocks < bcount) - xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, + xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); } else { if (mp->m_sb.sb_dblocks < bcount) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } if (icount > fs_icount || rcount > mp->m_sb.sb_rblocks) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); /* * We can violate the hard limits if the admin suddenly sets a @@ -183,29 +183,29 @@ xfs_scrub_quota_item( * admin review. */ if (id != 0 && bhard != 0 && bcount > bhard) - xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); if (id != 0 && ihard != 0 && icount > ihard) - xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); if (id != 0 && rhard != 0 && rcount > rhard) - xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset); + xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); return 0; } /* Check the quota's data fork. */ STATIC int -xfs_scrub_quota_data_fork( - struct xfs_scrub_context *sc) +xchk_quota_data_fork( + struct xfs_scrub *sc) { - struct xfs_bmbt_irec irec = { 0 }; - struct xfs_iext_cursor icur; - struct xfs_quotainfo *qi = sc->mp->m_quotainfo; - struct xfs_ifork *ifp; - xfs_fileoff_t max_dqid_off; - int error = 0; + struct xfs_bmbt_irec irec = { 0 }; + struct xfs_iext_cursor icur; + struct xfs_quotainfo *qi = sc->mp->m_quotainfo; + struct xfs_ifork *ifp; + xfs_fileoff_t max_dqid_off; + int error = 0; /* Invoke the fork scrubber. */ - error = xfs_scrub_metadata_inode_forks(sc); + error = xchk_metadata_inode_forks(sc); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) return error; @@ -213,7 +213,7 @@ xfs_scrub_quota_data_fork( max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk; ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); for_each_xfs_iext(ifp, &icur, &irec) { - if (xfs_scrub_should_terminate(sc, &error)) + if (xchk_should_terminate(sc, &error)) break; /* * delalloc extents or blocks mapped above the highest @@ -222,7 +222,7 @@ xfs_scrub_quota_data_fork( if (isnullstartblock(irec.br_startblock) || irec.br_startoff > max_dqid_off || irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, irec.br_startoff); break; } @@ -233,19 +233,19 @@ xfs_scrub_quota_data_fork( /* Scrub all of a quota type's items. */ int -xfs_scrub_quota( - struct xfs_scrub_context *sc) +xchk_quota( + struct xfs_scrub *sc) { - struct xfs_scrub_quota_info sqi; - struct xfs_mount *mp = sc->mp; - struct xfs_quotainfo *qi = mp->m_quotainfo; - uint dqtype; - int error = 0; + struct xchk_quota_info sqi; + struct xfs_mount *mp = sc->mp; + struct xfs_quotainfo *qi = mp->m_quotainfo; + uint dqtype; + int error = 0; - dqtype = xfs_scrub_quota_to_dqtype(sc); + dqtype = xchk_quota_to_dqtype(sc); /* Look for problem extents. */ - error = xfs_scrub_quota_data_fork(sc); + error = xchk_quota_data_fork(sc); if (error) goto out; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) @@ -260,10 +260,10 @@ xfs_scrub_quota( sc->ilock_flags = 0; sqi.sc = sc; sqi.last_id = 0; - error = xfs_qm_dqiterate(mp, dqtype, xfs_scrub_quota_item, &sqi); + error = xfs_qm_dqiterate(mp, dqtype, xchk_quota_item, &sqi); sc->ilock_flags = XFS_ILOCK_EXCL; xfs_ilock(sc->ip, sc->ilock_flags); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, sqi.last_id * qi->qi_dqperchunk, &error)) goto out; diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 607a9faa8ecc..e8c82b026083 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -28,11 +28,11 @@ * Set us up to scrub reference count btrees. */ int -xfs_scrub_setup_ag_refcountbt( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_ag_refcountbt( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - return xfs_scrub_setup_ag_btree(sc, ip, false); + return xchk_setup_ag_btree(sc, ip, false); } /* Reference count btree scrubber. */ @@ -73,22 +73,22 @@ xfs_scrub_setup_ag_refcountbt( * If the refcount is correct, all the check conditions in the algorithm * should always hold true. If not, the refcount is incorrect. */ -struct xfs_scrub_refcnt_frag { - struct list_head list; - struct xfs_rmap_irec rm; +struct xchk_refcnt_frag { + struct list_head list; + struct xfs_rmap_irec rm; }; -struct xfs_scrub_refcnt_check { - struct xfs_scrub_context *sc; - struct list_head fragments; +struct xchk_refcnt_check { + struct xfs_scrub *sc; + struct list_head fragments; /* refcount extent we're examining */ - xfs_agblock_t bno; - xfs_extlen_t len; - xfs_nlink_t refcount; + xfs_agblock_t bno; + xfs_extlen_t len; + xfs_nlink_t refcount; /* number of owners seen */ - xfs_nlink_t seen; + xfs_nlink_t seen; }; /* @@ -99,18 +99,18 @@ struct xfs_scrub_refcnt_check { * fragments as the refcountbt says we should have. */ STATIC int -xfs_scrub_refcountbt_rmap_check( +xchk_refcountbt_rmap_check( struct xfs_btree_cur *cur, struct xfs_rmap_irec *rec, void *priv) { - struct xfs_scrub_refcnt_check *refchk = priv; - struct xfs_scrub_refcnt_frag *frag; + struct xchk_refcnt_check *refchk = priv; + struct xchk_refcnt_frag *frag; xfs_agblock_t rm_last; xfs_agblock_t rc_last; int error = 0; - if (xfs_scrub_should_terminate(refchk->sc, &error)) + if (xchk_should_terminate(refchk->sc, &error)) return error; rm_last = rec->rm_startblock + rec->rm_blockcount - 1; @@ -118,7 +118,7 @@ xfs_scrub_refcountbt_rmap_check( /* Confirm that a single-owner refc extent is a CoW stage. */ if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { - xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0); + xchk_btree_xref_set_corrupt(refchk->sc, cur, 0); return 0; } @@ -135,7 +135,7 @@ xfs_scrub_refcountbt_rmap_check( * is healthy each rmap_irec we see will be in agbno order * so we don't need insertion sort here. */ - frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag), + frag = kmem_alloc(sizeof(struct xchk_refcnt_frag), KM_MAYFAIL); if (!frag) return -ENOMEM; @@ -154,12 +154,12 @@ xfs_scrub_refcountbt_rmap_check( * we have a refcountbt error. */ STATIC void -xfs_scrub_refcountbt_process_rmap_fragments( - struct xfs_scrub_refcnt_check *refchk) +xchk_refcountbt_process_rmap_fragments( + struct xchk_refcnt_check *refchk) { struct list_head worklist; - struct xfs_scrub_refcnt_frag *frag; - struct xfs_scrub_refcnt_frag *n; + struct xchk_refcnt_frag *frag; + struct xchk_refcnt_frag *n; xfs_agblock_t bno; xfs_agblock_t rbno; xfs_agblock_t next_rbno; @@ -277,13 +277,13 @@ done: /* Use the rmap entries covering this extent to verify the refcount. */ STATIC void -xfs_scrub_refcountbt_xref_rmap( - struct xfs_scrub_context *sc, +xchk_refcountbt_xref_rmap( + struct xfs_scrub *sc, xfs_agblock_t bno, xfs_extlen_t len, xfs_nlink_t refcount) { - struct xfs_scrub_refcnt_check refchk = { + struct xchk_refcnt_check refchk = { .sc = sc, .bno = bno, .len = len, @@ -292,11 +292,11 @@ xfs_scrub_refcountbt_xref_rmap( }; struct xfs_rmap_irec low; struct xfs_rmap_irec high; - struct xfs_scrub_refcnt_frag *frag; - struct xfs_scrub_refcnt_frag *n; + struct xchk_refcnt_frag *frag; + struct xchk_refcnt_frag *n; int error; - if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) return; /* Cross-reference with the rmapbt to confirm the refcount. */ @@ -307,13 +307,13 @@ xfs_scrub_refcountbt_xref_rmap( INIT_LIST_HEAD(&refchk.fragments); error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, - &xfs_scrub_refcountbt_rmap_check, &refchk); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + &xchk_refcountbt_rmap_check, &refchk); + if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) goto out_free; - xfs_scrub_refcountbt_process_rmap_fragments(&refchk); + xchk_refcountbt_process_rmap_fragments(&refchk); if (refcount != refchk.seen) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); out_free: list_for_each_entry_safe(frag, n, &refchk.fragments, list) { @@ -324,34 +324,34 @@ out_free: /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_refcountbt_xref( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - xfs_extlen_t len, - xfs_nlink_t refcount) +xchk_refcountbt_xref( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + xfs_extlen_t len, + xfs_nlink_t refcount) { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xfs_scrub_xref_is_used_space(sc, agbno, len); - xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); - xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount); + xchk_xref_is_used_space(sc, agbno, len); + xchk_xref_is_not_inode_chunk(sc, agbno, len); + xchk_refcountbt_xref_rmap(sc, agbno, len, refcount); } /* Scrub a refcountbt record. */ STATIC int -xfs_scrub_refcountbt_rec( - struct xfs_scrub_btree *bs, - union xfs_btree_rec *rec) +xchk_refcountbt_rec( + struct xchk_btree *bs, + union xfs_btree_rec *rec) { - struct xfs_mount *mp = bs->cur->bc_mp; - xfs_agblock_t *cow_blocks = bs->private; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; - xfs_agblock_t bno; - xfs_extlen_t len; - xfs_nlink_t refcount; - bool has_cowflag; - int error = 0; + struct xfs_mount *mp = bs->cur->bc_mp; + xfs_agblock_t *cow_blocks = bs->private; + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + xfs_agblock_t bno; + xfs_extlen_t len; + xfs_nlink_t refcount; + bool has_cowflag; + int error = 0; bno = be32_to_cpu(rec->refc.rc_startblock); len = be32_to_cpu(rec->refc.rc_blockcount); @@ -360,7 +360,7 @@ xfs_scrub_refcountbt_rec( /* Only CoW records can have refcount == 1. */ has_cowflag = (bno & XFS_REFC_COW_START); if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (has_cowflag) (*cow_blocks) += len; @@ -369,75 +369,75 @@ xfs_scrub_refcountbt_rec( if (bno + len <= bno || !xfs_verify_agbno(mp, agno, bno) || !xfs_verify_agbno(mp, agno, bno + len - 1)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (refcount == 0) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount); + xchk_refcountbt_xref(bs->sc, bno, len, refcount); return error; } /* Make sure we have as many refc blocks as the rmap says. */ STATIC void -xfs_scrub_refcount_xref_rmap( - struct xfs_scrub_context *sc, - struct xfs_owner_info *oinfo, - xfs_filblks_t cow_blocks) +xchk_refcount_xref_rmap( + struct xfs_scrub *sc, + struct xfs_owner_info *oinfo, + xfs_filblks_t cow_blocks) { - xfs_extlen_t refcbt_blocks = 0; - xfs_filblks_t blocks; - int error; + xfs_extlen_t refcbt_blocks = 0; + xfs_filblks_t blocks; + int error; - if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) return; /* Check that we saw as many refcbt blocks as the rmap knows about. */ error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); - if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) + if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) return; - error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, + error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) return; if (blocks != refcbt_blocks) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); /* Check that we saw as many cow blocks as the rmap knows about. */ xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW); - error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, + error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, &blocks); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) return; if (blocks != cow_blocks) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); } /* Scrub the refcount btree for some AG. */ int -xfs_scrub_refcountbt( - struct xfs_scrub_context *sc) +xchk_refcountbt( + struct xfs_scrub *sc) { - struct xfs_owner_info oinfo; - xfs_agblock_t cow_blocks = 0; - int error; + struct xfs_owner_info oinfo; + xfs_agblock_t cow_blocks = 0; + int error; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); - error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, + error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec, &oinfo, &cow_blocks); if (error) return error; - xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks); + xchk_refcount_xref_rmap(sc, &oinfo, cow_blocks); return 0; } /* xref check that a cow staging extent is marked in the refcountbt. */ void -xfs_scrub_xref_is_cow_staging( - struct xfs_scrub_context *sc, +xchk_xref_is_cow_staging( + struct xfs_scrub *sc, xfs_agblock_t agbno, xfs_extlen_t len) { @@ -446,35 +446,35 @@ xfs_scrub_xref_is_cow_staging( int has_refcount; int error; - if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) return; /* Find the CoW staging extent. */ error = xfs_refcount_lookup_le(sc->sa.refc_cur, agbno + XFS_REFC_COW_START, &has_refcount); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (!has_refcount) { - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); return; } error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (!has_refcount) { - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); return; } /* CoW flag must be set, refcount must be 1. */ has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); if (!has_cowflag || rc.rc_refcount != 1) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); /* Must be at least as long as what was passed in */ if (rc.rc_blockcount < len) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); } /* @@ -482,20 +482,20 @@ xfs_scrub_xref_is_cow_staging( * can have multiple owners. */ void -xfs_scrub_xref_is_not_shared( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno, - xfs_extlen_t len) +xchk_xref_is_not_shared( + struct xfs_scrub *sc, + xfs_agblock_t agbno, + xfs_extlen_t len) { - bool shared; - int error; + bool shared; + int error; - if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) return; error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (shared) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); } diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 326be4e8b71e..17cf48564390 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -34,6 +34,7 @@ #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" +#include "scrub/bitmap.h" /* * Attempt to repair some metadata, if the metadata is corrupt and userspace @@ -41,21 +42,21 @@ * and will set *fixed to true if it thinks it repaired anything. */ int -xfs_repair_attempt( - struct xfs_inode *ip, - struct xfs_scrub_context *sc, - bool *fixed) +xrep_attempt( + struct xfs_inode *ip, + struct xfs_scrub *sc, + bool *fixed) { - int error = 0; + int error = 0; - trace_xfs_repair_attempt(ip, sc->sm, error); + trace_xrep_attempt(ip, sc->sm, error); - xfs_scrub_ag_btcur_free(&sc->sa); + xchk_ag_btcur_free(&sc->sa); /* Repair whatever's broken. */ ASSERT(sc->ops->repair); error = sc->ops->repair(sc); - trace_xfs_repair_done(ip, sc->sm, error); + trace_xrep_done(ip, sc->sm, error); switch (error) { case 0: /* @@ -93,8 +94,8 @@ xfs_repair_attempt( * structure to track rate limiting information. */ void -xfs_repair_failure( - struct xfs_mount *mp) +xrep_failure( + struct xfs_mount *mp) { xfs_alert_ratelimited(mp, "Corruption not fixed during online repair. Unmount and run xfs_repair."); @@ -105,12 +106,12 @@ xfs_repair_failure( * given mountpoint. */ int -xfs_repair_probe( - struct xfs_scrub_context *sc) +xrep_probe( + struct xfs_scrub *sc) { - int error = 0; + int error = 0; - if (xfs_scrub_should_terminate(sc, &error)) + if (xchk_should_terminate(sc, &error)) return error; return 0; @@ -121,15 +122,18 @@ xfs_repair_probe( * the btree cursors. */ int -xfs_repair_roll_ag_trans( - struct xfs_scrub_context *sc) +xrep_roll_ag_trans( + struct xfs_scrub *sc) { - int error; + int error; /* Keep the AG header buffers locked so we can keep going. */ - xfs_trans_bhold(sc->tp, sc->sa.agi_bp); - xfs_trans_bhold(sc->tp, sc->sa.agf_bp); - xfs_trans_bhold(sc->tp, sc->sa.agfl_bp); + if (sc->sa.agi_bp) + xfs_trans_bhold(sc->tp, sc->sa.agi_bp); + if (sc->sa.agf_bp) + xfs_trans_bhold(sc->tp, sc->sa.agf_bp); + if (sc->sa.agfl_bp) + xfs_trans_bhold(sc->tp, sc->sa.agfl_bp); /* Roll the transaction. */ error = xfs_trans_roll(&sc->tp); @@ -137,9 +141,12 @@ xfs_repair_roll_ag_trans( goto out_release; /* Join AG headers to the new transaction. */ - xfs_trans_bjoin(sc->tp, sc->sa.agi_bp); - xfs_trans_bjoin(sc->tp, sc->sa.agf_bp); - xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp); + if (sc->sa.agi_bp) + xfs_trans_bjoin(sc->tp, sc->sa.agi_bp); + if (sc->sa.agf_bp) + xfs_trans_bjoin(sc->tp, sc->sa.agf_bp); + if (sc->sa.agfl_bp) + xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp); return 0; @@ -149,9 +156,12 @@ out_release: * buffers will be released during teardown on our way out * of the kernel. */ - xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp); - xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp); - xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp); + if (sc->sa.agi_bp) + xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp); + if (sc->sa.agf_bp) + xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp); + if (sc->sa.agfl_bp) + xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp); return error; } @@ -162,10 +172,10 @@ out_release: * in AG reservations) to construct a whole btree. */ bool -xfs_repair_ag_has_space( - struct xfs_perag *pag, - xfs_extlen_t nr_blocks, - enum xfs_ag_resv_type type) +xrep_ag_has_space( + struct xfs_perag *pag, + xfs_extlen_t nr_blocks, + enum xfs_ag_resv_type type) { return !xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) && !xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) && @@ -178,8 +188,8 @@ xfs_repair_ag_has_space( * any type of per-AG btree. */ xfs_extlen_t -xfs_repair_calc_ag_resblks( - struct xfs_scrub_context *sc) +xrep_calc_ag_resblks( + struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; struct xfs_scrub_metadata *sm = sc->sm; @@ -231,7 +241,7 @@ xfs_repair_calc_ag_resblks( } xfs_perag_put(pag); - trace_xfs_repair_calc_ag_resblks(mp, sm->sm_agno, icount, aglen, + trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen, freelen, usedlen); /* @@ -270,7 +280,7 @@ xfs_repair_calc_ag_resblks( rmapbt_sz = 0; } - trace_xfs_repair_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz, + trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz, inobt_sz, rmapbt_sz, refcbt_sz); return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz)); @@ -278,15 +288,15 @@ xfs_repair_calc_ag_resblks( /* Allocate a block in an AG. */ int -xfs_repair_alloc_ag_block( - struct xfs_scrub_context *sc, - struct xfs_owner_info *oinfo, - xfs_fsblock_t *fsbno, - enum xfs_ag_resv_type resv) +xrep_alloc_ag_block( + struct xfs_scrub *sc, + struct xfs_owner_info *oinfo, + xfs_fsblock_t *fsbno, + enum xfs_ag_resv_type resv) { - struct xfs_alloc_arg args = {0}; - xfs_agblock_t bno; - int error; + struct xfs_alloc_arg args = {0}; + xfs_agblock_t bno; + int error; switch (resv) { case XFS_AG_RESV_AGFL: @@ -329,8 +339,8 @@ xfs_repair_alloc_ag_block( /* Initialize a new AG btree root block with zero entries. */ int -xfs_repair_init_btblock( - struct xfs_scrub_context *sc, +xrep_init_btblock( + struct xfs_scrub *sc, xfs_fsblock_t fsb, struct xfs_buf **bpp, xfs_btnum_t btnum, @@ -340,7 +350,7 @@ xfs_repair_init_btblock( struct xfs_mount *mp = sc->mp; struct xfs_buf *bp; - trace_xfs_repair_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb), + trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb), XFS_FSB_TO_AGBNO(mp, fsb), btnum); ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno); @@ -367,222 +377,29 @@ xfs_repair_init_btblock( * * However, that leaves the matter of removing all the metadata describing the * old broken structure. For primary metadata we use the rmap data to collect - * every extent with a matching rmap owner (exlist); we then iterate all other + * every extent with a matching rmap owner (bitmap); we then iterate all other * metadata structures with the same rmap owner to collect the extents that - * cannot be removed (sublist). We then subtract sublist from exlist to + * cannot be removed (sublist). We then subtract sublist from bitmap to * derive the blocks that were used by the old btree. These blocks can be * reaped. * * For rmapbt reconstructions we must use different tactics for extent * collection. First we iterate all primary metadata (this excludes the old * rmapbt, obviously) to generate new rmap records. The gaps in the rmap - * records are collected as exlist. The bnobt records are collected as - * sublist. As with the other btrees we subtract sublist from exlist, and the + * records are collected as bitmap. The bnobt records are collected as + * sublist. As with the other btrees we subtract sublist from bitmap, and the * result (since the rmapbt lives in the free space) are the blocks from the * old rmapbt. - */ - -/* Collect a dead btree extent for later disposal. */ -int -xfs_repair_collect_btree_extent( - struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *exlist, - xfs_fsblock_t fsbno, - xfs_extlen_t len) -{ - struct xfs_repair_extent *rex; - - trace_xfs_repair_collect_btree_extent(sc->mp, - XFS_FSB_TO_AGNO(sc->mp, fsbno), - XFS_FSB_TO_AGBNO(sc->mp, fsbno), len); - - rex = kmem_alloc(sizeof(struct xfs_repair_extent), KM_MAYFAIL); - if (!rex) - return -ENOMEM; - - INIT_LIST_HEAD(&rex->list); - rex->fsbno = fsbno; - rex->len = len; - list_add_tail(&rex->list, &exlist->list); - - return 0; -} - -/* - * An error happened during the rebuild so the transaction will be cancelled. - * The fs will shut down, and the administrator has to unmount and run repair. - * Therefore, free all the memory associated with the list so we can die. - */ -void -xfs_repair_cancel_btree_extents( - struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *exlist) -{ - struct xfs_repair_extent *rex; - struct xfs_repair_extent *n; - - for_each_xfs_repair_extent_safe(rex, n, exlist) { - list_del(&rex->list); - kmem_free(rex); - } -} - -/* Compare two btree extents. */ -static int -xfs_repair_btree_extent_cmp( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_repair_extent *ap; - struct xfs_repair_extent *bp; - - ap = container_of(a, struct xfs_repair_extent, list); - bp = container_of(b, struct xfs_repair_extent, list); - - if (ap->fsbno > bp->fsbno) - return 1; - if (ap->fsbno < bp->fsbno) - return -1; - return 0; -} - -/* - * Remove all the blocks mentioned in @sublist from the extents in @exlist. * - * The intent is that callers will iterate the rmapbt for all of its records - * for a given owner to generate @exlist; and iterate all the blocks of the - * metadata structures that are not being rebuilt and have the same rmapbt - * owner to generate @sublist. This routine subtracts all the extents - * mentioned in sublist from all the extents linked in @exlist, which leaves - * @exlist as the list of blocks that are not accounted for, which we assume - * are the dead blocks of the old metadata structure. The blocks mentioned in - * @exlist can be reaped. - */ -#define LEFT_ALIGNED (1 << 0) -#define RIGHT_ALIGNED (1 << 1) -int -xfs_repair_subtract_extents( - struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *exlist, - struct xfs_repair_extent_list *sublist) -{ - struct list_head *lp; - struct xfs_repair_extent *ex; - struct xfs_repair_extent *newex; - struct xfs_repair_extent *subex; - xfs_fsblock_t sub_fsb; - xfs_extlen_t sub_len; - int state; - int error = 0; - - if (list_empty(&exlist->list) || list_empty(&sublist->list)) - return 0; - ASSERT(!list_empty(&sublist->list)); - - list_sort(NULL, &exlist->list, xfs_repair_btree_extent_cmp); - list_sort(NULL, &sublist->list, xfs_repair_btree_extent_cmp); - - /* - * Now that we've sorted both lists, we iterate exlist once, rolling - * forward through sublist and/or exlist as necessary until we find an - * overlap or reach the end of either list. We do not reset lp to the - * head of exlist nor do we reset subex to the head of sublist. The - * list traversal is similar to merge sort, but we're deleting - * instead. In this manner we avoid O(n^2) operations. - */ - subex = list_first_entry(&sublist->list, struct xfs_repair_extent, - list); - lp = exlist->list.next; - while (lp != &exlist->list) { - ex = list_entry(lp, struct xfs_repair_extent, list); - - /* - * Advance subex and/or ex until we find a pair that - * intersect or we run out of extents. - */ - while (subex->fsbno + subex->len <= ex->fsbno) { - if (list_is_last(&subex->list, &sublist->list)) - goto out; - subex = list_next_entry(subex, list); - } - if (subex->fsbno >= ex->fsbno + ex->len) { - lp = lp->next; - continue; - } - - /* trim subex to fit the extent we have */ - sub_fsb = subex->fsbno; - sub_len = subex->len; - if (subex->fsbno < ex->fsbno) { - sub_len -= ex->fsbno - subex->fsbno; - sub_fsb = ex->fsbno; - } - if (sub_len > ex->len) - sub_len = ex->len; - - state = 0; - if (sub_fsb == ex->fsbno) - state |= LEFT_ALIGNED; - if (sub_fsb + sub_len == ex->fsbno + ex->len) - state |= RIGHT_ALIGNED; - switch (state) { - case LEFT_ALIGNED: - /* Coincides with only the left. */ - ex->fsbno += sub_len; - ex->len -= sub_len; - break; - case RIGHT_ALIGNED: - /* Coincides with only the right. */ - ex->len -= sub_len; - lp = lp->next; - break; - case LEFT_ALIGNED | RIGHT_ALIGNED: - /* Total overlap, just delete ex. */ - lp = lp->next; - list_del(&ex->list); - kmem_free(ex); - break; - case 0: - /* - * Deleting from the middle: add the new right extent - * and then shrink the left extent. - */ - newex = kmem_alloc(sizeof(struct xfs_repair_extent), - KM_MAYFAIL); - if (!newex) { - error = -ENOMEM; - goto out; - } - INIT_LIST_HEAD(&newex->list); - newex->fsbno = sub_fsb + sub_len; - newex->len = ex->fsbno + ex->len - newex->fsbno; - list_add(&newex->list, &ex->list); - ex->len = sub_fsb - ex->fsbno; - lp = lp->next; - break; - default: - ASSERT(0); - break; - } - } - -out: - return error; -} -#undef LEFT_ALIGNED -#undef RIGHT_ALIGNED - -/* * Disposal of Blocks from Old per-AG Btrees * * Now that we've constructed a new btree to replace the damaged one, we want * to dispose of the blocks that (we think) the old btree was using. - * Previously, we used the rmapbt to collect the extents (exlist) with the + * Previously, we used the rmapbt to collect the extents (bitmap) with the * rmap owner corresponding to the tree we rebuilt, collected extents for any * blocks with the same rmap owner that are owned by another data structure - * (sublist), and subtracted sublist from exlist. In theory the extents - * remaining in exlist are the old btree's blocks. + * (sublist), and subtracted sublist from bitmap. In theory the extents + * remaining in bitmap are the old btree's blocks. * * Unfortunately, it's possible that the btree was crosslinked with other * blocks on disk. The rmap data can tell us if there are multiple owners, so @@ -598,7 +415,7 @@ out: * If there are no rmap records at all, we also free the block. If the btree * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't * supposed to be a rmap record and everything is ok. For other btrees there - * had to have been an rmap entry for the block to have ended up on @exlist, + * had to have been an rmap entry for the block to have ended up on @bitmap, * so if it's gone now there's something wrong and the fs will shut down. * * Note: If there are multiple rmap records with only the same rmap owner as @@ -611,7 +428,7 @@ out: * The caller is responsible for locking the AG headers for the entire rebuild * operation so that nothing else can sneak in and change the AG state while * we're not looking. We also assume that the caller already invalidated any - * buffers associated with @exlist. + * buffers associated with @bitmap. */ /* @@ -619,15 +436,14 @@ out: * is not intended for use with file data repairs; we have bunmapi for that. */ int -xfs_repair_invalidate_blocks( - struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *exlist) +xrep_invalidate_blocks( + struct xfs_scrub *sc, + struct xfs_bitmap *bitmap) { - struct xfs_repair_extent *rex; - struct xfs_repair_extent *n; - struct xfs_buf *bp; - xfs_fsblock_t fsbno; - xfs_agblock_t i; + struct xfs_bitmap_range *bmr; + struct xfs_bitmap_range *n; + struct xfs_buf *bp; + xfs_fsblock_t fsbno; /* * For each block in each extent, see if there's an incore buffer for @@ -637,18 +453,16 @@ xfs_repair_invalidate_blocks( * because we never own those; and if we can't TRYLOCK the buffer we * assume it's owned by someone else. */ - for_each_xfs_repair_extent_safe(rex, n, exlist) { - for (fsbno = rex->fsbno, i = rex->len; i > 0; fsbno++, i--) { - /* Skip AG headers and post-EOFS blocks */ - if (!xfs_verify_fsbno(sc->mp, fsbno)) - continue; - bp = xfs_buf_incore(sc->mp->m_ddev_targp, - XFS_FSB_TO_DADDR(sc->mp, fsbno), - XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK); - if (bp) { - xfs_trans_bjoin(sc->tp, bp); - xfs_trans_binval(sc->tp, bp); - } + for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) { + /* Skip AG headers and post-EOFS blocks */ + if (!xfs_verify_fsbno(sc->mp, fsbno)) + continue; + bp = xfs_buf_incore(sc->mp->m_ddev_targp, + XFS_FSB_TO_DADDR(sc->mp, fsbno), + XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK); + if (bp) { + xfs_trans_bjoin(sc->tp, bp); + xfs_trans_binval(sc->tp, bp); } } @@ -657,11 +471,11 @@ xfs_repair_invalidate_blocks( /* Ensure the freelist is the correct size. */ int -xfs_repair_fix_freelist( - struct xfs_scrub_context *sc, - bool can_shrink) +xrep_fix_freelist( + struct xfs_scrub *sc, + bool can_shrink) { - struct xfs_alloc_arg args = {0}; + struct xfs_alloc_arg args = {0}; args.mp = sc->mp; args.tp = sc->tp; @@ -677,15 +491,15 @@ xfs_repair_fix_freelist( * Put a block back on the AGFL. */ STATIC int -xfs_repair_put_freelist( - struct xfs_scrub_context *sc, - xfs_agblock_t agbno) +xrep_put_freelist( + struct xfs_scrub *sc, + xfs_agblock_t agbno) { - struct xfs_owner_info oinfo; - int error; + struct xfs_owner_info oinfo; + int error; /* Make sure there's space on the freelist. */ - error = xfs_repair_fix_freelist(sc, true); + error = xrep_fix_freelist(sc, true); if (error) return error; @@ -711,20 +525,20 @@ xfs_repair_put_freelist( return 0; } -/* Dispose of a single metadata block. */ +/* Dispose of a single block. */ STATIC int -xfs_repair_dispose_btree_block( - struct xfs_scrub_context *sc, - xfs_fsblock_t fsbno, - struct xfs_owner_info *oinfo, - enum xfs_ag_resv_type resv) +xrep_reap_block( + struct xfs_scrub *sc, + xfs_fsblock_t fsbno, + struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type resv) { - struct xfs_btree_cur *cur; - struct xfs_buf *agf_bp = NULL; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - bool has_other_rmap; - int error; + struct xfs_btree_cur *cur; + struct xfs_buf *agf_bp = NULL; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + bool has_other_rmap; + int error; agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); @@ -747,9 +561,9 @@ xfs_repair_dispose_btree_block( /* Can we find any other rmappings? */ error = xfs_rmap_has_other_keys(cur, agbno, 1, oinfo, &has_other_rmap); + xfs_btree_del_cursor(cur, error); if (error) - goto out_cur; - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + goto out_free; /* * If there are other rmappings, this block is cross linked and must @@ -767,7 +581,7 @@ xfs_repair_dispose_btree_block( if (has_other_rmap) error = xfs_rmap_free(sc->tp, agf_bp, agno, agbno, 1, oinfo); else if (resv == XFS_AG_RESV_AGFL) - error = xfs_repair_put_freelist(sc, agbno); + error = xrep_put_freelist(sc, agbno); else error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv); if (agf_bp != sc->sa.agf_bp) @@ -777,50 +591,43 @@ xfs_repair_dispose_btree_block( if (sc->ip) return xfs_trans_roll_inode(&sc->tp, sc->ip); - return xfs_repair_roll_ag_trans(sc); + return xrep_roll_ag_trans(sc); -out_cur: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); +out_free: if (agf_bp != sc->sa.agf_bp) xfs_trans_brelse(sc->tp, agf_bp); return error; } -/* Dispose of btree blocks from an old per-AG btree. */ +/* Dispose of every block of every extent in the bitmap. */ int -xfs_repair_reap_btree_extents( - struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *exlist, - struct xfs_owner_info *oinfo, - enum xfs_ag_resv_type type) +xrep_reap_extents( + struct xfs_scrub *sc, + struct xfs_bitmap *bitmap, + struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type) { - struct xfs_repair_extent *rex; - struct xfs_repair_extent *n; - int error = 0; + struct xfs_bitmap_range *bmr; + struct xfs_bitmap_range *n; + xfs_fsblock_t fsbno; + int error = 0; ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb)); - /* Dispose of every block from the old btree. */ - for_each_xfs_repair_extent_safe(rex, n, exlist) { + for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) { ASSERT(sc->ip != NULL || - XFS_FSB_TO_AGNO(sc->mp, rex->fsbno) == sc->sa.agno); + XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno); + trace_xrep_dispose_btree_extent(sc->mp, + XFS_FSB_TO_AGNO(sc->mp, fsbno), + XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1); - trace_xfs_repair_dispose_btree_extent(sc->mp, - XFS_FSB_TO_AGNO(sc->mp, rex->fsbno), - XFS_FSB_TO_AGBNO(sc->mp, rex->fsbno), rex->len); - - for (; rex->len > 0; rex->len--, rex->fsbno++) { - error = xfs_repair_dispose_btree_block(sc, rex->fsbno, - oinfo, type); - if (error) - goto out; - } - list_del(&rex->list); - kmem_free(rex); + error = xrep_reap_block(sc, fsbno, oinfo, type); + if (error) + goto out; } out: - xfs_repair_cancel_btree_extents(sc, exlist); + xfs_bitmap_destroy(bitmap); return error; } @@ -832,12 +639,12 @@ out: * btree roots. This is not guaranteed to work if the AG is heavily damaged * or the rmap data are corrupt. * - * Callers of xfs_repair_find_ag_btree_roots must lock the AGF and AGFL + * Callers of xrep_find_ag_btree_roots must lock the AGF and AGFL * buffers if the AGF is being rebuilt; or the AGF and AGI buffers if the * AGI is being rebuilt. It must maintain these locks until it's safe for * other threads to change the btrees' shapes. The caller provides * information about the btrees to look for by passing in an array of - * xfs_repair_find_ag_btree with the (rmap owner, buf_ops, magic) fields set. + * xrep_find_ag_btree with the (rmap owner, buf_ops, magic) fields set. * The (root, height) fields will be set on return if anything is found. The * last element of the array should have a NULL buf_ops to mark the end of the * array. @@ -851,30 +658,30 @@ out: * should be the roots. */ -struct xfs_repair_findroot { - struct xfs_scrub_context *sc; +struct xrep_findroot { + struct xfs_scrub *sc; struct xfs_buf *agfl_bp; struct xfs_agf *agf; - struct xfs_repair_find_ag_btree *btree_info; + struct xrep_find_ag_btree *btree_info; }; /* See if our block is in the AGFL. */ STATIC int -xfs_repair_findroot_agfl_walk( - struct xfs_mount *mp, - xfs_agblock_t bno, - void *priv) +xrep_findroot_agfl_walk( + struct xfs_mount *mp, + xfs_agblock_t bno, + void *priv) { - xfs_agblock_t *agbno = priv; + xfs_agblock_t *agbno = priv; return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0; } /* Does this block match the btree information passed in? */ STATIC int -xfs_repair_findroot_block( - struct xfs_repair_findroot *ri, - struct xfs_repair_find_ag_btree *fab, +xrep_findroot_block( + struct xrep_findroot *ri, + struct xrep_find_ag_btree *fab, uint64_t owner, xfs_agblock_t agbno, bool *found_it) @@ -895,7 +702,7 @@ xfs_repair_findroot_block( */ if (owner == XFS_RMAP_OWN_AG) { error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, - xfs_repair_findroot_agfl_walk, &agbno); + xrep_findroot_agfl_walk, &agbno); if (error == XFS_BTREE_QUERY_RANGE_ABORT) return 0; if (error) @@ -933,7 +740,7 @@ xfs_repair_findroot_block( fab->height = xfs_btree_get_level(btblock) + 1; *found_it = true; - trace_xfs_repair_findroot_block(mp, ri->sc->sa.agno, agbno, + trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno, be32_to_cpu(btblock->bb_magic), fab->height - 1); out: xfs_trans_brelse(ri->sc->tp, bp); @@ -945,13 +752,13 @@ out: * looking for? */ STATIC int -xfs_repair_findroot_rmap( +xrep_findroot_rmap( struct xfs_btree_cur *cur, struct xfs_rmap_irec *rec, void *priv) { - struct xfs_repair_findroot *ri = priv; - struct xfs_repair_find_ag_btree *fab; + struct xrep_findroot *ri = priv; + struct xrep_find_ag_btree *fab; xfs_agblock_t b; bool found_it; int error = 0; @@ -966,7 +773,7 @@ xfs_repair_findroot_rmap( for (fab = ri->btree_info; fab->buf_ops; fab++) { if (rec->rm_owner != fab->rmap_owner) continue; - error = xfs_repair_findroot_block(ri, fab, + error = xrep_findroot_block(ri, fab, rec->rm_owner, rec->rm_startblock + b, &found_it); if (error) @@ -981,15 +788,15 @@ xfs_repair_findroot_rmap( /* Find the roots of the per-AG btrees described in btree_info. */ int -xfs_repair_find_ag_btree_roots( - struct xfs_scrub_context *sc, +xrep_find_ag_btree_roots( + struct xfs_scrub *sc, struct xfs_buf *agf_bp, - struct xfs_repair_find_ag_btree *btree_info, + struct xrep_find_ag_btree *btree_info, struct xfs_buf *agfl_bp) { struct xfs_mount *mp = sc->mp; - struct xfs_repair_findroot ri; - struct xfs_repair_find_ag_btree *fab; + struct xrep_findroot ri; + struct xrep_find_ag_btree *fab; struct xfs_btree_cur *cur; int error; @@ -1008,19 +815,19 @@ xfs_repair_find_ag_btree_roots( } cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno); - error = xfs_rmap_query_all(cur, xfs_repair_findroot_rmap, &ri); - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + error = xfs_rmap_query_all(cur, xrep_findroot_rmap, &ri); + xfs_btree_del_cursor(cur, error); return error; } /* Force a quotacheck the next time we mount. */ void -xfs_repair_force_quotacheck( - struct xfs_scrub_context *sc, - uint dqtype) +xrep_force_quotacheck( + struct xfs_scrub *sc, + uint dqtype) { - uint flag; + uint flag; flag = xfs_quota_chkd_flag(dqtype); if (!(flag & sc->mp->m_qflags)) @@ -1044,10 +851,10 @@ xfs_repair_force_quotacheck( * repair corruptions in the quota metadata. */ int -xfs_repair_ino_dqattach( - struct xfs_scrub_context *sc) +xrep_ino_dqattach( + struct xfs_scrub *sc) { - int error; + int error; error = xfs_qm_dqattach_locked(sc->ip, false); switch (error) { @@ -1058,11 +865,11 @@ xfs_repair_ino_dqattach( "inode %llu repair encountered quota error %d, quotacheck forced.", (unsigned long long)sc->ip->i_ino, error); if (XFS_IS_UQUOTA_ON(sc->mp) && !sc->ip->i_udquot) - xfs_repair_force_quotacheck(sc, XFS_DQ_USER); + xrep_force_quotacheck(sc, XFS_DQ_USER); if (XFS_IS_GQUOTA_ON(sc->mp) && !sc->ip->i_gdquot) - xfs_repair_force_quotacheck(sc, XFS_DQ_GROUP); + xrep_force_quotacheck(sc, XFS_DQ_GROUP); if (XFS_IS_PQUOTA_ON(sc->mp) && !sc->ip->i_pdquot) - xfs_repair_force_quotacheck(sc, XFS_DQ_PROJ); + xrep_force_quotacheck(sc, XFS_DQ_PROJ); /* fall through */ case -ESRCH: error = 0; diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index ef47826b6725..9de321eee4ab 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -6,7 +6,7 @@ #ifndef __XFS_SCRUB_REPAIR_H__ #define __XFS_SCRUB_REPAIR_H__ -static inline int xfs_repair_notsupported(struct xfs_scrub_context *sc) +static inline int xrep_notsupported(struct xfs_scrub *sc) { return -EOPNOTSUPP; } @@ -15,55 +15,26 @@ static inline int xfs_repair_notsupported(struct xfs_scrub_context *sc) /* Repair helpers */ -int xfs_repair_attempt(struct xfs_inode *ip, struct xfs_scrub_context *sc, - bool *fixed); -void xfs_repair_failure(struct xfs_mount *mp); -int xfs_repair_roll_ag_trans(struct xfs_scrub_context *sc); -bool xfs_repair_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks, +int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc, bool *fixed); +void xrep_failure(struct xfs_mount *mp); +int xrep_roll_ag_trans(struct xfs_scrub *sc); +bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks, enum xfs_ag_resv_type type); -xfs_extlen_t xfs_repair_calc_ag_resblks(struct xfs_scrub_context *sc); -int xfs_repair_alloc_ag_block(struct xfs_scrub_context *sc, - struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno, - enum xfs_ag_resv_type resv); -int xfs_repair_init_btblock(struct xfs_scrub_context *sc, xfs_fsblock_t fsb, +xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc); +int xrep_alloc_ag_block(struct xfs_scrub *sc, struct xfs_owner_info *oinfo, + xfs_fsblock_t *fsbno, enum xfs_ag_resv_type resv); +int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb, struct xfs_buf **bpp, xfs_btnum_t btnum, const struct xfs_buf_ops *ops); -struct xfs_repair_extent { - struct list_head list; - xfs_fsblock_t fsbno; - xfs_extlen_t len; -}; - -struct xfs_repair_extent_list { - struct list_head list; -}; - -static inline void -xfs_repair_init_extent_list( - struct xfs_repair_extent_list *exlist) -{ - INIT_LIST_HEAD(&exlist->list); -} +struct xfs_bitmap; -#define for_each_xfs_repair_extent_safe(rbe, n, exlist) \ - list_for_each_entry_safe((rbe), (n), &(exlist)->list, list) -int xfs_repair_collect_btree_extent(struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *btlist, xfs_fsblock_t fsbno, - xfs_extlen_t len); -void xfs_repair_cancel_btree_extents(struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *btlist); -int xfs_repair_subtract_extents(struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *exlist, - struct xfs_repair_extent_list *sublist); -int xfs_repair_fix_freelist(struct xfs_scrub_context *sc, bool can_shrink); -int xfs_repair_invalidate_blocks(struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *btlist); -int xfs_repair_reap_btree_extents(struct xfs_scrub_context *sc, - struct xfs_repair_extent_list *exlist, +int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink); +int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xfs_bitmap *btlist); +int xrep_reap_extents(struct xfs_scrub *sc, struct xfs_bitmap *exlist, struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type); -struct xfs_repair_find_ag_btree { +struct xrep_find_ag_btree { /* in: rmap owner of the btree we're looking for */ uint64_t rmap_owner; @@ -78,40 +49,44 @@ struct xfs_repair_find_ag_btree { unsigned int height; }; -int xfs_repair_find_ag_btree_roots(struct xfs_scrub_context *sc, - struct xfs_buf *agf_bp, - struct xfs_repair_find_ag_btree *btree_info, - struct xfs_buf *agfl_bp); -void xfs_repair_force_quotacheck(struct xfs_scrub_context *sc, uint dqtype); -int xfs_repair_ino_dqattach(struct xfs_scrub_context *sc); +int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp, + struct xrep_find_ag_btree *btree_info, struct xfs_buf *agfl_bp); +void xrep_force_quotacheck(struct xfs_scrub *sc, uint dqtype); +int xrep_ino_dqattach(struct xfs_scrub *sc); /* Metadata repairers */ -int xfs_repair_probe(struct xfs_scrub_context *sc); -int xfs_repair_superblock(struct xfs_scrub_context *sc); +int xrep_probe(struct xfs_scrub *sc); +int xrep_superblock(struct xfs_scrub *sc); +int xrep_agf(struct xfs_scrub *sc); +int xrep_agfl(struct xfs_scrub *sc); +int xrep_agi(struct xfs_scrub *sc); #else -static inline int xfs_repair_attempt( - struct xfs_inode *ip, - struct xfs_scrub_context *sc, - bool *fixed) +static inline int xrep_attempt( + struct xfs_inode *ip, + struct xfs_scrub *sc, + bool *fixed) { return -EOPNOTSUPP; } -static inline void xfs_repair_failure(struct xfs_mount *mp) {} +static inline void xrep_failure(struct xfs_mount *mp) {} static inline xfs_extlen_t -xfs_repair_calc_ag_resblks( - struct xfs_scrub_context *sc) +xrep_calc_ag_resblks( + struct xfs_scrub *sc) { ASSERT(!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)); return 0; } -#define xfs_repair_probe xfs_repair_notsupported -#define xfs_repair_superblock xfs_repair_notsupported +#define xrep_probe xrep_notsupported +#define xrep_superblock xrep_notsupported +#define xrep_agf xrep_notsupported +#define xrep_agfl xrep_notsupported +#define xrep_agi xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index c6d763236ba7..5e293c129813 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -29,30 +29,30 @@ * Set us up to scrub reverse mapping btrees. */ int -xfs_scrub_setup_ag_rmapbt( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_ag_rmapbt( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - return xfs_scrub_setup_ag_btree(sc, ip, false); + return xchk_setup_ag_btree(sc, ip, false); } /* Reverse-mapping scrubber. */ /* Cross-reference a rmap against the refcount btree. */ STATIC void -xfs_scrub_rmapbt_xref_refc( - struct xfs_scrub_context *sc, - struct xfs_rmap_irec *irec) +xchk_rmapbt_xref_refc( + struct xfs_scrub *sc, + struct xfs_rmap_irec *irec) { - xfs_agblock_t fbno; - xfs_extlen_t flen; - bool non_inode; - bool is_bmbt; - bool is_attr; - bool is_unwritten; - int error; - - if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm)) + xfs_agblock_t fbno; + xfs_extlen_t flen; + bool non_inode; + bool is_bmbt; + bool is_attr; + bool is_unwritten; + int error; + + if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) return; non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner); @@ -63,58 +63,58 @@ xfs_scrub_rmapbt_xref_refc( /* If this is shared, must be a data fork extent. */ error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock, irec->rm_blockcount, &fbno, &flen, false); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten)) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); } /* Cross-reference with the other btrees. */ STATIC void -xfs_scrub_rmapbt_xref( - struct xfs_scrub_context *sc, - struct xfs_rmap_irec *irec) +xchk_rmapbt_xref( + struct xfs_scrub *sc, + struct xfs_rmap_irec *irec) { - xfs_agblock_t agbno = irec->rm_startblock; - xfs_extlen_t len = irec->rm_blockcount; + xfs_agblock_t agbno = irec->rm_startblock; + xfs_extlen_t len = irec->rm_blockcount; if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xfs_scrub_xref_is_used_space(sc, agbno, len); + xchk_xref_is_used_space(sc, agbno, len); if (irec->rm_owner == XFS_RMAP_OWN_INODES) - xfs_scrub_xref_is_inode_chunk(sc, agbno, len); + xchk_xref_is_inode_chunk(sc, agbno, len); else - xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); + xchk_xref_is_not_inode_chunk(sc, agbno, len); if (irec->rm_owner == XFS_RMAP_OWN_COW) - xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock, + xchk_xref_is_cow_staging(sc, irec->rm_startblock, irec->rm_blockcount); else - xfs_scrub_rmapbt_xref_refc(sc, irec); + xchk_rmapbt_xref_refc(sc, irec); } /* Scrub an rmapbt record. */ STATIC int -xfs_scrub_rmapbt_rec( - struct xfs_scrub_btree *bs, - union xfs_btree_rec *rec) +xchk_rmapbt_rec( + struct xchk_btree *bs, + union xfs_btree_rec *rec) { - struct xfs_mount *mp = bs->cur->bc_mp; - struct xfs_rmap_irec irec; - xfs_agnumber_t agno = bs->cur->bc_private.a.agno; - bool non_inode; - bool is_unwritten; - bool is_bmbt; - bool is_attr; - int error; + struct xfs_mount *mp = bs->cur->bc_mp; + struct xfs_rmap_irec irec; + xfs_agnumber_t agno = bs->cur->bc_private.a.agno; + bool non_inode; + bool is_unwritten; + bool is_bmbt; + bool is_attr; + int error; error = xfs_rmap_btrec_to_irec(rec, &irec); - if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error)) + if (!xchk_btree_process_error(bs->sc, bs->cur, 0, &error)) goto out; /* Check extent. */ if (irec.rm_startblock + irec.rm_blockcount <= irec.rm_startblock) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (irec.rm_owner == XFS_RMAP_OWN_FS) { /* @@ -124,7 +124,7 @@ xfs_scrub_rmapbt_rec( */ if (irec.rm_startblock != 0 || irec.rm_blockcount != XFS_AGFL_BLOCK(mp) + 1) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); } else { /* * Otherwise we must point somewhere past the static metadata @@ -133,7 +133,7 @@ xfs_scrub_rmapbt_rec( if (!xfs_verify_agbno(mp, agno, irec.rm_startblock) || !xfs_verify_agbno(mp, agno, irec.rm_startblock + irec.rm_blockcount - 1)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); } /* Check flags. */ @@ -143,105 +143,105 @@ xfs_scrub_rmapbt_rec( is_unwritten = irec.rm_flags & XFS_RMAP_UNWRITTEN; if (is_bmbt && irec.rm_offset != 0) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (non_inode && irec.rm_offset != 0) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (is_unwritten && (is_bmbt || non_inode || is_attr)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (non_inode && (is_bmbt || is_unwritten || is_attr)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (!non_inode) { if (!xfs_verify_ino(mp, irec.rm_owner)) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); } else { /* Non-inode owner within the magic values? */ if (irec.rm_owner <= XFS_RMAP_OWN_MIN || irec.rm_owner > XFS_RMAP_OWN_FS) - xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); } - xfs_scrub_rmapbt_xref(bs->sc, &irec); + xchk_rmapbt_xref(bs->sc, &irec); out: return error; } /* Scrub the rmap btree for some AG. */ int -xfs_scrub_rmapbt( - struct xfs_scrub_context *sc) +xchk_rmapbt( + struct xfs_scrub *sc) { - struct xfs_owner_info oinfo; + struct xfs_owner_info oinfo; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); - return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec, + return xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec, &oinfo, NULL); } /* xref check that the extent is owned by a given owner */ static inline void -xfs_scrub_xref_check_owner( - struct xfs_scrub_context *sc, - xfs_agblock_t bno, - xfs_extlen_t len, - struct xfs_owner_info *oinfo, - bool should_have_rmap) +xchk_xref_check_owner( + struct xfs_scrub *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + bool should_have_rmap) { - bool has_rmap; - int error; + bool has_rmap; + int error; - if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) return; error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo, &has_rmap); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) return; if (has_rmap != should_have_rmap) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); } /* xref check that the extent is owned by a given owner */ void -xfs_scrub_xref_is_owned_by( - struct xfs_scrub_context *sc, - xfs_agblock_t bno, - xfs_extlen_t len, - struct xfs_owner_info *oinfo) +xchk_xref_is_owned_by( + struct xfs_scrub *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) { - xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true); + xchk_xref_check_owner(sc, bno, len, oinfo, true); } /* xref check that the extent is not owned by a given owner */ void -xfs_scrub_xref_is_not_owned_by( - struct xfs_scrub_context *sc, - xfs_agblock_t bno, - xfs_extlen_t len, - struct xfs_owner_info *oinfo) +xchk_xref_is_not_owned_by( + struct xfs_scrub *sc, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) { - xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false); + xchk_xref_check_owner(sc, bno, len, oinfo, false); } /* xref check that the extent has no reverse mapping at all */ void -xfs_scrub_xref_has_no_owner( - struct xfs_scrub_context *sc, - xfs_agblock_t bno, - xfs_extlen_t len) +xchk_xref_has_no_owner( + struct xfs_scrub *sc, + xfs_agblock_t bno, + xfs_extlen_t len) { - bool has_rmap; - int error; + bool has_rmap; + int error; - if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm)) + if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) return; error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap); - if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) + if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) return; if (has_rmap) - xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); + xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); } diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 1f86e02a07ca..665d4bbb17cc 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -25,13 +25,13 @@ /* Set us up with the realtime metadata locked. */ int -xfs_scrub_setup_rt( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_rt( + struct xfs_scrub *sc, + struct xfs_inode *ip) { - int error; + int error; - error = xfs_scrub_setup_fs(sc, ip); + error = xchk_setup_fs(sc, ip); if (error) return error; @@ -46,14 +46,14 @@ xfs_scrub_setup_rt( /* Scrub a free extent record from the realtime bitmap. */ STATIC int -xfs_scrub_rtbitmap_rec( - struct xfs_trans *tp, - struct xfs_rtalloc_rec *rec, - void *priv) +xchk_rtbitmap_rec( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv) { - struct xfs_scrub_context *sc = priv; - xfs_rtblock_t startblock; - xfs_rtblock_t blockcount; + struct xfs_scrub *sc = priv; + xfs_rtblock_t startblock; + xfs_rtblock_t blockcount; startblock = rec->ar_startext * tp->t_mountp->m_sb.sb_rextsize; blockcount = rec->ar_extcount * tp->t_mountp->m_sb.sb_rextsize; @@ -61,24 +61,24 @@ xfs_scrub_rtbitmap_rec( if (startblock + blockcount <= startblock || !xfs_verify_rtbno(sc->mp, startblock) || !xfs_verify_rtbno(sc->mp, startblock + blockcount - 1)) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } /* Scrub the realtime bitmap. */ int -xfs_scrub_rtbitmap( - struct xfs_scrub_context *sc) +xchk_rtbitmap( + struct xfs_scrub *sc) { - int error; + int error; /* Invoke the fork scrubber. */ - error = xfs_scrub_metadata_inode_forks(sc); + error = xchk_metadata_inode_forks(sc); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) return error; - error = xfs_rtalloc_query_all(sc->tp, xfs_scrub_rtbitmap_rec, sc); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + error = xfs_rtalloc_query_all(sc->tp, xchk_rtbitmap_rec, sc); + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; out: @@ -87,13 +87,13 @@ out: /* Scrub the realtime summary. */ int -xfs_scrub_rtsummary( - struct xfs_scrub_context *sc) +xchk_rtsummary( + struct xfs_scrub *sc) { - struct xfs_inode *rsumip = sc->mp->m_rsumip; - struct xfs_inode *old_ip = sc->ip; - uint old_ilock_flags = sc->ilock_flags; - int error = 0; + struct xfs_inode *rsumip = sc->mp->m_rsumip; + struct xfs_inode *old_ip = sc->ip; + uint old_ilock_flags = sc->ilock_flags; + int error = 0; /* * We ILOCK'd the rt bitmap ip in the setup routine, now lock the @@ -107,12 +107,12 @@ xfs_scrub_rtsummary( xfs_ilock(sc->ip, sc->ilock_flags); /* Invoke the fork scrubber. */ - error = xfs_scrub_metadata_inode_forks(sc); + error = xchk_metadata_inode_forks(sc); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) goto out; /* XXX: implement this some day */ - xfs_scrub_set_incomplete(sc); + xchk_set_incomplete(sc); out: /* Switch back to the rtbitmap inode and lock flags. */ xfs_iunlock(sc->ip, sc->ilock_flags); @@ -124,18 +124,18 @@ out: /* xref check that the extent is not free in the rtbitmap */ void -xfs_scrub_xref_is_used_rt_space( - struct xfs_scrub_context *sc, - xfs_rtblock_t fsbno, - xfs_extlen_t len) +xchk_xref_is_used_rt_space( + struct xfs_scrub *sc, + xfs_rtblock_t fsbno, + xfs_extlen_t len) { - xfs_rtblock_t startext; - xfs_rtblock_t endext; - xfs_rtblock_t extcount; - bool is_free; - int error; + xfs_rtblock_t startext; + xfs_rtblock_t endext; + xfs_rtblock_t extcount; + bool is_free; + int error; - if (xfs_scrub_skip_xref(sc->sm)) + if (xchk_skip_xref(sc->sm)) return; startext = fsbno; @@ -147,10 +147,10 @@ xfs_scrub_xref_is_used_rt_space( xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext, extcount, &is_free); - if (!xfs_scrub_should_check_xref(sc, &error, NULL)) + if (!xchk_should_check_xref(sc, &error, NULL)) goto out_unlock; if (is_free) - xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino); + xchk_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino); out_unlock: xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); } diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 58ae76b3a421..4bfae1e61d30 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -131,6 +131,12 @@ * optimize the structure so that the rebuild knows what to do. The * second check evaluates the completeness of the repair; that is what * is reported to userspace. + * + * A quick note on symbol prefixes: + * - "xfs_" are general XFS symbols. + * - "xchk_" are symbols related to metadata checking. + * - "xrep_" are symbols related to metadata repair. + * - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS. */ /* @@ -144,12 +150,12 @@ * supported by the running kernel. */ static int -xfs_scrub_probe( - struct xfs_scrub_context *sc) +xchk_probe( + struct xfs_scrub *sc) { - int error = 0; + int error = 0; - if (xfs_scrub_should_terminate(sc, &error)) + if (xchk_should_terminate(sc, &error)) return error; return 0; @@ -159,12 +165,12 @@ xfs_scrub_probe( /* Free all the resources and finish the transactions. */ STATIC int -xfs_scrub_teardown( - struct xfs_scrub_context *sc, - struct xfs_inode *ip_in, - int error) +xchk_teardown( + struct xfs_scrub *sc, + struct xfs_inode *ip_in, + int error) { - xfs_scrub_ag_free(sc, &sc->sa); + xchk_ag_free(sc, &sc->sa); if (sc->tp) { if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)) error = xfs_trans_commit(sc->tp); @@ -177,7 +183,7 @@ xfs_scrub_teardown( xfs_iunlock(sc->ip, sc->ilock_flags); if (sc->ip != ip_in && !xfs_internal_inum(sc->mp, sc->ip->i_ino)) - iput(VFS_I(sc->ip)); + xfs_irele(sc->ip); sc->ip = NULL; } if (sc->has_quotaofflock) @@ -191,165 +197,165 @@ xfs_scrub_teardown( /* Scrubbing dispatch. */ -static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { +static const struct xchk_meta_ops meta_scrub_ops[] = { [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ .type = ST_NONE, - .setup = xfs_scrub_setup_fs, - .scrub = xfs_scrub_probe, - .repair = xfs_repair_probe, + .setup = xchk_setup_fs, + .scrub = xchk_probe, + .repair = xrep_probe, }, [XFS_SCRUB_TYPE_SB] = { /* superblock */ .type = ST_PERAG, - .setup = xfs_scrub_setup_fs, - .scrub = xfs_scrub_superblock, - .repair = xfs_repair_superblock, + .setup = xchk_setup_fs, + .scrub = xchk_superblock, + .repair = xrep_superblock, }, [XFS_SCRUB_TYPE_AGF] = { /* agf */ .type = ST_PERAG, - .setup = xfs_scrub_setup_fs, - .scrub = xfs_scrub_agf, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_fs, + .scrub = xchk_agf, + .repair = xrep_agf, }, [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ .type = ST_PERAG, - .setup = xfs_scrub_setup_fs, - .scrub = xfs_scrub_agfl, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_fs, + .scrub = xchk_agfl, + .repair = xrep_agfl, }, [XFS_SCRUB_TYPE_AGI] = { /* agi */ .type = ST_PERAG, - .setup = xfs_scrub_setup_fs, - .scrub = xfs_scrub_agi, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_fs, + .scrub = xchk_agi, + .repair = xrep_agi, }, [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ .type = ST_PERAG, - .setup = xfs_scrub_setup_ag_allocbt, - .scrub = xfs_scrub_bnobt, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_ag_allocbt, + .scrub = xchk_bnobt, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ .type = ST_PERAG, - .setup = xfs_scrub_setup_ag_allocbt, - .scrub = xfs_scrub_cntbt, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_ag_allocbt, + .scrub = xchk_cntbt, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ .type = ST_PERAG, - .setup = xfs_scrub_setup_ag_iallocbt, - .scrub = xfs_scrub_inobt, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_ag_iallocbt, + .scrub = xchk_inobt, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ .type = ST_PERAG, - .setup = xfs_scrub_setup_ag_iallocbt, - .scrub = xfs_scrub_finobt, + .setup = xchk_setup_ag_iallocbt, + .scrub = xchk_finobt, .has = xfs_sb_version_hasfinobt, - .repair = xfs_repair_notsupported, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ .type = ST_PERAG, - .setup = xfs_scrub_setup_ag_rmapbt, - .scrub = xfs_scrub_rmapbt, + .setup = xchk_setup_ag_rmapbt, + .scrub = xchk_rmapbt, .has = xfs_sb_version_hasrmapbt, - .repair = xfs_repair_notsupported, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ .type = ST_PERAG, - .setup = xfs_scrub_setup_ag_refcountbt, - .scrub = xfs_scrub_refcountbt, + .setup = xchk_setup_ag_refcountbt, + .scrub = xchk_refcountbt, .has = xfs_sb_version_hasreflink, - .repair = xfs_repair_notsupported, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_INODE] = { /* inode record */ .type = ST_INODE, - .setup = xfs_scrub_setup_inode, - .scrub = xfs_scrub_inode, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_inode, + .scrub = xchk_inode, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ .type = ST_INODE, - .setup = xfs_scrub_setup_inode_bmap, - .scrub = xfs_scrub_bmap_data, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_inode_bmap, + .scrub = xchk_bmap_data, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ .type = ST_INODE, - .setup = xfs_scrub_setup_inode_bmap, - .scrub = xfs_scrub_bmap_attr, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_inode_bmap, + .scrub = xchk_bmap_attr, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ .type = ST_INODE, - .setup = xfs_scrub_setup_inode_bmap, - .scrub = xfs_scrub_bmap_cow, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_inode_bmap, + .scrub = xchk_bmap_cow, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_DIR] = { /* directory */ .type = ST_INODE, - .setup = xfs_scrub_setup_directory, - .scrub = xfs_scrub_directory, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_directory, + .scrub = xchk_directory, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ .type = ST_INODE, - .setup = xfs_scrub_setup_xattr, - .scrub = xfs_scrub_xattr, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_xattr, + .scrub = xchk_xattr, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ .type = ST_INODE, - .setup = xfs_scrub_setup_symlink, - .scrub = xfs_scrub_symlink, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_symlink, + .scrub = xchk_symlink, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ .type = ST_INODE, - .setup = xfs_scrub_setup_parent, - .scrub = xfs_scrub_parent, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_parent, + .scrub = xchk_parent, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ .type = ST_FS, - .setup = xfs_scrub_setup_rt, - .scrub = xfs_scrub_rtbitmap, + .setup = xchk_setup_rt, + .scrub = xchk_rtbitmap, .has = xfs_sb_version_hasrealtime, - .repair = xfs_repair_notsupported, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ .type = ST_FS, - .setup = xfs_scrub_setup_rt, - .scrub = xfs_scrub_rtsummary, + .setup = xchk_setup_rt, + .scrub = xchk_rtsummary, .has = xfs_sb_version_hasrealtime, - .repair = xfs_repair_notsupported, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ .type = ST_FS, - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_quota, + .scrub = xchk_quota, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ .type = ST_FS, - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_quota, + .scrub = xchk_quota, + .repair = xrep_notsupported, }, [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ .type = ST_FS, - .setup = xfs_scrub_setup_quota, - .scrub = xfs_scrub_quota, - .repair = xfs_repair_notsupported, + .setup = xchk_setup_quota, + .scrub = xchk_quota, + .repair = xrep_notsupported, }, }; /* This isn't a stable feature, warn once per day. */ static inline void -xfs_scrub_experimental_warning( +xchk_experimental_warning( struct xfs_mount *mp) { static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT( - "xfs_scrub_warning", 86400 * HZ, 1); + "xchk_warning", 86400 * HZ, 1); ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE); if (__ratelimit(&scrub_warning)) @@ -358,12 +364,12 @@ xfs_scrub_experimental_warning( } static int -xfs_scrub_validate_inputs( +xchk_validate_inputs( struct xfs_mount *mp, struct xfs_scrub_metadata *sm) { int error; - const struct xfs_scrub_meta_ops *ops; + const struct xchk_meta_ops *ops; error = -EINVAL; /* Check our inputs. */ @@ -441,7 +447,7 @@ out: } #ifdef CONFIG_XFS_ONLINE_REPAIR -static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc) +static inline void xchk_postmortem(struct xfs_scrub *sc) { /* * Userspace asked us to repair something, we repaired it, rescanned @@ -451,10 +457,10 @@ static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc) if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | XFS_SCRUB_OFLAG_XCORRUPT))) - xfs_repair_failure(sc->mp); + xrep_failure(sc->mp); } #else -static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc) +static inline void xchk_postmortem(struct xfs_scrub *sc) { /* * Userspace asked us to scrub something, it's broken, and we have no @@ -473,16 +479,16 @@ xfs_scrub_metadata( struct xfs_inode *ip, struct xfs_scrub_metadata *sm) { - struct xfs_scrub_context sc; + struct xfs_scrub sc; struct xfs_mount *mp = ip->i_mount; bool try_harder = false; bool already_fixed = false; int error = 0; BUILD_BUG_ON(sizeof(meta_scrub_ops) != - (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); + (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR)); - trace_xfs_scrub_start(ip, sm, error); + trace_xchk_start(ip, sm, error); /* Forbidden if we are shut down or mounted norecovery. */ error = -ESHUTDOWN; @@ -492,11 +498,11 @@ xfs_scrub_metadata( if (mp->m_flags & XFS_MOUNT_NORECOVERY) goto out; - error = xfs_scrub_validate_inputs(mp, sm); + error = xchk_validate_inputs(mp, sm); if (error) goto out; - xfs_scrub_experimental_warning(mp); + xchk_experimental_warning(mp); retry_op: /* Set up for the operation. */ @@ -518,7 +524,7 @@ retry_op: * Tear down everything we hold, then set up again with * preparation for worst-case scenarios. */ - error = xfs_scrub_teardown(&sc, ip, 0); + error = xchk_teardown(&sc, ip, 0); if (error) goto out; try_harder = true; @@ -549,13 +555,13 @@ retry_op: * If it's broken, userspace wants us to fix it, and we haven't * already tried to fix it, then attempt a repair. */ - error = xfs_repair_attempt(ip, &sc, &already_fixed); + error = xrep_attempt(ip, &sc, &already_fixed); if (error == -EAGAIN) { if (sc.try_harder) try_harder = true; - error = xfs_scrub_teardown(&sc, ip, 0); + error = xchk_teardown(&sc, ip, 0); if (error) { - xfs_repair_failure(mp); + xrep_failure(mp); goto out; } goto retry_op; @@ -563,11 +569,11 @@ retry_op: } out_nofix: - xfs_scrub_postmortem(&sc); + xchk_postmortem(&sc); out_teardown: - error = xfs_scrub_teardown(&sc, ip, error); + error = xchk_teardown(&sc, ip, error); out: - trace_xfs_scrub_done(ip, sm, error); + trace_xchk_done(ip, sm, error); if (error == -EFSCORRUPTED || error == -EFSBADCRC) { sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; error = 0; diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index b295edd5fc0e..af323b229c4b 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -6,58 +6,58 @@ #ifndef __XFS_SCRUB_SCRUB_H__ #define __XFS_SCRUB_SCRUB_H__ -struct xfs_scrub_context; +struct xfs_scrub; /* Type info and names for the scrub types. */ -enum xfs_scrub_type { +enum xchk_type { ST_NONE = 1, /* disabled */ ST_PERAG, /* per-AG metadata */ ST_FS, /* per-FS metadata */ ST_INODE, /* per-inode metadata */ }; -struct xfs_scrub_meta_ops { +struct xchk_meta_ops { /* Acquire whatever resources are needed for the operation. */ - int (*setup)(struct xfs_scrub_context *, + int (*setup)(struct xfs_scrub *, struct xfs_inode *); /* Examine metadata for errors. */ - int (*scrub)(struct xfs_scrub_context *); + int (*scrub)(struct xfs_scrub *); /* Repair or optimize the metadata. */ - int (*repair)(struct xfs_scrub_context *); + int (*repair)(struct xfs_scrub *); /* Decide if we even have this piece of metadata. */ bool (*has)(struct xfs_sb *); /* type describing required/allowed inputs */ - enum xfs_scrub_type type; + enum xchk_type type; }; /* Buffer pointers and btree cursors for an entire AG. */ -struct xfs_scrub_ag { - xfs_agnumber_t agno; - struct xfs_perag *pag; +struct xchk_ag { + xfs_agnumber_t agno; + struct xfs_perag *pag; /* AG btree roots */ - struct xfs_buf *agf_bp; - struct xfs_buf *agfl_bp; - struct xfs_buf *agi_bp; + struct xfs_buf *agf_bp; + struct xfs_buf *agfl_bp; + struct xfs_buf *agi_bp; /* AG btrees */ - struct xfs_btree_cur *bno_cur; - struct xfs_btree_cur *cnt_cur; - struct xfs_btree_cur *ino_cur; - struct xfs_btree_cur *fino_cur; - struct xfs_btree_cur *rmap_cur; - struct xfs_btree_cur *refc_cur; + struct xfs_btree_cur *bno_cur; + struct xfs_btree_cur *cnt_cur; + struct xfs_btree_cur *ino_cur; + struct xfs_btree_cur *fino_cur; + struct xfs_btree_cur *rmap_cur; + struct xfs_btree_cur *refc_cur; }; -struct xfs_scrub_context { +struct xfs_scrub { /* General scrub state. */ struct xfs_mount *mp; struct xfs_scrub_metadata *sm; - const struct xfs_scrub_meta_ops *ops; + const struct xchk_meta_ops *ops; struct xfs_trans *tp; struct xfs_inode *ip; void *buf; @@ -66,78 +66,76 @@ struct xfs_scrub_context { bool has_quotaofflock; /* State tracking for single-AG operations. */ - struct xfs_scrub_ag sa; + struct xchk_ag sa; }; /* Metadata scrubbers */ -int xfs_scrub_tester(struct xfs_scrub_context *sc); -int xfs_scrub_superblock(struct xfs_scrub_context *sc); -int xfs_scrub_agf(struct xfs_scrub_context *sc); -int xfs_scrub_agfl(struct xfs_scrub_context *sc); -int xfs_scrub_agi(struct xfs_scrub_context *sc); -int xfs_scrub_bnobt(struct xfs_scrub_context *sc); -int xfs_scrub_cntbt(struct xfs_scrub_context *sc); -int xfs_scrub_inobt(struct xfs_scrub_context *sc); -int xfs_scrub_finobt(struct xfs_scrub_context *sc); -int xfs_scrub_rmapbt(struct xfs_scrub_context *sc); -int xfs_scrub_refcountbt(struct xfs_scrub_context *sc); -int xfs_scrub_inode(struct xfs_scrub_context *sc); -int xfs_scrub_bmap_data(struct xfs_scrub_context *sc); -int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc); -int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc); -int xfs_scrub_directory(struct xfs_scrub_context *sc); -int xfs_scrub_xattr(struct xfs_scrub_context *sc); -int xfs_scrub_symlink(struct xfs_scrub_context *sc); -int xfs_scrub_parent(struct xfs_scrub_context *sc); +int xchk_tester(struct xfs_scrub *sc); +int xchk_superblock(struct xfs_scrub *sc); +int xchk_agf(struct xfs_scrub *sc); +int xchk_agfl(struct xfs_scrub *sc); +int xchk_agi(struct xfs_scrub *sc); +int xchk_bnobt(struct xfs_scrub *sc); +int xchk_cntbt(struct xfs_scrub *sc); +int xchk_inobt(struct xfs_scrub *sc); +int xchk_finobt(struct xfs_scrub *sc); +int xchk_rmapbt(struct xfs_scrub *sc); +int xchk_refcountbt(struct xfs_scrub *sc); +int xchk_inode(struct xfs_scrub *sc); +int xchk_bmap_data(struct xfs_scrub *sc); +int xchk_bmap_attr(struct xfs_scrub *sc); +int xchk_bmap_cow(struct xfs_scrub *sc); +int xchk_directory(struct xfs_scrub *sc); +int xchk_xattr(struct xfs_scrub *sc); +int xchk_symlink(struct xfs_scrub *sc); +int xchk_parent(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT -int xfs_scrub_rtbitmap(struct xfs_scrub_context *sc); -int xfs_scrub_rtsummary(struct xfs_scrub_context *sc); +int xchk_rtbitmap(struct xfs_scrub *sc); +int xchk_rtsummary(struct xfs_scrub *sc); #else static inline int -xfs_scrub_rtbitmap(struct xfs_scrub_context *sc) +xchk_rtbitmap(struct xfs_scrub *sc) { return -ENOENT; } static inline int -xfs_scrub_rtsummary(struct xfs_scrub_context *sc) +xchk_rtsummary(struct xfs_scrub *sc) { return -ENOENT; } #endif #ifdef CONFIG_XFS_QUOTA -int xfs_scrub_quota(struct xfs_scrub_context *sc); +int xchk_quota(struct xfs_scrub *sc); #else static inline int -xfs_scrub_quota(struct xfs_scrub_context *sc) +xchk_quota(struct xfs_scrub *sc) { return -ENOENT; } #endif /* cross-referencing helpers */ -void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc, - xfs_agblock_t agbno, xfs_extlen_t len); -void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc, - xfs_agblock_t agbno, xfs_extlen_t len); -void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc, - xfs_agblock_t agbno, xfs_extlen_t len); -void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc, - xfs_agblock_t agbno, xfs_extlen_t len, - struct xfs_owner_info *oinfo); -void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc, - xfs_agblock_t agbno, xfs_extlen_t len, - struct xfs_owner_info *oinfo); -void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc, - xfs_agblock_t agbno, xfs_extlen_t len); -void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc, - xfs_agblock_t bno, xfs_extlen_t len); -void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc, - xfs_agblock_t bno, xfs_extlen_t len); +void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len); +void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len); +void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len); +void xchk_xref_is_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len, struct xfs_owner_info *oinfo); +void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len, struct xfs_owner_info *oinfo); +void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno, + xfs_extlen_t len); +void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno, + xfs_extlen_t len); +void xchk_xref_is_not_shared(struct xfs_scrub *sc, xfs_agblock_t bno, + xfs_extlen_t len); #ifdef CONFIG_XFS_RT -void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc, - xfs_rtblock_t rtbno, xfs_extlen_t len); +void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno, + xfs_extlen_t len); #else -# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) +# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0) #endif #endif /* __XFS_SCRUB_SCRUB_H__ */ diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index 570a89812116..f7ebaa946999 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -25,28 +25,28 @@ /* Set us up to scrub a symbolic link. */ int -xfs_scrub_setup_symlink( - struct xfs_scrub_context *sc, - struct xfs_inode *ip) +xchk_setup_symlink( + struct xfs_scrub *sc, + struct xfs_inode *ip) { /* Allocate the buffer without the inode lock held. */ sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP); if (!sc->buf) return -ENOMEM; - return xfs_scrub_setup_inode_contents(sc, ip, 0); + return xchk_setup_inode_contents(sc, ip, 0); } /* Symbolic links. */ int -xfs_scrub_symlink( - struct xfs_scrub_context *sc) +xchk_symlink( + struct xfs_scrub *sc) { - struct xfs_inode *ip = sc->ip; - struct xfs_ifork *ifp; - loff_t len; - int error = 0; + struct xfs_inode *ip = sc->ip; + struct xfs_ifork *ifp; + loff_t len; + int error = 0; if (!S_ISLNK(VFS_I(ip)->i_mode)) return -ENOENT; @@ -55,7 +55,7 @@ xfs_scrub_symlink( /* Plausible size? */ if (len > XFS_SYMLINK_MAXLEN || len <= 0) { - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out; } @@ -63,16 +63,16 @@ xfs_scrub_symlink( if (ifp->if_flags & XFS_IFINLINE) { if (len > XFS_IFORK_DSIZE(ip) || len > strnlen(ifp->if_u1.if_data, XFS_IFORK_DSIZE(ip))) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); goto out; } /* Remote symlink; must read the contents. */ error = xfs_readlink_bmap_ilocked(sc->ip, sc->buf); - if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) goto out; if (strnlen(sc->buf, XFS_SYMLINK_MAXLEN) < len) - xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); out: return error; } diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 7c76d8b5cb05..96feaf8dcdec 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -22,9 +22,9 @@ /* Figure out which block the btree cursor was pointing to. */ static inline xfs_fsblock_t -xfs_scrub_btree_cur_fsbno( - struct xfs_btree_cur *cur, - int level) +xchk_btree_cur_fsbno( + struct xfs_btree_cur *cur, + int level) { if (level < cur->bc_nlevels && cur->bc_bufs[level]) return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn); diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index cec3e5ece5a1..4e20f0e48232 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -12,7 +12,7 @@ #include <linux/tracepoint.h> #include "xfs_bit.h" -DECLARE_EVENT_CLASS(xfs_scrub_class, +DECLARE_EVENT_CLASS(xchk_class, TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, int error), TP_ARGS(ip, sm, error), @@ -47,19 +47,19 @@ DECLARE_EVENT_CLASS(xfs_scrub_class, __entry->error) ) #define DEFINE_SCRUB_EVENT(name) \ -DEFINE_EVENT(xfs_scrub_class, name, \ +DEFINE_EVENT(xchk_class, name, \ TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, \ int error), \ TP_ARGS(ip, sm, error)) -DEFINE_SCRUB_EVENT(xfs_scrub_start); -DEFINE_SCRUB_EVENT(xfs_scrub_done); -DEFINE_SCRUB_EVENT(xfs_scrub_deadlock_retry); -DEFINE_SCRUB_EVENT(xfs_repair_attempt); -DEFINE_SCRUB_EVENT(xfs_repair_done); +DEFINE_SCRUB_EVENT(xchk_start); +DEFINE_SCRUB_EVENT(xchk_done); +DEFINE_SCRUB_EVENT(xchk_deadlock_retry); +DEFINE_SCRUB_EVENT(xrep_attempt); +DEFINE_SCRUB_EVENT(xrep_done); -TRACE_EVENT(xfs_scrub_op_error, - TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno, +TRACE_EVENT(xchk_op_error, + TP_PROTO(struct xfs_scrub *sc, xfs_agnumber_t agno, xfs_agblock_t bno, int error, void *ret_ip), TP_ARGS(sc, agno, bno, error, ret_ip), TP_STRUCT__entry( @@ -87,8 +87,8 @@ TRACE_EVENT(xfs_scrub_op_error, __entry->ret_ip) ); -TRACE_EVENT(xfs_scrub_file_op_error, - TP_PROTO(struct xfs_scrub_context *sc, int whichfork, +TRACE_EVENT(xchk_file_op_error, + TP_PROTO(struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset, int error, void *ret_ip), TP_ARGS(sc, whichfork, offset, error, ret_ip), TP_STRUCT__entry( @@ -119,8 +119,8 @@ TRACE_EVENT(xfs_scrub_file_op_error, __entry->ret_ip) ); -DECLARE_EVENT_CLASS(xfs_scrub_block_error_class, - TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, void *ret_ip), +DECLARE_EVENT_CLASS(xchk_block_error_class, + TP_PROTO(struct xfs_scrub *sc, xfs_daddr_t daddr, void *ret_ip), TP_ARGS(sc, daddr, ret_ip), TP_STRUCT__entry( __field(dev_t, dev) @@ -153,16 +153,16 @@ DECLARE_EVENT_CLASS(xfs_scrub_block_error_class, ) #define DEFINE_SCRUB_BLOCK_ERROR_EVENT(name) \ -DEFINE_EVENT(xfs_scrub_block_error_class, name, \ - TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, \ +DEFINE_EVENT(xchk_block_error_class, name, \ + TP_PROTO(struct xfs_scrub *sc, xfs_daddr_t daddr, \ void *ret_ip), \ TP_ARGS(sc, daddr, ret_ip)) -DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error); -DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen); +DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_block_error); +DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_block_preen); -DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class, - TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, void *ret_ip), +DECLARE_EVENT_CLASS(xchk_ino_error_class, + TP_PROTO(struct xfs_scrub *sc, xfs_ino_t ino, void *ret_ip), TP_ARGS(sc, ino, ret_ip), TP_STRUCT__entry( __field(dev_t, dev) @@ -184,17 +184,17 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class, ) #define DEFINE_SCRUB_INO_ERROR_EVENT(name) \ -DEFINE_EVENT(xfs_scrub_ino_error_class, name, \ - TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \ +DEFINE_EVENT(xchk_ino_error_class, name, \ + TP_PROTO(struct xfs_scrub *sc, xfs_ino_t ino, \ void *ret_ip), \ TP_ARGS(sc, ino, ret_ip)) -DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error); -DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen); -DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_warning); +DEFINE_SCRUB_INO_ERROR_EVENT(xchk_ino_error); +DEFINE_SCRUB_INO_ERROR_EVENT(xchk_ino_preen); +DEFINE_SCRUB_INO_ERROR_EVENT(xchk_ino_warning); -DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class, - TP_PROTO(struct xfs_scrub_context *sc, int whichfork, +DECLARE_EVENT_CLASS(xchk_fblock_error_class, + TP_PROTO(struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset, void *ret_ip), TP_ARGS(sc, whichfork, offset, ret_ip), TP_STRUCT__entry( @@ -223,16 +223,16 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class, ); #define DEFINE_SCRUB_FBLOCK_ERROR_EVENT(name) \ -DEFINE_EVENT(xfs_scrub_fblock_error_class, name, \ - TP_PROTO(struct xfs_scrub_context *sc, int whichfork, \ +DEFINE_EVENT(xchk_fblock_error_class, name, \ + TP_PROTO(struct xfs_scrub *sc, int whichfork, \ xfs_fileoff_t offset, void *ret_ip), \ TP_ARGS(sc, whichfork, offset, ret_ip)) -DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_error); -DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_warning); +DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error); +DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning); -TRACE_EVENT(xfs_scrub_incomplete, - TP_PROTO(struct xfs_scrub_context *sc, void *ret_ip), +TRACE_EVENT(xchk_incomplete, + TP_PROTO(struct xfs_scrub *sc, void *ret_ip), TP_ARGS(sc, ret_ip), TP_STRUCT__entry( __field(dev_t, dev) @@ -250,8 +250,8 @@ TRACE_EVENT(xfs_scrub_incomplete, __entry->ret_ip) ); -TRACE_EVENT(xfs_scrub_btree_op_error, - TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, +TRACE_EVENT(xchk_btree_op_error, + TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level, int error, void *ret_ip), TP_ARGS(sc, cur, level, error, ret_ip), TP_STRUCT__entry( @@ -266,7 +266,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error, __field(void *, ret_ip) ), TP_fast_assign( - xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); __entry->dev = sc->mp->m_super->s_dev; __entry->type = sc->sm->sm_type; @@ -290,8 +290,8 @@ TRACE_EVENT(xfs_scrub_btree_op_error, __entry->ret_ip) ); -TRACE_EVENT(xfs_scrub_ifork_btree_op_error, - TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, +TRACE_EVENT(xchk_ifork_btree_op_error, + TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level, int error, void *ret_ip), TP_ARGS(sc, cur, level, error, ret_ip), TP_STRUCT__entry( @@ -308,7 +308,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error, __field(void *, ret_ip) ), TP_fast_assign( - xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); __entry->dev = sc->mp->m_super->s_dev; __entry->ino = sc->ip->i_ino; __entry->whichfork = cur->bc_private.b.whichfork; @@ -335,8 +335,8 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error, __entry->ret_ip) ); -TRACE_EVENT(xfs_scrub_btree_error, - TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, +TRACE_EVENT(xchk_btree_error, + TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level, void *ret_ip), TP_ARGS(sc, cur, level, ret_ip), TP_STRUCT__entry( @@ -350,7 +350,7 @@ TRACE_EVENT(xfs_scrub_btree_error, __field(void *, ret_ip) ), TP_fast_assign( - xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); __entry->dev = sc->mp->m_super->s_dev; __entry->type = sc->sm->sm_type; __entry->btnum = cur->bc_btnum; @@ -371,8 +371,8 @@ TRACE_EVENT(xfs_scrub_btree_error, __entry->ret_ip) ); -TRACE_EVENT(xfs_scrub_ifork_btree_error, - TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, +TRACE_EVENT(xchk_ifork_btree_error, + TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level, void *ret_ip), TP_ARGS(sc, cur, level, ret_ip), TP_STRUCT__entry( @@ -388,7 +388,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error, __field(void *, ret_ip) ), TP_fast_assign( - xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); __entry->dev = sc->mp->m_super->s_dev; __entry->ino = sc->ip->i_ino; __entry->whichfork = cur->bc_private.b.whichfork; @@ -413,8 +413,8 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error, __entry->ret_ip) ); -DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class, - TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, +DECLARE_EVENT_CLASS(xchk_sbtree_class, + TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level), TP_ARGS(sc, cur, level), TP_STRUCT__entry( @@ -428,7 +428,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class, __field(int, ptr) ), TP_fast_assign( - xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level); + xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); __entry->dev = sc->mp->m_super->s_dev; __entry->type = sc->sm->sm_type; @@ -450,16 +450,16 @@ DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class, __entry->ptr) ) #define DEFINE_SCRUB_SBTREE_EVENT(name) \ -DEFINE_EVENT(xfs_scrub_sbtree_class, name, \ - TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, \ +DEFINE_EVENT(xchk_sbtree_class, name, \ + TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur, \ int level), \ TP_ARGS(sc, cur, level)) -DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec); -DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key); +DEFINE_SCRUB_SBTREE_EVENT(xchk_btree_rec); +DEFINE_SCRUB_SBTREE_EVENT(xchk_btree_key); -TRACE_EVENT(xfs_scrub_xref_error, - TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip), +TRACE_EVENT(xchk_xref_error, + TP_PROTO(struct xfs_scrub *sc, int error, void *ret_ip), TP_ARGS(sc, error, ret_ip), TP_STRUCT__entry( __field(dev_t, dev) @@ -483,7 +483,7 @@ TRACE_EVENT(xfs_scrub_xref_error, /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) -DECLARE_EVENT_CLASS(xfs_repair_extent_class, +DECLARE_EVENT_CLASS(xrep_extent_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len), TP_ARGS(mp, agno, agbno, len), @@ -506,15 +506,14 @@ DECLARE_EVENT_CLASS(xfs_repair_extent_class, __entry->len) ); #define DEFINE_REPAIR_EXTENT_EVENT(name) \ -DEFINE_EVENT(xfs_repair_extent_class, name, \ +DEFINE_EVENT(xrep_extent_class, name, \ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ xfs_agblock_t agbno, xfs_extlen_t len), \ TP_ARGS(mp, agno, agbno, len)) -DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_dispose_btree_extent); -DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_collect_btree_extent); -DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_agfl_insert); +DEFINE_REPAIR_EXTENT_EVENT(xrep_dispose_btree_extent); +DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert); -DECLARE_EVENT_CLASS(xfs_repair_rmap_class, +DECLARE_EVENT_CLASS(xrep_rmap_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len, uint64_t owner, uint64_t offset, unsigned int flags), @@ -547,17 +546,17 @@ DECLARE_EVENT_CLASS(xfs_repair_rmap_class, __entry->flags) ); #define DEFINE_REPAIR_RMAP_EVENT(name) \ -DEFINE_EVENT(xfs_repair_rmap_class, name, \ +DEFINE_EVENT(xrep_rmap_class, name, \ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ xfs_agblock_t agbno, xfs_extlen_t len, \ uint64_t owner, uint64_t offset, unsigned int flags), \ TP_ARGS(mp, agno, agbno, len, owner, offset, flags)) -DEFINE_REPAIR_RMAP_EVENT(xfs_repair_alloc_extent_fn); -DEFINE_REPAIR_RMAP_EVENT(xfs_repair_ialloc_extent_fn); -DEFINE_REPAIR_RMAP_EVENT(xfs_repair_rmap_extent_fn); -DEFINE_REPAIR_RMAP_EVENT(xfs_repair_bmap_extent_fn); +DEFINE_REPAIR_RMAP_EVENT(xrep_alloc_extent_fn); +DEFINE_REPAIR_RMAP_EVENT(xrep_ialloc_extent_fn); +DEFINE_REPAIR_RMAP_EVENT(xrep_rmap_extent_fn); +DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_extent_fn); -TRACE_EVENT(xfs_repair_refcount_extent_fn, +TRACE_EVENT(xrep_refcount_extent_fn, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, struct xfs_refcount_irec *irec), TP_ARGS(mp, agno, irec), @@ -583,7 +582,7 @@ TRACE_EVENT(xfs_repair_refcount_extent_fn, __entry->refcount) ) -TRACE_EVENT(xfs_repair_init_btblock, +TRACE_EVENT(xrep_init_btblock, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_btnum_t btnum), TP_ARGS(mp, agno, agbno, btnum), @@ -605,7 +604,7 @@ TRACE_EVENT(xfs_repair_init_btblock, __entry->agbno, __entry->btnum) ) -TRACE_EVENT(xfs_repair_findroot_block, +TRACE_EVENT(xrep_findroot_block, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, uint32_t magic, uint16_t level), TP_ARGS(mp, agno, agbno, magic, level), @@ -630,7 +629,7 @@ TRACE_EVENT(xfs_repair_findroot_block, __entry->magic, __entry->level) ) -TRACE_EVENT(xfs_repair_calc_ag_resblks, +TRACE_EVENT(xrep_calc_ag_resblks, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t icount, xfs_agblock_t aglen, xfs_agblock_t freelen, xfs_agblock_t usedlen), @@ -659,7 +658,7 @@ TRACE_EVENT(xfs_repair_calc_ag_resblks, __entry->freelen, __entry->usedlen) ) -TRACE_EVENT(xfs_repair_calc_ag_resblks_btsize, +TRACE_EVENT(xrep_calc_ag_resblks_btsize, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bnobt_sz, xfs_agblock_t inobt_sz, xfs_agblock_t rmapbt_sz, xfs_agblock_t refcbt_sz), @@ -688,7 +687,7 @@ TRACE_EVENT(xfs_repair_calc_ag_resblks_btsize, __entry->rmapbt_sz, __entry->refcbt_sz) ) -TRACE_EVENT(xfs_repair_reset_counters, +TRACE_EVENT(xrep_reset_counters, TP_PROTO(struct xfs_mount *mp), TP_ARGS(mp), TP_STRUCT__entry( @@ -701,7 +700,7 @@ TRACE_EVENT(xfs_repair_reset_counters, MAJOR(__entry->dev), MINOR(__entry->dev)) ) -TRACE_EVENT(xfs_repair_ialloc_insert, +TRACE_EVENT(xrep_ialloc_insert, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t startino, uint16_t holemask, uint8_t count, uint8_t freecount, uint64_t freemask), diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 583a9f539bf1..f6ffb4f248f7 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -8,7 +8,6 @@ #ifdef CONFIG_XFS_DEBUG #define DEBUG 1 -#define XFS_BUF_LOCK_TRACKING 1 #endif #ifdef CONFIG_XFS_ASSERT_FATAL diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 8eb3ba3d4d00..49f5f5896a43 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2016-2018 Christoph Hellwig. * All Rights Reserved. */ #include "xfs.h" @@ -20,9 +21,6 @@ #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" -#include <linux/gfp.h> -#include <linux/mpage.h> -#include <linux/pagevec.h> #include <linux/writeback.h> /* @@ -30,31 +28,11 @@ */ struct xfs_writepage_ctx { struct xfs_bmbt_irec imap; - bool imap_valid; unsigned int io_type; + unsigned int cow_seq; struct xfs_ioend *ioend; - sector_t last_block; }; -void -xfs_count_page_state( - struct page *page, - int *delalloc, - int *unwritten) -{ - struct buffer_head *bh, *head; - - *delalloc = *unwritten = 0; - - bh = head = page_buffers(page); - do { - if (buffer_unwritten(bh)) - (*unwritten) = 1; - else if (buffer_delay(bh)) - (*delalloc) = 1; - } while ((bh = bh->b_this_page) != head); -} - struct block_device * xfs_find_bdev_for_inode( struct inode *inode) @@ -81,60 +59,23 @@ xfs_find_daxdev_for_inode( return mp->m_ddev_targp->bt_daxdev; } -/* - * We're now finished for good with this page. Update the page state via the - * associated buffer_heads, paying attention to the start and end offsets that - * we need to process on the page. - * - * Note that we open code the action in end_buffer_async_write here so that we - * only have to iterate over the buffers attached to the page once. This is not - * only more efficient, but also ensures that we only calls end_page_writeback - * at the end of the iteration, and thus avoids the pitfall of having the page - * and buffers potentially freed after every call to end_buffer_async_write. - */ static void xfs_finish_page_writeback( struct inode *inode, struct bio_vec *bvec, int error) { - struct buffer_head *head = page_buffers(bvec->bv_page), *bh = head; - bool busy = false; - unsigned int off = 0; - unsigned long flags; - - ASSERT(bvec->bv_offset < PAGE_SIZE); - ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); - ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE); - ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); - - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &head->b_state); - do { - if (off >= bvec->bv_offset && - off < bvec->bv_offset + bvec->bv_len) { - ASSERT(buffer_async_write(bh)); - ASSERT(bh->b_end_io == NULL); - - if (error) { - mark_buffer_write_io_error(bh); - clear_buffer_uptodate(bh); - SetPageError(bvec->bv_page); - } else { - set_buffer_uptodate(bh); - } - clear_buffer_async_write(bh); - unlock_buffer(bh); - } else if (buffer_async_write(bh)) { - ASSERT(buffer_locked(bh)); - busy = true; - } - off += bh->b_size; - } while ((bh = bh->b_this_page) != head); - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); - local_irq_restore(flags); + struct iomap_page *iop = to_iomap_page(bvec->bv_page); + + if (error) { + SetPageError(bvec->bv_page); + mapping_set_error(inode->i_mapping, -EIO); + } + + ASSERT(iop || i_blocksize(inode) == PAGE_SIZE); + ASSERT(!iop || atomic_read(&iop->write_count) > 0); - if (!busy) + if (!iop || atomic_dec_and_test(&iop->write_count)) end_page_writeback(bvec->bv_page); } @@ -170,7 +111,6 @@ xfs_destroy_ioend( /* walk each page on bio, ending page IO on them */ bio_for_each_segment_all(bvec, bio, i) xfs_finish_page_writeback(inode, bvec, error); - bio_put(bio); } @@ -363,39 +303,66 @@ xfs_end_bio( STATIC int xfs_map_blocks( + struct xfs_writepage_ctx *wpc, struct inode *inode, - loff_t offset, - struct xfs_bmbt_irec *imap, - int type) + loff_t offset) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t count = i_blocksize(inode); - xfs_fileoff_t offset_fsb, end_fsb; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb; + xfs_fileoff_t cow_fsb = NULLFILEOFF; + struct xfs_bmbt_irec imap; + int whichfork = XFS_DATA_FORK; + struct xfs_iext_cursor icur; + bool imap_valid; int error = 0; - int bmapi_flags = XFS_BMAPI_ENTIRE; - int nimaps = 1; - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; + /* + * We have to make sure the cached mapping is within EOF to protect + * against eofblocks trimming on file release leaving us with a stale + * mapping. Otherwise, a page for a subsequent file extending buffered + * write could get picked up by this writeback cycle and written to the + * wrong blocks. + * + * Note that what we really want here is a generic mapping invalidation + * mechanism to protect us from arbitrary extent modifying contexts, not + * just eofblocks. + */ + xfs_trim_extent_eof(&wpc->imap, ip); /* - * Truncate can race with writeback since writeback doesn't take the - * iolock and truncate decreases the file size before it starts - * truncating the pages between new_size and old_size. Therefore, we - * can end up in the situation where writeback gets a CoW fork mapping - * but the truncate makes the mapping invalid and we end up in here - * trying to get a new mapping. Bail out here so that we simply never - * get a valid mapping and so we drop the write altogether. The page - * truncation will kill the contents anyway. + * COW fork blocks can overlap data fork blocks even if the blocks + * aren't shared. COW I/O always takes precedent, so we must always + * check for overlap on reflink inodes unless the mapping is already a + * COW one, or the COW fork hasn't changed from the last time we looked + * at it. + * + * It's safe to check the COW fork if_seq here without the ILOCK because + * we've indirectly protected against concurrent updates: writeback has + * the page locked, which prevents concurrent invalidations by reflink + * and directio and prevents concurrent buffered writes to the same + * page. Changes to if_seq always happen under i_lock, which protects + * against concurrent updates and provides a memory barrier on the way + * out that ensures that we always see the current value. */ - if (type == XFS_IO_COW && offset > i_size_read(inode)) + imap_valid = offset_fsb >= wpc->imap.br_startoff && + offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount; + if (imap_valid && + (!xfs_inode_has_cow_data(ip) || + wpc->io_type == XFS_IO_COW || + wpc->cow_seq == READ_ONCE(ip->i_cowfp->if_seq))) return 0; - ASSERT(type != XFS_IO_COW); - if (type == XFS_IO_UNWRITTEN) - bmapi_flags |= XFS_BMAPI_IGSTATE; + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + /* + * If we don't have a valid map, now it's time to get a new one for this + * offset. This will convert delayed allocations (including COW ones) + * into real extents. If we return without a valid map, it means we + * landed in a hole and we skip the block. + */ xfs_ilock(ip, XFS_ILOCK_SHARED); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || (ip->i_df.if_flags & XFS_IFEXTENTS)); @@ -404,109 +371,96 @@ xfs_map_blocks( if (offset > mp->m_super->s_maxbytes - count) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); - offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, - imap, &nimaps, bmapi_flags); + /* - * Truncate an overwrite extent if there's a pending CoW - * reservation before the end of this extent. This forces us - * to come back to writepage to take care of the CoW. + * Check if this is offset is covered by a COW extents, and if yes use + * it directly instead of looking up anything in the data fork. */ - if (nimaps && type == XFS_IO_OVERWRITE) - xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (error) - return error; - - if (type == XFS_IO_DELALLOC && - (!nimaps || isnullstartblock(imap->br_startblock))) { - error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset, - imap); - if (!error) - trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); - return error; - } - -#ifdef DEBUG - if (type == XFS_IO_UNWRITTEN) { - ASSERT(nimaps); - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + if (xfs_inode_has_cow_data(ip) && + xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap)) + cow_fsb = imap.br_startoff; + if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { + wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + /* + * Truncate can race with writeback since writeback doesn't + * take the iolock and truncate decreases the file size before + * it starts truncating the pages between new_size and old_size. + * Therefore, we can end up in the situation where writeback + * gets a CoW fork mapping but the truncate makes the mapping + * invalid and we end up in here trying to get a new mapping. + * bail out here so that we simply never get a valid mapping + * and so we drop the write altogether. The page truncation + * will kill the contents anyway. + */ + if (offset > i_size_read(inode)) { + wpc->io_type = XFS_IO_HOLE; + return 0; + } + whichfork = XFS_COW_FORK; + wpc->io_type = XFS_IO_COW; + goto allocate_blocks; } -#endif - if (nimaps) - trace_xfs_map_blocks_found(ip, offset, count, type, imap); - return 0; -} - -STATIC bool -xfs_imap_valid( - struct inode *inode, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - offset >>= inode->i_blkbits; /* - * We have to make sure the cached mapping is within EOF to protect - * against eofblocks trimming on file release leaving us with a stale - * mapping. Otherwise, a page for a subsequent file extending buffered - * write could get picked up by this writeback cycle and written to the - * wrong blocks. - * - * Note that what we really want here is a generic mapping invalidation - * mechanism to protect us from arbitrary extent modifying contexts, not - * just eofblocks. + * Map valid and no COW extent in the way? We're done. */ - xfs_trim_extent_eof(imap, XFS_I(inode)); - - return offset >= imap->br_startoff && - offset < imap->br_startoff + imap->br_blockcount; -} - -STATIC void -xfs_start_buffer_writeback( - struct buffer_head *bh) -{ - ASSERT(buffer_mapped(bh)); - ASSERT(buffer_locked(bh)); - ASSERT(!buffer_delay(bh)); - ASSERT(!buffer_unwritten(bh)); - - bh->b_end_io = NULL; - set_buffer_async_write(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); -} - -STATIC void -xfs_start_page_writeback( - struct page *page, - int clear_dirty) -{ - ASSERT(PageLocked(page)); - ASSERT(!PageWriteback(page)); + if (imap_valid) { + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return 0; + } /* - * if the page was not fully cleaned, we need to ensure that the higher - * layers come back to it correctly. That means we need to keep the page - * dirty, and for WB_SYNC_ALL writeback we need to ensure the - * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to - * write this page in this writeback sweep will be made. + * If we don't have a valid map, now it's time to get a new one for this + * offset. This will convert delayed allocations (including COW ones) + * into real extents. */ - if (clear_dirty) { - clear_page_dirty_for_io(page); - set_page_writeback(page); - } else - set_page_writeback_keepwrite(page); + if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) + imap.br_startoff = end_fsb; /* fake a hole past EOF */ + xfs_iunlock(ip, XFS_ILOCK_SHARED); - unlock_page(page); -} + if (imap.br_startoff > offset_fsb) { + /* landed in a hole or beyond EOF */ + imap.br_blockcount = imap.br_startoff - offset_fsb; + imap.br_startoff = offset_fsb; + imap.br_startblock = HOLESTARTBLOCK; + wpc->io_type = XFS_IO_HOLE; + } else { + /* + * Truncate to the next COW extent if there is one. This is the + * only opportunity to do this because we can skip COW fork + * lookups for the subsequent blocks in the mapping; however, + * the requirement to treat the COW range separately remains. + */ + if (cow_fsb != NULLFILEOFF && + cow_fsb < imap.br_startoff + imap.br_blockcount) + imap.br_blockcount = cow_fsb - imap.br_startoff; + + if (isnullstartblock(imap.br_startblock)) { + /* got a delalloc extent */ + wpc->io_type = XFS_IO_DELALLOC; + goto allocate_blocks; + } -static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) -{ - return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); + if (imap.br_state == XFS_EXT_UNWRITTEN) + wpc->io_type = XFS_IO_UNWRITTEN; + else + wpc->io_type = XFS_IO_OVERWRITE; + } + + wpc->imap = imap; + trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap); + return 0; +allocate_blocks: + error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap, + &wpc->cow_seq); + if (error) + return error; + ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF || + imap.br_startoff + imap.br_blockcount <= cow_fsb); + wpc->imap = imap; + trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap); + return 0; } /* @@ -574,27 +528,20 @@ xfs_submit_ioend( return 0; } -static void -xfs_init_bio_from_bh( - struct bio *bio, - struct buffer_head *bh) -{ - bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio_set_dev(bio, bh->b_bdev); -} - static struct xfs_ioend * xfs_alloc_ioend( struct inode *inode, unsigned int type, xfs_off_t offset, - struct buffer_head *bh) + struct block_device *bdev, + sector_t sector) { struct xfs_ioend *ioend; struct bio *bio; bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); - xfs_init_bio_from_bh(bio, bh); + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = sector; ioend = container_of(bio, struct xfs_ioend, io_inline_bio); INIT_LIST_HEAD(&ioend->io_list); @@ -619,13 +566,14 @@ static void xfs_chain_bio( struct xfs_ioend *ioend, struct writeback_control *wbc, - struct buffer_head *bh) + struct block_device *bdev, + sector_t sector) { struct bio *new; new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); - xfs_init_bio_from_bh(new, bh); - + bio_set_dev(new, bdev); + new->bi_iter.bi_sector = sector; bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); @@ -635,122 +583,47 @@ xfs_chain_bio( } /* - * Test to see if we've been building up a completion structure for - * earlier buffers -- if so, we try to append to this ioend if we - * can, otherwise we finish off any current ioend and start another. - * Return the ioend we finished off so that the caller can submit it - * once it has finished processing the dirty page. + * Test to see if we have an existing ioend structure that we could append to + * first, otherwise finish off the current ioend and start another. */ STATIC void xfs_add_to_ioend( struct inode *inode, - struct buffer_head *bh, xfs_off_t offset, + struct page *page, + struct iomap_page *iop, struct xfs_writepage_ctx *wpc, struct writeback_control *wbc, struct list_head *iolist) { + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct block_device *bdev = xfs_find_bdev_for_inode(inode); + unsigned len = i_blocksize(inode); + unsigned poff = offset & (PAGE_SIZE - 1); + sector_t sector; + + sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) + + ((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9); + if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || - bh->b_blocknr != wpc->last_block + 1 || + sector != bio_end_sector(wpc->ioend->io_bio) || offset != wpc->ioend->io_offset + wpc->ioend->io_size) { if (wpc->ioend) list_add(&wpc->ioend->io_list, iolist); - wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh); + wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, + bdev, sector); } - /* - * If the buffer doesn't fit into the bio we need to allocate a new - * one. This shouldn't happen more than once for a given buffer. - */ - while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) - xfs_chain_bio(wpc->ioend, wbc, bh); - - wpc->ioend->io_size += bh->b_size; - wpc->last_block = bh->b_blocknr; - xfs_start_buffer_writeback(bh); -} - -STATIC void -xfs_map_buffer( - struct inode *inode, - struct buffer_head *bh, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - sector_t bn; - struct xfs_mount *m = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); - xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); - - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - - bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + - ((offset - iomap_offset) >> inode->i_blkbits); - - ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); - - bh->b_blocknr = bn; - set_buffer_mapped(bh); -} - -STATIC void -xfs_map_at_offset( - struct inode *inode, - struct buffer_head *bh, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - - xfs_map_buffer(inode, bh, imap, offset); - set_buffer_mapped(bh); - clear_buffer_delay(bh); - clear_buffer_unwritten(bh); -} - -/* - * Test if a given page contains at least one buffer of a given @type. - * If @check_all_buffers is true, then we walk all the buffers in the page to - * try to find one of the type passed in. If it is not set, then the caller only - * needs to check the first buffer on the page for a match. - */ -STATIC bool -xfs_check_page_type( - struct page *page, - unsigned int type, - bool check_all_buffers) -{ - struct buffer_head *bh; - struct buffer_head *head; - - if (PageWriteback(page)) - return false; - if (!page->mapping) - return false; - if (!page_has_buffers(page)) - return false; - - bh = head = page_buffers(page); - do { - if (buffer_unwritten(bh)) { - if (type == XFS_IO_UNWRITTEN) - return true; - } else if (buffer_delay(bh)) { - if (type == XFS_IO_DELALLOC) - return true; - } else if (buffer_dirty(bh) && buffer_mapped(bh)) { - if (type == XFS_IO_OVERWRITE) - return true; - } - - /* If we are only checking the first buffer, we are done now. */ - if (!check_all_buffers) - break; - } while ((bh = bh->b_this_page) != head); + if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) { + if (iop) + atomic_inc(&iop->write_count); + if (bio_full(wpc->ioend->io_bio)) + xfs_chain_bio(wpc->ioend, wbc, bdev, sector); + __bio_add_page(wpc->ioend->io_bio, page, len, poff); + } - return false; + wpc->ioend->io_size += len; } STATIC void @@ -759,34 +632,20 @@ xfs_vm_invalidatepage( unsigned int offset, unsigned int length) { - trace_xfs_invalidatepage(page->mapping->host, page, offset, - length); - - /* - * If we are invalidating the entire page, clear the dirty state from it - * so that we can check for attempts to release dirty cached pages in - * xfs_vm_releasepage(). - */ - if (offset == 0 && length >= PAGE_SIZE) - cancel_dirty_page(page); - block_invalidatepage(page, offset, length); + trace_xfs_invalidatepage(page->mapping->host, page, offset, length); + iomap_invalidatepage(page, offset, length); } /* - * If the page has delalloc buffers on it, we need to punch them out before we - * invalidate the page. If we don't, we leave a stale delalloc mapping on the - * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read - * is done on that same region - the delalloc extent is returned when none is - * supposed to be there. - * - * We prevent this by truncating away the delalloc regions on the page before - * invalidating it. Because they are delalloc, we can do this without needing a - * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this - * truncation without a transaction as there is no space left for block - * reservation (typically why we see a ENOSPC in writeback). + * If the page has delalloc blocks on it, we need to punch them out before we + * invalidate the page. If we don't, we leave a stale delalloc mapping on the + * inode that can trip up a later direct I/O read operation on the same region. * - * This is not a performance critical path, so for now just do the punching a - * buffer head at a time. + * We prevent this by truncating away the delalloc regions on the page. Because + * they are delalloc, we can do this without needing a transaction. Indeed - if + * we get ENOSPC errors, we have to be able to do this truncation without a + * transaction as there is no space left for block reservation (typically why we + * see a ENOSPC in writeback). */ STATIC void xfs_aops_discard_page( @@ -794,104 +653,31 @@ xfs_aops_discard_page( { struct inode *inode = page->mapping->host; struct xfs_inode *ip = XFS_I(inode); - struct buffer_head *bh, *head; + struct xfs_mount *mp = ip->i_mount; loff_t offset = page_offset(page); + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); + int error; - if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true)) - goto out_invalidate; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(mp)) goto out_invalidate; - xfs_alert(ip->i_mount, + xfs_alert(mp, "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", page, ip->i_ino, offset); - xfs_ilock(ip, XFS_ILOCK_EXCL); - bh = head = page_buffers(page); - do { - int error; - xfs_fileoff_t start_fsb; - - if (!buffer_delay(bh)) - goto next_buffer; - - start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); - if (error) { - /* something screwed, just bail */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_alert(ip->i_mount, - "page discard unable to remove delalloc mapping."); - } - break; - } -next_buffer: - offset += i_blocksize(inode); - - } while ((bh = bh->b_this_page) != head); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); + error = xfs_bmap_punch_delalloc_range(ip, start_fsb, + PAGE_SIZE / i_blocksize(inode)); + if (error && !XFS_FORCED_SHUTDOWN(mp)) + xfs_alert(mp, "page discard unable to remove delalloc mapping."); out_invalidate: xfs_vm_invalidatepage(page, 0, PAGE_SIZE); - return; -} - -static int -xfs_map_cow( - struct xfs_writepage_ctx *wpc, - struct inode *inode, - loff_t offset, - unsigned int *new_type) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_bmbt_irec imap; - bool is_cow = false; - int error; - - /* - * If we already have a valid COW mapping keep using it. - */ - if (wpc->io_type == XFS_IO_COW) { - wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); - if (wpc->imap_valid) { - *new_type = XFS_IO_COW; - return 0; - } - } - - /* - * Else we need to check if there is a COW mapping at this offset. - */ - xfs_ilock(ip, XFS_ILOCK_SHARED); - is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (!is_cow) - return 0; - - /* - * And if the COW mapping has a delayed extent here we need to - * allocate real space for it now. - */ - if (isnullstartblock(imap.br_startblock)) { - error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, - &imap); - if (error) - return error; - } - - wpc->io_type = *new_type = XFS_IO_COW; - wpc->imap_valid = true; - wpc->imap = imap; - return 0; } /* * We implement an immediate ioend submission policy here to avoid needing to * chain multiple ioends and hence nest mempool allocations which can violate * forward progress guarantees we need to provide. The current ioend we are - * adding buffers to is cached on the writepage context, and if the new buffer + * adding blocks to is cached on the writepage context, and if the new block * does not append to the cached ioend it will create a new ioend and cache that * instead. * @@ -912,138 +698,99 @@ xfs_writepage_map( uint64_t end_offset) { LIST_HEAD(submit_list); + struct iomap_page *iop = to_iomap_page(page); + unsigned len = i_blocksize(inode); struct xfs_ioend *ioend, *next; - struct buffer_head *bh, *head; - ssize_t len = i_blocksize(inode); - uint64_t offset; - int error = 0; - int count = 0; - int uptodate = 1; - unsigned int new_type; - - bh = head = page_buffers(page); - offset = page_offset(page); - do { - if (offset >= end_offset) - break; - if (!buffer_uptodate(bh)) - uptodate = 0; + uint64_t file_offset; /* file offset of page */ + int error = 0, count = 0, i; - /* - * set_page_dirty dirties all buffers in a page, independent - * of their state. The dirty state however is entirely - * meaningless for holes (!mapped && uptodate), so skip - * buffers covering holes here. - */ - if (!buffer_mapped(bh) && buffer_uptodate(bh)) { - wpc->imap_valid = false; - continue; - } + ASSERT(iop || i_blocksize(inode) == PAGE_SIZE); + ASSERT(!iop || atomic_read(&iop->write_count) == 0); - if (buffer_unwritten(bh)) - new_type = XFS_IO_UNWRITTEN; - else if (buffer_delay(bh)) - new_type = XFS_IO_DELALLOC; - else if (buffer_uptodate(bh)) - new_type = XFS_IO_OVERWRITE; - else { - if (PageUptodate(page)) - ASSERT(buffer_mapped(bh)); - /* - * This buffer is not uptodate and will not be - * written to disk. Ensure that we will put any - * subsequent writeable buffers into a new - * ioend. - */ - wpc->imap_valid = false; + /* + * Walk through the page to find areas to write back. If we run off the + * end of the current map or find the current map invalid, grab a new + * one. + */ + for (i = 0, file_offset = page_offset(page); + i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset; + i++, file_offset += len) { + if (iop && !test_bit(i, iop->uptodate)) continue; - } - if (xfs_is_reflink_inode(XFS_I(inode))) { - error = xfs_map_cow(wpc, inode, offset, &new_type); - if (error) - goto out; - } - - if (wpc->io_type != new_type) { - wpc->io_type = new_type; - wpc->imap_valid = false; - } - - if (wpc->imap_valid) - wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, - offset); - if (!wpc->imap_valid) { - error = xfs_map_blocks(inode, offset, &wpc->imap, - wpc->io_type); - if (error) - goto out; - wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, - offset); - } - if (wpc->imap_valid) { - lock_buffer(bh); - if (wpc->io_type != XFS_IO_OVERWRITE) - xfs_map_at_offset(inode, bh, &wpc->imap, offset); - xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list); - count++; - } - - } while (offset += len, ((bh = bh->b_this_page) != head)); - - if (uptodate && bh == head) - SetPageUptodate(page); + error = xfs_map_blocks(wpc, inode, file_offset); + if (error) + break; + if (wpc->io_type == XFS_IO_HOLE) + continue; + xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc, + &submit_list); + count++; + } ASSERT(wpc->ioend || list_empty(&submit_list)); + ASSERT(PageLocked(page)); + ASSERT(!PageWriteback(page)); -out: /* - * On error, we have to fail the ioend here because we have locked - * buffers in the ioend. If we don't do this, we'll deadlock - * invalidating the page as that tries to lock the buffers on the page. - * Also, because we may have set pages under writeback, we have to make - * sure we run IO completion to mark the error state of the IO - * appropriately, so we can't cancel the ioend directly here. That means - * we have to mark this page as under writeback if we included any - * buffers from it in the ioend chain so that completion treats it - * correctly. + * On error, we have to fail the ioend here because we may have set + * pages under writeback, we have to make sure we run IO completion to + * mark the error state of the IO appropriately, so we can't cancel the + * ioend directly here. That means we have to mark this page as under + * writeback if we included any blocks from it in the ioend chain so + * that completion treats it correctly. * * If we didn't include the page in the ioend, the on error we can * simply discard and unlock it as there are no other users of the page - * or it's buffers right now. The caller will still need to trigger - * submission of outstanding ioends on the writepage context so they are - * treated correctly on error. + * now. The caller will still need to trigger submission of outstanding + * ioends on the writepage context so they are treated correctly on + * error. */ - if (count) { - xfs_start_page_writeback(page, !error); + if (unlikely(error)) { + if (!count) { + xfs_aops_discard_page(page); + ClearPageUptodate(page); + unlock_page(page); + goto done; + } /* - * Preserve the original error if there was one, otherwise catch - * submission errors here and propagate into subsequent ioend - * submissions. + * If the page was not fully cleaned, we need to ensure that the + * higher layers come back to it correctly. That means we need + * to keep the page dirty, and for WB_SYNC_ALL writeback we need + * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed + * so another attempt to write this page in this writeback sweep + * will be made. */ - list_for_each_entry_safe(ioend, next, &submit_list, io_list) { - int error2; - - list_del_init(&ioend->io_list); - error2 = xfs_submit_ioend(wbc, ioend, error); - if (error2 && !error) - error = error2; - } - } else if (error) { - xfs_aops_discard_page(page); - ClearPageUptodate(page); - unlock_page(page); + set_page_writeback_keepwrite(page); } else { - /* - * We can end up here with no error and nothing to write if we - * race with a partial page truncate on a sub-page block sized - * filesystem. In that case we need to mark the page clean. - */ - xfs_start_page_writeback(page, 1); - end_page_writeback(page); + clear_page_dirty_for_io(page); + set_page_writeback(page); } + unlock_page(page); + + /* + * Preserve the original error if there was one, otherwise catch + * submission errors here and propagate into subsequent ioend + * submissions. + */ + list_for_each_entry_safe(ioend, next, &submit_list, io_list) { + int error2; + + list_del_init(&ioend->io_list); + error2 = xfs_submit_ioend(wbc, ioend, error); + if (error2 && !error) + error = error2; + } + + /* + * We can end up here with no error and nothing to write only if we race + * with a partial page truncate on a sub-page block sized filesystem. + */ + if (!count) + end_page_writeback(page); +done: mapping_set_error(page->mapping, error); return error; } @@ -1054,7 +801,6 @@ out: * For delalloc space on the page we need to allocate space and flush it. * For unwritten space on the page we need to start the conversion to * regular allocated space. - * For any other dirty buffer heads on the page we should flush them. */ STATIC int xfs_do_writepage( @@ -1070,8 +816,6 @@ xfs_do_writepage( trace_xfs_writepage(inode, page, 0, 0); - ASSERT(page_has_buffers(page)); - /* * Refuse to write the page out if we are called from reclaim context. * @@ -1210,166 +954,13 @@ xfs_dax_writepages( xfs_find_bdev_for_inode(mapping->host), wbc); } -/* - * Called to move a page into cleanable state - and from there - * to be released. The page should already be clean. We always - * have buffer heads in this call. - * - * Returns 1 if the page is ok to release, 0 otherwise. - */ STATIC int xfs_vm_releasepage( struct page *page, gfp_t gfp_mask) { - int delalloc, unwritten; - trace_xfs_releasepage(page->mapping->host, page, 0, 0); - - /* - * mm accommodates an old ext3 case where clean pages might not have had - * the dirty bit cleared. Thus, it can send actual dirty pages to - * ->releasepage() via shrink_active_list(). Conversely, - * block_invalidatepage() can send pages that are still marked dirty but - * otherwise have invalidated buffers. - * - * We want to release the latter to avoid unnecessary buildup of the - * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages - * that are entirely invalidated and need to be released. Hence the - * only time we should get dirty pages here is through - * shrink_active_list() and so we can simply skip those now. - * - * warn if we've left any lingering delalloc/unwritten buffers on clean - * or invalidated pages we are about to release. - */ - if (PageDirty(page)) - return 0; - - xfs_count_page_state(page, &delalloc, &unwritten); - - if (WARN_ON_ONCE(delalloc)) - return 0; - if (WARN_ON_ONCE(unwritten)) - return 0; - - return try_to_free_buffers(page); -} - -/* - * If this is O_DIRECT or the mpage code calling tell them how large the mapping - * is, so that we can avoid repeated get_blocks calls. - * - * If the mapping spans EOF, then we have to break the mapping up as the mapping - * for blocks beyond EOF must be marked new so that sub block regions can be - * correctly zeroed. We can't do this for mappings within EOF unless the mapping - * was just allocated or is unwritten, otherwise the callers would overwrite - * existing data with zeros. Hence we have to split the mapping into a range up - * to and including EOF, and a second mapping for beyond EOF. - */ -static void -xfs_map_trim_size( - struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - struct xfs_bmbt_irec *imap, - xfs_off_t offset, - ssize_t size) -{ - xfs_off_t mapping_size; - - mapping_size = imap->br_startoff + imap->br_blockcount - iblock; - mapping_size <<= inode->i_blkbits; - - ASSERT(mapping_size > 0); - if (mapping_size > size) - mapping_size = size; - if (offset < i_size_read(inode) && - (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { - /* limit mapping to block that spans EOF */ - mapping_size = roundup_64(i_size_read(inode) - offset, - i_blocksize(inode)); - } - if (mapping_size > LONG_MAX) - mapping_size = LONG_MAX; - - bh_result->b_size = mapping_size; -} - -static int -xfs_get_blocks( - struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb, end_fsb; - int error = 0; - int lockmode = 0; - struct xfs_bmbt_irec imap; - int nimaps = 1; - xfs_off_t offset; - ssize_t size; - - BUG_ON(create); - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - offset = (xfs_off_t)iblock << inode->i_blkbits; - ASSERT(bh_result->b_size >= i_blocksize(inode)); - size = bh_result->b_size; - - if (offset >= i_size_read(inode)) - return 0; - - /* - * Direct I/O is usually done on preallocated files, so try getting - * a block mapping without an exclusive lock first. - */ - lockmode = xfs_ilock_data_map_shared(ip); - - ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset > mp->m_super->s_maxbytes - size) - size = mp->m_super->s_maxbytes - offset; - end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); - offset_fsb = XFS_B_TO_FSBT(mp, offset); - - error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, - &nimaps, 0); - if (error) - goto out_unlock; - if (!nimaps) { - trace_xfs_get_blocks_notfound(ip, offset, size); - goto out_unlock; - } - - trace_xfs_get_blocks_found(ip, offset, size, - imap.br_state == XFS_EXT_UNWRITTEN ? - XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap); - xfs_iunlock(ip, lockmode); - - /* trim mapping down to size requested */ - xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); - - /* - * For unwritten extents do not report a disk address in the buffered - * read case (treat as if we're reading into a hole). - */ - if (xfs_bmap_is_real_extent(&imap)) - xfs_map_buffer(inode, bh_result, &imap, offset); - - /* - * If this is a realtime file, data may be on a different device. - * to that pointed to from the buffer_head b_bdev currently. - */ - bh_result->b_bdev = xfs_find_bdev_for_inode(inode); - return 0; - -out_unlock: - xfs_iunlock(ip, lockmode); - return error; + return iomap_releasepage(page, gfp_mask); } STATIC sector_t @@ -1401,7 +992,7 @@ xfs_vm_readpage( struct page *page) { trace_xfs_vm_readpage(page->mapping->host, 1); - return mpage_readpage(page, xfs_get_blocks); + return iomap_readpage(page, &xfs_iomap_ops); } STATIC int @@ -1412,63 +1003,7 @@ xfs_vm_readpages( unsigned nr_pages) { trace_xfs_vm_readpages(mapping->host, nr_pages); - return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); -} - -/* - * This is basically a copy of __set_page_dirty_buffers() with one - * small tweak: buffers beyond EOF do not get marked dirty. If we mark them - * dirty, we'll never be able to clean them because we don't write buffers - * beyond EOF, and that means we can't invalidate pages that span EOF - * that have been marked dirty. Further, the dirty state can leak into - * the file interior if the file is extended, resulting in all sorts of - * bad things happening as the state does not match the underlying data. - * - * XXX: this really indicates that bufferheads in XFS need to die. Warts like - * this only exist because of bufferheads and how the generic code manages them. - */ -STATIC int -xfs_vm_set_page_dirty( - struct page *page) -{ - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - loff_t end_offset; - loff_t offset; - int newly_dirty; - - if (unlikely(!mapping)) - return !TestSetPageDirty(page); - - end_offset = i_size_read(inode); - offset = page_offset(page); - - spin_lock(&mapping->private_lock); - if (page_has_buffers(page)) { - struct buffer_head *head = page_buffers(page); - struct buffer_head *bh = head; - - do { - if (offset < end_offset) - set_buffer_dirty(bh); - bh = bh->b_this_page; - offset += i_blocksize(inode); - } while (bh != head); - } - /* - * Lock out page->mem_cgroup migration to keep PageDirty - * synchronized with per-memcg dirty page counters. - */ - lock_page_memcg(page); - newly_dirty = !TestSetPageDirty(page); - spin_unlock(&mapping->private_lock); - - if (newly_dirty) - __set_page_dirty(page, mapping, 1); - unlock_page_memcg(page); - if (newly_dirty) - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return newly_dirty; + return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops); } static int @@ -1486,13 +1021,13 @@ const struct address_space_operations xfs_address_space_operations = { .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, - .set_page_dirty = xfs_vm_set_page_dirty, + .set_page_dirty = iomap_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .bmap = xfs_vm_bmap, .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, + .migratepage = iomap_migrate_page, + .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = xfs_iomap_swapfile_activate, }; diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 25bc6d4a1231..9af867951a10 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -17,6 +17,7 @@ enum { XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ XFS_IO_OVERWRITE, /* covers already allocated extent */ XFS_IO_COW, /* covers copy-on-write extent */ + XFS_IO_HOLE, /* covers region without any block allocation */ }; #define XFS_IO_TYPES \ @@ -24,7 +25,8 @@ enum { { XFS_IO_DELALLOC, "delalloc" }, \ { XFS_IO_UNWRITTEN, "unwritten" }, \ { XFS_IO_OVERWRITE, "overwrite" }, \ - { XFS_IO_COW, "CoW" } + { XFS_IO_COW, "CoW" }, \ + { XFS_IO_HOLE, "hole" } /* * Structure for buffered I/O completions. diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 7ce10055f275..228821b2ebe0 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -26,6 +26,7 @@ #include "xfs_quota.h" #include "xfs_trace.h" #include "xfs_dir2.h" +#include "xfs_defer.h" /* * Look at all the extents for this logical region, diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index f9ca80154c9c..a58034049995 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -87,7 +87,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) */ if (context->bufsize == 0 || (XFS_ISRESET_CURSOR(cursor) && - (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { + (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { context->put_listent(context, sfe->flags, diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 956ebd583e27..ce45f066995e 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -375,9 +375,8 @@ xfs_bud_init( */ int xfs_bui_recover( - struct xfs_mount *mp, - struct xfs_bui_log_item *buip, - struct xfs_defer_ops *dfops) + struct xfs_trans *parent_tp, + struct xfs_bui_log_item *buip) { int error = 0; unsigned int bui_type; @@ -393,6 +392,7 @@ xfs_bui_recover( struct xfs_trans *tp; struct xfs_inode *ip = NULL; struct xfs_bmbt_irec irec; + struct xfs_mount *mp = parent_tp->t_mountp; ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -441,6 +441,12 @@ xfs_bui_recover( XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); if (error) return error; + /* + * Recovery stashes all deferred ops during intent processing and + * finishes them on completion. Transfer current dfops state to this + * transaction and transfer the result back before we return. + */ + xfs_defer_move(tp, parent_tp); budp = xfs_trans_get_bud(tp, buip); /* Grab the inode. */ @@ -469,9 +475,8 @@ xfs_bui_recover( xfs_trans_ijoin(tp, ip, 0); count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type, - ip, whichfork, bmap->me_startoff, - bmap->me_startblock, &count, state); + error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork, + bmap->me_startoff, bmap->me_startblock, &count, state); if (error) goto err_inode; @@ -481,23 +486,25 @@ xfs_bui_recover( irec.br_blockcount = count; irec.br_startoff = bmap->me_startoff; irec.br_state = state; - error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec); + error = xfs_bmap_unmap_extent(tp, ip, &irec); if (error) goto err_inode; } set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); + xfs_defer_move(parent_tp, tp); error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); - IRELE(ip); + xfs_irele(ip); return error; err_inode: + xfs_defer_move(parent_tp, tp); xfs_trans_cancel(tp); if (ip) { xfs_iunlock(ip, XFS_ILOCK_EXCL); - IRELE(ip); + xfs_irele(ip); } return error; } diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index fd1a1b13df51..89e043a88bb8 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -79,7 +79,6 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, struct xfs_bui_log_item *); void xfs_bui_item_free(struct xfs_bui_log_item *); void xfs_bui_release(struct xfs_bui_log_item *); -int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip, - struct xfs_defer_ops *dfops); +int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip); #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 83b1e8c6c18f..addbd74ecd8e 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -702,16 +702,15 @@ xfs_bmap_punch_delalloc_range( struct xfs_iext_cursor icur; int error = 0; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - + xfs_ilock(ip, XFS_ILOCK_EXCL); if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); if (error) - return error; + goto out_unlock; } if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) - return 0; + goto out_unlock; while (got.br_startoff + got.br_blockcount > start_fsb) { del = got; @@ -735,6 +734,8 @@ xfs_bmap_punch_delalloc_range( break; } +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -872,13 +873,11 @@ xfs_alloc_file_space( xfs_filblks_t allocatesize_fsb; xfs_extlen_t extsz, temp; xfs_fileoff_t startoffset_fsb; - xfs_fsblock_t firstfsb; int nimaps; int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imaps[1], *imapp; - struct xfs_defer_ops dfops; uint qblocks, resblks, resrtextents; int error; @@ -971,20 +970,15 @@ xfs_alloc_file_space( xfs_trans_ijoin(tp, ip, 0); - xfs_defer_init(&dfops, &firstfsb); error = xfs_bmapi_write(tp, ip, startoffset_fsb, - allocatesize_fsb, alloc_type, &firstfsb, - resblks, imapp, &nimaps, &dfops); + allocatesize_fsb, alloc_type, resblks, + imapp, &nimaps); if (error) goto error0; /* * Complete the transaction */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto error0; - error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) @@ -1003,8 +997,7 @@ xfs_alloc_file_space( return error; -error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ - xfs_defer_cancel(&dfops); +error0: /* unlock inode, unreserve quota blocks, cancel trans */ xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); error1: /* Just cancel transaction */ @@ -1022,8 +1015,6 @@ xfs_unmap_extent( { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; - struct xfs_defer_ops dfops; - xfs_fsblock_t firstfsb; uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); int error; @@ -1041,24 +1032,15 @@ xfs_unmap_extent( xfs_trans_ijoin(tp, ip, 0); - xfs_defer_init(&dfops, &firstfsb); - error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, - &dfops, done); - if (error) - goto out_bmap_cancel; - - xfs_defer_ijoin(&dfops, ip); - error = xfs_defer_finish(&tp, &dfops); + error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; error = xfs_trans_commit(tp); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; -out_bmap_cancel: - xfs_defer_cancel(&dfops); out_trans_cancel: xfs_trans_cancel(tp); goto out_unlock; @@ -1279,7 +1261,7 @@ xfs_prepare_shift( * we've flushed all the dirty data out to disk to avoid having * CoW extents at the wrong offsets. */ - if (xfs_is_reflink_inode(ip)) { + if (xfs_inode_has_cow_data(ip)) { error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF, true); if (error) @@ -1310,8 +1292,6 @@ xfs_collapse_file_space( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); @@ -1344,22 +1324,16 @@ xfs_collapse_file_space( goto out_trans_cancel; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_defer_init(&dfops, &first_block); error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, - &done, &first_block, &dfops); + &done); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_bmap_cancel; error = xfs_trans_commit(tp); } return error; -out_bmap_cancel: - xfs_defer_cancel(&dfops); out_trans_cancel: xfs_trans_cancel(tp); return error; @@ -1386,8 +1360,6 @@ xfs_insert_file_space( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, offset); xfs_fileoff_t next_fsb = NULLFSBLOCK; xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); @@ -1423,22 +1395,17 @@ xfs_insert_file_space( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_defer_init(&dfops, &first_block); error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb, - &done, stop_fsb, &first_block, &dfops); + &done, stop_fsb); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_bmap_cancel; error = xfs_trans_commit(tp); } return error; -out_bmap_cancel: - xfs_defer_cancel(&dfops); +out_trans_cancel: xfs_trans_cancel(tp); return error; } @@ -1566,14 +1533,13 @@ xfs_swap_extent_rmap( struct xfs_inode *ip, struct xfs_inode *tip) { + struct xfs_trans *tp = *tpp; struct xfs_bmbt_irec irec; struct xfs_bmbt_irec uirec; struct xfs_bmbt_irec tirec; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; xfs_filblks_t count_fsb; - xfs_fsblock_t firstfsb; - struct xfs_defer_ops dfops; int error; xfs_filblks_t ilen; xfs_filblks_t rlen; @@ -1609,7 +1575,7 @@ xfs_swap_extent_rmap( /* Unmap the old blocks in the source file. */ while (tirec.br_blockcount) { - xfs_defer_init(&dfops, &firstfsb); + ASSERT(tp->t_firstblock == NULLFSBLOCK); trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec); /* Read extent from the source file */ @@ -1631,33 +1597,29 @@ xfs_swap_extent_rmap( trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); /* Remove the mapping from the donor file. */ - error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops, - tip, &uirec); + error = xfs_bmap_unmap_extent(tp, tip, &uirec); if (error) goto out_defer; /* Remove the mapping from the source file. */ - error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops, - ip, &irec); + error = xfs_bmap_unmap_extent(tp, ip, &irec); if (error) goto out_defer; /* Map the donor file's blocks into the source file. */ - error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops, - ip, &uirec); + error = xfs_bmap_map_extent(tp, ip, &uirec); if (error) goto out_defer; /* Map the source file's blocks into the donor file. */ - error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops, - tip, &irec); + error = xfs_bmap_map_extent(tp, tip, &irec); if (error) goto out_defer; - xfs_defer_ijoin(&dfops, ip); - error = xfs_defer_finish(tpp, &dfops); + error = xfs_defer_finish(tpp); + tp = *tpp; if (error) - goto out_defer; + goto out; tirec.br_startoff += rlen; if (tirec.br_startblock != HOLESTARTBLOCK && @@ -1675,7 +1637,7 @@ xfs_swap_extent_rmap( return 0; out_defer: - xfs_defer_cancel(&dfops); + xfs_defer_cancel(tp); out: trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); tip->i_d.di_flags2 = tip_flags2; @@ -1691,7 +1653,6 @@ xfs_swap_extent_forks( int *src_log_flags, int *target_log_flags) { - struct xfs_ifork tempifp, *ifp, *tifp; xfs_filblks_t aforkblks = 0; xfs_filblks_t taforkblks = 0; xfs_extnum_t junk; @@ -1733,11 +1694,7 @@ xfs_swap_extent_forks( /* * Swap the data forks of the inodes */ - ifp = &ip->i_df; - tifp = &tip->i_df; - tempifp = *ifp; /* struct copy */ - *ifp = *tifp; /* struct copy */ - *tifp = tempifp; /* struct copy */ + swap(ip->i_df, tip->i_df); /* * Fix the on-disk inode values @@ -1746,13 +1703,8 @@ xfs_swap_extent_forks( ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; - tmp = (uint64_t) ip->i_d.di_nextents; - ip->i_d.di_nextents = tip->i_d.di_nextents; - tip->i_d.di_nextents = tmp; - - tmp = (uint64_t) ip->i_d.di_format; - ip->i_d.di_format = tip->i_d.di_format; - tip->i_d.di_format = tmp; + swap(ip->i_d.di_nextents, tip->i_d.di_nextents); + swap(ip->i_d.di_format, tip->i_d.di_format); /* * The extents in the source inode could still contain speculative @@ -1846,7 +1798,6 @@ xfs_swap_extents( int src_log_flags, target_log_flags; int error = 0; int lock_flags; - struct xfs_ifork *cowfp; uint64_t f; int resblks = 0; @@ -1987,18 +1938,11 @@ xfs_swap_extents( /* Swap the cow forks. */ if (xfs_sb_version_hasreflink(&mp->m_sb)) { - xfs_extnum_t extnum; - ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS); ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS); - extnum = ip->i_cnextents; - ip->i_cnextents = tip->i_cnextents; - tip->i_cnextents = extnum; - - cowfp = ip->i_cowfp; - ip->i_cowfp = tip->i_cowfp; - tip->i_cowfp = cowfp; + swap(ip->i_cnextents, tip->i_cnextents); + swap(ip->i_cowfp, tip->i_cowfp); if (ip->i_cowfp && ip->i_cowfp->if_bytes) xfs_inode_set_cowblocks_tag(ip); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e9c058e3761c..e839907e8492 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -34,16 +34,6 @@ static kmem_zone_t *xfs_buf_zone; -#ifdef XFS_BUF_LOCK_TRACKING -# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) -# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) -# define XB_GET_OWNER(bp) ((bp)->b_last_holder) -#else -# define XB_SET_OWNER(bp) do { } while (0) -# define XB_CLEAR_OWNER(bp) do { } while (0) -# define XB_GET_OWNER(bp) do { } while (0) -#endif - #define xb_to_gfp(flags) \ ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) @@ -226,7 +216,6 @@ _xfs_buf_alloc( INIT_LIST_HEAD(&bp->b_li_list); sema_init(&bp->b_sema, 0); /* held, no waiters */ spin_lock_init(&bp->b_lock); - XB_SET_OWNER(bp); bp->b_target = target; bp->b_flags = flags; @@ -757,11 +746,7 @@ _xfs_buf_read( bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); - if (flags & XBF_ASYNC) { - xfs_buf_submit(bp); - return 0; - } - return xfs_buf_submit_wait(bp); + return xfs_buf_submit(bp); } xfs_buf_t * @@ -846,7 +831,7 @@ xfs_buf_read_uncached( bp->b_flags |= XBF_READ; bp->b_ops = ops; - xfs_buf_submit_wait(bp); + xfs_buf_submit(bp); if (bp->b_error) { int error = bp->b_error; xfs_buf_relse(bp); @@ -1095,12 +1080,10 @@ xfs_buf_trylock( int locked; locked = down_trylock(&bp->b_sema) == 0; - if (locked) { - XB_SET_OWNER(bp); + if (locked) trace_xfs_buf_trylock(bp, _RET_IP_); - } else { + else trace_xfs_buf_trylock_fail(bp, _RET_IP_); - } return locked; } @@ -1122,7 +1105,6 @@ xfs_buf_lock( if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) xfs_log_force(bp->b_target->bt_mount, 0); down(&bp->b_sema); - XB_SET_OWNER(bp); trace_xfs_buf_lock_done(bp, _RET_IP_); } @@ -1133,9 +1115,7 @@ xfs_buf_unlock( { ASSERT(xfs_buf_islocked(bp)); - XB_CLEAR_OWNER(bp); up(&bp->b_sema); - trace_xfs_buf_unlock(bp, _RET_IP_); } @@ -1249,7 +1229,7 @@ xfs_bwrite( bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL | XBF_DONE); - error = xfs_buf_submit_wait(bp); + error = xfs_buf_submit(bp); if (error) { xfs_force_shutdown(bp->b_target->bt_mount, SHUTDOWN_META_IO_ERROR); @@ -1453,29 +1433,55 @@ _xfs_buf_ioapply( } /* - * Asynchronous IO submission path. This transfers the buffer lock ownership and - * the current reference to the IO. It is not safe to reference the buffer after - * a call to this function unless the caller holds an additional reference - * itself. + * Wait for I/O completion of a sync buffer and return the I/O error code. */ -void -xfs_buf_submit( +static int +xfs_buf_iowait( struct xfs_buf *bp) { + ASSERT(!(bp->b_flags & XBF_ASYNC)); + + trace_xfs_buf_iowait(bp, _RET_IP_); + wait_for_completion(&bp->b_iowait); + trace_xfs_buf_iowait_done(bp, _RET_IP_); + + return bp->b_error; +} + +/* + * Buffer I/O submission path, read or write. Asynchronous submission transfers + * the buffer lock ownership and the current reference to the IO. It is not + * safe to reference the buffer after a call to this function unless the caller + * holds an additional reference itself. + */ +int +__xfs_buf_submit( + struct xfs_buf *bp, + bool wait) +{ + int error = 0; + trace_xfs_buf_submit(bp, _RET_IP_); ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); - ASSERT(bp->b_flags & XBF_ASYNC); /* on shutdown we stale and complete the buffer immediately */ if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; xfs_buf_stale(bp); - xfs_buf_ioend(bp); - return; + if (bp->b_flags & XBF_ASYNC) + xfs_buf_ioend(bp); + return -EIO; } + /* + * Grab a reference so the buffer does not go away underneath us. For + * async buffers, I/O completion drops the callers reference, which + * could occur before submission returns. + */ + xfs_buf_hold(bp); + if (bp->b_flags & XBF_WRITE) xfs_buf_wait_unpin(bp); @@ -1483,22 +1489,13 @@ xfs_buf_submit( bp->b_io_error = 0; /* - * The caller's reference is released during I/O completion. - * This occurs some time after the last b_io_remaining reference is - * released, so after we drop our Io reference we have to have some - * other reference to ensure the buffer doesn't go away from underneath - * us. Take a direct reference to ensure we have safe access to the - * buffer until we are finished with it. - */ - xfs_buf_hold(bp); - - /* * Set the count to 1 initially, this will stop an I/O completion * callout which happens before we have started all the I/O from calling * xfs_buf_ioend too early. */ atomic_set(&bp->b_io_remaining, 1); - xfs_buf_ioacct_inc(bp); + if (bp->b_flags & XBF_ASYNC) + xfs_buf_ioacct_inc(bp); _xfs_buf_ioapply(bp); /* @@ -1507,74 +1504,19 @@ xfs_buf_submit( * that we don't return to the caller with completion still pending. */ if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { - if (bp->b_error) + if (bp->b_error || !(bp->b_flags & XBF_ASYNC)) xfs_buf_ioend(bp); else xfs_buf_ioend_async(bp); } - xfs_buf_rele(bp); - /* Note: it is not safe to reference bp now we've dropped our ref */ -} - -/* - * Synchronous buffer IO submission path, read or write. - */ -int -xfs_buf_submit_wait( - struct xfs_buf *bp) -{ - int error; - - trace_xfs_buf_submit_wait(bp, _RET_IP_); - - ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC))); - - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { - xfs_buf_ioerror(bp, -EIO); - xfs_buf_stale(bp); - bp->b_flags &= ~XBF_DONE; - return -EIO; - } - - if (bp->b_flags & XBF_WRITE) - xfs_buf_wait_unpin(bp); - - /* clear the internal error state to avoid spurious errors */ - bp->b_io_error = 0; + if (wait) + error = xfs_buf_iowait(bp); /* - * For synchronous IO, the IO does not inherit the submitters reference - * count, nor the buffer lock. Hence we cannot release the reference we - * are about to take until we've waited for all IO completion to occur, - * including any xfs_buf_ioend_async() work that may be pending. - */ - xfs_buf_hold(bp); - - /* - * Set the count to 1 initially, this will stop an I/O completion - * callout which happens before we have started all the I/O from calling - * xfs_buf_ioend too early. - */ - atomic_set(&bp->b_io_remaining, 1); - _xfs_buf_ioapply(bp); - - /* - * make sure we run completion synchronously if it raced with us and is - * already complete. - */ - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) - xfs_buf_ioend(bp); - - /* wait for completion before gathering the error from the buffer */ - trace_xfs_buf_iowait(bp, _RET_IP_); - wait_for_completion(&bp->b_iowait); - trace_xfs_buf_iowait_done(bp, _RET_IP_); - error = bp->b_error; - - /* - * all done now, we can release the hold that keeps the buffer - * referenced for the entire IO. + * Release the hold that keeps the buffer referenced for the entire + * I/O. Note that if the buffer is async, it is not safe to reference + * after this release. */ xfs_buf_rele(bp); return error; @@ -1972,16 +1914,11 @@ xfs_buf_cmp( } /* - * submit buffers for write. - * - * When we have a large buffer list, we do not want to hold all the buffers - * locked while we block on the request queue waiting for IO dispatch. To avoid - * this problem, we lock and submit buffers in groups of 50, thereby minimising - * the lock hold times for lists which may contain thousands of objects. - * - * To do this, we sort the buffer list before we walk the list to lock and - * submit buffers, and we plug and unplug around each group of buffers we - * submit. + * Submit buffers for write. If wait_list is specified, the buffers are + * submitted using sync I/O and placed on the wait list such that the caller can + * iowait each buffer. Otherwise async I/O is used and the buffers are released + * at I/O completion time. In either case, buffers remain locked until I/O + * completes and the buffer is released from the queue. */ static int xfs_buf_delwri_submit_buffers( @@ -2023,21 +1960,21 @@ xfs_buf_delwri_submit_buffers( trace_xfs_buf_delwri_split(bp, _RET_IP_); /* - * We do all IO submission async. This means if we need - * to wait for IO completion we need to take an extra - * reference so the buffer is still valid on the other - * side. We need to move the buffer onto the io_list - * at this point so the caller can still access it. + * If we have a wait list, each buffer (and associated delwri + * queue reference) transfers to it and is submitted + * synchronously. Otherwise, drop the buffer from the delwri + * queue and submit async. */ bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL); - bp->b_flags |= XBF_WRITE | XBF_ASYNC; + bp->b_flags |= XBF_WRITE; if (wait_list) { - xfs_buf_hold(bp); + bp->b_flags &= ~XBF_ASYNC; list_move_tail(&bp->b_list, wait_list); - } else + } else { + bp->b_flags |= XBF_ASYNC; list_del_init(&bp->b_list); - - xfs_buf_submit(bp); + } + __xfs_buf_submit(bp, false); } blk_finish_plug(&plug); @@ -2084,9 +2021,11 @@ xfs_buf_delwri_submit( list_del_init(&bp->b_list); - /* locking the buffer will wait for async IO completion. */ - xfs_buf_lock(bp); - error2 = bp->b_error; + /* + * Wait on the locked buffer, check for errors and unlock and + * release the delwri queue reference. + */ + error2 = xfs_buf_iowait(bp); xfs_buf_relse(bp); if (!error) error = error2; @@ -2132,23 +2071,18 @@ xfs_buf_delwri_pushbuf( /* * Delwri submission clears the DELWRI_Q buffer flag and returns with - * the buffer on the wait list with an associated reference. Rather than + * the buffer on the wait list with the original reference. Rather than * bounce the buffer from a local wait list back to the original list * after I/O completion, reuse the original list as the wait list. */ xfs_buf_delwri_submit_buffers(&submit_list, buffer_list); /* - * The buffer is now under I/O and wait listed as during typical delwri - * submission. Lock the buffer to wait for I/O completion. Rather than - * remove the buffer from the wait list and release the reference, we - * want to return with the buffer queued to the original list. The - * buffer already sits on the original list with a wait list reference, - * however. If we let the queue inherit that wait list reference, all we - * need to do is reset the DELWRI_Q flag. + * The buffer is now locked, under I/O and wait listed on the original + * delwri queue. Wait for I/O completion, restore the DELWRI_Q flag and + * return with the buffer unlocked and on the original queue. */ - xfs_buf_lock(bp); - error = bp->b_error; + error = xfs_buf_iowait(bp); bp->b_flags |= _XBF_DELWRI_Q; xfs_buf_unlock(bp); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d24dbd4dac39..4e3171acd0f8 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -12,7 +12,6 @@ #include <linux/mm.h> #include <linux/fs.h> #include <linux/dax.h> -#include <linux/buffer_head.h> #include <linux/uio.h> #include <linux/list_lru.h> @@ -199,10 +198,6 @@ typedef struct xfs_buf { int b_last_error; const struct xfs_buf_ops *b_ops; - -#ifdef XFS_BUF_LOCK_TRACKING - int b_last_holder; -#endif } xfs_buf_t; /* Finding and Reading Buffers */ @@ -298,8 +293,14 @@ extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error, xfs_failaddr_t failaddr); #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); -extern void xfs_buf_submit(struct xfs_buf *bp); -extern int xfs_buf_submit_wait(struct xfs_buf *bp); + +extern int __xfs_buf_submit(struct xfs_buf *bp, bool); +static inline int xfs_buf_submit(struct xfs_buf *bp) +{ + bool wait = bp->b_flags & XBF_ASYNC ? false : true; + return __xfs_buf_submit(bp, wait); +} + extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); #define xfs_buf_zero(bp, off, len) \ diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 678a5fcd7576..93f07edafd81 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -128,7 +128,7 @@ next_extent: } out_del_cursor: - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); xfs_buf_relse(agbp); out_put_perag: xfs_perag_put(pag); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 0973a0423bed..87e6dd5326d5 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -286,17 +286,15 @@ xfs_dquot_disk_alloc( struct xfs_buf **bpp) { struct xfs_bmbt_irec map; - struct xfs_defer_ops dfops; - struct xfs_mount *mp = (*tpp)->t_mountp; + struct xfs_trans *tp = *tpp; + struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *bp; struct xfs_inode *quotip = xfs_quota_inode(mp, dqp->dq_flags); - xfs_fsblock_t firstblock; int nmaps = 1; int error; trace_xfs_dqalloc(dqp); - xfs_defer_init(&dfops, &firstblock); xfs_ilock(quotip, XFS_ILOCK_EXCL); if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { /* @@ -308,13 +306,12 @@ xfs_dquot_disk_alloc( } /* Create the block mapping. */ - xfs_trans_ijoin(*tpp, quotip, XFS_ILOCK_EXCL); - error = xfs_bmapi_write(*tpp, quotip, dqp->q_fileoffset, + xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); + error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, - &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), - &map, &nmaps, &dfops); + XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps); if (error) - goto error0; + return error; ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); ASSERT(nmaps == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && @@ -326,19 +323,17 @@ xfs_dquot_disk_alloc( dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); /* now we can just get the buffer (there's nothing to read yet) */ - bp = xfs_trans_get_buf(*tpp, mp->m_ddev_targp, dqp->q_blkno, + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0); - if (!bp) { - error = -ENOMEM; - goto error1; - } + if (!bp) + return -ENOMEM; bp->b_ops = &xfs_dquot_buf_ops; /* * Make a chunk of dquots out of this buffer and log * the entire thing. */ - xfs_qm_init_dquot_blk(*tpp, mp, be32_to_cpu(dqp->q_core.d_id), + xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), dqp->dq_flags & XFS_DQ_ALLTYPES, bp); xfs_buf_set_ref(bp, XFS_DQUOT_REF); @@ -352,10 +347,8 @@ xfs_dquot_disk_alloc( * the buffer locked across the _defer_finish call. We can now do * this correctly with xfs_defer_bjoin. * - * Above, we allocated a disk block for the dquot information and - * used get_buf to initialize the dquot. If the _defer_bjoin fails, - * the buffer is still locked to *tpp, so we must _bhold_release and - * then _trans_brelse the buffer. If the _defer_finish fails, the old + * Above, we allocated a disk block for the dquot information and used + * get_buf to initialize the dquot. If the _defer_finish fails, the old * transaction is gone but the new buffer is not joined or held to any * transaction, so we must _buf_relse it. * @@ -364,25 +357,15 @@ xfs_dquot_disk_alloc( * is responsible for unlocking any buffer passed back, either * manually or by committing the transaction. */ - xfs_trans_bhold(*tpp, bp); - error = xfs_defer_bjoin(&dfops, bp); - if (error) { - xfs_trans_bhold_release(*tpp, bp); - xfs_trans_brelse(*tpp, bp); - goto error1; - } - error = xfs_defer_finish(tpp, &dfops); + xfs_trans_bhold(tp, bp); + error = xfs_defer_finish(tpp); + tp = *tpp; if (error) { xfs_buf_relse(bp); - goto error1; + return error; } *bpp = bp; return 0; - -error1: - xfs_defer_cancel(&dfops); -error0: - return error; } /* diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 0470114a8d80..9866f542e77b 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -50,6 +50,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_LOG_ITEM_PIN, XFS_RANDOM_BUF_LRU_REF, XFS_RANDOM_FORCE_SCRUB_REPAIR, + XFS_RANDOM_FORCE_SUMMARY_RECALC, }; struct xfs_errortag_attr { @@ -157,6 +158,7 @@ XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR); +XFS_ERRORTAG_ATTR_RW(bad_summary, XFS_ERRTAG_FORCE_SUMMARY_RECALC); static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(noerror), @@ -192,6 +194,7 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(log_item_pin), XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), XFS_ERRORTAG_ATTR_LIST(force_repair), + XFS_ERRORTAG_ATTR_LIST(bad_summary), NULL, }; diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 3cf4682e2510..f2284ceb129f 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -150,7 +150,7 @@ xfs_nfs_get_inode( } if (VFS_I(ip)->i_generation != generation) { - IRELE(ip); + xfs_irele(ip); return ERR_PTR(-ESTALE); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a3e7767a5715..5eaef2c17293 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -721,12 +721,10 @@ xfs_file_write_iter( static void xfs_wait_dax_page( - struct inode *inode, - bool *did_unlock) + struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); - *did_unlock = true; xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); schedule(); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); @@ -735,8 +733,7 @@ xfs_wait_dax_page( static int xfs_break_dax_layouts( struct inode *inode, - uint iolock, - bool *did_unlock) + bool *retry) { struct page *page; @@ -746,9 +743,10 @@ xfs_break_dax_layouts( if (!page) return 0; + *retry = true; return ___wait_var_event(&page->_refcount, atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, - 0, 0, xfs_wait_dax_page(inode, did_unlock)); + 0, 0, xfs_wait_dax_page(inode)); } int @@ -766,7 +764,7 @@ xfs_break_layouts( retry = false; switch (reason) { case BREAK_UNMAP: - error = xfs_break_dax_layouts(inode, *iolock, &retry); + error = xfs_break_dax_layouts(inode, &retry); if (error || retry) break; /* fall through */ @@ -1171,7 +1169,7 @@ xfs_file_mmap( file_accessed(filp); vma->vm_ops = &xfs_file_vm_ops; if (IS_DAX(file_inode(filp))) - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + vma->vm_flags |= VM_HUGEPAGE; return 0; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 2d2c5ab9143c..182501373af2 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -19,6 +19,8 @@ #include "xfs_filestream.h" #include "xfs_trace.h" #include "xfs_ag_resv.h" +#include "xfs_trans.h" +#include "xfs_shared.h" struct xfs_fstrm_item { struct xfs_mru_cache_elem mru; @@ -339,7 +341,7 @@ xfs_filestream_lookup_ag( if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0)) ag = NULLAGNUMBER; out: - IRELE(pip); + xfs_irele(pip); return ag; } @@ -377,7 +379,7 @@ xfs_filestream_new_ag( if (xfs_alloc_is_userdata(ap->datatype)) flags |= XFS_PICK_USERDATA; - if (ap->dfops->dop_low) + if (ap->tp->t_flags & XFS_TRANS_LOWMODE) flags |= XFS_PICK_LOWSPACE; err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); @@ -388,7 +390,7 @@ xfs_filestream_new_ag( if (mru) xfs_fstrm_free_func(mp, mru); - IRELE(pip); + xfs_irele(pip); exit: if (*agp == NULLAGNUMBER) *agp = 0; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index c7157bc48bd1..3d76a9e35870 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -214,12 +214,12 @@ xfs_getfsmap_is_shared( /* Are there any shared blocks here? */ flen = 0; cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, - info->agno, NULL); + info->agno); error = xfs_refcount_find_shared(cur, rec->rm_startblock, rec->rm_blockcount, &fbno, &flen, false); - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); if (error) return error; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3f2bd6032cf8..7c00b8bedfe3 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -536,7 +536,7 @@ xfs_fs_reserve_ag_blocks( for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { pag = xfs_perag_get(mp, agno); - err2 = xfs_ag_resv_init(pag); + err2 = xfs_ag_resv_init(pag, NULL); xfs_perag_put(pag); if (err2 && !error) error = err2; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 47f417d20a30..245483cc282b 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -66,7 +66,7 @@ xfs_inode_alloc( ip->i_cowfp = NULL; ip->i_cnextents = 0; ip->i_cformat = XFS_DINODE_FMT_EXTENTS; - memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); + memset(&ip->i_df, 0, sizeof(ip->i_df)); ip->i_flags = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(ip->i_d)); @@ -716,7 +716,7 @@ xfs_icache_inode_is_allocated( return error; *inuse = !!(VFS_I(ip)->i_mode); - IRELE(ip); + xfs_irele(ip); return 0; } @@ -856,7 +856,7 @@ restart: xfs_iflags_test(batch[i], XFS_INEW)) xfs_inew_wait(batch[i]); error = execute(batch[i], flags, args); - IRELE(batch[i]); + xfs_irele(batch[i]); if (error == -EAGAIN) { skipped++; continue; @@ -1697,14 +1697,13 @@ xfs_inode_clear_eofblocks_tag( */ static bool xfs_prep_free_cowblocks( - struct xfs_inode *ip, - struct xfs_ifork *ifp) + struct xfs_inode *ip) { /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. */ - if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { + if (!xfs_inode_has_cow_data(ip)) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); return false; @@ -1742,11 +1741,10 @@ xfs_inode_free_cowblocks( void *args) { struct xfs_eofblocks *eofb = args; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); int match; int ret = 0; - if (!xfs_prep_free_cowblocks(ip, ifp)) + if (!xfs_prep_free_cowblocks(ip)) return 0; if (eofb) { @@ -1771,7 +1769,7 @@ xfs_inode_free_cowblocks( * Check again, nobody else should be able to dirty blocks or change * the reflink iflag now that we have the first two locks held. */ - if (xfs_prep_free_cowblocks(ip, ifp)) + if (xfs_prep_free_cowblocks(ip)) ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5df4de666cc1..d957a46dc1cb 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -927,7 +927,7 @@ xfs_ialloc( case S_IFLNK: ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; ip->i_df.if_flags = XFS_IFEXTENTS; - ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; + ip->i_df.if_bytes = 0; ip->i_df.if_u1.if_root = NULL; break; default: @@ -1142,8 +1142,6 @@ xfs_create( struct xfs_inode *ip = NULL; struct xfs_trans *tp = NULL; int error; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; bool unlock_dp_on_error = false; prid_t prid; struct xfs_dquot *udqp = NULL; @@ -1195,9 +1193,6 @@ xfs_create( xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = true; - xfs_defer_init(&dfops, &first_block); - tp->t_agfl_dfops = &dfops; - /* * Reserve disk quota and the inode. */ @@ -1226,7 +1221,7 @@ xfs_create( unlock_dp_on_error = false; error = xfs_dir_createname(tp, dp, name, ip->i_ino, - &first_block, &dfops, resblks ? + resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != -ENOSPC); @@ -1238,11 +1233,11 @@ xfs_create( if (is_dir) { error = xfs_dir_init(tp, ip, dp); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; error = xfs_bumplink(tp, dp); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; } /* @@ -1260,10 +1255,6 @@ xfs_create( */ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_bmap_cancel; - error = xfs_trans_commit(tp); if (error) goto out_release_inode; @@ -1275,8 +1266,6 @@ xfs_create( *ipp = ip; return 0; - out_bmap_cancel: - xfs_defer_cancel(&dfops); out_trans_cancel: xfs_trans_cancel(tp); out_release_inode: @@ -1287,7 +1276,7 @@ xfs_create( */ if (ip) { xfs_finish_inode_setup(ip); - IRELE(ip); + xfs_irele(ip); } xfs_qm_dqrele(udqp); @@ -1382,7 +1371,7 @@ xfs_create_tmpfile( */ if (ip) { xfs_finish_inode_setup(ip); - IRELE(ip); + xfs_irele(ip); } xfs_qm_dqrele(udqp); @@ -1401,8 +1390,6 @@ xfs_link( xfs_mount_t *mp = tdp->i_mount; xfs_trans_t *tp; int error; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; int resblks; trace_xfs_link(tdp, target_name); @@ -1451,9 +1438,6 @@ xfs_link( goto error_return; } - xfs_defer_init(&dfops, &first_block); - tp->t_agfl_dfops = &dfops; - /* * Handle initial link state of O_TMPFILE inode */ @@ -1464,7 +1448,7 @@ xfs_link( } error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, - &first_block, &dfops, resblks); + resblks); if (error) goto error_return; xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -1482,12 +1466,6 @@ xfs_link( if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) xfs_trans_set_sync(tp); - error = xfs_defer_finish(&tp, &dfops); - if (error) { - xfs_defer_cancel(&dfops); - goto error_return; - } - return xfs_trans_commit(tp); error_return: @@ -1545,8 +1523,6 @@ xfs_itruncate_extents_flags( { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp = *tpp; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; xfs_fileoff_t first_unmap_block; xfs_fileoff_t last_block; xfs_filblks_t unmap_len; @@ -1583,10 +1559,9 @@ xfs_itruncate_extents_flags( ASSERT(first_unmap_block < last_block); unmap_len = last_block - first_unmap_block + 1; while (!done) { - xfs_defer_init(&dfops, &first_block); + ASSERT(tp->t_firstblock == NULLFSBLOCK); error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags, - XFS_ITRUNC_MAX_EXTENTS, &first_block, - &dfops, &done); + XFS_ITRUNC_MAX_EXTENTS, &done); if (error) goto out_bmap_cancel; @@ -1594,10 +1569,9 @@ xfs_itruncate_extents_flags( * Duplicate the transaction that has the permanent * reservation and commit the old transaction. */ - xfs_defer_ijoin(&dfops, ip); - error = xfs_defer_finish(&tp, &dfops); + error = xfs_defer_finish(&tp); if (error) - goto out_bmap_cancel; + goto out; error = xfs_trans_roll_inode(&tp, ip); if (error) @@ -1631,7 +1605,7 @@ out_bmap_cancel: * the transaction can be properly aborted. We just need to make sure * we're not holding any resources that we were not when we came in. */ - xfs_defer_cancel(&dfops); + xfs_defer_cancel(tp); goto out; } @@ -1733,7 +1707,6 @@ xfs_inactive_truncate( ASSERT(XFS_FORCED_SHUTDOWN(mp)); return error; } - xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); @@ -1774,8 +1747,6 @@ STATIC int xfs_inactive_ifree( struct xfs_inode *ip) { - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error; @@ -1812,9 +1783,7 @@ xfs_inactive_ifree( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - xfs_defer_init(&dfops, &first_block); - tp->t_agfl_dfops = &dfops; - error = xfs_ifree(tp, ip, &dfops); + error = xfs_ifree(tp, ip); if (error) { /* * If we fail to free the inode, shut down. The cancel @@ -1840,12 +1809,6 @@ xfs_inactive_ifree( * Just ignore errors at this point. There is nothing we can do except * to try to keep going. Make sure it's not a silent error. */ - error = xfs_defer_finish(&tp, &dfops); - if (error) { - xfs_notice(mp, "%s: xfs_defer_finish returned error %d", - __func__, error); - xfs_defer_cancel(&dfops); - } error = xfs_trans_commit(tp); if (error) xfs_notice(mp, "%s: xfs_trans_commit returned error %d", @@ -1868,7 +1831,6 @@ xfs_inactive( xfs_inode_t *ip) { struct xfs_mount *mp; - struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); int error; int truncate = 0; @@ -1877,7 +1839,6 @@ xfs_inactive( * to clean up here. */ if (VFS_I(ip)->i_mode == 0) { - ASSERT(ip->i_df.if_real_bytes == 0); ASSERT(ip->i_df.if_broot_bytes == 0); return; } @@ -1890,7 +1851,7 @@ xfs_inactive( return; /* Try to clean out the cow blocks if there are any. */ - if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0) + if (xfs_inode_has_cow_data(ip)) xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); if (VFS_I(ip)->i_nlink != 0) { @@ -2445,9 +2406,8 @@ xfs_ifree_local_data( */ int xfs_ifree( - xfs_trans_t *tp, - xfs_inode_t *ip, - struct xfs_defer_ops *dfops) + struct xfs_trans *tp, + struct xfs_inode *ip) { int error; struct xfs_icluster xic = { 0 }; @@ -2466,7 +2426,7 @@ xfs_ifree( if (error) return error; - error = xfs_difree(tp, ip->i_ino, dfops, &xic); + error = xfs_difree(tp, ip->i_ino, &xic); if (error) return error; @@ -2577,8 +2537,6 @@ xfs_remove( xfs_trans_t *tp = NULL; int is_dir = S_ISDIR(VFS_I(ip)->i_mode); int error = 0; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; uint resblks; trace_xfs_remove(dp, name); @@ -2658,13 +2616,10 @@ xfs_remove( if (error) goto out_trans_cancel; - xfs_defer_init(&dfops, &first_block); - tp->t_agfl_dfops = &dfops; - error = xfs_dir_removename(tp, dp, name, ip->i_ino, - &first_block, &dfops, resblks); + error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks); if (error) { ASSERT(error != -ENOENT); - goto out_bmap_cancel; + goto out_trans_cancel; } /* @@ -2675,10 +2630,6 @@ xfs_remove( if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) xfs_trans_set_sync(tp); - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_bmap_cancel; - error = xfs_trans_commit(tp); if (error) goto std_return; @@ -2688,8 +2639,6 @@ xfs_remove( return 0; - out_bmap_cancel: - xfs_defer_cancel(&dfops); out_trans_cancel: xfs_trans_cancel(tp); std_return: @@ -2749,11 +2698,8 @@ xfs_sort_for_rename( static int xfs_finish_rename( - struct xfs_trans *tp, - struct xfs_defer_ops *dfops) + struct xfs_trans *tp) { - int error; - /* * If this is a synchronous mount, make sure that the rename transaction * goes to disk before returning to the user. @@ -2761,13 +2707,6 @@ xfs_finish_rename( if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) xfs_trans_set_sync(tp); - error = xfs_defer_finish(&tp, dfops); - if (error) { - xfs_defer_cancel(dfops); - xfs_trans_cancel(tp); - return error; - } - return xfs_trans_commit(tp); } @@ -2785,8 +2724,6 @@ xfs_cross_rename( struct xfs_inode *dp2, struct xfs_name *name2, struct xfs_inode *ip2, - struct xfs_defer_ops *dfops, - xfs_fsblock_t *first_block, int spaceres) { int error = 0; @@ -2795,16 +2732,12 @@ xfs_cross_rename( int dp2_flags = 0; /* Swap inode number for dirent in first parent */ - error = xfs_dir_replace(tp, dp1, name1, - ip2->i_ino, - first_block, dfops, spaceres); + error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres); if (error) goto out_trans_abort; /* Swap inode number for dirent in second parent */ - error = xfs_dir_replace(tp, dp2, name2, - ip1->i_ino, - first_block, dfops, spaceres); + error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres); if (error) goto out_trans_abort; @@ -2818,8 +2751,7 @@ xfs_cross_rename( if (S_ISDIR(VFS_I(ip2)->i_mode)) { error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, - dp1->i_ino, first_block, - dfops, spaceres); + dp1->i_ino, spaceres); if (error) goto out_trans_abort; @@ -2845,8 +2777,7 @@ xfs_cross_rename( if (S_ISDIR(VFS_I(ip1)->i_mode)) { error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, - dp2->i_ino, first_block, - dfops, spaceres); + dp2->i_ino, spaceres); if (error) goto out_trans_abort; @@ -2885,10 +2816,9 @@ xfs_cross_rename( } xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); - return xfs_finish_rename(tp, dfops); + return xfs_finish_rename(tp); out_trans_abort: - xfs_defer_cancel(dfops); xfs_trans_cancel(tp); return error; } @@ -2943,8 +2873,6 @@ xfs_rename( { struct xfs_mount *mp = src_dp->i_mount; struct xfs_trans *tp; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; struct xfs_inode *wip = NULL; /* whiteout inode */ struct xfs_inode *inodes[__XFS_SORT_INODES]; int num_inodes = __XFS_SORT_INODES; @@ -3026,14 +2954,11 @@ xfs_rename( goto out_trans_cancel; } - xfs_defer_init(&dfops, &first_block); - tp->t_agfl_dfops = &dfops; - /* RENAME_EXCHANGE is unique from here on. */ if (flags & RENAME_EXCHANGE) return xfs_cross_rename(tp, src_dp, src_name, src_ip, target_dp, target_name, target_ip, - &dfops, &first_block, spaceres); + spaceres); /* * Set up the target. @@ -3054,10 +2979,9 @@ xfs_rename( * to account for the ".." reference from the new entry. */ error = xfs_dir_createname(tp, target_dp, target_name, - src_ip->i_ino, &first_block, - &dfops, spaceres); + src_ip->i_ino, spaceres); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; xfs_trans_ichgtime(tp, target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -3065,7 +2989,7 @@ xfs_rename( if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; } } else { /* target_ip != NULL */ /* @@ -3094,10 +3018,9 @@ xfs_rename( * name at the destination directory, remove it first. */ error = xfs_dir_replace(tp, target_dp, target_name, - src_ip->i_ino, - &first_block, &dfops, spaceres); + src_ip->i_ino, spaceres); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; xfs_trans_ichgtime(tp, target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -3108,7 +3031,7 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; if (src_is_directory) { /* @@ -3116,7 +3039,7 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; } } /* target_ip != NULL */ @@ -3129,11 +3052,10 @@ xfs_rename( * directory. */ error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, - target_dp->i_ino, - &first_block, &dfops, spaceres); + target_dp->i_ino, spaceres); ASSERT(error != -EEXIST); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; } /* @@ -3159,7 +3081,7 @@ xfs_rename( */ error = xfs_droplink(tp, src_dp); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; } /* @@ -3169,12 +3091,12 @@ xfs_rename( */ if (wip) { error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, - &first_block, &dfops, spaceres); + spaceres); } else error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, - &first_block, &dfops, spaceres); + spaceres); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; /* * For whiteouts, we need to bump the link count on the whiteout inode. @@ -3188,10 +3110,10 @@ xfs_rename( ASSERT(VFS_I(wip)->i_nlink == 0); error = xfs_bumplink(tp, wip); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; error = xfs_iunlink_remove(tp, wip); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); /* @@ -3207,18 +3129,16 @@ xfs_rename( if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); - error = xfs_finish_rename(tp, &dfops); + error = xfs_finish_rename(tp); if (wip) - IRELE(wip); + xfs_irele(wip); return error; -out_bmap_cancel: - xfs_defer_cancel(&dfops); out_trans_cancel: xfs_trans_cancel(tp); out_release_wip: if (wip) - IRELE(wip); + xfs_irele(wip); return error; } @@ -3674,3 +3594,12 @@ xfs_iflush_int( corrupt_out: return -EFSCORRUPTED; } + +/* Release an inode. */ +void +xfs_irele( + struct xfs_inode *ip) +{ + trace_xfs_irele(ip, _RET_IP_); + iput(VFS_I(ip)); +} diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 2ed63a49e890..be2014520155 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -15,7 +15,6 @@ struct xfs_dinode; struct xfs_inode; struct xfs_buf; -struct xfs_defer_ops; struct xfs_bmbt_irec; struct xfs_inode_log_item; struct xfs_mount; @@ -34,9 +33,9 @@ typedef struct xfs_inode { struct xfs_imap i_imap; /* location for xfs_imap() */ /* Extent information. */ - xfs_ifork_t *i_afp; /* attribute fork pointer */ - xfs_ifork_t *i_cowfp; /* copy on write extents */ - xfs_ifork_t i_df; /* data fork */ + struct xfs_ifork *i_afp; /* attribute fork pointer */ + struct xfs_ifork *i_cowfp; /* copy on write extents */ + struct xfs_ifork i_df; /* data fork */ /* operations vectors */ const struct xfs_dir_ops *d_ops; /* directory ops vector */ @@ -199,6 +198,15 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) } /* + * Check if an inode has any data in the COW fork. This might be often false + * even for inodes with the reflink flag when there is no pending COW operation. + */ +static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) +{ + return ip->i_cowfp && ip->i_cowfp->if_bytes; +} + +/* * In-core inode flags. */ #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ @@ -415,8 +423,7 @@ uint xfs_ilock_data_map_shared(struct xfs_inode *); uint xfs_ilock_attr_map_shared(struct xfs_inode *); uint xfs_ip2xflags(struct xfs_inode *); -int xfs_ifree(struct xfs_trans *, xfs_inode_t *, - struct xfs_defer_ops *); +int xfs_ifree(struct xfs_trans *, struct xfs_inode *); int xfs_itruncate_extents_flags(struct xfs_trans **, struct xfs_inode *, int, xfs_fsize_t, int); void xfs_iext_realloc(xfs_inode_t *, int, int); @@ -484,18 +491,7 @@ static inline void xfs_setup_existing_inode(struct xfs_inode *ip) xfs_finish_inode_setup(ip); } -#define IHOLD(ip) \ -do { \ - ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ - ihold(VFS_I(ip)); \ - trace_xfs_ihold(ip, _THIS_IP_); \ -} while (0) - -#define IRELE(ip) \ -do { \ - trace_xfs_irele(ip, _THIS_IP_); \ - iput(VFS_I(ip)); \ -} while (0) +void xfs_irele(struct xfs_inode *ip); extern struct kmem_zone *xfs_inode_zone; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 2389c34c172d..fa1c4fe2ffbf 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -194,8 +194,6 @@ xfs_inode_item_format_data_fork( * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); - ASSERT(ip->i_df.if_real_bytes == 0 || - ip->i_df.if_real_bytes >= data_bytes); ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, @@ -280,8 +278,6 @@ xfs_inode_item_format_attr_fork( * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); - ASSERT(ip->i_afp->if_real_bytes == 0 || - ip->i_afp->if_real_bytes >= data_bytes); ASSERT(ip->i_afp->if_u1.if_data != NULL); xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, ip->i_afp->if_u1.if_data, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 55876dd02f0c..6320aca39f39 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * Copyright (c) 2016 Christoph Hellwig. + * Copyright (c) 2016-2018 Christoph Hellwig. * All Rights Reserved. */ #include <linux/iomap.h> @@ -152,13 +152,11 @@ xfs_iomap_write_direct( xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t count_fsb, resaligned; - xfs_fsblock_t firstfsb; xfs_extlen_t extsz; int nimaps; int quota_flag; int rt; xfs_trans_t *tp; - struct xfs_defer_ops dfops; uint qblocks, resblks, resrtextents; int error; int lockmode; @@ -254,21 +252,15 @@ xfs_iomap_write_direct( * From this point onwards we overwrite the imap pointer that the * caller gave to us. */ - xfs_defer_init(&dfops, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - bmapi_flags, &firstfsb, resblks, imap, - &nimaps, &dfops); + bmapi_flags, resblks, imap, &nimaps); if (error) - goto out_bmap_cancel; + goto out_res_cancel; /* * Complete the transaction */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_bmap_cancel; - error = xfs_trans_commit(tp); if (error) goto out_unlock; @@ -288,8 +280,7 @@ out_unlock: xfs_iunlock(ip, lockmode); return error; -out_bmap_cancel: - xfs_defer_cancel(&dfops); +out_res_cancel: xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); out_trans_cancel: xfs_trans_cancel(tp); @@ -626,7 +617,7 @@ retry: * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. */ - iomap->flags = IOMAP_F_NEW; + iomap->flags |= IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) @@ -660,13 +651,13 @@ xfs_iomap_write_allocate( xfs_inode_t *ip, int whichfork, xfs_off_t offset, - xfs_bmbt_irec_t *imap) + xfs_bmbt_irec_t *imap, + unsigned int *cow_seq) { xfs_mount_t *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); xfs_fileoff_t offset_fsb, last_block; xfs_fileoff_t end_fsb, map_start_fsb; - xfs_fsblock_t first_block; - struct xfs_defer_ops dfops; xfs_filblks_t count_fsb; xfs_trans_t *tp; int nimaps; @@ -716,8 +707,6 @@ xfs_iomap_write_allocate( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - xfs_defer_init(&dfops, &first_block); - /* * it is possible that the extents have changed since * we did the read call as we dropped the ilock for a @@ -770,13 +759,8 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, flags, &first_block, - nres, imap, &nimaps, - &dfops); - if (error) - goto trans_cancel; - - error = xfs_defer_finish(&tp, &dfops); + count_fsb, flags, nres, imap, + &nimaps); if (error) goto trans_cancel; @@ -784,6 +768,8 @@ xfs_iomap_write_allocate( if (error) goto error0; + if (whichfork == XFS_COW_FORK) + *cow_seq = READ_ONCE(ifp->if_seq); xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -810,7 +796,6 @@ xfs_iomap_write_allocate( } trans_cancel: - xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -828,11 +813,9 @@ xfs_iomap_write_unwritten( xfs_fileoff_t offset_fsb; xfs_filblks_t count_fsb; xfs_filblks_t numblks_fsb; - xfs_fsblock_t firstfsb; int nimaps; xfs_trans_t *tp; xfs_bmbt_irec_t imap; - struct xfs_defer_ops dfops; struct inode *inode = VFS_I(ip); xfs_fsize_t i_size; uint resblks; @@ -877,11 +860,10 @@ xfs_iomap_write_unwritten( /* * Modify the unwritten extent state of the buffer. */ - xfs_defer_init(&dfops, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - XFS_BMAPI_CONVERT, &firstfsb, resblks, - &imap, &nimaps, &dfops); + XFS_BMAPI_CONVERT, resblks, &imap, + &nimaps); if (error) goto error_on_bmapi_transaction; @@ -901,10 +883,6 @@ xfs_iomap_write_unwritten( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto error_on_bmapi_transaction; - error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) @@ -928,7 +906,6 @@ xfs_iomap_write_unwritten( return 0; error_on_bmapi_transaction: - xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -1132,7 +1109,7 @@ xfs_file_iomap_begin( if (error) return error; - iomap->flags = IOMAP_F_NEW; + iomap->flags |= IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); out_finish: @@ -1202,11 +1179,8 @@ xfs_file_iomap_end_delalloc( truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), XFS_FSB_TO_B(mp, end_fsb) - 1); - xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_bmap_punch_delalloc_range(ip, start_fsb, end_fsb - start_fsb); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (error && !XFS_FORCED_SHUTDOWN(mp)) { xfs_alert(mp, "%s: unable to clean up ino %lld", __func__, ip->i_ino); diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 83474c9cede9..c6170548831b 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -14,7 +14,7 @@ struct xfs_bmbt_irec; int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *, int); int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, - struct xfs_bmbt_irec *); + struct xfs_bmbt_irec *, unsigned int *); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 0fa29f39d658..c3e74f9128e8 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -26,6 +26,7 @@ #include "xfs_dir2.h" #include "xfs_trans_space.h" #include "xfs_iomap.h" +#include "xfs_defer.h" #include <linux/capability.h> #include <linux/xattr.h> @@ -208,7 +209,7 @@ xfs_generic_create( xfs_finish_inode_setup(ip); if (!tmpfile) xfs_cleanup_inode(dir, inode, dentry); - iput(inode); + xfs_irele(ip); goto out_free_acl; } @@ -390,7 +391,7 @@ xfs_vn_symlink( out_cleanup_inode: xfs_finish_inode_setup(cip); xfs_cleanup_inode(dir, inode, dentry); - iput(inode); + xfs_irele(cip); out: return error; } @@ -1253,7 +1254,7 @@ xfs_setup_inode( inode_sb_list_add(inode); /* make the inode look hashed for the writeback code */ - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 24f4f1c555b5..e9508ba01ed1 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -114,7 +114,7 @@ xfs_bulkstat_one_int( break; } xfs_iunlock(ip, XFS_ILOCK_SHARED); - IRELE(ip); + xfs_irele(ip); error = formatter(buffer, ubsize, ubused, buf); if (!error) @@ -458,8 +458,7 @@ xfs_bulkstat( * pending error, then we are done. */ del_cursor: - xfs_btree_del_cursor(cur, error ? - XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); xfs_buf_relse(agbp); if (error) break; @@ -632,8 +631,7 @@ next_ag: kmem_free(buffer); if (cur) - xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : - XFS_BTREE_NOERROR)); + xfs_btree_del_cursor(cur, error); if (agbp) xfs_buf_relse(agbp); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 5e56f3b93d4b..c3b610b687d1 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -410,7 +410,7 @@ out_error: } /* - * Reserve log space and return a ticket corresponding the reservation. + * Reserve log space and return a ticket corresponding to the reservation. * * Each reservation is going to reserve extra space for a log record header. * When writes happen to the on-disk log, we don't subtract the length of the @@ -826,6 +826,88 @@ xfs_log_mount_cancel( * deallocation must not be done until source-end. */ +/* Actually write the unmount record to disk. */ +static void +xfs_log_write_unmount_record( + struct xfs_mount *mp) +{ + /* the data section must be 32 bit size aligned */ + struct xfs_unmount_log_format magic = { + .magic = XLOG_UNMOUNT_TYPE, + }; + struct xfs_log_iovec reg = { + .i_addr = &magic, + .i_len = sizeof(magic), + .i_type = XLOG_REG_TYPE_UNMOUNT, + }; + struct xfs_log_vec vec = { + .lv_niovecs = 1, + .lv_iovecp = ®, + }; + struct xlog *log = mp->m_log; + struct xlog_in_core *iclog; + struct xlog_ticket *tic = NULL; + xfs_lsn_t lsn; + uint flags = XLOG_UNMOUNT_TRANS; + int error; + + error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); + if (error) + goto out_err; + + /* + * If we think the summary counters are bad, clear the unmount header + * flag in the unmount record so that the summary counters will be + * recalculated during log recovery at next mount. Refer to + * xlog_check_unmount_rec for more details. + */ + if (XFS_TEST_ERROR((mp->m_flags & XFS_MOUNT_BAD_SUMMARY), mp, + XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { + xfs_alert(mp, "%s: will fix summary counters at next mount", + __func__); + flags &= ~XLOG_UNMOUNT_TRANS; + } + + /* remove inited flag, and account for space used */ + tic->t_flags = 0; + tic->t_curr_res -= sizeof(magic); + error = xlog_write(log, &vec, tic, &lsn, NULL, flags); + /* + * At this point, we're umounting anyway, so there's no point in + * transitioning log state to IOERROR. Just continue... + */ +out_err: + if (error) + xfs_alert(mp, "%s: unmount record failed", __func__); + + spin_lock(&log->l_icloglock); + iclog = log->l_iclog; + atomic_inc(&iclog->ic_refcnt); + xlog_state_want_sync(log, iclog); + spin_unlock(&log->l_icloglock); + error = xlog_state_release_iclog(log, iclog); + + spin_lock(&log->l_icloglock); + switch (iclog->ic_state) { + default: + if (!XLOG_FORCED_SHUTDOWN(log)) { + xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); + break; + } + /* fall through */ + case XLOG_STATE_ACTIVE: + case XLOG_STATE_DIRTY: + spin_unlock(&log->l_icloglock); + break; + } + + if (tic) { + trace_xfs_log_umount_write(log, tic); + xlog_ungrant_log_space(log, tic); + xfs_log_ticket_put(tic); + } +} + /* * Unmount record used to have a string "Unmount filesystem--" in the * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE). @@ -842,8 +924,6 @@ xfs_log_unmount_write(xfs_mount_t *mp) #ifdef DEBUG xlog_in_core_t *first_iclog; #endif - xlog_ticket_t *tic = NULL; - xfs_lsn_t lsn; int error; /* @@ -870,66 +950,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) } while (iclog != first_iclog); #endif if (! (XLOG_FORCED_SHUTDOWN(log))) { - error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); - if (!error) { - /* the data section must be 32 bit size aligned */ - struct { - uint16_t magic; - uint16_t pad1; - uint32_t pad2; /* may as well make it 64 bits */ - } magic = { - .magic = XLOG_UNMOUNT_TYPE, - }; - struct xfs_log_iovec reg = { - .i_addr = &magic, - .i_len = sizeof(magic), - .i_type = XLOG_REG_TYPE_UNMOUNT, - }; - struct xfs_log_vec vec = { - .lv_niovecs = 1, - .lv_iovecp = ®, - }; - - /* remove inited flag, and account for space used */ - tic->t_flags = 0; - tic->t_curr_res -= sizeof(magic); - error = xlog_write(log, &vec, tic, &lsn, - NULL, XLOG_UNMOUNT_TRANS); - /* - * At this point, we're umounting anyway, - * so there's no point in transitioning log state - * to IOERROR. Just continue... - */ - } - - if (error) - xfs_alert(mp, "%s: unmount record failed", __func__); - - - spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - atomic_inc(&iclog->ic_refcnt); - xlog_state_want_sync(log, iclog); - spin_unlock(&log->l_icloglock); - error = xlog_state_release_iclog(log, iclog); - - spin_lock(&log->l_icloglock); - if (!(iclog->ic_state == XLOG_STATE_ACTIVE || - iclog->ic_state == XLOG_STATE_DIRTY)) { - if (!XLOG_FORCED_SHUTDOWN(log)) { - xlog_wait(&iclog->ic_force_wait, - &log->l_icloglock); - } else { - spin_unlock(&log->l_icloglock); - } - } else { - spin_unlock(&log->l_icloglock); - } - if (tic) { - trace_xfs_log_umount_write(log, tic); - xlog_ungrant_log_space(log, tic); - xfs_log_ticket_put(tic); - } + xfs_log_write_unmount_record(mp); } else { /* * We're already in forced_shutdown mode, couldn't @@ -4083,3 +4104,12 @@ xfs_log_check_lsn( return valid; } + +bool +xfs_log_in_recovery( + struct xfs_mount *mp) +{ + struct xlog *log = mp->m_log; + + return log->l_flags & XLOG_ACTIVE_RECOVERY; +} diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 3c1f6a8b4b70..73a64bf32f6f 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -153,5 +153,6 @@ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); void xfs_log_work_queue(struct xfs_mount *mp); void xfs_log_quiesce(struct xfs_mount *mp); bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); +bool xfs_log_in_recovery(struct xfs_mount *); #endif /* __XFS_LOG_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b181b5f57a19..a21dc61ec09e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -196,7 +196,7 @@ xlog_bread_noalign( bp->b_io_length = nbblks; bp->b_error = 0; - error = xfs_buf_submit_wait(bp); + error = xfs_buf_submit(bp); if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) xfs_buf_ioerror_alert(bp, __func__); return error; @@ -4733,10 +4733,9 @@ xlog_recover_cancel_rui( /* Recover the CUI if necessary. */ STATIC int xlog_recover_process_cui( - struct xfs_mount *mp, + struct xfs_trans *parent_tp, struct xfs_ail *ailp, - struct xfs_log_item *lip, - struct xfs_defer_ops *dfops) + struct xfs_log_item *lip) { struct xfs_cui_log_item *cuip; int error; @@ -4749,7 +4748,7 @@ xlog_recover_process_cui( return 0; spin_unlock(&ailp->ail_lock); - error = xfs_cui_recover(mp, cuip, dfops); + error = xfs_cui_recover(parent_tp, cuip); spin_lock(&ailp->ail_lock); return error; @@ -4774,10 +4773,9 @@ xlog_recover_cancel_cui( /* Recover the BUI if necessary. */ STATIC int xlog_recover_process_bui( - struct xfs_mount *mp, + struct xfs_trans *parent_tp, struct xfs_ail *ailp, - struct xfs_log_item *lip, - struct xfs_defer_ops *dfops) + struct xfs_log_item *lip) { struct xfs_bui_log_item *buip; int error; @@ -4790,7 +4788,7 @@ xlog_recover_process_bui( return 0; spin_unlock(&ailp->ail_lock); - error = xfs_bui_recover(mp, buip, dfops); + error = xfs_bui_recover(parent_tp, buip); spin_lock(&ailp->ail_lock); return error; @@ -4829,9 +4827,9 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip) /* Take all the collected deferred ops and finish them in order. */ static int xlog_finish_defer_ops( - struct xfs_mount *mp, - struct xfs_defer_ops *dfops) + struct xfs_trans *parent_tp) { + struct xfs_mount *mp = parent_tp->t_mountp; struct xfs_trans *tp; int64_t freeblks; uint resblks; @@ -4854,16 +4852,10 @@ xlog_finish_defer_ops( 0, XFS_TRANS_RESERVE, &tp); if (error) return error; - - error = xfs_defer_finish(&tp, dfops); - if (error) - goto out_cancel; + /* transfer all collected dfops to this transaction */ + xfs_defer_move(tp, parent_tp); return xfs_trans_commit(tp); - -out_cancel: - xfs_trans_cancel(tp); - return error; } /* @@ -4886,23 +4878,34 @@ STATIC int xlog_recover_process_intents( struct xlog *log) { - struct xfs_defer_ops dfops; + struct xfs_trans *parent_tp; struct xfs_ail_cursor cur; struct xfs_log_item *lip; struct xfs_ail *ailp; - xfs_fsblock_t firstfsb; - int error = 0; + int error; #if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; #endif + /* + * The intent recovery handlers commit transactions to complete recovery + * for individual intents, but any new deferred operations that are + * queued during that process are held off until the very end. The + * purpose of this transaction is to serve as a container for deferred + * operations. Each intent recovery handler must transfer dfops here + * before its local transaction commits, and we'll finish the entire + * list below. + */ + error = xfs_trans_alloc_empty(log->l_mp, &parent_tp); + if (error) + return error; + ailp = log->l_ailp; spin_lock(&ailp->ail_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); #if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); #endif - xfs_defer_init(&dfops, &firstfsb); while (lip != NULL) { /* * We're done when we see something other than an intent. @@ -4937,12 +4940,10 @@ xlog_recover_process_intents( error = xlog_recover_process_rui(log->l_mp, ailp, lip); break; case XFS_LI_CUI: - error = xlog_recover_process_cui(log->l_mp, ailp, lip, - &dfops); + error = xlog_recover_process_cui(parent_tp, ailp, lip); break; case XFS_LI_BUI: - error = xlog_recover_process_bui(log->l_mp, ailp, lip, - &dfops); + error = xlog_recover_process_bui(parent_tp, ailp, lip); break; } if (error) @@ -4952,10 +4953,9 @@ xlog_recover_process_intents( out: xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->ail_lock); - if (error) - xfs_defer_cancel(&dfops); - else - error = xlog_finish_defer_ops(log->l_mp, &dfops); + if (!error) + error = xlog_finish_defer_ops(parent_tp); + xfs_trans_cancel(parent_tp); return error; } @@ -5094,11 +5094,11 @@ xlog_recover_process_one_iunlink( */ ip->i_d.di_dmevmask = 0; - IRELE(ip); + xfs_irele(ip); return agino; fail_iput: - IRELE(ip); + xfs_irele(ip); fail: /* * We can't read in the inode this bucket points to, or this inode @@ -5707,7 +5707,7 @@ xlog_do_recover( bp->b_flags |= XBF_READ; bp->b_ops = &xfs_sb_buf_ops; - error = xfs_buf_submit_wait(bp); + error = xfs_buf_submit(bp); if (error) { if (!XFS_FORCED_SHUTDOWN(mp)) { xfs_buf_ioerror_alert(bp, __func__); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a3378252baa1..99db27d6ac8a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -207,6 +207,9 @@ xfs_initialize_perag( if (xfs_buf_hash_init(pag)) goto out_free_pag; init_waitqueue_head(&pag->pagb_wait); + spin_lock_init(&pag->pagb_lock); + pag->pagb_count = 0; + pag->pagb_tree = RB_ROOT; if (radix_tree_preload(GFP_NOFS)) goto out_hash_destroy; @@ -606,6 +609,56 @@ xfs_default_resblks(xfs_mount_t *mp) return resblks; } +/* Ensure the summary counts are correct. */ +STATIC int +xfs_check_summary_counts( + struct xfs_mount *mp) +{ + /* + * The AG0 superblock verifier rejects in-progress filesystems, + * so we should never see the flag set this far into mounting. + */ + if (mp->m_sb.sb_inprogress) { + xfs_err(mp, "sb_inprogress set after log recovery??"); + WARN_ON(1); + return -EFSCORRUPTED; + } + + /* + * Now the log is mounted, we know if it was an unclean shutdown or + * not. If it was, with the first phase of recovery has completed, we + * have consistent AG blocks on disk. We have not recovered EFIs yet, + * but they are recovered transactionally in the second recovery phase + * later. + * + * If the log was clean when we mounted, we can check the summary + * counters. If any of them are obviously incorrect, we can recompute + * them from the AGF headers in the next step. + */ + if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && + (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks || + mp->m_sb.sb_ifree > mp->m_sb.sb_icount)) + mp->m_flags |= XFS_MOUNT_BAD_SUMMARY; + + /* + * We can safely re-initialise incore superblock counters from the + * per-ag data. These may not be correct if the filesystem was not + * cleanly unmounted, so we waited for recovery to finish before doing + * this. + * + * If the filesystem was cleanly unmounted or the previous check did + * not flag anything weird, then we can trust the values in the + * superblock to be correct and we don't need to do anything here. + * Otherwise, recalculate the summary counters. + */ + if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) || + XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) && + !(mp->m_flags & XFS_MOUNT_BAD_SUMMARY)) + return 0; + + return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount); +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -831,32 +884,10 @@ xfs_mountfs( goto out_fail_wait; } - /* - * Now the log is mounted, we know if it was an unclean shutdown or - * not. If it was, with the first phase of recovery has completed, we - * have consistent AG blocks on disk. We have not recovered EFIs yet, - * but they are recovered transactionally in the second recovery phase - * later. - * - * Hence we can safely re-initialise incore superblock counters from - * the per-ag data. These may not be correct if the filesystem was not - * cleanly unmounted, so we need to wait for recovery to finish before - * doing this. - * - * If the filesystem was cleanly unmounted, then we can trust the - * values in the superblock to be correct and we don't need to do - * anything here. - * - * If we are currently making the filesystem, the initialisation will - * fail as the perag data is in an undefined state. - */ - if (xfs_sb_version_haslazysbcount(&mp->m_sb) && - !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && - !mp->m_sb.sb_inprogress) { - error = xfs_initialize_perag_data(mp, sbp->sb_agcount); - if (error) - goto out_log_dealloc; - } + /* Make sure the summary counts are ok. */ + error = xfs_check_summary_counts(mp); + if (error) + goto out_log_dealloc; /* * Get and sanity-check the root inode. @@ -1011,7 +1042,7 @@ xfs_mountfs( out_rtunmount: xfs_rtunmount_inodes(mp); out_rele_rip: - IRELE(rip); + xfs_irele(rip); /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); /* @@ -1067,7 +1098,7 @@ xfs_unmountfs( xfs_fs_unreserve_ag_blocks(mp); xfs_qm_unmount_quotas(mp); xfs_rtunmount_inodes(mp); - IRELE(mp->m_rootip); + xfs_irele(mp->m_rootip); /* * We can potentially deadlock here if we have an inode cluster @@ -1395,3 +1426,16 @@ xfs_dev_is_read_only( } return 0; } + +/* Force the summary counters to be recalculated at next mount. */ +void +xfs_force_summary_recalc( + struct xfs_mount *mp) +{ + if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) + return; + + spin_lock(&mp->m_sb_lock); + mp->m_flags |= XFS_MOUNT_BAD_SUMMARY; + spin_unlock(&mp->m_sb_lock); +} diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 245349d1e23f..7964513c3128 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -202,6 +202,7 @@ typedef struct xfs_mount { must be synchronous except for space allocations */ #define XFS_MOUNT_UNMOUNTING (1ULL << 1) /* filesystem is unmounting */ +#define XFS_MOUNT_BAD_SUMMARY (1ULL << 2) /* summary counters are bad */ #define XFS_MOUNT_WAS_CLEAN (1ULL << 3) #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem operations, typically for @@ -216,7 +217,6 @@ typedef struct xfs_mount { #define XFS_MOUNT_SMALL_INUMS (1ULL << 14) /* user wants 32bit inodes */ #define XFS_MOUNT_32BITINODES (1ULL << 15) /* inode32 allocator active */ #define XFS_MOUNT_NOUUID (1ULL << 16) /* ignore uuid during mount */ -#define XFS_MOUNT_BARRIER (1ULL << 17) #define XFS_MOUNT_IKEEP (1ULL << 18) /* keep empty inode clusters*/ #define XFS_MOUNT_SWALLOC (1ULL << 19) /* turn on stripe width * allocation */ @@ -434,5 +434,6 @@ int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, int error_class, int error); +void xfs_force_summary_recalc(struct xfs_mount *mp); #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 9ceb85cce33a..52ed7904df10 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -231,15 +231,15 @@ xfs_qm_unmount_quotas( */ if (mp->m_quotainfo) { if (mp->m_quotainfo->qi_uquotaip) { - IRELE(mp->m_quotainfo->qi_uquotaip); + xfs_irele(mp->m_quotainfo->qi_uquotaip); mp->m_quotainfo->qi_uquotaip = NULL; } if (mp->m_quotainfo->qi_gquotaip) { - IRELE(mp->m_quotainfo->qi_gquotaip); + xfs_irele(mp->m_quotainfo->qi_gquotaip); mp->m_quotainfo->qi_gquotaip = NULL; } if (mp->m_quotainfo->qi_pquotaip) { - IRELE(mp->m_quotainfo->qi_pquotaip); + xfs_irele(mp->m_quotainfo->qi_pquotaip); mp->m_quotainfo->qi_pquotaip = NULL; } } @@ -1200,12 +1200,12 @@ xfs_qm_dqusage_adjust( goto error0; } - IRELE(ip); + xfs_irele(ip); *res = BULKSTAT_RV_DIDONE; return 0; error0: - IRELE(ip); + xfs_irele(ip); *res = BULKSTAT_RV_GIVEUP; return error; } @@ -1575,11 +1575,11 @@ xfs_qm_init_quotainos( error_rele: if (uip) - IRELE(uip); + xfs_irele(uip); if (gip) - IRELE(gip); + xfs_irele(gip); if (pip) - IRELE(pip); + xfs_irele(pip); return error; } @@ -1588,15 +1588,15 @@ xfs_qm_destroy_quotainos( xfs_quotainfo_t *qi) { if (qi->qi_uquotaip) { - IRELE(qi->qi_uquotaip); + xfs_irele(qi->qi_uquotaip); qi->qi_uquotaip = NULL; /* paranoia */ } if (qi->qi_gquotaip) { - IRELE(qi->qi_gquotaip); + xfs_irele(qi->qi_gquotaip); qi->qi_gquotaip = NULL; } if (qi->qi_pquotaip) { - IRELE(qi->qi_pquotaip); + xfs_irele(qi->qi_pquotaip); qi->qi_pquotaip = NULL; } } diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index abc8a21e3a82..b3190890f096 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -22,6 +22,7 @@ #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_defer.h" STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, @@ -189,15 +190,15 @@ xfs_qm_scall_quotaoff( * Release our quotainode references if we don't need them anymore. */ if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) { - IRELE(q->qi_uquotaip); + xfs_irele(q->qi_uquotaip); q->qi_uquotaip = NULL; } if ((dqtype & XFS_QMOPT_GQUOTA) && q->qi_gquotaip) { - IRELE(q->qi_gquotaip); + xfs_irele(q->qi_gquotaip); q->qi_gquotaip = NULL; } if ((dqtype & XFS_QMOPT_PQUOTA) && q->qi_pquotaip) { - IRELE(q->qi_pquotaip); + xfs_irele(q->qi_pquotaip); q->qi_pquotaip = NULL; } @@ -250,7 +251,7 @@ xfs_qm_scall_trunc_qfile( out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); out_put: - IRELE(ip); + xfs_irele(ip); return error; } diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 205fbb2a77e4..a7c0c657dfaf 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -45,7 +45,7 @@ xfs_qm_fill_state( tstate->ino_warnlimit = q->qi_iwarnlimit; tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit; if (tempqip) - IRELE(ip); + xfs_irele(ip); } /* diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 472a73e9d331..fce38b56b962 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -380,9 +380,8 @@ xfs_cud_init( */ int xfs_cui_recover( - struct xfs_mount *mp, - struct xfs_cui_log_item *cuip, - struct xfs_defer_ops *dfops) + struct xfs_trans *parent_tp, + struct xfs_cui_log_item *cuip) { int i; int error = 0; @@ -398,6 +397,7 @@ xfs_cui_recover( xfs_extlen_t new_len; struct xfs_bmbt_irec irec; bool requeue_only = false; + struct xfs_mount *mp = parent_tp->t_mountp; ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); @@ -452,6 +452,12 @@ xfs_cui_recover( mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; + /* + * Recovery stashes all deferred ops during intent processing and + * finishes them on completion. Transfer current dfops state to this + * transaction and transfer the result back before we return. + */ + xfs_defer_move(tp, parent_tp); cudp = xfs_trans_get_cud(tp, cuip); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { @@ -473,7 +479,7 @@ xfs_cui_recover( new_len = refc->pe_len; } else error = xfs_trans_log_finish_refcount_update(tp, cudp, - dfops, type, refc->pe_startblock, refc->pe_len, + type, refc->pe_startblock, refc->pe_len, &new_fsb, &new_len, &rcur); if (error) goto abort_error; @@ -484,22 +490,18 @@ xfs_cui_recover( irec.br_blockcount = new_len; switch (type) { case XFS_REFCOUNT_INCREASE: - error = xfs_refcount_increase_extent( - tp->t_mountp, dfops, &irec); + error = xfs_refcount_increase_extent(tp, &irec); break; case XFS_REFCOUNT_DECREASE: - error = xfs_refcount_decrease_extent( - tp->t_mountp, dfops, &irec); + error = xfs_refcount_decrease_extent(tp, &irec); break; case XFS_REFCOUNT_ALLOC_COW: - error = xfs_refcount_alloc_cow_extent( - tp->t_mountp, dfops, + error = xfs_refcount_alloc_cow_extent(tp, irec.br_startblock, irec.br_blockcount); break; case XFS_REFCOUNT_FREE_COW: - error = xfs_refcount_free_cow_extent( - tp->t_mountp, dfops, + error = xfs_refcount_free_cow_extent(tp, irec.br_startblock, irec.br_blockcount); break; @@ -514,11 +516,13 @@ xfs_cui_recover( xfs_refcount_finish_one_cleanup(tp, rcur, error); set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); + xfs_defer_move(parent_tp, tp); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); + xfs_defer_move(parent_tp, tp); xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index dd830b69cd1e..3896dcc2368f 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -82,7 +82,6 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, struct xfs_cui_log_item *); void xfs_cui_item_free(struct xfs_cui_log_item *); void xfs_cui_release(struct xfs_cui_log_item *); -int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip, - struct xfs_defer_ops *dfops); +int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip); #endif /* __XFS_REFCOUNT_ITEM_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 592fb2071a03..38f405415b88 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -157,12 +157,12 @@ xfs_reflink_find_shared( if (!agbp) return -ENOMEM; - cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen, find_end_of_shared); - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cur, error); xfs_trans_brelse(tp, agbp); return error; @@ -312,10 +312,8 @@ xfs_reflink_convert_cow_extent( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, xfs_fileoff_t offset_fsb, - xfs_filblks_t count_fsb, - struct xfs_defer_ops *dfops) + xfs_filblks_t count_fsb) { - xfs_fsblock_t first_block = NULLFSBLOCK; int nimaps = 1; if (imap->br_state == XFS_EXT_NORM) @@ -326,8 +324,8 @@ xfs_reflink_convert_cow_extent( if (imap->br_blockcount == 0) return 0; return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount, - XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block, - 0, imap, &nimaps, dfops); + XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, 0, imap, + &nimaps); } /* Convert all of the unwritten CoW extents in a file's range to real ones. */ @@ -342,8 +340,6 @@ xfs_reflink_convert_cow( xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_filblks_t count_fsb = end_fsb - offset_fsb; struct xfs_bmbt_irec imap; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block = NULLFSBLOCK; int nimaps = 1, error = 0; ASSERT(count != 0); @@ -351,8 +347,7 @@ xfs_reflink_convert_cow( xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb, XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT | - XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps, - &dfops); + XFS_BMAPI_CONVERT_ONLY, 0, &imap, &nimaps); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -369,9 +364,7 @@ xfs_reflink_allocate_cow( xfs_fileoff_t offset_fsb = imap->br_startoff; xfs_filblks_t count_fsb = imap->br_blockcount; struct xfs_bmbt_irec got; - struct xfs_defer_ops dfops; struct xfs_trans *tp = NULL; - xfs_fsblock_t first_block; int nimaps, error = 0; bool trimmed; xfs_filblks_t resaligned; @@ -430,23 +423,18 @@ retry: xfs_trans_ijoin(tp, ip, 0); - xfs_defer_init(&dfops, &first_block); nimaps = 1; /* Allocate the entire reservation as unwritten blocks. */ error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, - resblks, imap, &nimaps, &dfops); + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, + resblks, imap, &nimaps); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; xfs_inode_set_cowblocks_tag(ip); /* Finish up. */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_bmap_cancel; - error = xfs_trans_commit(tp); if (error) return error; @@ -458,10 +446,8 @@ retry: if (nimaps == 0) return -ENOSPC; convert: - return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, - &dfops); -out_bmap_cancel: - xfs_defer_cancel(&dfops); + return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb); +out_trans_cancel: xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, XFS_QMOPT_RES_REGBLKS); out: @@ -471,69 +457,6 @@ out: } /* - * Find the CoW reservation for a given byte offset of a file. - */ -bool -xfs_reflink_find_cow_mapping( - struct xfs_inode *ip, - xfs_off_t offset, - struct xfs_bmbt_irec *imap) -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - xfs_fileoff_t offset_fsb; - struct xfs_bmbt_irec got; - struct xfs_iext_cursor icur; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); - - if (!xfs_is_reflink_inode(ip)) - return false; - offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) - return false; - if (got.br_startoff > offset_fsb) - return false; - - trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE, - &got); - *imap = got; - return true; -} - -/* - * Trim an extent to end at the next CoW reservation past offset_fsb. - */ -void -xfs_reflink_trim_irec_to_next_cow( - struct xfs_inode *ip, - xfs_fileoff_t offset_fsb, - struct xfs_bmbt_irec *imap) -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - struct xfs_bmbt_irec got; - struct xfs_iext_cursor icur; - - if (!xfs_is_reflink_inode(ip)) - return; - - /* Find the extent in the CoW fork. */ - if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) - return; - - /* This is the extent before; try sliding up one. */ - if (got.br_startoff < offset_fsb) { - if (!xfs_iext_next_extent(ifp, &icur, &got)) - return; - } - - if (got.br_startoff >= imap->br_startoff + imap->br_blockcount) - return; - - imap->br_blockcount = got.br_startoff - imap->br_startoff; - trace_xfs_reflink_trim_irec(ip, imap); -} - -/* * Cancel CoW reservations for some block range of an inode. * * If cancel_real is true this function cancels all COW fork extents for the @@ -553,11 +476,9 @@ xfs_reflink_cancel_cow_blocks( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; - xfs_fsblock_t firstfsb; - struct xfs_defer_ops dfops; int error = 0; - if (!xfs_is_reflink_inode(ip)) + if (!xfs_inode_has_cow_data(ip)) return 0; if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) return 0; @@ -581,26 +502,21 @@ xfs_reflink_cancel_cow_blocks( if (error) break; } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { - xfs_defer_init(&dfops, &firstfsb); + ASSERT((*tpp)->t_firstblock == NULLFSBLOCK); /* Free the CoW orphan record. */ - error = xfs_refcount_free_cow_extent(ip->i_mount, - &dfops, del.br_startblock, - del.br_blockcount); + error = xfs_refcount_free_cow_extent(*tpp, + del.br_startblock, del.br_blockcount); if (error) break; - xfs_bmap_add_free(ip->i_mount, &dfops, - del.br_startblock, del.br_blockcount, - NULL); + xfs_bmap_add_free(*tpp, del.br_startblock, + del.br_blockcount, NULL); /* Roll the transaction */ - xfs_defer_ijoin(&dfops, ip); - error = xfs_defer_finish(tpp, &dfops); - if (error) { - xfs_defer_cancel(&dfops); + error = xfs_defer_finish(tpp); + if (error) break; - } /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); @@ -623,7 +539,6 @@ next_extent: /* clear tag if cow fork is emptied */ if (!ifp->if_bytes) xfs_inode_clear_cowblocks_tag(ip); - return error; } @@ -696,8 +611,6 @@ xfs_reflink_end_cow( struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; - xfs_fsblock_t firstfsb; - struct xfs_defer_ops dfops; int error; unsigned int resblks; xfs_filblks_t rlen; @@ -764,12 +677,11 @@ xfs_reflink_end_cow( goto prev_extent; /* Unmap the old blocks in the data fork. */ - xfs_defer_init(&dfops, &firstfsb); + ASSERT(tp->t_firstblock == NULLFSBLOCK); rlen = del.br_blockcount; - error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1, - &firstfsb, &dfops); + error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1); if (error) - goto out_defer; + goto out_cancel; /* Trim the extent to whatever got unmapped. */ if (rlen) { @@ -779,15 +691,15 @@ xfs_reflink_end_cow( trace_xfs_reflink_cow_remap(ip, &del); /* Free the CoW orphan record. */ - error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops, - del.br_startblock, del.br_blockcount); + error = xfs_refcount_free_cow_extent(tp, del.br_startblock, + del.br_blockcount); if (error) - goto out_defer; + goto out_cancel; /* Map the new blocks into the data fork. */ - error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del); + error = xfs_bmap_map_extent(tp, ip, &del); if (error) - goto out_defer; + goto out_cancel; /* Charge this new data fork mapping to the on-disk quota. */ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, @@ -796,10 +708,9 @@ xfs_reflink_end_cow( /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); - xfs_defer_ijoin(&dfops, ip); - error = xfs_defer_finish(&tp, &dfops); + error = xfs_defer_finish(&tp); if (error) - goto out_defer; + goto out_cancel; if (!xfs_iext_get_extent(ifp, &icur, &got)) break; continue; @@ -814,8 +725,6 @@ prev_extent: goto out; return 0; -out_defer: - xfs_defer_cancel(&dfops); out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1070,9 +979,7 @@ xfs_reflink_remap_extent( struct xfs_mount *mp = ip->i_mount; bool real_extent = xfs_bmap_is_real_extent(irec); struct xfs_trans *tp; - xfs_fsblock_t firstfsb; unsigned int resblks; - struct xfs_defer_ops dfops; struct xfs_bmbt_irec uirec; xfs_filblks_t rlen; xfs_filblks_t unmap_len; @@ -1113,11 +1020,10 @@ xfs_reflink_remap_extent( /* Unmap the old blocks in the data fork. */ rlen = unmap_len; while (rlen) { - xfs_defer_init(&dfops, &firstfsb); - error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1, - &firstfsb, &dfops); + ASSERT(tp->t_firstblock == NULLFSBLOCK); + error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1); if (error) - goto out_defer; + goto out_cancel; /* * Trim the extent to whatever got unmapped. @@ -1136,14 +1042,14 @@ xfs_reflink_remap_extent( uirec.br_blockcount, uirec.br_startblock); /* Update the refcount tree */ - error = xfs_refcount_increase_extent(mp, &dfops, &uirec); + error = xfs_refcount_increase_extent(tp, &uirec); if (error) - goto out_defer; + goto out_cancel; /* Map the new blocks into the data fork. */ - error = xfs_bmap_map_extent(mp, &dfops, ip, &uirec); + error = xfs_bmap_map_extent(tp, ip, &uirec); if (error) - goto out_defer; + goto out_cancel; /* Update quota accounting. */ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, @@ -1162,10 +1068,9 @@ xfs_reflink_remap_extent( next_extent: /* Process all the deferred stuff. */ - xfs_defer_ijoin(&dfops, ip); - error = xfs_defer_finish(&tp, &dfops); + error = xfs_defer_finish(&tp); if (error) - goto out_defer; + goto out_cancel; } error = xfs_trans_commit(tp); @@ -1174,8 +1079,6 @@ next_extent: goto out; return 0; -out_defer: - xfs_defer_cancel(&dfops); out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 1532827ba911..c585ad9552b2 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -18,10 +18,6 @@ extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode); extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); -extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, - struct xfs_bmbt_irec *imap); -extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, - xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap); extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 329d4d26c13e..926ed314ffba 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -761,8 +761,6 @@ xfs_growfs_rt_alloc( struct xfs_buf *bp; /* temporary buffer for zeroing */ xfs_daddr_t d; /* disk block address */ int error; /* error return value */ - xfs_fsblock_t firstblock;/* first block allocated in xaction */ - struct xfs_defer_ops dfops; /* list of freed blocks */ xfs_fsblock_t fsbno; /* filesystem block for bno */ struct xfs_bmbt_irec map; /* block map output */ int nmap; /* number of block maps */ @@ -787,24 +785,20 @@ xfs_growfs_rt_alloc( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_defer_init(&dfops, &firstblock); /* * Allocate blocks to the bitmap file. */ nmap = 1; error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, - XFS_BMAPI_METADATA, &firstblock, - resblks, &map, &nmap, &dfops); + XFS_BMAPI_METADATA, resblks, &map, + &nmap); if (!error && nmap < 1) error = -ENOSPC; if (error) - goto out_bmap_cancel; + goto out_trans_cancel; /* * Free any blocks freed up in the transaction, then commit. */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_bmap_cancel; error = xfs_trans_commit(tp); if (error) return error; @@ -854,8 +848,6 @@ xfs_growfs_rt_alloc( return 0; -out_bmap_cancel: - xfs_defer_cancel(&dfops); out_trans_cancel: xfs_trans_cancel(tp); return error; @@ -1215,7 +1207,7 @@ xfs_rtmount_inodes( ASSERT(sbp->sb_rsumino != NULLFSINO); error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); if (error) { - IRELE(mp->m_rbmip); + xfs_irele(mp->m_rbmip); return error; } ASSERT(mp->m_rsumip != NULL); @@ -1227,9 +1219,9 @@ xfs_rtunmount_inodes( struct xfs_mount *mp) { if (mp->m_rbmip) - IRELE(mp->m_rbmip); + xfs_irele(mp->m_rbmip); if (mp->m_rsumip) - IRELE(mp->m_rsumip); + xfs_irele(mp->m_rsumip); } /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 9d791f158dfe..207ee302b1bb 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -65,11 +65,10 @@ enum { Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize, Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, - Opt_allocsize, Opt_norecovery, Opt_barrier, Opt_nobarrier, - Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep, - Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams, - Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, - Opt_uquota, Opt_gquota, Opt_pquota, + Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, + Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, + Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, + Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, Opt_discard, Opt_nodiscard, Opt_dax, Opt_err, }; @@ -118,14 +117,7 @@ static const match_table_t tokens = { {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */ {Opt_discard, "discard"}, /* Discard unused blocks */ {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */ - {Opt_dax, "dax"}, /* Enable direct access to bdev pages */ - - /* Deprecated mount options scheduled for removal */ - {Opt_barrier, "barrier"}, /* use writer barriers for log write and - * unwritten extent conversion */ - {Opt_nobarrier, "nobarrier"}, /* .. disable */ - {Opt_err, NULL}, }; @@ -209,7 +201,6 @@ xfs_parseargs( * Set some default flags that could be cleared by the mount option * parsing. */ - mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; /* @@ -362,14 +353,6 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_DAX; break; #endif - case Opt_barrier: - xfs_warn(mp, "%s option is deprecated, ignoring.", p); - mp->m_flags |= XFS_MOUNT_BARRIER; - break; - case Opt_nobarrier: - xfs_warn(mp, "%s option is deprecated, ignoring.", p); - mp->m_flags &= ~XFS_MOUNT_BARRIER; - break; default: xfs_warn(mp, "unknown mount option [%s].", p); return -EINVAL; @@ -487,7 +470,6 @@ xfs_showargs( static struct proc_xfs_info xfs_info_unset[] = { /* the few simple ones we can get from the mount struct */ { XFS_MOUNT_COMPAT_IOSIZE, ",largeio" }, - { XFS_MOUNT_BARRIER, ",nobarrier" }, { XFS_MOUNT_SMALL_INUMS, ",inode64" }, { 0, NULL } }; @@ -1278,14 +1260,6 @@ xfs_fs_remount( token = match_token(p, tokens, args); switch (token) { - case Opt_barrier: - xfs_warn(mp, "%s option is deprecated, ignoring.", p); - mp->m_flags |= XFS_MOUNT_BARRIER; - break; - case Opt_nobarrier: - xfs_warn(mp, "%s option is deprecated, ignoring.", p); - mp->m_flags &= ~XFS_MOUNT_BARRIER; - break; case Opt_inode64: mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); @@ -1860,7 +1834,7 @@ MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_zones(void) { - if (bioset_init(&xfs_ioend_bioset, 4 * MAX_BUF_PER_PAGE, + if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), offsetof(struct xfs_ioend, io_inline_bio), BIOSET_NEED_BVECS)) goto out; @@ -1886,7 +1860,7 @@ xfs_init_zones(void) if (!xfs_da_state_zone) goto out_destroy_btree_cur_zone; - xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); + xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork"); if (!xfs_ifork_zone) goto out_destroy_da_state_zone; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 3783afcb68d2..a3e98c64b6e3 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -163,8 +163,6 @@ xfs_symlink( struct xfs_inode *ip = NULL; int error = 0; int pathlen; - struct xfs_defer_ops dfops; - xfs_fsblock_t first_block; bool unlock_dp_on_error = false; xfs_fileoff_t first_fsb; xfs_filblks_t fs_blocks; @@ -243,13 +241,6 @@ xfs_symlink( goto out_trans_cancel; /* - * Initialize the bmap freelist prior to calling either - * bmapi or the directory create code. - */ - xfs_defer_init(&dfops, &first_block); - tp->t_agfl_dfops = &dfops; - - /* * Allocate an inode for the symlink. */ error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, @@ -290,10 +281,9 @@ xfs_symlink( nmaps = XFS_SYMLINK_MAPS; error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, - XFS_BMAPI_METADATA, &first_block, resblks, - mval, &nmaps, &dfops); + XFS_BMAPI_METADATA, resblks, mval, &nmaps); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; if (resblks) resblks -= fs_blocks; @@ -311,7 +301,7 @@ xfs_symlink( BTOBB(byte_cnt), 0); if (!bp) { error = -ENOMEM; - goto out_bmap_cancel; + goto out_trans_cancel; } bp->b_ops = &xfs_symlink_buf_ops; @@ -338,10 +328,9 @@ xfs_symlink( /* * Create the directory entry for the symlink. */ - error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, - &first_block, &dfops, resblks); + error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, resblks); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); @@ -354,10 +343,6 @@ xfs_symlink( xfs_trans_set_sync(tp); } - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto out_bmap_cancel; - error = xfs_trans_commit(tp); if (error) goto out_release_inode; @@ -369,8 +354,6 @@ xfs_symlink( *ipp = ip; return 0; -out_bmap_cancel: - xfs_defer_cancel(&dfops); out_trans_cancel: xfs_trans_cancel(tp); out_release_inode: @@ -381,7 +364,7 @@ out_release_inode: */ if (ip) { xfs_finish_inode_setup(ip); - IRELE(ip); + xfs_irele(ip); } xfs_qm_dqrele(udqp); @@ -403,8 +386,6 @@ xfs_inactive_symlink_rmt( xfs_buf_t *bp; int done; int error; - xfs_fsblock_t first_block; - struct xfs_defer_ops dfops; int i; xfs_mount_t *mp; xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; @@ -443,7 +424,6 @@ xfs_inactive_symlink_rmt( * Find the block(s) so we can inval and unmap them. */ done = 0; - xfs_defer_init(&dfops, &first_block); nmaps = ARRAY_SIZE(mval); error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), mval, &nmaps, 0); @@ -458,28 +438,21 @@ xfs_inactive_symlink_rmt( XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); if (!bp) { error = -ENOMEM; - goto error_bmap_cancel; + goto error_trans_cancel; } xfs_trans_binval(tp, bp); } /* * Unmap the dead block(s) to the dfops. */ - error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, - &first_block, &dfops, &done); + error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &done); if (error) - goto error_bmap_cancel; + goto error_trans_cancel; ASSERT(done); - /* - * Commit the first transaction. This logs the EFI and the inode. - */ - xfs_defer_ijoin(&dfops, ip); - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto error_bmap_cancel; /* - * Commit the transaction containing extent freeing and EFDs. + * Commit the transaction. This first logs the EFI and the inode, then + * rolls and commits the transaction that frees the extents. */ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_trans_commit(tp); @@ -498,8 +471,6 @@ xfs_inactive_symlink_rmt( xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; -error_bmap_cancel: - xfs_defer_cancel(&dfops); error_trans_cancel: xfs_trans_cancel(tp); error_unlock: diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 972d45d28097..ad315e83bc02 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -310,7 +310,6 @@ DEFINE_BUF_EVENT(xfs_buf_hold); DEFINE_BUF_EVENT(xfs_buf_rele); DEFINE_BUF_EVENT(xfs_buf_iodone); DEFINE_BUF_EVENT(xfs_buf_submit); -DEFINE_BUF_EVENT(xfs_buf_submit_wait); DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); DEFINE_BUF_EVENT(xfs_buf_trylock_fail); @@ -1153,33 +1152,23 @@ DECLARE_EVENT_CLASS(xfs_page_class, __field(loff_t, size) __field(unsigned long, offset) __field(unsigned int, length) - __field(int, delalloc) - __field(int, unwritten) ), TP_fast_assign( - int delalloc = -1, unwritten = -1; - - if (page_has_buffers(page)) - xfs_count_page_state(page, &delalloc, &unwritten); __entry->dev = inode->i_sb->s_dev; __entry->ino = XFS_I(inode)->i_ino; __entry->pgoff = page_offset(page); __entry->size = i_size_read(inode); __entry->offset = off; __entry->length = len; - __entry->delalloc = delalloc; - __entry->unwritten = unwritten; ), TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " - "length %x delalloc %d unwritten %d", + "length %x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->pgoff, __entry->size, __entry->offset, - __entry->length, - __entry->delalloc, - __entry->unwritten) + __entry->length) ) #define DEFINE_PAGE_EVENT(name) \ @@ -1263,9 +1252,6 @@ DEFINE_EVENT(xfs_imap_class, name, \ TP_ARGS(ip, offset, count, type, irec)) DEFINE_IOMAP_EVENT(xfs_map_blocks_found); DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); -DEFINE_IOMAP_EVENT(xfs_get_blocks_found); -DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); -DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); DEFINE_IOMAP_EVENT(xfs_iomap_alloc); DEFINE_IOMAP_EVENT(xfs_iomap_found); @@ -1304,7 +1290,6 @@ DEFINE_EVENT(xfs_simple_io_class, name, \ TP_ARGS(ip, offset, count)) DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); -DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write); @@ -1604,7 +1589,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __entry->wasfromfl = args->wasfromfl; __entry->resv = args->resv; __entry->datatype = args->datatype; - __entry->firstblock = args->firstblock; + __entry->firstblock = args->tp->t_firstblock; ), TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " "prod %u minleft %u total %u alignment %u minalignslop %u " @@ -2228,67 +2213,54 @@ DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range); /* deferred ops */ struct xfs_defer_pending; -struct xfs_defer_ops; DECLARE_EVENT_CLASS(xfs_defer_class, - TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, - unsigned long caller_ip), - TP_ARGS(mp, dop, caller_ip), + TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), + TP_ARGS(tp, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) - __field(void *, dop) + __field(struct xfs_trans *, tp) __field(char, committed) - __field(char, low) __field(unsigned long, caller_ip) ), TP_fast_assign( - __entry->dev = mp ? mp->m_super->s_dev : 0; - __entry->dop = dop; - __entry->committed = dop->dop_committed; - __entry->low = dop->dop_low; + __entry->dev = tp->t_mountp->m_super->s_dev; + __entry->tp = tp; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ops %p committed %d low %d, caller %pS", + TP_printk("dev %d:%d tp %p caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->dop, - __entry->committed, - __entry->low, + __entry->tp, (char *)__entry->caller_ip) ) #define DEFINE_DEFER_EVENT(name) \ DEFINE_EVENT(xfs_defer_class, name, \ - TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, \ - unsigned long caller_ip), \ - TP_ARGS(mp, dop, caller_ip)) + TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), \ + TP_ARGS(tp, caller_ip)) DECLARE_EVENT_CLASS(xfs_defer_error_class, - TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error), - TP_ARGS(mp, dop, error), + TP_PROTO(struct xfs_trans *tp, int error), + TP_ARGS(tp, error), TP_STRUCT__entry( __field(dev_t, dev) - __field(void *, dop) + __field(struct xfs_trans *, tp) __field(char, committed) - __field(char, low) __field(int, error) ), TP_fast_assign( - __entry->dev = mp ? mp->m_super->s_dev : 0; - __entry->dop = dop; - __entry->committed = dop->dop_committed; - __entry->low = dop->dop_low; + __entry->dev = tp->t_mountp->m_super->s_dev; + __entry->tp = tp; __entry->error = error; ), - TP_printk("dev %d:%d ops %p committed %d low %d err %d", + TP_printk("dev %d:%d tp %p err %d", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->dop, - __entry->committed, - __entry->low, + __entry->tp, __entry->error) ) #define DEFINE_DEFER_ERROR_EVENT(name) \ DEFINE_EVENT(xfs_defer_error_class, name, \ - TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error), \ - TP_ARGS(mp, dop, error)) + TP_PROTO(struct xfs_trans *tp, int error), \ + TP_ARGS(tp, error)) DECLARE_EVENT_CLASS(xfs_defer_pending_class, TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp), @@ -2407,7 +2379,6 @@ DEFINE_EVENT(xfs_map_extent_deferred_class, name, \ xfs_exntst_t state), \ TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state)) -DEFINE_DEFER_EVENT(xfs_defer_init); DEFINE_DEFER_EVENT(xfs_defer_cancel); DEFINE_DEFER_EVENT(xfs_defer_trans_roll); DEFINE_DEFER_EVENT(xfs_defer_trans_abort); @@ -2417,9 +2388,8 @@ DEFINE_DEFER_EVENT(xfs_defer_finish_done); DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error); DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error); -DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work); -DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel); -DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel); +DEFINE_DEFER_PENDING_EVENT(xfs_defer_create_intent); +DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list); DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish); DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort); @@ -3215,8 +3185,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); DEFINE_RW_EVENT(xfs_reflink_reserve_cow); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); -DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping); -DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 524f543c5b82..bedc5a5133a5 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -100,6 +100,8 @@ xfs_trans_dup( ntp->t_mountp = tp->t_mountp; INIT_LIST_HEAD(&ntp->t_items); INIT_LIST_HEAD(&ntp->t_busy); + INIT_LIST_HEAD(&ntp->t_dfops); + ntp->t_firstblock = NULLFSBLOCK; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_ticket != NULL); @@ -118,7 +120,9 @@ xfs_trans_dup( ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; ntp->t_pflags = tp->t_pflags; - ntp->t_agfl_dfops = tp->t_agfl_dfops; + + /* move deferred ops over to the new tp */ + xfs_defer_move(ntp, tp); xfs_trans_dup_dqinfo(tp, ntp); @@ -273,6 +277,8 @@ xfs_trans_alloc( tp->t_mountp = mp; INIT_LIST_HEAD(&tp->t_items); INIT_LIST_HEAD(&tp->t_busy); + INIT_LIST_HEAD(&tp->t_dfops); + tp->t_firstblock = NULLFSBLOCK; error = xfs_trans_reserve(tp, resp, blocks, rtextents); if (error) { @@ -914,12 +920,21 @@ __xfs_trans_commit( int error = 0; int sync = tp->t_flags & XFS_TRANS_SYNC; - ASSERT(!tp->t_agfl_dfops || - !xfs_defer_has_unfinished_work(tp->t_agfl_dfops) || regrant); - trace_xfs_trans_commit(tp, _RET_IP_); /* + * Finish deferred items on final commit. Only permanent transactions + * should ever have deferred ops. + */ + WARN_ON_ONCE(!list_empty(&tp->t_dfops) && + !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); + if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) { + error = xfs_defer_finish_noroll(&tp); + if (error) + goto out_unreserve; + } + + /* * If there is nothing to be logged by the transaction, * then unlock all of the items associated with the * transaction and free the transaction structure. @@ -1008,6 +1023,9 @@ xfs_trans_cancel( trace_xfs_trans_cancel(tp, _RET_IP_); + if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) + xfs_defer_cancel(tp); + /* * See if the caller is relying on us to shut down the * filesystem. This happens in paths where we detect diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 6526314f0b8f..c3d278e96ad1 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -24,7 +24,6 @@ struct xfs_rui_log_item; struct xfs_btree_cur; struct xfs_cui_log_item; struct xfs_cud_log_item; -struct xfs_defer_ops; struct xfs_bui_log_item; struct xfs_bud_log_item; @@ -90,6 +89,11 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, #define XFS_ITEM_LOCKED 2 #define XFS_ITEM_FLUSHING 3 +/* + * Deferred operation item relogging limits. + */ +#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ /* * This is the structure maintained for every active transaction. @@ -102,11 +106,11 @@ typedef struct xfs_trans { unsigned int t_blk_res_used; /* # of resvd blocks used */ unsigned int t_rtx_res; /* # of rt extents resvd */ unsigned int t_rtx_res_used; /* # of resvd rt extents used */ + unsigned int t_flags; /* misc flags */ + xfs_fsblock_t t_firstblock; /* first block allocated */ struct xlog_ticket *t_ticket; /* log mgr ticket */ struct xfs_mount *t_mountp; /* ptr to fs mount struct */ struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ - struct xfs_defer_ops *t_agfl_dfops; /* optional agfl fixup dfops */ - unsigned int t_flags; /* misc flags */ int64_t t_icount_delta; /* superblock icount change */ int64_t t_ifree_delta; /* superblock ifree change */ int64_t t_fdblocks_delta; /* superblock fdblocks chg */ @@ -128,6 +132,7 @@ typedef struct xfs_trans { int64_t t_rextslog_delta;/* superblocks rextslog chg */ struct list_head t_items; /* log item descriptors */ struct list_head t_busy; /* list of busy extents */ + struct list_head t_dfops; /* deferred operations */ unsigned long t_pflags; /* saved process flags state */ } xfs_trans_t; @@ -258,7 +263,7 @@ void xfs_refcount_update_init_defer_op(void); struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp, struct xfs_cui_log_item *cuip); int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp, - struct xfs_cud_log_item *cudp, struct xfs_defer_ops *dfops, + struct xfs_cud_log_item *cudp, enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); @@ -270,9 +275,9 @@ void xfs_bmap_update_init_defer_op(void); struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp, struct xfs_bui_log_item *buip); int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp, - struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops, - enum xfs_bmap_intent_type type, struct xfs_inode *ip, - int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, xfs_exntst_t state); + struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type, + struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff, + xfs_fsblock_t startblock, xfs_filblks_t *blockcount, + xfs_exntst_t state); #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c index a15a5cd867f9..741c558b2179 100644 --- a/fs/xfs/xfs_trans_bmap.c +++ b/fs/xfs/xfs_trans_bmap.c @@ -43,7 +43,6 @@ int xfs_trans_log_finish_bmap_update( struct xfs_trans *tp, struct xfs_bud_log_item *budp, - struct xfs_defer_ops *dop, enum xfs_bmap_intent_type type, struct xfs_inode *ip, int whichfork, @@ -54,7 +53,7 @@ xfs_trans_log_finish_bmap_update( { int error; - error = xfs_bmap_finish_one(tp, dop, ip, type, whichfork, startoff, + error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, startblock, blockcount, state); /* @@ -176,7 +175,6 @@ xfs_bmap_update_create_done( STATIC int xfs_bmap_update_finish_item( struct xfs_trans *tp, - struct xfs_defer_ops *dop, struct list_head *item, void *done_item, void **state) @@ -187,7 +185,7 @@ xfs_bmap_update_finish_item( bmap = container_of(item, struct xfs_bmap_intent, bi_list); count = bmap->bi_bmap.br_blockcount; - error = xfs_trans_log_finish_bmap_update(tp, done_item, dop, + error = xfs_trans_log_finish_bmap_update(tp, done_item, bmap->bi_type, bmap->bi_owner, bmap->bi_whichfork, bmap->bi_bmap.br_startoff, diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index bd66c76f55e6..855c0b651fd4 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -171,7 +171,6 @@ xfs_extent_free_create_done( STATIC int xfs_extent_free_finish_item( struct xfs_trans *tp, - struct xfs_defer_ops *dop, struct list_head *item, void *done_item, void **state) @@ -226,7 +225,6 @@ static const struct xfs_defer_op_type xfs_extent_free_defer_type = { STATIC int xfs_agfl_free_finish_item( struct xfs_trans *tp, - struct xfs_defer_ops *dop, struct list_head *item, void *done_item, void **state) diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c index 46dd4fca8aa7..523c55663954 100644 --- a/fs/xfs/xfs_trans_refcount.c +++ b/fs/xfs/xfs_trans_refcount.c @@ -42,7 +42,6 @@ int xfs_trans_log_finish_refcount_update( struct xfs_trans *tp, struct xfs_cud_log_item *cudp, - struct xfs_defer_ops *dop, enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, xfs_extlen_t blockcount, @@ -52,7 +51,7 @@ xfs_trans_log_finish_refcount_update( { int error; - error = xfs_refcount_finish_one(tp, dop, type, startblock, + error = xfs_refcount_finish_one(tp, type, startblock, blockcount, new_fsb, new_len, pcur); /* @@ -169,7 +168,6 @@ xfs_refcount_update_create_done( STATIC int xfs_refcount_update_finish_item( struct xfs_trans *tp, - struct xfs_defer_ops *dop, struct list_head *item, void *done_item, void **state) @@ -180,7 +178,7 @@ xfs_refcount_update_finish_item( int error; refc = container_of(item, struct xfs_refcount_intent, ri_list); - error = xfs_trans_log_finish_refcount_update(tp, done_item, dop, + error = xfs_trans_log_finish_refcount_update(tp, done_item, refc->ri_type, refc->ri_startblock, refc->ri_blockcount, diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c index 726d8e2c0558..05b00e40251f 100644 --- a/fs/xfs/xfs_trans_rmap.c +++ b/fs/xfs/xfs_trans_rmap.c @@ -193,7 +193,6 @@ xfs_rmap_update_create_done( STATIC int xfs_rmap_update_finish_item( struct xfs_trans *tp, - struct xfs_defer_ops *dop, struct list_head *item, void *done_item, void **state) |