From cc7e79b168a552152299bd8a8254dc099aacc993 Mon Sep 17 00:00:00 2001 From: Wendy Cheng Date: Fri, 5 Oct 2007 00:27:58 -0400 Subject: [GFS2] Handle multiple glock demote requests Fix a race condition where multiple glock demote requests are sent to a node back-to-back. This patch does a check inside handle_callback() to see whether a demote request is in progress. If true, it sets a flag to make sure run_queue() will loop again to handle the new request, instead of erronously setting gl_demote_state to a different state. Signed-off-by: S. Wendy Cheng Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 15 ++++++++++++++- fs/gfs2/incore.h | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a37efe4aae6f..104e83ff874f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -567,7 +567,10 @@ static int rq_demote(struct gfs2_glock *gl) gfs2_demote_wake(gl); return 0; } + set_bit(GLF_LOCK, &gl->gl_flags); + set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); + if (gl->gl_demote_state == LM_ST_UNLOCKED || gl->gl_state != LM_ST_EXCLUSIVE) { spin_unlock(&gl->gl_spin); @@ -576,7 +579,9 @@ static int rq_demote(struct gfs2_glock *gl) spin_unlock(&gl->gl_spin); gfs2_glock_xmote_th(gl, NULL); } + spin_lock(&gl->gl_spin); + clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); return 0; } @@ -606,6 +611,11 @@ static void run_queue(struct gfs2_glock *gl) } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { blocked = rq_demote(gl); + if (gl->gl_waiters2 && !blocked) { + set_bit(GLF_DEMOTE, &gl->gl_flags); + gl->gl_demote_state = LM_ST_UNLOCKED; + } + gl->gl_waiters2 = 0; } else if (!list_empty(&gl->gl_waiters3)) { gh = list_entry(gl->gl_waiters3.next, struct gfs2_holder, gh_list); @@ -722,7 +732,10 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, } } else if (gl->gl_demote_state != LM_ST_UNLOCKED && gl->gl_demote_state != state) { - gl->gl_demote_state = LM_ST_UNLOCKED; + if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) + gl->gl_waiters2 = 1; + else + gl->gl_demote_state = LM_ST_UNLOCKED; } spin_unlock(&gl->gl_spin); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index eaddfb5a8e6f..662182bfbff7 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -171,6 +171,7 @@ enum { GLF_DEMOTE = 3, GLF_PENDING_DEMOTE = 4, GLF_DIRTY = 5, + GLF_DEMOTE_IN_PROGRESS = 6, }; struct gfs2_glock { @@ -190,6 +191,7 @@ struct gfs2_glock { struct list_head gl_holders; struct list_head gl_waiters1; /* HIF_MUTEX */ struct list_head gl_waiters3; /* HIF_PROMOTE */ + int gl_waiters2; /* GIF_DEMOTE */ const struct gfs2_glock_operations *gl_ops; -- cgit v1.2.3 From 51ff87bdd9f21a5d3672517b75d25ab5842d94a8 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 15 Oct 2007 14:42:35 +0100 Subject: [GFS2] Clean up internal read function As requested by Christoph, this patch cleans up GFS2's internal read function so that it no longer uses the do_generic_mapping_read function. This function is obsolete and GFS2 is the last user of it. As a side effect the internal read code gets smaller and easier to read and gfs2_readpage is split into two. One function has the locking and the other function has the rest of the logic. Signed-off-by: Steven Whitehouse Cc: Christoph Hellwig --- fs/gfs2/inode.c | 1 - fs/gfs2/ops_address.c | 151 +++++++++++++++++++++++++++++++++----------------- fs/gfs2/ops_address.h | 3 + fs/gfs2/ops_file.c | 45 --------------- fs/gfs2/ops_file.h | 24 -------- fs/gfs2/ops_inode.h | 4 ++ fs/gfs2/quota.c | 7 +-- fs/gfs2/rgrp.c | 2 +- 8 files changed, 111 insertions(+), 126 deletions(-) delete mode 100644 fs/gfs2/ops_file.h diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5f6dc32946cd..ad0fe373dca5 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -31,7 +31,6 @@ #include "log.h" #include "meta_io.h" #include "ops_address.h" -#include "ops_file.h" #include "ops_inode.h" #include "quota.h" #include "rgrp.h" diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 9679f8b9870d..9bb24b1d9c05 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -32,7 +33,6 @@ #include "quota.h" #include "trans.h" #include "rgrp.h" -#include "ops_file.h" #include "super.h" #include "util.h" #include "glops.h" @@ -231,62 +231,115 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) /** - * gfs2_readpage - readpage with locking - * @file: The file to read a page for. N.B. This may be NULL if we are - * reading an internal file. + * __gfs2_readpage - readpage + * @file: The file to read a page for * @page: The page to read * - * Returns: errno + * This is the core of gfs2's readpage. Its used by the internal file + * reading code as in that case we already hold the glock. Also its + * called by gfs2_readpage() once the required lock has been granted. + * */ -static int gfs2_readpage(struct file *file, struct page *page) +static int __gfs2_readpage(void *file, struct page *page) { struct gfs2_inode *ip = GFS2_I(page->mapping->host); struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); - struct gfs2_file *gf = NULL; - struct gfs2_holder gh; int error; - int do_unlock = 0; - - if (likely(file != &gfs2_internal_file_sentinel)) { - if (file) { - gf = file->private_data; - if (test_bit(GFF_EXLOCK, &gf->f_flags)) - /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ - goto skip_lock; - } - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); - do_unlock = 1; - error = gfs2_glock_nq_atime(&gh); - if (unlikely(error)) - goto out_unlock; - } -skip_lock: if (gfs2_is_stuffed(ip)) { error = stuffed_readpage(ip, page); unlock_page(page); - } else + } else { error = mpage_readpage(page, gfs2_get_block); + } if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - error = -EIO; + return -EIO; - if (do_unlock) { - gfs2_glock_dq_m(1, &gh); - gfs2_holder_uninit(&gh); + return error; +} + +/** + * gfs2_readpage - read a page of a file + * @file: The file to read + * @page: The page of the file + * + * This deals with the locking required. If the GFF_EXLOCK flags is set + * then we already hold the glock (due to page fault) and thus we call + * __gfs2_readpage() directly. Otherwise we use a trylock in order to + * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE + * in the event that we are unable to get the lock. + */ + +static int gfs2_readpage(struct file *file, struct page *page) +{ + struct gfs2_inode *ip = GFS2_I(page->mapping->host); + struct gfs2_holder gh; + int error; + + if (file) { + struct gfs2_file *gf = file->private_data; + if (test_bit(GFF_EXLOCK, &gf->f_flags)) + return __gfs2_readpage(file, page); } + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); + error = gfs2_glock_nq_atime(&gh); + if (unlikely(error)) { + unlock_page(page); + goto out; + } + error = __gfs2_readpage(file, page); + gfs2_glock_dq(&gh); out: - return error; -out_unlock: - unlock_page(page); + gfs2_holder_uninit(&gh); if (error == GLR_TRYFAILED) { - error = AOP_TRUNCATED_PAGE; yield(); + return AOP_TRUNCATED_PAGE; } - if (do_unlock) - gfs2_holder_uninit(&gh); - goto out; + return error; +} + +/** + * gfs2_internal_read - read an internal file + * @ip: The gfs2 inode + * @ra_state: The readahead state (or NULL for no readahead) + * @buf: The buffer to fill + * @pos: The file position + * @size: The amount to read + * + */ + +int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, + char *buf, loff_t *pos, unsigned size) +{ + struct address_space *mapping = ip->i_inode.i_mapping; + unsigned long index = *pos / PAGE_CACHE_SIZE; + unsigned offset = *pos & (PAGE_CACHE_SIZE - 1); + unsigned copied = 0; + unsigned amt; + struct page *page; + void *p; + + do { + amt = size - copied; + if (offset + size > PAGE_CACHE_SIZE) + amt = PAGE_CACHE_SIZE - offset; + page = read_cache_page(mapping, index, __gfs2_readpage, NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + p = kmap_atomic(page, KM_USER0); + memcpy(buf + copied, p + offset, amt); + kunmap_atomic(p, KM_USER0); + mark_page_accessed(page); + page_cache_release(page); + copied += amt; + index++; + offset = 0; + } while(copied < size); + (*pos) += size; + return size; } /** @@ -314,21 +367,19 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, int ret = 0; int do_unlock = 0; - if (likely(file != &gfs2_internal_file_sentinel)) { - if (file) { - struct gfs2_file *gf = file->private_data; - if (test_bit(GFF_EXLOCK, &gf->f_flags)) - goto skip_lock; - } - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_TRY_1CB|GL_ATIME, &gh); - do_unlock = 1; - ret = gfs2_glock_nq_atime(&gh); - if (ret == GLR_TRYFAILED) - goto out_noerror; - if (unlikely(ret)) - goto out_unlock; + if (file) { + struct gfs2_file *gf = file->private_data; + if (test_bit(GFF_EXLOCK, &gf->f_flags)) + goto skip_lock; } + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, + LM_FLAG_TRY_1CB|GL_ATIME, &gh); + do_unlock = 1; + ret = gfs2_glock_nq_atime(&gh); + if (ret == GLR_TRYFAILED) + goto out_noerror; + if (unlikely(ret)) + goto out_unlock; skip_lock: if (!gfs2_is_stuffed(ip)) ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h index fa1b5b3d28b9..e8fe83fcd583 100644 --- a/fs/gfs2/ops_address.h +++ b/fs/gfs2/ops_address.h @@ -18,5 +18,8 @@ extern const struct address_space_operations gfs2_file_aops; extern int gfs2_get_block(struct inode *inode, sector_t lblock, struct buffer_head *bh_result, int create); extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); +extern int gfs2_internal_read(struct gfs2_inode *ip, + struct file_ra_state *ra_state, + char *buf, loff_t *pos, unsigned size); #endif /* __OPS_ADDRESS_DOT_H__ */ diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index bb11fd6752d3..a729c86b8be1 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -33,7 +33,6 @@ #include "lm.h" #include "log.h" #include "meta_io.h" -#include "ops_file.h" #include "ops_vm.h" #include "quota.h" #include "rgrp.h" @@ -41,50 +40,6 @@ #include "util.h" #include "eaops.h" -/* - * Most fields left uninitialised to catch anybody who tries to - * use them. f_flags set to prevent file_accessed() from touching - * any other part of this. Its use is purely as a flag so that we - * know (in readpage()) whether or not do to locking. - */ -struct file gfs2_internal_file_sentinel = { - .f_flags = O_NOATIME|O_RDONLY, -}; - -static int gfs2_read_actor(read_descriptor_t *desc, struct page *page, - unsigned long offset, unsigned long size) -{ - char *kaddr; - unsigned long count = desc->count; - - if (size > count) - size = count; - - kaddr = kmap(page); - memcpy(desc->arg.data, kaddr + offset, size); - kunmap(page); - - desc->count = count - size; - desc->written += size; - desc->arg.buf += size; - return size; -} - -int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size) -{ - struct inode *inode = &ip->i_inode; - read_descriptor_t desc; - desc.written = 0; - desc.arg.data = buf; - desc.count = size; - desc.error = 0; - do_generic_mapping_read(inode->i_mapping, ra_state, - &gfs2_internal_file_sentinel, pos, &desc, - gfs2_read_actor); - return desc.written ? desc.written : desc.error; -} - /** * gfs2_llseek - seek to a location in a file * @file: the file diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h deleted file mode 100644 index 7e5d8ec9c846..000000000000 --- a/fs/gfs2/ops_file.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#ifndef __OPS_FILE_DOT_H__ -#define __OPS_FILE_DOT_H__ - -#include -struct gfs2_inode; - -extern struct file gfs2_internal_file_sentinel; -extern int gfs2_internal_read(struct gfs2_inode *ip, - struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size); -extern void gfs2_set_inode_flags(struct inode *inode); -extern const struct file_operations gfs2_file_fops; -extern const struct file_operations gfs2_dir_fops; - -#endif /* __OPS_FILE_DOT_H__ */ diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h index 34f0caac1a03..edb519cb05ee 100644 --- a/fs/gfs2/ops_inode.h +++ b/fs/gfs2/ops_inode.h @@ -16,5 +16,9 @@ extern const struct inode_operations gfs2_file_iops; extern const struct inode_operations gfs2_dir_iops; extern const struct inode_operations gfs2_symlink_iops; extern const struct inode_operations gfs2_dev_iops; +extern const struct file_operations gfs2_file_fops; +extern const struct file_operations gfs2_dir_fops; + +extern void gfs2_set_inode_flags(struct inode *inode); #endif /* __OPS_INODE_DOT_H__ */ diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index addb51e0f135..4996f0ef3007 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -59,7 +59,6 @@ #include "super.h" #include "trans.h" #include "inode.h" -#include "ops_file.h" #include "ops_address.h" #include "util.h" @@ -793,11 +792,9 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_holder i_gh; struct gfs2_quota_host q; char buf[sizeof(struct gfs2_quota)]; - struct file_ra_state ra_state; int error; struct gfs2_quota_lvb *qlvb; - file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping); restart: error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); if (error) @@ -820,8 +817,8 @@ restart: memset(buf, 0, sizeof(struct gfs2_quota)); pos = qd2offset(qd); - error = gfs2_internal_read(ip, &ra_state, buf, - &pos, sizeof(struct gfs2_quota)); + error = gfs2_internal_read(ip, NULL, buf, &pos, + sizeof(struct gfs2_quota)); if (error < 0) goto fail_gunlock; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 708c287e1d0e..09848aac45f6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -25,10 +25,10 @@ #include "rgrp.h" #include "super.h" #include "trans.h" -#include "ops_file.h" #include "util.h" #include "log.h" #include "inode.h" +#include "ops_address.h" #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -- cgit v1.2.3 From 3cc3f710ce0effe397b830826a1a081fa81f11c7 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 15 Oct 2007 15:40:33 +0100 Subject: [GFS2] Use ->page_mkwrite() for mmap() This cleans up the mmap() code path for GFS2 by implementing the page_mkwrite function for GFS2. We are thus able to use the generic filemap_fault function for our ->fault() implementation. This now means that shared writable mappings will be much more efficiently shared across the cluster if there is a reasonable proportion of read activity (the greater proportion, the better). As a side effect, it also reduces the size of the code, removes special cases from readpage and readpages, and makes the code path easier to follow. Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 2 +- fs/gfs2/glops.c | 9 +-- fs/gfs2/incore.h | 8 --- fs/gfs2/ops_address.c | 45 +++----------- fs/gfs2/ops_file.c | 131 +++++++++++++++++++++++++++++++++++--- fs/gfs2/ops_vm.c | 169 -------------------------------------------------- fs/gfs2/ops_vm.h | 18 ------ 7 files changed, 131 insertions(+), 251 deletions(-) delete mode 100644 fs/gfs2/ops_vm.c delete mode 100644 fs/gfs2/ops_vm.h diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 04ad0caebedb..8fff11058cee 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ - ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ + ops_fstype.o ops_inode.o ops_super.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4670dcb2a877..110f03d66f4b 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -86,15 +86,10 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) if (!ip || !S_ISREG(inode->i_mode)) return; - if (!test_bit(GIF_PAGED, &ip->i_flags)) - return; - unmap_shared_mapping_range(inode->i_mapping, 0, 0); - if (test_bit(GIF_SW_PAGED, &ip->i_flags)) set_bit(GLF_DIRTY, &gl->gl_flags); - clear_bit(GIF_SW_PAGED, &ip->i_flags); } /** @@ -234,10 +229,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) set_bit(GIF_INVALID, &ip->i_flags); } - if (ip && S_ISREG(ip->i_inode.i_mode)) { + if (ip && S_ISREG(ip->i_inode.i_mode)) truncate_inode_pages(ip->i_inode.i_mapping, 0); - clear_bit(GIF_PAGED, &ip->i_flags); - } } /** diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 662182bfbff7..55c72f01cf31 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -241,7 +241,6 @@ struct gfs2_alloc { enum { GIF_INVALID = 0, GIF_QD_LOCKED = 1, - GIF_PAGED = 2, GIF_SW_PAGED = 3, }; @@ -289,19 +288,12 @@ static inline struct gfs2_inode *GFS2_I(struct inode *inode) return container_of(inode, struct gfs2_inode, i_inode); } -/* To be removed? */ static inline struct gfs2_sbd *GFS2_SB(struct inode *inode) { return inode->i_sb->s_fs_info; } -enum { - GFF_DID_DIRECT_ALLOC = 0, - GFF_EXLOCK = 1, -}; - struct gfs2_file { - unsigned long f_flags; /* GFF_... */ struct mutex f_fl_mutex; struct gfs2_holder f_fl_gh; }; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 9bb24b1d9c05..1696e5d9d112 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -265,9 +265,7 @@ static int __gfs2_readpage(void *file, struct page *page) * @file: The file to read * @page: The page of the file * - * This deals with the locking required. If the GFF_EXLOCK flags is set - * then we already hold the glock (due to page fault) and thus we call - * __gfs2_readpage() directly. Otherwise we use a trylock in order to + * This deals with the locking required. We use a trylock in order to * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE * in the event that we are unable to get the lock. */ @@ -278,12 +276,6 @@ static int gfs2_readpage(struct file *file, struct page *page) struct gfs2_holder gh; int error; - if (file) { - struct gfs2_file *gf = file->private_data; - if (test_bit(GFF_EXLOCK, &gf->f_flags)) - return __gfs2_readpage(file, page); - } - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); error = gfs2_glock_nq_atime(&gh); if (unlikely(error)) { @@ -354,9 +346,8 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, * 2. We don't handle stuffed files here we let readpage do the honours. * 3. mpage_readpages() does most of the heavy lifting in the common case. * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. - * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as - * well as read-ahead. */ + static int gfs2_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { @@ -364,40 +355,20 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder gh; - int ret = 0; - int do_unlock = 0; + int ret; - if (file) { - struct gfs2_file *gf = file->private_data; - if (test_bit(GFF_EXLOCK, &gf->f_flags)) - goto skip_lock; - } - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_TRY_1CB|GL_ATIME, &gh); - do_unlock = 1; + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); ret = gfs2_glock_nq_atime(&gh); - if (ret == GLR_TRYFAILED) - goto out_noerror; if (unlikely(ret)) - goto out_unlock; -skip_lock: + goto out_uninit; if (!gfs2_is_stuffed(ip)) ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); - - if (do_unlock) { - gfs2_glock_dq_m(1, &gh); - gfs2_holder_uninit(&gh); - } -out: + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) ret = -EIO; return ret; -out_noerror: - ret = 0; -out_unlock: - if (do_unlock) - gfs2_holder_uninit(&gh); - goto out; } /** diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index a729c86b8be1..6f3aeb059c61 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -33,7 +33,6 @@ #include "lm.h" #include "log.h" #include "meta_io.h" -#include "ops_vm.h" #include "quota.h" #include "rgrp.h" #include "trans.h" @@ -169,7 +168,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) if (put_user(fsflags, ptr)) error = -EFAULT; - gfs2_glock_dq_m(1, &gh); + gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); return error; } @@ -293,6 +292,125 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -ENOTTY; } +/** + * gfs2_allocate_page_backing - Use bmap to allocate blocks + * @page: The (locked) page to allocate backing for + * + * We try to allocate all the blocks required for the page in + * one go. This might fail for various reasons, so we keep + * trying until all the blocks to back this page are allocated. + * If some of the blocks are already allocated, thats ok too. + */ + +static int gfs2_allocate_page_backing(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct buffer_head bh; + unsigned long size = PAGE_CACHE_SIZE; + u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + do { + bh.b_state = 0; + bh.b_size = size; + gfs2_block_map(inode, lblock, 1, &bh); + if (!buffer_mapped(&bh)) + return -EIO; + size -= bh.b_size; + lblock += (bh.b_size >> inode->i_blkbits); + } while(size > 0); + return 0; +} + +/** + * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable + * @vma: The virtual memory area + * @page: The page which is about to become writable + * + * When the page becomes writable, we need to ensure that we have + * blocks allocated on disk to back that page. + */ + +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned long last_index; + u64 pos = page->index << (PAGE_CACHE_SIZE - inode->i_blkbits); + unsigned int data_blocks, ind_blocks, rblocks; + int alloc_required = 0; + struct gfs2_holder gh; + struct gfs2_alloc *al; + int ret; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); + ret = gfs2_glock_nq_atime(&gh); + if (ret) + goto out; + + set_bit(GIF_SW_PAGED, &ip->i_flags); + gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); + ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); + if (ret || !alloc_required) + goto out_unlock; + + ip->i_alloc.al_requested = 0; + al = gfs2_alloc_get(ip); + ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (ret) + goto out_alloc_put; + ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); + if (ret) + goto out_quota_unlock; + al->al_requested = data_blocks + ind_blocks; + ret = gfs2_inplace_reserve(ip); + if (ret) + goto out_quota_unlock; + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks ? data_blocks : 1; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + ret = gfs2_trans_begin(sdp, rblocks, 0); + if (ret) + goto out_trans_fail; + + lock_page(page); + ret = -EINVAL; + last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; + if (page->index > last_index) + goto out_unlock_page; + if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) + goto out_unlock_page; + if (gfs2_is_stuffed(ip)) { + ret = gfs2_unstuff_dinode(ip, page); + if (ret) + goto out_unlock_page; + } + ret = gfs2_allocate_page_backing(page); + +out_unlock_page: + unlock_page(page); + gfs2_trans_end(sdp); +out_trans_fail: + gfs2_inplace_release(ip); +out_quota_unlock: + gfs2_quota_unlock(ip); +out_alloc_put: + gfs2_alloc_put(ip); +out_unlock: + gfs2_glock_dq(&gh); +out: + gfs2_holder_uninit(&gh); + return ret; +} + +static struct vm_operations_struct gfs2_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = gfs2_page_mkwrite, +}; + /** * gfs2_mmap - @@ -315,14 +433,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) return error; } - /* This is VM_MAYWRITE instead of VM_WRITE because a call - to mprotect() can turn on VM_WRITE later. */ - - if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == - (VM_MAYSHARE | VM_MAYWRITE)) - vma->vm_ops = &gfs2_vm_ops_sharewrite; - else - vma->vm_ops = &gfs2_vm_ops_private; + vma->vm_ops = &gfs2_vm_ops; gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c deleted file mode 100644 index 927d739d4685..000000000000 --- a/fs/gfs2/ops_vm.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "bmap.h" -#include "glock.h" -#include "inode.h" -#include "ops_vm.h" -#include "quota.h" -#include "rgrp.h" -#include "trans.h" -#include "util.h" - -static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); - - set_bit(GIF_PAGED, &ip->i_flags); - return filemap_fault(vma, vmf); -} - -static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned long index = page->index; - u64 lblock = index << (PAGE_CACHE_SHIFT - - sdp->sd_sb.sb_bsize_shift); - unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift; - struct gfs2_alloc *al; - unsigned int data_blocks, ind_blocks; - unsigned int x; - int error; - - al = gfs2_alloc_get(ip); - - error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); - if (error) - goto out; - - error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); - if (error) - goto out_gunlock_q; - - gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - - al->al_requested = data_blocks + ind_blocks; - - error = gfs2_inplace_reserve(ip); - if (error) - goto out_gunlock_q; - - error = gfs2_trans_begin(sdp, al->al_rgd->rd_length + - ind_blocks + RES_DINODE + - RES_STATFS + RES_QUOTA, 0); - if (error) - goto out_ipres; - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL); - if (error) - goto out_trans; - } - - for (x = 0; x < blocks; ) { - u64 dblock; - unsigned int extlen; - int new = 1; - - error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); - if (error) - goto out_trans; - - lblock += extlen; - x += extlen; - } - - gfs2_assert_warn(sdp, al->al_alloced); - -out_trans: - gfs2_trans_end(sdp); -out_ipres: - gfs2_inplace_release(ip); -out_gunlock_q: - gfs2_quota_unlock(ip); -out: - gfs2_alloc_put(ip); - return error; -} - -static int gfs2_sharewrite_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct gfs2_file *gf = file->private_data; - struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - struct gfs2_holder i_gh; - int alloc_required; - int error; - int ret = 0; - - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); - if (error) - goto out; - - set_bit(GIF_PAGED, &ip->i_flags); - set_bit(GIF_SW_PAGED, &ip->i_flags); - - error = gfs2_write_alloc_required(ip, - (u64)vmf->pgoff << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, &alloc_required); - if (error) { - ret = VM_FAULT_OOM; /* XXX: are these right? */ - goto out_unlock; - } - - set_bit(GFF_EXLOCK, &gf->f_flags); - ret = filemap_fault(vma, vmf); - clear_bit(GFF_EXLOCK, &gf->f_flags); - if (ret & VM_FAULT_ERROR) - goto out_unlock; - - if (alloc_required) { - /* XXX: do we need to drop page lock around alloc_page_backing?*/ - error = alloc_page_backing(ip, vmf->page); - if (error) { - /* - * VM_FAULT_LOCKED should always be the case for - * filemap_fault, but it may not be in a future - * implementation. - */ - if (ret & VM_FAULT_LOCKED) - unlock_page(vmf->page); - page_cache_release(vmf->page); - ret = VM_FAULT_OOM; - goto out_unlock; - } - set_page_dirty(vmf->page); - } - -out_unlock: - gfs2_glock_dq_uninit(&i_gh); -out: - return ret; -} - -struct vm_operations_struct gfs2_vm_ops_private = { - .fault = gfs2_private_fault, -}; - -struct vm_operations_struct gfs2_vm_ops_sharewrite = { - .fault = gfs2_sharewrite_fault, -}; - diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h deleted file mode 100644 index 4ae8f43ed5e3..000000000000 --- a/fs/gfs2/ops_vm.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#ifndef __OPS_VM_DOT_H__ -#define __OPS_VM_DOT_H__ - -#include - -extern struct vm_operations_struct gfs2_vm_ops_private; -extern struct vm_operations_struct gfs2_vm_ops_sharewrite; - -#endif /* __OPS_VM_DOT_H__ */ -- cgit v1.2.3 From f91a0d3e24e4b0198be5fae20d45a35c40d1efce Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 15 Oct 2007 16:29:05 +0100 Subject: [GFS2] Remove useless i_cache from inodes The i_cache was designed to keep references to the indirect blocks used during block mapping so that they didn't have to be looked up continually. The idea failed because there are too many places where the i_cache needs to be freed, and this has in the past been the cause of many bugs. In addition there was no performance benefit being gained since the disk blocks in question were cached anyway. So this patch removes it in order to simplify the code to prepare for other changes which would otherwise have had to add further support for this feature. Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 21 +------------ fs/gfs2/incore.h | 2 -- fs/gfs2/inode.c | 13 ++++----- fs/gfs2/log.c | 6 ++-- fs/gfs2/log.h | 2 +- fs/gfs2/main.c | 1 - fs/gfs2/meta_io.c | 81 +++++++-------------------------------------------- fs/gfs2/meta_io.h | 1 - fs/gfs2/ops_address.c | 1 - fs/gfs2/super.c | 1 - 10 files changed, 19 insertions(+), 110 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 110f03d66f4b..ba124230393b 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -56,7 +56,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) bd = list_entry(head->next, struct gfs2_bufdata, bd_ail_gl_list); bh = bd->bd_bh; - gfs2_remove_from_ail(NULL, bd); + gfs2_remove_from_ail(bd); bd->bd_bh = NULL; bh->b_private = NULL; bd->bd_blkno = bh->b_blocknr; @@ -286,23 +286,6 @@ static int inode_go_lock(struct gfs2_holder *gh) return error; } -/** - * inode_go_unlock - operation done before an inode lock is unlocked by a - * process - * @gl: the glock - * @flags: - * - */ - -static void inode_go_unlock(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_inode *ip = gl->gl_object; - - if (ip) - gfs2_meta_cache_flush(ip); -} - /** * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock * @gl: the glock @@ -377,7 +360,6 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) if (gl->gl_state != LM_ST_UNLOCKED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode)); j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head); @@ -437,7 +419,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, .go_lock = inode_go_lock, - .go_unlock = inode_go_unlock, .go_type = LM_TYPE_INODE, .go_min_hold_time = HZ / 10, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 55c72f01cf31..5662ff9f86e1 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -275,8 +275,6 @@ struct gfs2_inode { spinlock_t i_spin; struct rw_semaphore i_rw_mutex; unsigned long i_last_pfault; - - struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; }; /* diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index ad0fe373dca5..af493fc6c8ce 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -293,11 +293,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) return 0; } -static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh) -{ - ip->i_cache[0] = bh; -} - /** * gfs2_inode_refresh - Refresh the incore copy of the dinode * @ip: The GFS2 inode @@ -965,7 +960,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; int error; u64 generation; - struct buffer_head *bh=NULL; + struct buffer_head *bh = NULL; if (!name->len || name->len > GFS2_FNAMESIZE) return ERR_PTR(-ENAMETOOLONG); @@ -1002,8 +997,6 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (IS_ERR(inode)) goto fail_gunlock2; - gfs2_inode_bh(GFS2_I(inode), bh); - error = gfs2_inode_refresh(GFS2_I(inode)); if (error) goto fail_gunlock2; @@ -1020,6 +1013,8 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock2; + if (bh) + brelse(bh); if (!inode) return ERR_PTR(-ENOMEM); return inode; @@ -1031,6 +1026,8 @@ fail_gunlock2: fail_gunlock: gfs2_glock_dq(ghs); fail: + if (bh) + brelse(bh); return ERR_PTR(error); } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 7df702473252..70b404d2774b 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -68,14 +68,12 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, * */ -void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd) +void gfs2_remove_from_ail(struct gfs2_bufdata *bd) { bd->bd_ail = NULL; list_del_init(&bd->bd_ail_st_list); list_del_init(&bd->bd_ail_gl_list); atomic_dec(&bd->bd_gl->gl_ail_count); - if (mapping) - gfs2_meta_cache_flush(GFS2_I(mapping->host)); brelse(bd->bd_bh); } @@ -248,7 +246,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) bd = list_entry(head->prev, struct gfs2_bufdata, bd_ail_st_list); gfs2_assert(sdp, bd->bd_ail == ai); - gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd); + gfs2_remove_from_ail(bd); } } diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index dae282400627..24e7161486e2 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -59,7 +59,7 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, struct buffer_head *real); void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); -void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd); +void gfs2_remove_from_ail(struct gfs2_bufdata *bd); void gfs2_log_shutdown(struct gfs2_sbd *sdp); void gfs2_meta_syncfs(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7ecfe0d3a491..653fd5a6203a 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -31,7 +31,6 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) inode_init_once(&ip->i_inode); spin_lock_init(&ip->i_spin); init_rwsem(&ip->i_rw_mutex); - memset(ip->i_cache, 0, sizeof(ip->i_cache)); } static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 4da423985e4f..01ef90253ed1 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -317,7 +317,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int } if (bd) { if (bd->bd_ail) { - gfs2_remove_from_ail(NULL, bd); + gfs2_remove_from_ail(bd); bh->b_private = NULL; bd->bd_bh = NULL; bd->bd_blkno = bh->b_blocknr; @@ -357,32 +357,6 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) } } -/** - * gfs2_meta_cache_flush - get rid of any references on buffers for this inode - * @ip: The GFS2 inode - * - * This releases buffers that are in the most-recently-used array of - * blocks used for indirect block addressing for this inode. - */ - -void gfs2_meta_cache_flush(struct gfs2_inode *ip) -{ - struct buffer_head **bh_slot; - unsigned int x; - - spin_lock(&ip->i_spin); - - for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { - bh_slot = &ip->i_cache[x]; - if (*bh_slot) { - brelse(*bh_slot); - *bh_slot = NULL; - } - } - - spin_unlock(&ip->i_spin); -} - /** * gfs2_meta_indirect_buffer - Get a metadata buffer * @ip: The GFS2 inode @@ -391,8 +365,6 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip) * @new: Non-zero if we may create a new buffer * @bhp: the buffer is returned here * - * Try to use the gfs2_inode's MRU metadata tree cache. - * * Returns: errno */ @@ -401,58 +373,25 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_glock *gl = ip->i_gl; - struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height; - int in_cache = 0; - - BUG_ON(!gl); - BUG_ON(!sdp); - - spin_lock(&ip->i_spin); - if (*bh_slot && (*bh_slot)->b_blocknr == num) { - bh = *bh_slot; - get_bh(bh); - in_cache = 1; - } - spin_unlock(&ip->i_spin); - - if (!bh) - bh = getbuf(gl, num, CREATE); - - if (!bh) - return -ENOBUFS; + struct buffer_head *bh; + int ret = 0; if (new) { - if (gfs2_assert_warn(sdp, height)) - goto err; - meta_prep_new(bh); + BUG_ON(height == 0); + bh = gfs2_meta_new(gl, num); gfs2_trans_add_bh(ip->i_gl, bh, 1); gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); } else { u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; - if (!buffer_uptodate(bh)) { - ll_rw_block(READ_META, 1, &bh); - if (gfs2_meta_wait(sdp, bh)) - goto err; + ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh); + if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { + brelse(bh); + ret = -EIO; } - if (gfs2_metatype_check(sdp, bh, mtype)) - goto err; - } - - if (!in_cache) { - spin_lock(&ip->i_spin); - if (*bh_slot) - brelse(*bh_slot); - *bh_slot = bh; - get_bh(bh); - spin_unlock(&ip->i_spin); } - *bhp = bh; - return 0; -err: - brelse(bh); - return -EIO; + return ret; } /** diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index b7048222ebb4..73e3b1c76fe1 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -56,7 +56,6 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); -void gfs2_meta_cache_flush(struct gfs2_inode *ip); int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, int new, struct buffer_head **bhp); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 1696e5d9d112..4c4ef7f59909 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -154,7 +154,6 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); if (done_trans) gfs2_trans_end(sdp); - gfs2_meta_cache_flush(ip); return error; out_ignore: diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index dd3e737f528e..5183dfb9342a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -543,7 +543,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) if (error) return error; - gfs2_meta_cache_flush(ip); j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head); -- cgit v1.2.3 From e7e36f143565d14950055c893cfaf4400ad64d34 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 16 Oct 2007 11:47:04 +0100 Subject: [GFS2] Remove unused field in struct gfs2_inode Removes a field that is not used. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 - fs/gfs2/ops_super.c | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 5662ff9f86e1..e53da7d4cfff 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -274,7 +274,6 @@ struct gfs2_inode { spinlock_t i_spin; struct rw_semaphore i_rw_mutex; - unsigned long i_last_pfault; }; /* diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 950f31460e8b..5e524217944a 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -487,7 +487,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) if (ip) { ip->i_flags = 0; ip->i_gl = NULL; - ip->i_last_pfault = jiffies; } return &ip->i_inode; } -- cgit v1.2.3 From bf36a713169432643d4fc7eeb4e0ace96d791d26 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 17 Oct 2007 08:35:19 +0100 Subject: [GFS2] Add gfs2_is_writeback() This adds a function "gfs2_is_writeback()" along the lines of the existing "gfs2_is_jdata()" in order to clean up the code and make the various tests for the inode mode more obvious. It also fixes the PageChecked() logic where we were resetting the flag too early in the case of an error path. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 6 ++---- fs/gfs2/incore.h | 2 +- fs/gfs2/inode.h | 6 ++++++ fs/gfs2/ops_address.c | 10 ++++------ 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 93fa427bb5f5..1cfd493e30fb 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -59,7 +59,6 @@ struct strip_mine { static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, u64 block, struct page *page) { - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct inode *inode = &ip->i_inode; struct buffer_head *bh; int release = 0; @@ -95,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, set_buffer_uptodate(bh); if (!gfs2_is_jdata(ip)) mark_buffer_dirty(bh); - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) + if (!gfs2_is_writeback(ip)) gfs2_trans_add_bh(ip->i_gl, bh, 0); if (release) { @@ -879,7 +878,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping) { struct inode *inode = mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); loff_t from = inode->i_size; unsigned long index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -931,7 +929,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) err = 0; } - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) + if (!gfs2_is_writeback(ip)) gfs2_trans_add_bh(ip->i_gl, bh, 0); zero_user_page(page, offset, length, KM_USER0); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e53da7d4cfff..82dfe9bd270b 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -285,7 +285,7 @@ static inline struct gfs2_inode *GFS2_I(struct inode *inode) return container_of(inode, struct gfs2_inode, i_inode); } -static inline struct gfs2_sbd *GFS2_SB(struct inode *inode) +static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode) { return inode->i_sb->s_fs_info; } diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 351ac87ab384..bed3dc212a18 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -20,6 +20,12 @@ static inline int gfs2_is_jdata(const struct gfs2_inode *ip) return ip->i_di.di_flags & GFS2_DIF_JDATA; } +static inline int gfs2_is_writeback(const struct gfs2_inode *ip) +{ + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + return (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK) && !gfs2_is_jdata(ip); +} + static inline int gfs2_is_dir(const struct gfs2_inode *ip) { return S_ISDIR(ip->i_inode.i_mode); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 4c4ef7f59909..ed154af86171 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -138,12 +138,11 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) return 0; /* don't care */ } - if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) && - PageChecked(page)) { - ClearPageChecked(page); + if (PageChecked(page)) { error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); if (error) goto out_ignore; + ClearPageChecked(page); if (!page_has_buffers(page)) { create_empty_buffers(page, inode->i_sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -180,9 +179,8 @@ static int gfs2_writepages(struct address_space *mapping, { struct inode *inode = mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) + if (gfs2_is_writeback(ip)) return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); return generic_writepages(mapping, wbc); @@ -606,7 +604,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, if (gfs2_is_stuffed(ip)) return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) + if (!gfs2_is_writeback(ip)) gfs2_page_add_databufs(ip, page, from, to); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); -- cgit v1.2.3 From 5561093e2cac9f7d2a77e39cc689b8d2b7f9b2bc Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 17 Oct 2007 08:47:38 +0100 Subject: [GFS2] Introduce gfs2_set_aops() Just like ext3 we now have three sets of address space operations to cover the cases of writeback, ordered and journalled data writes. This means that the individual operations can now become less complicated as we are able to remove some of the tests for file data mode from the code. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 4 ++- fs/gfs2/inode.h | 6 ++++ fs/gfs2/ops_address.c | 78 +++++++++++++++++++++++++++++++++------------------ fs/gfs2/ops_address.h | 2 +- fs/gfs2/ops_file.c | 13 ++++++++- 5 files changed, 72 insertions(+), 31 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index af493fc6c8ce..532784eb5ba4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -136,7 +136,6 @@ void gfs2_set_iop(struct inode *inode) if (S_ISREG(mode)) { inode->i_op = &gfs2_file_iops; inode->i_fop = &gfs2_file_fops; - inode->i_mapping->a_ops = &gfs2_file_aops; } else if (S_ISDIR(mode)) { inode->i_op = &gfs2_dir_iops; inode->i_fop = &gfs2_dir_fops; @@ -290,6 +289,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) di->di_entries = be32_to_cpu(str->di_entries); di->di_eattr = be64_to_cpu(str->di_eattr); + if (S_ISREG(ip->i_inode.i_mode)) + gfs2_set_aops(&ip->i_inode); + return 0; } diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index bed3dc212a18..d44650662615 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -26,6 +26,12 @@ static inline int gfs2_is_writeback(const struct gfs2_inode *ip) return (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK) && !gfs2_is_jdata(ip); } +static inline int gfs2_is_ordered(const struct gfs2_inode *ip) +{ + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + return (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) && !gfs2_is_jdata(ip); +} + static inline int gfs2_is_dir(const struct gfs2_inode *ip) { return S_ISDIR(ip->i_inode.i_mode); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index ed154af86171..207014f363d8 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -162,28 +162,18 @@ out_ignore: } /** - * gfs2_writepages - Write a bunch of dirty pages back to disk + * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk * @mapping: The mapping to write * @wbc: Write-back control * - * For journaled files and/or ordered writes this just falls back to the - * kernel's default writepages path for now. We will probably want to change - * that eventually (i.e. when we look at allocate on flush). - * - * For the data=writeback case though we can already ignore buffer heads + * For the data=writeback case we can already ignore buffer heads * and write whole extents at once. This is a big reduction in the * number of I/O requests we send and the bmap calls we make in this case. */ -static int gfs2_writepages(struct address_space *mapping, - struct writeback_control *wbc) +static int gfs2_writeback_writepages(struct address_space *mapping, + struct writeback_control *wbc) { - struct inode *inode = mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - - if (gfs2_is_writeback(ip)) - return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); - - return generic_writepages(mapping, wbc); + return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); } /** @@ -644,11 +634,7 @@ failed: static int gfs2_set_page_dirty(struct page *page) { - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); - - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) - SetPageChecked(page); + SetPageChecked(page); return __set_page_dirty_buffers(page); } @@ -738,13 +724,9 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) { /* * Should we return an error here? I can't see that O_DIRECT for - * a journaled file makes any sense. For now we'll silently fall - * back to buffered I/O, likewise we do the same for stuffed - * files since they are (a) small and (b) unaligned. + * a stuffed file makes any sense. For now we'll silently fall + * back to buffered I/O */ - if (gfs2_is_jdata(ip)) - return 0; - if (gfs2_is_stuffed(ip)) return 0; @@ -855,9 +837,22 @@ cannot_release: return 0; } -const struct address_space_operations gfs2_file_aops = { +static const struct address_space_operations gfs2_writeback_aops = { + .writepage = gfs2_writepage, + .writepages = gfs2_writeback_writepages, + .readpage = gfs2_readpage, + .readpages = gfs2_readpages, + .sync_page = block_sync_page, + .write_begin = gfs2_write_begin, + .write_end = gfs2_write_end, + .bmap = gfs2_bmap, + .invalidatepage = gfs2_invalidatepage, + .releasepage = gfs2_releasepage, + .direct_IO = gfs2_direct_IO, +}; + +static const struct address_space_operations gfs2_ordered_aops = { .writepage = gfs2_writepage, - .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, @@ -870,3 +865,30 @@ const struct address_space_operations gfs2_file_aops = { .direct_IO = gfs2_direct_IO, }; +static const struct address_space_operations gfs2_jdata_aops = { + .writepage = gfs2_writepage, + .readpage = gfs2_readpage, + .readpages = gfs2_readpages, + .sync_page = block_sync_page, + .write_begin = gfs2_write_begin, + .write_end = gfs2_write_end, + .set_page_dirty = gfs2_set_page_dirty, + .bmap = gfs2_bmap, + .invalidatepage = gfs2_invalidatepage, + .releasepage = gfs2_releasepage, +}; + +void gfs2_set_aops(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + if (gfs2_is_writeback(ip)) + inode->i_mapping->a_ops = &gfs2_writeback_aops; + else if (gfs2_is_ordered(ip)) + inode->i_mapping->a_ops = &gfs2_ordered_aops; + else if (gfs2_is_jdata(ip)) + inode->i_mapping->a_ops = &gfs2_jdata_aops; + else + BUG(); +} + diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h index e8fe83fcd583..d3b76d0cdc81 100644 --- a/fs/gfs2/ops_address.h +++ b/fs/gfs2/ops_address.h @@ -14,12 +14,12 @@ #include #include -extern const struct address_space_operations gfs2_file_aops; extern int gfs2_get_block(struct inode *inode, sector_t lblock, struct buffer_head *bh_result, int create); extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); extern int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, char *buf, loff_t *pos, unsigned size); +extern void gfs2_set_aops(struct inode *inode); #endif /* __OPS_ADDRESS_DOT_H__ */ diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 6f3aeb059c61..ad5daaa6babc 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -38,6 +38,7 @@ #include "trans.h" #include "util.h" #include "eaops.h" +#include "ops_address.h" /** * gfs2_llseek - seek to a location in a file @@ -245,7 +246,16 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) if (error) goto out; } - + if ((flags ^ new_flags) & GFS2_DIF_JDATA) { + if (flags & GFS2_DIF_JDATA) + gfs2_log_flush(sdp, ip->i_gl); + error = filemap_fdatawrite(inode->i_mapping); + if (error) + goto out; + error = filemap_fdatawait(inode->i_mapping); + if (error) + goto out; + } error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) goto out; @@ -257,6 +267,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) gfs2_dinode_out(ip, bh->b_data); brelse(bh); gfs2_set_inode_flags(inode); + gfs2_set_aops(inode); out_trans_end: gfs2_trans_end(sdp); out: -- cgit v1.2.3 From 9ff8ec32e58875022447af619bec6e5aee7c77e4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 28 Sep 2007 13:49:05 +0100 Subject: [GFS2] Split gfs2_writepage into three cases This patch splits gfs2_writepage into separate functions for each of the three cases: writeback, ordered and journalled. As a result it becomes a lot easier to see what each one is doing. The common code is moved into gfs2_writepage_common. This fixes a performance bug where we were doing more work than strictly required in the ordered write case. Signed-off-by: Steven Whitehouse --- fs/gfs2/lops.c | 17 ++++---- fs/gfs2/ops_address.c | 112 ++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 101 insertions(+), 28 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6c27cea761c6..e901f8f7d650 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -556,17 +556,20 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) lock_buffer(bd->bd_bh); gfs2_log_lock(sdp); - if (!list_empty(&bd->bd_list_tr)) - goto out; - tr->tr_touched = 1; - if (gfs2_is_jdata(ip)) { - tr->tr_num_buf++; - list_add(&bd->bd_list_tr, &tr->tr_list_buf); + if (tr) { + if (!list_empty(&bd->bd_list_tr)) + goto out; + tr->tr_touched = 1; + if (gfs2_is_jdata(ip)) { + tr->tr_num_buf++; + list_add(&bd->bd_list_tr, &tr->tr_list_buf); + } } if (!list_empty(&le->le_list)) goto out; - __glock_lo_add(sdp, &bd->bd_gl->gl_le); + if (tr) + __glock_lo_add(sdp, &bd->bd_gl->gl_le); if (gfs2_is_jdata(ip)) { gfs2_pin(sdp, bd->bd_bh); tr->tr_num_databuf_new++; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 207014f363d8..4bf73ed945ae 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -103,16 +103,15 @@ static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, } /** - * gfs2_writepage - Write complete page - * @page: Page to write - * - * Returns: errno + * gfs2_writepage_common - Common bits of writepage + * @page: The page to be written + * @wbc: The writeback control * - * Some of this is copied from block_write_full_page() although we still - * call it to do most of the work. + * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. */ -static int gfs2_writepage(struct page *page, struct writeback_control *wbc) +static int gfs2_writepage_common(struct page *page, + struct writeback_control *wbc) { struct inode *inode = page->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); @@ -120,23 +119,94 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; - int error; - int done_trans = 0; + int ret = -EIO; - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { - unlock_page(page); - return -EIO; - } + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) + goto out; + ret = 0; if (current->journal_info) - goto out_ignore; - + goto redirty; /* Is the page fully outside i_size? (truncate in progress) */ - offset = i_size & (PAGE_CACHE_SIZE-1); + offset = i_size & (PAGE_CACHE_SIZE-1); if (page->index > end_index || (page->index == end_index && !offset)) { page->mapping->a_ops->invalidatepage(page, 0); - unlock_page(page); - return 0; /* don't care */ + goto out; } + return 1; +redirty: + redirty_page_for_writepage(wbc, page); +out: + unlock_page(page); + return 0; +} + +/** + * gfs2_writeback_writepage - Write page for writeback mappings + * @page: The page + * @wbc: The writeback control + * + */ + +static int gfs2_writeback_writepage(struct page *page, + struct writeback_control *wbc) +{ + int ret; + + ret = gfs2_writepage_common(page, wbc); + if (ret <= 0) + return ret; + + ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); + if (ret == -EAGAIN) + ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc); + return ret; +} + +/** + * gfs2_ordered_writepage - Write page for ordered data files + * @page: The page to write + * @wbc: The writeback control + * + */ + +static int gfs2_ordered_writepage(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + int ret; + + ret = gfs2_writepage_common(page, wbc); + if (ret <= 0) + return ret; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); + return block_write_full_page(page, gfs2_get_block_noalloc, wbc); +} + +/** + * gfs2_jdata_writepage - Write complete page + * @page: Page to write + * + * Returns: errno + * + */ + +static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + int error; + int done_trans = 0; + + error = gfs2_writepage_common(page, wbc); + if (error <= 0) + return error; if (PageChecked(page)) { error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); @@ -838,7 +908,7 @@ cannot_release: } static const struct address_space_operations gfs2_writeback_aops = { - .writepage = gfs2_writepage, + .writepage = gfs2_writeback_writepage, .writepages = gfs2_writeback_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, @@ -852,7 +922,7 @@ static const struct address_space_operations gfs2_writeback_aops = { }; static const struct address_space_operations gfs2_ordered_aops = { - .writepage = gfs2_writepage, + .writepage = gfs2_ordered_writepage, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, @@ -866,7 +936,7 @@ static const struct address_space_operations gfs2_ordered_aops = { }; static const struct address_space_operations gfs2_jdata_aops = { - .writepage = gfs2_writepage, + .writepage = gfs2_jdata_writepage, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, -- cgit v1.2.3 From b8e7cbb65bcc99630e123422c6829ce3c0fcdf14 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 17 Oct 2007 09:04:24 +0100 Subject: [GFS2] Add writepages for GFS2 jdata This patch resolves a lock ordering issue where we had been getting a transaction lock in the wrong order with respect to the page lock. By using writepages rather than just writepage, it is then possible to start a transaction before locking the page, and thus matching the locking order elsewhere in the code. Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 2 +- fs/gfs2/ops_address.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 206 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 70b404d2774b..1e1fe8def375 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -650,7 +650,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) get_bh(bh); gfs2_log_unlock(sdp); lock_buffer(bh); - if (test_clear_buffer_dirty(bh)) { + if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, bh); } else { diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 4bf73ed945ae..48913e569907 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -188,6 +189,34 @@ static int gfs2_ordered_writepage(struct page *page, return block_write_full_page(page, gfs2_get_block_noalloc, wbc); } +/** + * __gfs2_jdata_writepage - The core of jdata writepage + * @page: The page to write + * @wbc: The writeback control + * + * This is shared between writepage and writepages and implements the + * core of the writepage operation. If a transaction is required then + * PageChecked will have been set and the transaction will have + * already been started before this is called. + */ + +static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (PageChecked(page)) { + ClearPageChecked(page); + if (!page_has_buffers(page)) { + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); + } + return block_write_full_page(page, gfs2_get_block_noalloc, wbc); +} + /** * gfs2_jdata_writepage - Write complete page * @page: Page to write @@ -199,7 +228,6 @@ static int gfs2_ordered_writepage(struct page *page, static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int done_trans = 0; @@ -209,18 +237,14 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc return error; if (PageChecked(page)) { + if (wbc->sync_mode != WB_SYNC_ALL) + goto out_ignore; error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); if (error) goto out_ignore; - ClearPageChecked(page); - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); done_trans = 1; } - error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); + error = __gfs2_jdata_writepage(page, wbc); if (done_trans) gfs2_trans_end(sdp); return error; @@ -246,6 +270,178 @@ static int gfs2_writeback_writepages(struct address_space *mapping, return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); } +/** + * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages + * @mapping: The mapping + * @wbc: The writeback control + * @writepage: The writepage function to call for each page + * @pvec: The vector of pages + * @nr_pages: The number of pages to write + * + * Returns: non-zero if loop should terminate, zero otherwise + */ + +static int gfs2_write_jdata_pagevec(struct address_space *mapping, + struct writeback_control *wbc, + struct pagevec *pvec, + int nr_pages, pgoff_t end) +{ + struct inode *inode = mapping->host; + struct gfs2_sbd *sdp = GFS2_SB(inode); + loff_t i_size = i_size_read(inode); + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset = i_size & (PAGE_CACHE_SIZE-1); + unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); + struct backing_dev_info *bdi = mapping->backing_dev_info; + int i; + int ret; + + ret = gfs2_trans_begin(sdp, nrblocks, 0); + if (ret < 0) + return ret; + + for(i = 0; i < nr_pages; i++) { + struct page *page = pvec->pages[i]; + + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + ret = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + /* Is the page fully outside i_size? (truncate in progress) */ + if (page->index > end_index || (page->index == end_index && !offset)) { + page->mapping->a_ops->invalidatepage(page, 0); + unlock_page(page); + continue; + } + + ret = __gfs2_jdata_writepage(page, wbc); + + if (ret || (--(wbc->nr_to_write) <= 0)) + ret = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + ret = 1; + } + + } + gfs2_trans_end(sdp); + return ret; +} + +/** + * gfs2_write_cache_jdata - Like write_cache_pages but different + * @mapping: The mapping to write + * @wbc: The writeback control + * @writepage: The writepage function to call + * @data: The data to pass to writepage + * + * The reason that we use our own function here is that we need to + * start transactions before we grab page locks. This allows us + * to get the ordering right. + */ + +static int gfs2_write_cache_jdata(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } + +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + scanned = 1; + ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); + if (ret) + done = 1; + if (ret > 0) + ret = 0; + + pagevec_release(&pvec); + cond_resched(); + } + + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + + +/** + * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk + * @mapping: The mapping to write + * @wbc: The writeback control + * + */ + +static int gfs2_jdata_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); + int ret; + + ret = gfs2_write_cache_jdata(mapping, wbc); + if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { + gfs2_log_flush(sdp, ip->i_gl); + ret = gfs2_write_cache_jdata(mapping, wbc); + } + return ret; +} + /** * stuffed_readpage - Fill in a Linux page with stuffed file data * @ip: the inode @@ -937,6 +1133,7 @@ static const struct address_space_operations gfs2_ordered_aops = { static const struct address_space_operations gfs2_jdata_aops = { .writepage = gfs2_jdata_writepage, + .writepages = gfs2_jdata_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, -- cgit v1.2.3 From c41d4f09f13671f98ba4b82fdc94420cdc09be08 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 17 Oct 2007 14:05:41 +0100 Subject: [GFS2] Don't hold page lock when starting transaction This is an addendum to the new AOPs work which moves the point at which we take the page lock so that we don't get it until the last possible moment. This resolves a conflict between starting transactions and the page lock. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_address.c | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 48913e569907..ae782d2cbdec 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -657,18 +657,10 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (unlikely(error)) goto out_uninit; - error = -ENOMEM; - page = __grab_cache_page(mapping, index); - *pagep = page; - if (!page) - goto out_unlock; - gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); - error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); if (error) - goto out_putpage; - + goto out_unlock; ip->i_alloc.al_requested = 0; if (alloc_required) { @@ -699,40 +691,47 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (error) goto out_trans_fail; + error = -ENOMEM; + page = __grab_cache_page(mapping, index); + *pagep = page; + if (unlikely(!page)) + goto out_endtrans; + if (gfs2_is_stuffed(ip)) { + error = 0; if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { error = gfs2_unstuff_dinode(ip, page); if (error == 0) goto prepare_write; - } else if (!PageUptodate(page)) + } else if (!PageUptodate(page)) { error = stuffed_readpage(ip, page); + } goto out; } prepare_write: error = block_prepare_write(page, from, to, gfs2_get_block); - out: - if (error) { - gfs2_trans_end(sdp); + if (error == 0) + return 0; + + page_cache_release(page); + if (pos + len > ip->i_inode.i_size) + vmtruncate(&ip->i_inode, ip->i_inode.i_size); +out_endtrans: + gfs2_trans_end(sdp); out_trans_fail: - if (alloc_required) { - gfs2_inplace_release(ip); + if (alloc_required) { + gfs2_inplace_release(ip); out_qunlock: - gfs2_quota_unlock(ip); + gfs2_quota_unlock(ip); out_alloc_put: - gfs2_alloc_put(ip); - } -out_putpage: - page_cache_release(page); - if (pos + len > ip->i_inode.i_size) - vmtruncate(&ip->i_inode, ip->i_inode.i_size); + gfs2_alloc_put(ip); + } out_unlock: - gfs2_glock_dq_m(1, &ip->i_gh); + gfs2_glock_dq(&ip->i_gh); out_uninit: - gfs2_holder_uninit(&ip->i_gh); - } - + gfs2_holder_uninit(&ip->i_gh); return error; } -- cgit v1.2.3 From 47e83b509127f5e83ae5d93afd5c7cb9241acc38 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 18 Oct 2007 11:15:50 +0100 Subject: [GFS2] Use correct include file in ops_address.c Something changed in the upstream kernel, and it needs this one-liner to allow ops_address.c to build. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_address.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index ae782d2cbdec..7353933483bb 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include "gfs2.h" -- cgit v1.2.3 From 60b0d0877986b8fa70148f06055422d2ed858e88 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 31 Oct 2007 14:24:33 +0000 Subject: [GFS2] Remove unused variables These haven't been used for some time, remove them. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 4 ---- fs/gfs2/super.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 82dfe9bd270b..f7a50fed4b52 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -412,9 +412,6 @@ struct gfs2_args { struct gfs2_tune { spinlock_t gt_spin; - unsigned int gt_ilimit; - unsigned int gt_ilimit_tries; - unsigned int gt_ilimit_min; unsigned int gt_demote_secs; /* Cache retention for unheld glock */ unsigned int gt_incore_log_blocks; unsigned int gt_log_flush_secs; @@ -434,7 +431,6 @@ struct gfs2_tune { unsigned int gt_new_files_jdata; unsigned int gt_new_files_directio; unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ - unsigned int gt_lockdump_size; unsigned int gt_stall_secs; /* Detects trouble! */ unsigned int gt_complain_secs; unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5183dfb9342a..26edb7f9f4b8 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -51,9 +51,6 @@ void gfs2_tune_init(struct gfs2_tune *gt) { spin_lock_init(>->gt_spin); - gt->gt_ilimit = 100; - gt->gt_ilimit_tries = 3; - gt->gt_ilimit_min = 1; gt->gt_demote_secs = 300; gt->gt_incore_log_blocks = 1024; gt->gt_log_flush_secs = 60; @@ -71,7 +68,6 @@ void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_new_files_jdata = 0; gt->gt_new_files_directio = 0; gt->gt_max_readahead = 1 << 18; - gt->gt_lockdump_size = 131072; gt->gt_stall_secs = 600; gt->gt_complain_secs = 10; gt->gt_reclaim_limit = 5000; -- cgit v1.2.3 From c2932e03dbcfe7ea9052953dbd5f3157183c1e9b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 1 Nov 2007 09:26:54 +0000 Subject: [GFS2] Remove "reclaim limit" This call to reclaim glocks is not needed, and in particular we don't want it in the fast path for locking glocks. The limit was entirely arbitrary anyway and we can't expect users to adjust things like this, the remaining code will do the right thing on its own. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 9 --------- fs/gfs2/incore.h | 1 - fs/gfs2/super.c | 1 - fs/gfs2/sys.c | 2 -- 4 files changed, 13 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 104e83ff874f..159a5479c4e4 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -507,21 +507,12 @@ static int rq_mutex(struct gfs2_holder *gh) static int rq_promote(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { if (list_empty(&gl->gl_holders)) { gl->gl_req_gh = gh; set_bit(GLF_LOCK, &gl->gl_flags); spin_unlock(&gl->gl_spin); - - if (atomic_read(&sdp->sd_reclaim_count) > - gfs2_tune_get(sdp, gt_reclaim_limit) && - !(gh->gh_flags & LM_FLAG_PRIORITY)) { - gfs2_reclaim_glock(sdp); - gfs2_reclaim_glock(sdp); - } - gfs2_glock_xmote_th(gh->gh_gl, gh); spin_lock(&gl->gl_spin); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index f7a50fed4b52..089dba412cc0 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -433,7 +433,6 @@ struct gfs2_tune { unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ unsigned int gt_stall_secs; /* Detects trouble! */ unsigned int gt_complain_secs; - unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; }; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 26edb7f9f4b8..548cc8ba0703 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -70,7 +70,6 @@ void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_max_readahead = 1 << 18; gt->gt_stall_secs = 600; gt->gt_complain_secs = 10; - gt->gt_reclaim_limit = 5000; gt->gt_statfs_quantum = 30; gt->gt_statfs_slow = 0; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 06e0b7768d97..1359198aed63 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -433,7 +433,6 @@ TUNE_ATTR(quota_quantum, 0); TUNE_ATTR(atime_quantum, 0); TUNE_ATTR(max_readahead, 0); TUNE_ATTR(complain_secs, 0); -TUNE_ATTR(reclaim_limit, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(new_files_directio, 0); @@ -456,7 +455,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_atime_quantum.attr, &tune_attr_max_readahead.attr, &tune_attr_complain_secs.attr, - &tune_attr_reclaim_limit.attr, &tune_attr_statfs_slow.attr, &tune_attr_quota_simul_sync.attr, &tune_attr_quota_cache_secs.attr, -- cgit v1.2.3 From 52d4c74b08bf859f698ddb4e8a43c0dc8d4a0685 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 1 Nov 2007 09:34:14 +0000 Subject: [GFS2] Add sync_page to metadata address space operations This set of address space operations was missing a sync_page operation. Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 01ef90253ed1..4b1aced9023d 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -50,6 +50,7 @@ static int gfs2_aspace_writepage(struct page *page, static const struct address_space_operations aspace_aops = { .writepage = gfs2_aspace_writepage, .releasepage = gfs2_releasepage, + .sync_page = block_sync_page, }; /** -- cgit v1.2.3 From 3042a2ccd68d2b609d283219e51cba363aa35c1d Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 2 Nov 2007 08:39:34 +0000 Subject: [GFS2] Reorder writeback for glock sync Previously we were doing (write data, wait for data, write metadata, wait for metadata). After this patch we so (write metadata, write data, wait for data, wait for metadata) which should be more efficient. Also I noticed that the drop_bh and xmote_bh functions were almost identical. In fact the only difference was a single test, and that test is such that in the drop_bh case, it would always evaluate to the correct result. As such we can use the xmote_bh functions in all the places where we were using the drop_bh function and remove the drop_bh functions. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 13 +++++----- fs/gfs2/glops.c | 80 ++++++++++++--------------------------------------------- 2 files changed, 22 insertions(+), 71 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 159a5479c4e4..e668808b127f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -947,8 +947,8 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl) const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned int ret; - if (glops->go_drop_th) - glops->go_drop_th(gl); + if (glops->go_xmote_th) + glops->go_xmote_th(gl); gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); @@ -1252,12 +1252,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh) list_del_init(&gh->gh_list); if (list_empty(&gl->gl_holders)) { - spin_unlock(&gl->gl_spin); - - if (glops->go_unlock) + if (glops->go_unlock) { + spin_unlock(&gl->gl_spin); glops->go_unlock(gh); - - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_spin); + } gl->gl_stamp = jiffies; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index ba124230393b..c663b7a0f410 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -138,43 +138,33 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) static void inode_go_sync(struct gfs2_glock *gl) { struct gfs2_inode *ip = gl->gl_object; + struct address_space *metamapping = gl->gl_aspace->i_mapping; + int error; + + if (gl->gl_state != LM_ST_UNLOCKED) + gfs2_pte_inval(gl); + if (gl->gl_state != LM_ST_EXCLUSIVE) + return; if (ip && !S_ISREG(ip->i_inode.i_mode)) ip = NULL; if (test_bit(GLF_DIRTY, &gl->gl_flags)) { - if (ip && !gfs2_is_jdata(ip)) - filemap_fdatawrite(ip->i_inode.i_mapping); gfs2_log_flush(gl->gl_sbd, gl); - if (ip && gfs2_is_jdata(ip)) - filemap_fdatawrite(ip->i_inode.i_mapping); - gfs2_meta_sync(gl); + filemap_fdatawrite(metamapping); if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; - int error = filemap_fdatawait(mapping); + filemap_fdatawrite(mapping); + error = filemap_fdatawait(mapping); mapping_set_error(mapping, error); } + error = filemap_fdatawait(metamapping); + mapping_set_error(metamapping, error); clear_bit(GLF_DIRTY, &gl->gl_flags); gfs2_ail_empty_gl(gl); } } -/** - * inode_go_xmote_th - promote/demote a glock - * @gl: the glock - * @state: the requested state - * @flags: - * - */ - -static void inode_go_xmote_th(struct gfs2_glock *gl) -{ - if (gl->gl_state != LM_ST_UNLOCKED) - gfs2_pte_inval(gl); - if (gl->gl_state == LM_ST_EXCLUSIVE) - inode_go_sync(gl); -} - /** * inode_go_xmote_bh - After promoting/demoting a glock * @gl: the glock @@ -195,22 +185,6 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl) } } -/** - * inode_go_drop_th - unlock a glock - * @gl: the glock - * - * Invoked from rq_demote(). - * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long) - * is being purged from our node's glock cache; we're dropping lock. - */ - -static void inode_go_drop_th(struct gfs2_glock *gl) -{ - gfs2_pte_inval(gl); - if (gl->gl_state == LM_ST_EXCLUSIVE) - inode_go_sync(gl); -} - /** * inode_go_inval - prepare a inode glock to be released * @gl: the glock @@ -326,14 +300,14 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) } /** - * trans_go_xmote_th - promote/demote the transaction glock + * trans_go_sync - promote/demote the transaction glock * @gl: the glock * @state: the requested state * @flags: * */ -static void trans_go_xmote_th(struct gfs2_glock *gl) +static void trans_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; @@ -376,24 +350,6 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) } } -/** - * trans_go_drop_th - unlock the transaction glock - * @gl: the glock - * - * We want to sync the device even with localcaching. Remember - * that localcaching journal replay only marks buffers dirty. - */ - -static void trans_go_drop_th(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - - if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); - } -} - /** * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock * @gl: the glock @@ -408,14 +364,12 @@ static int quota_go_demote_ok(struct gfs2_glock *gl) const struct gfs2_glock_operations gfs2_meta_glops = { .go_xmote_th = meta_go_sync, - .go_drop_th = meta_go_sync, .go_type = LM_TYPE_META, }; const struct gfs2_glock_operations gfs2_inode_glops = { - .go_xmote_th = inode_go_xmote_th, + .go_xmote_th = inode_go_sync, .go_xmote_bh = inode_go_xmote_bh, - .go_drop_th = inode_go_drop_th, .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, .go_lock = inode_go_lock, @@ -425,7 +379,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_xmote_th = meta_go_sync, - .go_drop_th = meta_go_sync, .go_inval = meta_go_inval, .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, @@ -435,9 +388,8 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { }; const struct gfs2_glock_operations gfs2_trans_glops = { - .go_xmote_th = trans_go_xmote_th, + .go_xmote_th = trans_go_sync, .go_xmote_bh = trans_go_xmote_bh, - .go_drop_th = trans_go_drop_th, .go_type = LM_TYPE_NONDISK, }; -- cgit v1.2.3 From e589665eb97b297412fb16b4c1737a01a91db903 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 2 Nov 2007 09:14:31 +0000 Subject: [GFS2] Remove flags no longer required The HIF_MUTEX and HIF_PROMOTE flags were set on the glock holders depending upon which of the two waiters lists they were going to be queued upon. They were then tested when the holders were taken off the lists to ensure that the right type of holder was being dequeued. Since we are already using separate lists, there doesn't seem a lot of point having these flags as well, and since setting them and testing them is in the fast path for locking and unlocking glock, this patch removes them. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 17 ++--------------- fs/gfs2/incore.h | 4 ---- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e668808b127f..5fbd9d34ce23 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -594,12 +594,7 @@ static void run_queue(struct gfs2_glock *gl) if (!list_empty(&gl->gl_waiters1)) { gh = list_entry(gl->gl_waiters1.next, struct gfs2_holder, gh_list); - - if (test_bit(HIF_MUTEX, &gh->gh_iflags)) - blocked = rq_mutex(gh); - else - gfs2_assert_warn(gl->gl_sbd, 0); - + blocked = rq_mutex(gh); } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { blocked = rq_demote(gl); if (gl->gl_waiters2 && !blocked) { @@ -610,12 +605,7 @@ static void run_queue(struct gfs2_glock *gl) } else if (!list_empty(&gl->gl_waiters3)) { gh = list_entry(gl->gl_waiters3.next, struct gfs2_holder, gh_list); - - if (test_bit(HIF_PROMOTE, &gh->gh_iflags)) - blocked = rq_promote(gh); - else - gfs2_assert_warn(gl->gl_sbd, 0); - + blocked = rq_promote(gh); } else break; @@ -636,7 +626,6 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) struct gfs2_holder gh; gfs2_holder_init(gl, 0, 0, &gh); - set_bit(HIF_MUTEX, &gh.gh_iflags); if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags)) BUG(); @@ -1160,8 +1149,6 @@ restart: return -EIO; } - set_bit(HIF_PROMOTE, &gh->gh_iflags); - spin_lock(&gl->gl_spin); add_to_queue(gh); run_queue(gl); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 089dba412cc0..478023e9fda6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -141,10 +141,6 @@ struct gfs2_glock_operations { }; enum { - /* Actions */ - HIF_MUTEX = 0, - HIF_PROMOTE = 1, - /* States */ HIF_HOLDER = 6, HIF_FIRST = 7, -- cgit v1.2.3 From c7227e46423a57b4df27a2d75b5869bd3ae654d0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 2 Nov 2007 09:37:15 -0500 Subject: [GFS2] Given device ID rather than s_id in "id" sysfs file This patch changes the /sys/fs/gfs2//id file to give the device id "major:minor" rather than the s_id. That enables gfs2_tool to match devices properly (by id, not name) when locating the tuning files. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 1359198aed63..65dd0657e1f8 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -32,7 +32,8 @@ spinlock_t gfs2_sys_margs_lock; static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id); + return snprintf(buf, PAGE_SIZE, "%u:%u\n", + MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev)); } static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) -- cgit v1.2.3 From 8cbc4342478311c2a85260a7ca54d96cb7f71f7b Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 7 Nov 2007 09:03:56 -0600 Subject: [GFS2] check kthread_should_stop when waiting Use wait_event_interruptible() in the lock_dlm thread instead of an open coded equivalent, and include a kthread_should_stop() check in the wait test so we don't miss a kthread_stop(). Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/gfs2/locking/dlm/thread.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index bd938f06481d..521694fc19d6 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c @@ -273,18 +273,13 @@ static int gdlm_thread(void *data, int blist) struct gdlm_ls *ls = (struct gdlm_ls *) data; struct gdlm_lock *lp = NULL; uint8_t complete, blocking, submit, drop; - DECLARE_WAITQUEUE(wait, current); /* Only thread1 is allowed to do blocking callbacks since gfs may wait for a completion callback within a blocking cb. */ while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ls->thread_wait, &wait); - if (no_work(ls, blist)) - schedule(); - remove_wait_queue(&ls->thread_wait, &wait); - set_current_state(TASK_RUNNING); + wait_event_interruptible(ls->thread_wait, + !no_work(ls, blist) || kthread_should_stop()); complete = blocking = submit = drop = 0; -- cgit v1.2.3 From 2bcd610d2fdea608a8fdac32788fc35a32a2327c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 8 Nov 2007 14:25:12 +0000 Subject: [GFS2] Don't add glocks to the journal The only reason for adding glocks to the journal was to keep track of which locks required a log flush prior to release. We add a flag to the glock to allow this check to be made in a simpler way. This reduces the size of a glock (by 12 bytes on i386, 24 on x86_64) and means that we can avoid extra work during the journal flush. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 3 --- fs/gfs2/incore.h | 4 +--- fs/gfs2/inode.c | 3 ++- fs/gfs2/log.c | 15 +++++--------- fs/gfs2/log.h | 9 +++++++- fs/gfs2/lops.c | 58 +++++----------------------------------------------- fs/gfs2/ops_fstype.c | 1 - fs/gfs2/trans.c | 5 ----- fs/gfs2/trans.h | 1 - 9 files changed, 21 insertions(+), 78 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5fbd9d34ce23..d83df6888402 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -346,7 +346,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_object = NULL; gl->gl_sbd = sdp; gl->gl_aspace = NULL; - lops_init_le(&gl->gl_le, &gfs2_glock_lops); INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); /* If this glock protects actual on-disk data or metadata blocks, @@ -1900,8 +1899,6 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); - print_dbg(gi, " le = %s\n", - (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); print_dbg(gi, " reclaim = %s\n", (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); if (gl->gl_aspace) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 478023e9fda6..911822d1e4c0 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -168,6 +168,7 @@ enum { GLF_PENDING_DEMOTE = 4, GLF_DIRTY = 5, GLF_DEMOTE_IN_PROGRESS = 6, + GLF_LFLUSH = 7, }; struct gfs2_glock { @@ -208,7 +209,6 @@ struct gfs2_glock { struct gfs2_sbd *gl_sbd; struct inode *gl_aspace; - struct gfs2_log_element gl_le; struct list_head gl_ail_list; atomic_t gl_ail_count; struct delayed_work gl_work; @@ -584,13 +584,11 @@ struct gfs2_sbd { unsigned int sd_log_commited_databuf; unsigned int sd_log_commited_revoke; - unsigned int sd_log_num_gl; unsigned int sd_log_num_buf; unsigned int sd_log_num_revoke; unsigned int sd_log_num_rg; unsigned int sd_log_num_databuf; - struct list_head sd_log_le_gl; struct list_head sd_log_le_buf; struct list_head sd_log_le_revoke; struct list_head sd_log_le_rg; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 532784eb5ba4..92959d093adf 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -362,7 +362,8 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip) if (error) goto out_rg_gunlock; - gfs2_trans_add_gl(ip->i_gl); + set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); + set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); gfs2_free_di(rgd, ip); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 1e1fe8def375..d24684330bc3 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -692,20 +692,16 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) * */ -void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) +void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) { struct gfs2_ail *ai; down_write(&sdp->sd_log_flush_lock); - if (gl) { - gfs2_log_lock(sdp); - if (list_empty(&gl->gl_le.le_list)) { - gfs2_log_unlock(sdp); - up_write(&sdp->sd_log_flush_lock); - return; - } - gfs2_log_unlock(sdp); + /* Log might have been flushed while we waited for the flush lock */ + if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) { + up_write(&sdp->sd_log_flush_lock); + return; } ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); @@ -823,7 +819,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) down_write(&sdp->sd_log_flush_lock); gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 24e7161486e2..4babd430b722 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -57,7 +57,14 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp); struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, struct buffer_head *real); -void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); +void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); + +static inline void gfs2_log_flush(struct gfs2_sbd *sbd, struct gfs2_glock *gl) +{ + if (!gl || test_bit(GLF_LFLUSH, &gl->gl_flags)) + __gfs2_log_flush(sbd, gl); +} + void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); void gfs2_remove_from_ail(struct gfs2_bufdata *bd); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index e901f8f7d650..fae59d69d01a 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -87,6 +87,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, } bd->bd_ail = ai; list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); + clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); gfs2_log_unlock(sdp); unlock_buffer(bh); } @@ -124,49 +125,6 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) return bh; } -static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) -{ - struct gfs2_glock *gl; - struct gfs2_trans *tr = current->journal_info; - - tr->tr_touched = 1; - - gl = container_of(le, struct gfs2_glock, gl_le); - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) - return; - - if (!list_empty(&le->le_list)) - return; - - gfs2_glock_hold(gl); - set_bit(GLF_DIRTY, &gl->gl_flags); - sdp->sd_log_num_gl++; - list_add(&le->le_list, &sdp->sd_log_le_gl); -} - -static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) -{ - gfs2_log_lock(sdp); - __glock_lo_add(sdp, le); - gfs2_log_unlock(sdp); -} - -static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) -{ - struct list_head *head = &sdp->sd_log_le_gl; - struct gfs2_glock *gl; - - while (!list_empty(head)) { - gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list); - list_del_init(&gl->gl_le.le_list); - sdp->sd_log_num_gl--; - - gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)); - gfs2_glock_put(gl); - } - gfs2_assert_warn(sdp, !sdp->sd_log_num_gl); -} - static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); @@ -182,7 +140,8 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) list_add(&bd->bd_list_tr, &tr->tr_list_buf); if (!list_empty(&le->le_list)) goto out; - __glock_lo_add(sdp, &bd->bd_gl->gl_le); + set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); sdp->sd_log_num_buf++; @@ -568,8 +527,8 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) if (!list_empty(&le->le_list)) goto out; - if (tr) - __glock_lo_add(sdp, &bd->bd_gl->gl_le); + set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); if (gfs2_is_jdata(ip)) { gfs2_pin(sdp, bd->bd_bh); tr->tr_num_databuf_new++; @@ -776,12 +735,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) } -const struct gfs2_log_operations gfs2_glock_lops = { - .lo_add = glock_lo_add, - .lo_after_commit = glock_lo_after_commit, - .lo_name = "glock", -}; - const struct gfs2_log_operations gfs2_buf_lops = { .lo_add = buf_lo_add, .lo_incore_commit = buf_lo_incore_commit, @@ -819,7 +772,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = { }; const struct gfs2_log_operations *gfs2_log_ops[] = { - &gfs2_glock_lops, &gfs2_databuf_lops, &gfs2_buf_lops, &gfs2_rg_lops, diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 17de58e83d92..52aaba96d5da 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -77,7 +77,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) spin_lock_init(&sdp->sd_log_lock); - INIT_LIST_HEAD(&sdp->sd_log_le_gl); INIT_LIST_HEAD(&sdp->sd_log_le_buf); INIT_LIST_HEAD(&sdp->sd_log_le_revoke); INIT_LIST_HEAD(&sdp->sd_log_le_rg); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 717983e2c2ae..73e5d92a657c 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -114,11 +114,6 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_log_flush(sdp, NULL); } -void gfs2_trans_add_gl(struct gfs2_glock *gl) -{ - lops_add(gl->gl_sbd, &gl->gl_le); -} - /** * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction * @gl: the glock the buffer belongs to diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 043d5f4b9c4c..e826f0dab80a 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -30,7 +30,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, void gfs2_trans_end(struct gfs2_sbd *sdp); -void gfs2_trans_add_gl(struct gfs2_glock *gl); void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); -- cgit v1.2.3 From fd041f0b4045db8646b36d393cbb274db60649f5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 8 Nov 2007 14:55:03 +0000 Subject: [GFS2] Use atomic_t for journal free blocks counter This patch changes the counter which keeps track of the free blocks in the journal to an atomic_t in preparation for the following patch which will update the log reservation code. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 2 +- fs/gfs2/log.c | 26 +++++++++++++------------- fs/gfs2/ops_fstype.c | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 911822d1e4c0..7ae0206e9a61 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -595,7 +595,7 @@ struct gfs2_sbd { struct list_head sd_log_le_databuf; struct list_head sd_log_le_ordered; - unsigned int sd_log_blks_free; + atomic_t sd_log_blks_free; struct mutex sd_log_reserve_mutex; u64 sd_log_sequence; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index d24684330bc3..9192398408f2 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -301,7 +301,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) mutex_lock(&sdp->sd_log_reserve_mutex); gfs2_log_lock(sdp); - while(sdp->sd_log_blks_free <= (blks + reserved_blks)) { + while(atomic_read(&sdp->sd_log_blks_free) <= (blks + reserved_blks)) { gfs2_log_unlock(sdp); gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL); @@ -310,7 +310,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) gfs2_ail1_start(sdp, 0); gfs2_log_lock(sdp); } - sdp->sd_log_blks_free -= blks; + atomic_sub(blks, &sdp->sd_log_blks_free); gfs2_log_unlock(sdp); mutex_unlock(&sdp->sd_log_reserve_mutex); @@ -330,9 +330,9 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) { gfs2_log_lock(sdp); - sdp->sd_log_blks_free += blks; + atomic_add(blks, &sdp->sd_log_blks_free); gfs2_assert_withdraw(sdp, - sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); + atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); up_read(&sdp->sd_log_flush_lock); } @@ -559,8 +559,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) ail2_empty(sdp, new_tail); gfs2_log_lock(sdp); - sdp->sd_log_blks_free += dist; - gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); + atomic_add(dist, &sdp->sd_log_blks_free); + gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); sdp->sd_log_tail = new_tail; @@ -733,7 +733,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) log_flush_commit(sdp); else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ gfs2_log_lock(sdp); - sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */ + atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ gfs2_log_unlock(sdp); log_write_header(sdp, 0, PULL); } @@ -773,12 +773,12 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); reserved = calc_reserved(sdp); - old = sdp->sd_log_blks_free; - sdp->sd_log_blks_free += tr->tr_reserved - - (reserved - sdp->sd_log_blks_reserved); + old = atomic_read(&sdp->sd_log_blks_free); + atomic_add(tr->tr_reserved - (reserved - sdp->sd_log_blks_reserved), + &sdp->sd_log_blks_free); - gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old); - gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= + gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) >= old); + gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); sdp->sd_log_blks_reserved = reserved; @@ -831,7 +831,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL); - gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks); + gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 52aaba96d5da..1bba6ac0bcac 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -339,7 +339,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) if (sdp->sd_args.ar_spectator) { sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); - sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; + atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); } else { if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { fs_err(sdp, "can't mount journal #%u\n", @@ -376,7 +376,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) sdp->sd_jdesc->jd_jid, error); goto fail_jinode_gh; } - sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; + atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); } if (sdp->sd_lockstruct.ls_first) { -- cgit v1.2.3 From ec69b188837a347769e187997d040e84a683b38a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 9 Nov 2007 10:01:41 +0000 Subject: [GFS2] Move gfs2_logd into log.c This means that we can mark gfs2_ail1_empty static and prepares the way for further changes. Signed-off-by: Steven Whitehouse --- fs/gfs2/daemon.c | 50 -------------------------------------------------- fs/gfs2/daemon.h | 1 - fs/gfs2/log.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/gfs2/log.h | 3 +-- 4 files changed, 56 insertions(+), 54 deletions(-) diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c index 3731ab0771d5..e51991947d2c 100644 --- a/fs/gfs2/daemon.c +++ b/fs/gfs2/daemon.c @@ -82,56 +82,6 @@ int gfs2_recoverd(void *data) return 0; } -/** - * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks - * @sdp: Pointer to GFS2 superblock - * - * Also, periodically check to make sure that we're using the most recent - * journal index. - */ - -int gfs2_logd(void *data) -{ - struct gfs2_sbd *sdp = data; - struct gfs2_holder ji_gh; - unsigned long t; - int need_flush; - - while (!kthread_should_stop()) { - /* Advance the log tail */ - - t = sdp->sd_log_flush_time + - gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; - - gfs2_ail1_empty(sdp, DIO_ALL); - gfs2_log_lock(sdp); - need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); - gfs2_log_unlock(sdp); - if (need_flush || time_after_eq(jiffies, t)) { - gfs2_log_flush(sdp, NULL); - sdp->sd_log_flush_time = jiffies; - } - - /* Check for latest journal index */ - - t = sdp->sd_jindex_refresh_time + - gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ; - - if (time_after_eq(jiffies, t)) { - if (!gfs2_jindex_hold(sdp, &ji_gh)) - gfs2_glock_dq_uninit(&ji_gh); - sdp->sd_jindex_refresh_time = jiffies; - } - - t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; - if (freezing(current)) - refrigerator(); - schedule_timeout_interruptible(t); - } - - return 0; -} - /** * gfs2_quotad - Write cached quota changes into the quota file * @sdp: Pointer to GFS2 superblock diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h index 0de9b3557955..4be084fb6a62 100644 --- a/fs/gfs2/daemon.h +++ b/fs/gfs2/daemon.h @@ -12,7 +12,6 @@ int gfs2_glockd(void *data); int gfs2_recoverd(void *data); -int gfs2_logd(void *data); int gfs2_quotad(void *data); #endif /* __DAEMON_DOT_H__ */ diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 9192398408f2..e88a684b2209 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include "gfs2.h" #include "incore.h" @@ -26,6 +28,7 @@ #include "meta_io.h" #include "util.h" #include "dir.h" +#include "super.h" #define PULL 1 @@ -208,7 +211,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) gfs2_log_unlock(sdp); } -int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) +static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) { struct gfs2_ail *ai, *s; int ret; @@ -859,3 +862,54 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) } } + +/** + * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks + * @sdp: Pointer to GFS2 superblock + * + * Also, periodically check to make sure that we're using the most recent + * journal index. + */ + +int gfs2_logd(void *data) +{ + struct gfs2_sbd *sdp = data; + struct gfs2_holder ji_gh; + unsigned long t; + int need_flush; + + while (!kthread_should_stop()) { + /* Advance the log tail */ + + t = sdp->sd_log_flush_time + + gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; + + gfs2_ail1_empty(sdp, DIO_ALL); + gfs2_log_lock(sdp); + need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); + gfs2_log_unlock(sdp); + if (need_flush || time_after_eq(jiffies, t)) { + gfs2_log_flush(sdp, NULL); + sdp->sd_log_flush_time = jiffies; + } + + /* Check for latest journal index */ + + t = sdp->sd_jindex_refresh_time + + gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ; + + if (time_after_eq(jiffies, t)) { + if (!gfs2_jindex_hold(sdp, &ji_gh)) + gfs2_glock_dq_uninit(&ji_gh); + sdp->sd_jindex_refresh_time = jiffies; + } + + t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; + if (freezing(current)) + refrigerator(); + schedule_timeout_interruptible(t); + } + + return 0; +} + diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 4babd430b722..771152816508 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -48,8 +48,6 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, unsigned int ssize); -int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags); - int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); void gfs2_log_incr_head(struct gfs2_sbd *sdp); @@ -70,5 +68,6 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd); void gfs2_log_shutdown(struct gfs2_sbd *sdp); void gfs2_meta_syncfs(struct gfs2_sbd *sdp); +int gfs2_logd(void *data); #endif /* __LOG_DOT_H__ */ -- cgit v1.2.3 From e35b921185728850c5db3b5d5b356178f931a157 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 9 Nov 2007 10:07:21 +0000 Subject: [GFS2] Don't periodically update the jindex We only care about the content of the jindex in two cases, one is when we mount the fs and the other is when we need to recover another journal. In both cases we have to update the jindex anyway, so there is no point in updating it periodically between times, so this removes it to simplify gfs2_logd. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 - fs/gfs2/log.c | 13 ------------- fs/gfs2/super.c | 1 - fs/gfs2/sys.c | 2 -- 4 files changed, 17 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7ae0206e9a61..330f4c73d0e7 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -411,7 +411,6 @@ struct gfs2_tune { unsigned int gt_demote_secs; /* Cache retention for unheld glock */ unsigned int gt_incore_log_blocks; unsigned int gt_log_flush_secs; - unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ unsigned int gt_recoverd_secs; unsigned int gt_logd_secs; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index e88a684b2209..4dcc7a8cda22 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -28,7 +28,6 @@ #include "meta_io.h" #include "util.h" #include "dir.h" -#include "super.h" #define PULL 1 @@ -874,7 +873,6 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) int gfs2_logd(void *data) { struct gfs2_sbd *sdp = data; - struct gfs2_holder ji_gh; unsigned long t; int need_flush; @@ -893,17 +891,6 @@ int gfs2_logd(void *data) sdp->sd_log_flush_time = jiffies; } - /* Check for latest journal index */ - - t = sdp->sd_jindex_refresh_time + - gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ; - - if (time_after_eq(jiffies, t)) { - if (!gfs2_jindex_hold(sdp, &ji_gh)) - gfs2_glock_dq_uninit(&ji_gh); - sdp->sd_jindex_refresh_time = jiffies; - } - t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; if (freezing(current)) refrigerator(); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 548cc8ba0703..2e74792ee487 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -54,7 +54,6 @@ void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_demote_secs = 300; gt->gt_incore_log_blocks = 1024; gt->gt_log_flush_secs = 60; - gt->gt_jindex_refresh_secs = 60; gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; gt->gt_quotad_secs = 5; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 65dd0657e1f8..7f828a2cc858 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -428,7 +428,6 @@ TUNE_ATTR_2(name, name##_store) TUNE_ATTR(demote_secs, 0); TUNE_ATTR(incore_log_blocks, 0); TUNE_ATTR(log_flush_secs, 0); -TUNE_ATTR(jindex_refresh_secs, 0); TUNE_ATTR(quota_warn_period, 0); TUNE_ATTR(quota_quantum, 0); TUNE_ATTR(atime_quantum, 0); @@ -450,7 +449,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_demote_secs.attr, &tune_attr_incore_log_blocks.attr, &tune_attr_log_flush_secs.attr, - &tune_attr_jindex_refresh_secs.attr, &tune_attr_quota_warn_period.attr, &tune_attr_quota_quantum.attr, &tune_attr_atime_quantum.attr, -- cgit v1.2.3 From 0b7580c786a5feda6291fe68ead3a1b92b6b35b8 Mon Sep 17 00:00:00 2001 From: Fabio Massimo Di Nitto Date: Thu, 15 Nov 2007 13:48:52 +0000 Subject: [GFS2] Check for installation of mount helpers for DLM mounts The patch is a fix to abort mount if the mount.gfs* and possible umount.* are missing from /sbin. While we do what we can to guarantee that they are installed properly in userland (CVS HEAD), we want to make sure that mount still aborts properly. The only sign of missing helpers is that lock_dlm will receive no mount options at all. According to David the problem does not exist for lock_nolock as the helpers are not required. The patch has been tested for both gfs and gfs2 and it works as expected. The lack of mount.gfs* will generate an error that is propagated to mount: oot@node1:~# mount -t gfs2 /dev/nbd2 /mnt/ mount: wrong fs type, bad option, bad superblock on /dev/nbd2, missing codepage or helper program, or other error In some cases useful info is found in syslog - try dmesg | tail or so [ 3513.303346] GFS2: fsid=: Trying to join cluster "lock_dlm", "gutsy:gfs2" [ 3513.304546] DLM/GFS2/GFS ERROR: (u)mount helpers are not installed properly! [ 3513.306290] GFS2: fsid=: can't mount proto=lock_dlm, table=gutsy:gfs2, hostdata= You might want to notice that it will also avoid mount to hang or fail silently or with strange errors that will require the cluster to reboot/restart before you can actually mount the filesystem again. Signed-off-by: Fabio M. Di Nitto Signed-off-by: Steven Whitehouse --- fs/gfs2/locking/dlm/mount.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 41c5b04caaba..ab301023094f 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c @@ -67,6 +67,12 @@ static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir) memset(data, 0, 256); strncpy(data, data_arg, 255); + if (!strlen(data)) { + printk(KERN_ERR + "DLM/GFS2/GFS ERROR: (u)mount helpers are not installed!\n"); + return -EINVAL; + } + for (options = data; (x = strsep(&options, ":")); ) { if (!*x) continue; -- cgit v1.2.3 From 00c134756c5ad570a1ad3d6f93a67fc9c25a67ea Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 15 Nov 2007 09:01:13 -0600 Subject: [GFS2] tidy up error message Print error with log_error() to be consistent with others. Signed-off-by: David Teigland Signed-off-by: Fabio M. Di Nitto Signed-off-by: Steven Whitehouse --- fs/gfs2/locking/dlm/mount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index ab301023094f..f2efff424224 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c @@ -68,8 +68,7 @@ static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir) strncpy(data, data_arg, 255); if (!strlen(data)) { - printk(KERN_ERR - "DLM/GFS2/GFS ERROR: (u)mount helpers are not installed!\n"); + log_error("no mount options, (u)mount helpers not installed"); return -EINVAL; } -- cgit v1.2.3 From 1a2781cfa5ed8eb82bb311d684f268c1822dae69 Mon Sep 17 00:00:00 2001 From: Fabio Massimo Di Nitto Date: Fri, 16 Nov 2007 09:50:40 +0000 Subject: [GFS2] Fix runtime issue with UP kernels The issue is indeed UP vs SMP and it is totally random. spin_is_locked() is a bad assertion because there is no correct answer on UP. on UP spin_is_locked() has to return either one value or another, always. This means that in my setup I am lucky enough to trigger the issue and your you are lucky enough not to. the patch in attachment removes the bogus calls to BUG_ON and according to David (in CC and thanks for the long explanation on the problem) we can rely upon things like lockdep to find problem that might be trying to catch. Signed-off-by: Fabio M. Di Nitto Cc: David S. Miller Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 3 --- fs/gfs2/log.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d83df6888402..a7f3c462d4fe 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -217,7 +217,6 @@ int gfs2_glock_put(struct gfs2_glock *gl) if (atomic_dec_and_test(&gl->gl_ref)) { hlist_del(&gl->gl_list); write_unlock(gl_lock_addr(gl->gl_hash)); - BUG_ON(spin_is_locked(&gl->gl_spin)); gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); gfs2_assert(sdp, list_empty(&gl->gl_holders)); @@ -460,7 +459,6 @@ static void wait_on_holder(struct gfs2_holder *gh) static void gfs2_demote_wake(struct gfs2_glock *gl) { - BUG_ON(!spin_is_locked(&gl->gl_spin)); gl->gl_demote_state = LM_ST_EXCLUSIVE; clear_bit(GLF_DEMOTE, &gl->gl_flags); smp_mb__after_clear_bit(); @@ -680,7 +678,6 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) gl->gl_owner_pid = 0; gl->gl_ip = 0; run_queue(gl); - BUG_ON(!spin_is_locked(&gl->gl_spin)); spin_unlock(&gl->gl_spin); } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 4dcc7a8cda22..96dcf050e6c9 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -92,8 +92,6 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) struct buffer_head *bh; int retry; - BUG_ON(!spin_is_locked(&sdp->sd_log_lock)); - do { retry = 0; -- cgit v1.2.3 From 002ef1dc63ded14507c110d3cf83d0c3f51374ab Mon Sep 17 00:00:00 2001 From: Ryan O'Hara Date: Wed, 21 Nov 2007 11:54:54 -0600 Subject: [GFS2] remove unnecessary permission checks Remove read/write permission() checks from xattr operations. VFS layer is already handling permission for xattrs via the xattr_permission() call, so there is no need for gfs2 to check permissions. Futhermore, using permission() for SELinux xattrs ops is incorrect. Signed-off-by: Ryan O'Hara Signed-off-by: Steven Whitehouse --- fs/gfs2/eaops.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index aa8dbf303f6d..ef91b6e893a0 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c @@ -59,9 +59,6 @@ unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name) static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) { struct inode *inode = &ip->i_inode; - int error = permission(inode, MAY_READ, NULL); - if (error) - return error; return gfs2_ea_get_i(ip, er); } @@ -70,14 +67,6 @@ static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) { struct inode *inode = &ip->i_inode; - if (S_ISREG(inode->i_mode) || - (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) { - int error = permission(inode, MAY_WRITE, NULL); - if (error) - return error; - } else - return -EPERM; - return gfs2_ea_set_i(ip, er); } @@ -85,14 +74,6 @@ static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) { struct inode *inode = &ip->i_inode; - if (S_ISREG(inode->i_mode) || - (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) { - int error = permission(inode, MAY_WRITE, NULL); - if (error) - return error; - } else - return -EPERM; - return gfs2_ea_remove_i(ip, er); } @@ -108,8 +89,6 @@ static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len))) return -EOPNOTSUPP; - - return gfs2_ea_get_i(ip, er); } @@ -173,9 +152,6 @@ static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) { struct inode *inode = &ip->i_inode; - int error = permission(inode, MAY_READ, NULL); - if (error) - return error; return gfs2_ea_get_i(ip, er); } @@ -183,9 +159,6 @@ static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) { struct inode *inode = &ip->i_inode; - int error = permission(inode, MAY_WRITE, NULL); - if (error) - return error; return gfs2_ea_set_i(ip, er); } @@ -193,9 +166,6 @@ static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) { struct inode *inode = &ip->i_inode; - int error = permission(inode, MAY_WRITE, NULL); - if (error) - return error; return gfs2_ea_remove_i(ip, er); } -- cgit v1.2.3 From 6a69a23f7df18f39e4a084e10b62ff4a144b05a5 Mon Sep 17 00:00:00 2001 From: Fabio Massimo Di Nitto Date: Tue, 27 Nov 2007 06:16:42 +0100 Subject: [GFS2] Fix build warnings Hi Steven, Steven Whitehouse wrote: > Hi, > > Now in the -nmw git tree. Thanks, > > Steve. > > On Wed, 2007-11-21 at 11:54 -0600, Ryan O'Hara wrote: this patch introduces a bunch of build warnings by leaving around struct inode *inode = &ip->i_inode; The patch in attachment cleans them up. Please apply. Signed-off-by: Fabio Massimo Di Nitto Signed-off-by: Steven Whitehouse --- fs/gfs2/eaops.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index ef91b6e893a0..14fbd95fd664 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c @@ -58,22 +58,16 @@ unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name) static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) { - struct inode *inode = &ip->i_inode; - return gfs2_ea_get_i(ip, er); } static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) { - struct inode *inode = &ip->i_inode; - return gfs2_ea_set_i(ip, er); } static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) { - struct inode *inode = &ip->i_inode; - return gfs2_ea_remove_i(ip, er); } @@ -151,22 +145,16 @@ static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) { - struct inode *inode = &ip->i_inode; - return gfs2_ea_get_i(ip, er); } static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) { - struct inode *inode = &ip->i_inode; - return gfs2_ea_set_i(ip, er); } static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) { - struct inode *inode = &ip->i_inode; - return gfs2_ea_remove_i(ip, er); } -- cgit v1.2.3 From bcd405599faa16cf32a3d3f1ce6a1e12cb37fede Mon Sep 17 00:00:00 2001 From: "Fabio M. Di Nitto" Date: Wed, 28 Nov 2007 16:22:09 +0100 Subject: [GFS2] Remove unrequired code Signed-off-by: Fabio M. Di Nitto Signed-off-by: Steven Whitehouse --- fs/gfs2/eaops.c | 42 ++++++------------------------------------ 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index 14fbd95fd664..f114ba2b3557 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c @@ -56,21 +56,6 @@ unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name) return type; } -static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) -{ - return gfs2_ea_get_i(ip, er); -} - -static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) -{ - return gfs2_ea_set_i(ip, er); -} - -static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) -{ - return gfs2_ea_remove_i(ip, er); -} - static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) { if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) && @@ -143,25 +128,10 @@ static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) return gfs2_ea_remove_i(ip, er); } -static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) -{ - return gfs2_ea_get_i(ip, er); -} - -static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) -{ - return gfs2_ea_set_i(ip, er); -} - -static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) -{ - return gfs2_ea_remove_i(ip, er); -} - static const struct gfs2_eattr_operations gfs2_user_eaops = { - .eo_get = user_eo_get, - .eo_set = user_eo_set, - .eo_remove = user_eo_remove, + .eo_get = gfs2_ea_get_i, + .eo_set = gfs2_ea_set_i, + .eo_remove = gfs2_ea_remove_i, .eo_name = "user", }; @@ -173,9 +143,9 @@ const struct gfs2_eattr_operations gfs2_system_eaops = { }; static const struct gfs2_eattr_operations gfs2_security_eaops = { - .eo_get = security_eo_get, - .eo_set = security_eo_set, - .eo_remove = security_eo_remove, + .eo_get = gfs2_ea_get_i, + .eo_set = gfs2_ea_set_i, + .eo_remove = gfs2_ea_remove_i, .eo_name = "security", }; -- cgit v1.2.3 From c97bfe4351771675963e02f34d31e206fd2d7150 Mon Sep 17 00:00:00 2001 From: Wendy Cheng Date: Thu, 29 Nov 2007 17:56:51 -0500 Subject: [GFS2] Remove lock methods for lock_nolock protocol GFS2 supports two modes of locking - lock_nolock for single node filesystem and lock_dlm for cluster mode locking. The gfs2 lock methods are removed from file operation table for lock_nolock protocol. This would allow VFS to handle posix lock and flock logics just like other in-tree filesystems without duplication. Signed-off-by: S. Wendy Cheng Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 11 +++++++++-- fs/gfs2/ops_file.c | 36 ++++++++++++++++++++++++------------ fs/gfs2/ops_inode.h | 2 ++ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 92959d093adf..53bca9978fb5 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -131,14 +131,21 @@ static struct inode *gfs2_iget_skip(struct super_block *sb, void gfs2_set_iop(struct inode *inode) { + struct gfs2_sbd *sdp = GFS2_SB(inode); umode_t mode = inode->i_mode; if (S_ISREG(mode)) { inode->i_op = &gfs2_file_iops; - inode->i_fop = &gfs2_file_fops; + if (sdp->sd_args.ar_localflocks) + inode->i_fop = &gfs2_file_fops_nolock; + else + inode->i_fop = &gfs2_file_fops; } else if (S_ISDIR(mode)) { inode->i_op = &gfs2_dir_iops; - inode->i_fop = &gfs2_dir_fops; + if (sdp->sd_args.ar_localflocks) + inode->i_fop = &gfs2_dir_fops_nolock; + else + inode->i_fop = &gfs2_dir_fops; } else if (S_ISLNK(mode)) { inode->i_op = &gfs2_symlink_iops; } else { diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index ad5daaa6babc..db76ac1947e7 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -615,15 +615,6 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) if (__mandatory_lock(&ip->i_inode)) return -ENOLCK; - if (sdp->sd_args.ar_localflocks) { - if (IS_GETLK(cmd)) { - posix_test_lock(file, fl); - return 0; - } else { - return posix_lock_file_wait(file, fl); - } - } - if (cmd == F_CANCELLK) { /* Hack: */ cmd = F_SETLK; @@ -716,9 +707,6 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) if (__mandatory_lock(&ip->i_inode)) return -ENOLCK; - if (sdp->sd_args.ar_localflocks) - return flock_lock_file_wait(file, fl); - if (fl->fl_type == F_UNLCK) { do_unflock(file, fl); return 0; @@ -755,3 +743,27 @@ const struct file_operations gfs2_dir_fops = { .flock = gfs2_flock, }; +const struct file_operations gfs2_file_fops_nolock = { + .llseek = gfs2_llseek, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .write = do_sync_write, + .aio_write = generic_file_aio_write, + .unlocked_ioctl = gfs2_ioctl, + .mmap = gfs2_mmap, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, + .setlease = gfs2_setlease, +}; + +const struct file_operations gfs2_dir_fops_nolock = { + .readdir = gfs2_readdir, + .unlocked_ioctl = gfs2_ioctl, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, +}; + diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h index edb519cb05ee..fd8cee231e1d 100644 --- a/fs/gfs2/ops_inode.h +++ b/fs/gfs2/ops_inode.h @@ -18,6 +18,8 @@ extern const struct inode_operations gfs2_symlink_iops; extern const struct inode_operations gfs2_dev_iops; extern const struct file_operations gfs2_file_fops; extern const struct file_operations gfs2_dir_fops; +extern const struct file_operations gfs2_file_fops_nolock; +extern const struct file_operations gfs2_dir_fops_nolock; extern void gfs2_set_inode_flags(struct inode *inode); -- cgit v1.2.3 From 292c8c14cace19c94c6abe25506310239daf949e Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Thu, 29 Nov 2007 14:13:54 -0600 Subject: [GFS2] patch to check for recursive lock requests in gfs2_rename code path A certain scenario in the rename code path triggers a kernel BUG() because it accidentally does recursive locking The first lock is requested to unlink an already existing inode (replacing a file) and the second lock is requested when the destination directory needs to alloc some space. It is rare that these two events happen during the same rename call, and even more rare that these two instances try to lock the same rgrp. It is, however, possible. https://bugzilla.redhat.com/show_bug.cgi?id=404711 Signed-off-by: Abhijith Das Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 09848aac45f6..e0ee195558d3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1063,22 +1063,30 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) int flags = LM_FLAG_TRY; int skipped = 0; int loops = 0; - int error; + int error, rg_locked; /* Try recently successful rgrps */ rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); while (rgd) { - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - LM_FLAG_TRY, &al->al_rgd_gh); + rg_locked = 0; + + if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { + rg_locked = 1; + error = 0; + } else { + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_TRY, &al->al_rgd_gh); + } switch (error) { case 0: if (try_rgrp_fit(rgd, al)) goto out; if (rgd->rd_flags & GFS2_RDF_CHECK) inode = try_rgrp_unlink(rgd, last_unlinked); - gfs2_glock_dq_uninit(&al->al_rgd_gh); + if (!rg_locked) + gfs2_glock_dq_uninit(&al->al_rgd_gh); if (inode) return inode; rgd = recent_rgrp_next(rgd, 1); @@ -1098,15 +1106,23 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) begin = rgd = forward_rgrp_get(sdp); for (;;) { - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, - &al->al_rgd_gh); + rg_locked = 0; + + if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { + rg_locked = 1; + error = 0; + } else { + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, + &al->al_rgd_gh); + } switch (error) { case 0: if (try_rgrp_fit(rgd, al)) goto out; if (rgd->rd_flags & GFS2_RDF_CHECK) inode = try_rgrp_unlink(rgd, last_unlinked); - gfs2_glock_dq_uninit(&al->al_rgd_gh); + if (!rg_locked) + gfs2_glock_dq_uninit(&al->al_rgd_gh); if (inode) return inode; break; @@ -1213,7 +1229,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) al->al_line); al->al_rgd = NULL; - gfs2_glock_dq_uninit(&al->al_rgd_gh); + if (al->al_rgd_gh.gh_gl) + gfs2_glock_dq_uninit(&al->al_rgd_gh); if (ip != GFS2_I(sdp->sd_rindex)) gfs2_glock_dq_uninit(&al->al_ri_gh); } -- cgit v1.2.3 From dbee2199c37336e89060fbe9abdfd1ca8454372a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 30 Nov 2007 08:17:15 +0000 Subject: [GFS2] Remove unused variable Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index db76ac1947e7..2569c13eb108 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -700,7 +700,6 @@ static void do_unflock(struct file *file, struct file_lock *fl) static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; -- cgit v1.2.3 From 2066b58b0a038d7aedd24133677efb8856cac3a1 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 6 Dec 2007 09:35:25 -0600 Subject: [GFS2] use pid for plock owner for nfs clients The fl_owner is that of lockd when posix locks arrive from nfs clients, so it can't be used to distinguish between lock holders. Use fl_pid as owner instead; it's the pid of the process on the nfs client. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/gfs2/locking/dlm/plock.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c index 1f7b038530b4..2ebd374b3143 100644 --- a/fs/gfs2/locking/dlm/plock.c +++ b/fs/gfs2/locking/dlm/plock.c @@ -89,15 +89,19 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name, op->info.number = name->ln_number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - op->info.owner = (__u64)(long) fl->fl_owner; if (fl->fl_lmops && fl->fl_lmops->fl_grant) { + /* fl_owner is lockd which doesn't distinguish + processes on the nfs client */ + op->info.owner = (__u64) fl->fl_pid; xop->callback = fl->fl_lmops->fl_grant; locks_init_lock(&xop->flc); locks_copy_lock(&xop->flc, fl); xop->fl = fl; xop->file = file; - } else + } else { + op->info.owner = (__u64)(long) fl->fl_owner; xop->callback = NULL; + } send_op(op); @@ -203,7 +207,10 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name, op->info.number = name->ln_number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - op->info.owner = (__u64)(long) fl->fl_owner; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) + op->info.owner = (__u64) fl->fl_pid; + else + op->info.owner = (__u64)(long) fl->fl_owner; send_op(op); wait_event(recv_wq, (op->done != 0)); @@ -242,7 +249,10 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name, op->info.number = name->ln_number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - op->info.owner = (__u64)(long) fl->fl_owner; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) + op->info.owner = (__u64) fl->fl_pid; + else + op->info.owner = (__u64)(long) fl->fl_owner; send_op(op); wait_event(recv_wq, (op->done != 0)); -- cgit v1.2.3 From e9e1ef2b6ee401d7c1e1eb38052857b4b206d172 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 10 Dec 2007 14:13:27 -0600 Subject: [GFS2] Remove function gfs2_get_block This patch is just a cleanup. Function gfs2_get_block() just calls function gfs2_block_map reversing the last two parameters. By reversing the parameters, gfs2_block_map() may be called directly and function gfs2_get_block may be eliminated altogether. Since this function is done for every block operation, this streamlines the code and makes it a little bit more efficient. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 8 ++++---- fs/gfs2/bmap.h | 2 +- fs/gfs2/log.c | 2 +- fs/gfs2/ops_address.c | 30 +++++++----------------------- fs/gfs2/ops_address.h | 2 -- fs/gfs2/ops_file.c | 2 +- fs/gfs2/quota.c | 4 ++-- fs/gfs2/recovery.c | 2 +- 8 files changed, 17 insertions(+), 35 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1cfd493e30fb..49486029edc2 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -452,8 +452,8 @@ static inline void bmap_unlock(struct inode *inode, int create) * Returns: errno */ -int gfs2_block_map(struct inode *inode, u64 lblock, int create, - struct buffer_head *bh_map) +int gfs2_block_map(struct inode *inode, sector_t lblock, + struct buffer_head *bh_map, int create) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -559,7 +559,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi BUG_ON(!new); bh.b_size = 1 << (inode->i_blkbits + 5); - ret = gfs2_block_map(inode, lblock, create, &bh); + ret = gfs2_block_map(inode, lblock, &bh, create); *extlen = bh.b_size >> inode->i_blkbits; *dblock = bh.b_blocknr; if (buffer_new(&bh)) @@ -909,7 +909,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) err = 0; if (!buffer_mapped(bh)) { - gfs2_get_block(inode, iblock, bh, 0); + gfs2_block_map(inode, iblock, bh, 0); /* unmapped? It's a hole - nothing to do */ if (!buffer_mapped(bh)) goto unlock; diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index ac2fd04370dc..4e6cde2943bd 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -15,7 +15,7 @@ struct gfs2_inode; struct page; int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); -int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh); +int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); int gfs2_truncatei(struct gfs2_inode *ip, u64 size); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 96dcf050e6c9..14333d81cf7d 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -344,7 +344,7 @@ static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; bh_map.b_size = 1 << inode->i_blkbits; - error = gfs2_block_map(inode, lbn, 0, &bh_map); + error = gfs2_block_map(inode, lbn, &bh_map, 0); if (error || !bh_map.b_blocknr) printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, (unsigned long long)bh_map.b_blocknr, lbn); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 7353933483bb..8f94e306c862 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -58,22 +58,6 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, } } -/** - * gfs2_get_block - Fills in a buffer head with details about a block - * @inode: The inode - * @lblock: The block number to look up - * @bh_result: The buffer head to return the result in - * @create: Non-zero if we may add block to the file - * - * Returns: errno - */ - -int gfs2_get_block(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - return gfs2_block_map(inode, lblock, create, bh_result); -} - /** * gfs2_get_block_noalloc - Fills in a buffer head with details about a block * @inode: The inode @@ -89,7 +73,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, { int error; - error = gfs2_block_map(inode, lblock, 0, bh_result); + error = gfs2_block_map(inode, lblock, bh_result, 0); if (error) return error; if (!buffer_mapped(bh_result)) @@ -100,7 +84,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, struct buffer_head *bh_result, int create) { - return gfs2_block_map(inode, lblock, 0, bh_result); + return gfs2_block_map(inode, lblock, bh_result, 0); } /** @@ -504,7 +488,7 @@ static int __gfs2_readpage(void *file, struct page *page) error = stuffed_readpage(ip, page); unlock_page(page); } else { - error = mpage_readpage(page, gfs2_get_block); + error = mpage_readpage(page, gfs2_block_map); } if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) @@ -598,7 +582,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, * Any I/O we ignore at this time will be done via readpage later. * 2. We don't handle stuffed files here we let readpage do the honours. * 3. mpage_readpages() does most of the heavy lifting in the common case. - * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. + * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. */ static int gfs2_readpages(struct file *file, struct address_space *mapping, @@ -615,7 +599,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, if (unlikely(ret)) goto out_uninit; if (!gfs2_is_stuffed(ip)) - ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); + ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); @@ -710,7 +694,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, } prepare_write: - error = block_prepare_write(page, from, to, gfs2_get_block); + error = block_prepare_write(page, from, to, gfs2_block_map); out: if (error == 0) return 0; @@ -923,7 +907,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) return 0; if (!gfs2_is_stuffed(ip)) - dblock = generic_block_bmap(mapping, lblock, gfs2_get_block); + dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h index d3b76d0cdc81..5da21285bba4 100644 --- a/fs/gfs2/ops_address.h +++ b/fs/gfs2/ops_address.h @@ -14,8 +14,6 @@ #include #include -extern int gfs2_get_block(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create); extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); extern int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 2569c13eb108..597f7ff2bc11 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -323,7 +323,7 @@ static int gfs2_allocate_page_backing(struct page *page) do { bh.b_state = 0; bh.b_size = size; - gfs2_block_map(inode, lblock, 1, &bh); + gfs2_block_map(inode, lblock, &bh, 1); if (!buffer_mapped(&bh)) return -EIO; size -= bh.b_size; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 4996f0ef3007..8b4c20c49ca7 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -276,7 +276,7 @@ static int bh_get(struct gfs2_quota_data *qd) offset = qd->qd_slot % sdp->sd_qc_per_block;; bh_map.b_size = 1 << ip->i_inode.i_blkbits; - error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map); + error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0); if (error) goto fail; error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh); @@ -645,7 +645,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, } if (!buffer_mapped(bh)) { - gfs2_get_block(inode, iblock, bh, 1); + gfs2_block_map(inode, iblock, bh, 1); if (!buffer_mapped(bh)) goto unlock; } diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index beb6c7ac0086..27c994f2d1f0 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -391,7 +391,7 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea lblock = head->lh_blkno; gfs2_replay_incr_blk(sdp, &lblock); bh_map.b_size = 1 << ip->i_inode.i_blkbits; - error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map); + error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0); if (error) return error; if (!bh_map.b_blocknr) { -- cgit v1.2.3 From da6dd40d59fa9617ed697b90114e197036901632 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 11 Dec 2007 18:49:21 -0600 Subject: [GFS2] Journal extent mapping This patch saves a little time when gfs2 writes to the journals by keeping a mapping between logical and physical blocks on disk. That's better than constantly looking up indirect pointers in buffers, when the journals are several levels of indirection (which they typically are). Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 11 ++++++++- fs/gfs2/log.c | 22 +++++++---------- fs/gfs2/ops_fstype.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++- fs/gfs2/super.c | 13 ++++++++-- 4 files changed, 97 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 330f4c73d0e7..51166c12c5d7 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -360,8 +360,17 @@ struct gfs2_ail { u64 ai_sync_gen; }; +struct gfs2_journal_extent { + struct list_head extent_list; + + unsigned int lblock; /* First logical block */ + u64 dblock; /* First disk block */ + u64 blocks; +}; + struct gfs2_jdesc { struct list_head jd_list; + struct list_head extent_list; struct inode *jd_inode; unsigned int jd_jid; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 14333d81cf7d..69a583ec43c7 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -339,18 +339,14 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) { - struct inode *inode = sdp->sd_jdesc->jd_inode; - int error; - struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; - - bh_map.b_size = 1 << inode->i_blkbits; - error = gfs2_block_map(inode, lbn, &bh_map, 0); - if (error || !bh_map.b_blocknr) - printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, - (unsigned long long)bh_map.b_blocknr, lbn); - gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr); - - return bh_map.b_blocknr; + struct gfs2_journal_extent *je; + + list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { + if (lbn >= je->lblock && lbn < je->lblock + je->blocks) + return je->dblock + lbn; + } + + return -1; } /** diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1bba6ac0bcac..0921f17a164c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -21,6 +21,7 @@ #include "gfs2.h" #include "incore.h" +#include "bmap.h" #include "daemon.h" #include "glock.h" #include "glops.h" @@ -302,6 +303,68 @@ out: return error; } +/** + * map_journal_extents - create a reusable "extent" mapping from all logical + * blocks to all physical blocks for the given journal. This will save + * us time when writing journal blocks. Most journals will have only one + * extent that maps all their logical blocks. That's because gfs2.mkfs + * arranges the journal blocks sequentially to maximize performance. + * So the extent would map the first block for the entire file length. + * However, gfs2_jadd can happen while file activity is happening, so + * those journals may not be sequential. Less likely is the case where + * the users created their own journals by mounting the metafs and + * laying it out. But it's still possible. These journals might have + * several extents. + * + * TODO: This should be done in bigger chunks rather than one block at a time, + * but since it's only done at mount time, I'm not worried about the + * time it takes. + */ +static int map_journal_extents(struct gfs2_sbd *sdp) +{ + struct gfs2_jdesc *jd = sdp->sd_jdesc; + unsigned int lb; + u64 db, prev_db; /* logical block, disk block, prev disk block */ + struct gfs2_inode *ip = GFS2_I(jd->jd_inode); + struct gfs2_journal_extent *jext = NULL; + struct buffer_head bh; + int rc = 0; + + INIT_LIST_HEAD(&jd->extent_list); + prev_db = 0; + + for (lb = 0; lb < ip->i_di.di_size / sdp->sd_sb.sb_bsize; lb++) { + bh.b_state = 0; + bh.b_blocknr = 0; + bh.b_size = 1 << ip->i_inode.i_blkbits; + rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0); + db = bh.b_blocknr; + if (rc || !db) { + printk(KERN_INFO "GFS2 journal mapping error %d: lb=" + "%u db=%llu\n", rc, lb, (unsigned long long)db); + break; + } + if (!prev_db || db != prev_db + 1) { + jext = kzalloc(sizeof(struct gfs2_journal_extent), + GFP_KERNEL); + if (!jext) { + printk(KERN_INFO "GFS2 error: out of memory " + "mapping journal extents.\n"); + rc = -ENOMEM; + break; + } + jext->dblock = db; + jext->lblock = lb; + jext->blocks = 1; + list_add_tail(&jext->extent_list, &jd->extent_list); + } else { + jext->blocks++; + } + prev_db = db; + } + return rc; +} + static int init_journal(struct gfs2_sbd *sdp, int undo) { struct gfs2_holder ji_gh; @@ -377,6 +440,9 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) goto fail_jinode_gh; } atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); + + /* Map the extents for this journal's blocks */ + map_journal_extents(sdp); } if (sdp->sd_lockstruct.ls_first) { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2e74792ee487..22e09660d648 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -416,8 +416,9 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) void gfs2_jindex_free(struct gfs2_sbd *sdp) { - struct list_head list; + struct list_head list, *head; struct gfs2_jdesc *jd; + struct gfs2_journal_extent *jext; spin_lock(&sdp->sd_jindex_spin); list_add(&list, &sdp->sd_jindex_list); @@ -427,6 +428,14 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) while (!list_empty(&list)) { jd = list_entry(list.next, struct gfs2_jdesc, jd_list); + head = &jd->extent_list; + while (!list_empty(head)) { + jext = list_entry(head->next, + struct gfs2_journal_extent, + extent_list); + list_del(&jext->extent_list); + kfree(jext); + } list_del(&jd->jd_list); iput(jd->jd_inode); kfree(jd); -- cgit v1.2.3 From 0d0868bde33273a200b33e54f4fad6099ad0c566 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 11 Dec 2007 18:51:25 -0600 Subject: [GFS2] Get rid of useless "found" variable in quota.c This just eliminates an unused variable from the quota code. Not likely to be a time saver. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8b4c20c49ca7..60cc50fe15b4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -273,7 +273,7 @@ static int bh_get(struct gfs2_quota_data *qd) } block = qd->qd_slot / sdp->sd_qc_per_block; - offset = qd->qd_slot % sdp->sd_qc_per_block;; + offset = qd->qd_slot % sdp->sd_qc_per_block; bh_map.b_size = 1 << ip->i_inode.i_blkbits; error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0); @@ -1016,7 +1016,6 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, struct gfs2_alloc *al = &ip->i_alloc; struct gfs2_quota_data *qd; unsigned int x; - unsigned int found = 0; if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change)) return; @@ -1029,7 +1028,6 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { do_qc(qd, change); - found++; } } } -- cgit v1.2.3 From 5fdc2eeb5d1d3800367f471690b01fcd1fd5b963 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 11 Dec 2007 19:00:16 -0600 Subject: [GFS2] Run through full bitmaps quicker in gfs2_bitfit I eliminated the passing of an unused parameter into gfs2_bitfit called rgd. This also changes the gfs2_bitfit code that searches for free (or used) blocks. Before, the code was trying to check for bytes that indicated 4 blocks in the undesired state. The problem is, it was spending more time trying to do this than it actually was saving. This version only optimizes the case where we're looking for free blocks, and it checks a machine word at a time. So on 32-bit machines, it will check 32-bits (16 blocks) and on 64-bit machines, it will check 64-bits (32 blocks) at a time. The compiler optimizes that quite well and we save some time, especially when running through full bitmaps (like the bitmaps allocated for the journals). There's probably a more elegant or optimized way to do this, but I haven't thought of it yet. I'm open to suggestions. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 54 +++++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index e0ee195558d3..d7ff9cf6653f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -126,41 +126,46 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, * Return: the block number (bitmap buffer scope) that was found */ -static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer, - unsigned int buflen, u32 goal, - unsigned char old_state) +static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, + unsigned char old_state) { - unsigned char *byte, *end, alloc; + unsigned char *byte; u32 blk = goal; - unsigned int bit; + unsigned int bit, bitlong; + unsigned long *plong, plong55; + static int c = 0; byte = buffer + (goal / GFS2_NBBY); + plong = buffer + (goal / GFS2_NBBY); bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; - end = buffer + buflen; - alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0; - - while (byte < end) { - /* If we're looking for a free block we can eliminate all - bitmap settings with 0x55, which represents four data - blocks in a row. If we're looking for a data block, we can - eliminate 0x00 which corresponds to four free blocks. */ - if ((*byte & 0x55) == alloc) { - blk += (8 - bit) >> 1; - - bit = 0; - byte++; - + bitlong = bit; +#if BITS_PER_LONG == 32 + plong55 = 0x55555555; +#else + plong55 = 0x5555555555555555; +#endif + while (byte < buffer + buflen) { + + if (bitlong == 0 && old_state == 0 && *plong == plong55) { + plong++; + byte += sizeof(unsigned long); + blk += sizeof(unsigned long) * GFS2_NBBY; continue; } - - if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) + if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) { + c++; return blk; - + } bit += GFS2_BIT_SIZE; if (bit >= 8) { bit = 0; byte++; } + bitlong += GFS2_BIT_SIZE; + if (bitlong >= sizeof(unsigned long) * 8) { + bitlong = 0; + plong++; + } blk++; } @@ -1318,11 +1323,10 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone bitmaps, so we must search the originals for that. */ if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) - blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset, + blk = gfs2_bitfit(bi->bi_clone + bi->bi_offset, bi->bi_len, goal, old_state); else - blk = gfs2_bitfit(rgd, - bi->bi_bh->b_data + bi->bi_offset, + blk = gfs2_bitfit(bi->bi_bh->b_data + bi->bi_offset, bi->bi_len, goal, old_state); if (blk != BFITNOENT) break; -- cgit v1.2.3 From 398bbe68321947f6763fbc259a01eb548ce19408 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 11 Dec 2007 19:13:54 -0600 Subject: [GFS2] Reorganize function gfs2_glmutex_lock This patch optimizes the function gfs2_glmutex_lock. The basic theory is: Why bother initializing a holder, setting up wait bits and then waiting on them, if you know the glock can be yours. So the holder stuff is placed inside the if checking if the glock is locked. This one needs careful scrutiny because changing anything to do with locking should strike terror into one's heart. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a7f3c462d4fe..80e09c50590a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -620,26 +620,21 @@ static void run_queue(struct gfs2_glock *gl) static void gfs2_glmutex_lock(struct gfs2_glock *gl) { - struct gfs2_holder gh; - - gfs2_holder_init(gl, 0, 0, &gh); - if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags)) - BUG(); - spin_lock(&gl->gl_spin); if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + struct gfs2_holder gh; + + gfs2_holder_init(gl, 0, 0, &gh); + set_bit(HIF_WAIT, &gh.gh_iflags); list_add_tail(&gh.gh_list, &gl->gl_waiters1); + spin_unlock(&gl->gl_spin); + wait_on_holder(&gh); + gfs2_holder_uninit(&gh); } else { gl->gl_owner_pid = current->pid; gl->gl_ip = (unsigned long)__builtin_return_address(0); - clear_bit(HIF_WAIT, &gh.gh_iflags); - smp_mb(); - wake_up_bit(&gh.gh_iflags, HIF_WAIT); + spin_unlock(&gl->gl_spin); } - spin_unlock(&gl->gl_spin); - - wait_on_holder(&gh); - gfs2_holder_uninit(&gh); } /** -- cgit v1.2.3 From b0d5fd307463405fe1f57494fbb37f810715ed6d Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 11 Dec 2007 19:16:09 -0600 Subject: [GFS2] Only fetch the dinode once in block_map Function gfs2_block_map was often looking up the disk inode twice. This optimizes it so that only does it once. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 49486029edc2..224114166529 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -469,6 +469,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; struct metapath mp; u64 size; + struct buffer_head *dibh = NULL; BUG_ON(maxlen == 0); @@ -499,6 +500,8 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, error = gfs2_meta_inode_buffer(ip, &bh); if (error) goto out_fail; + dibh = bh; + get_bh(dibh); for (x = 0; x < end_of_metadata; x++) { lookup_block(ip, bh, x, &mp, create, &new, &dblock); @@ -517,13 +520,8 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, if (boundary) set_buffer_boundary(bh_map); if (new) { - struct buffer_head *dibh; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - } + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); set_buffer_new(bh_map); goto out_brelse; } @@ -544,6 +542,8 @@ out_brelse: out_ok: error = 0; out_fail: + if (dibh) + brelse(dibh); bmap_unlock(inode, create); return error; } -- cgit v1.2.3 From 15c7cee7995a9013f1b2f31a15b70e1d2e8ae501 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 11 Dec 2007 19:29:17 -0600 Subject: [GFS2] Function meta_read optimization This patch optimizes function gfs2_meta_read. Basically, gfs2_meta_wait was being called regardless of whether a disk read was requested. This just pulls that wait into the if that triggers the read. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 4b1aced9023d..3144d35a6261 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -222,13 +222,14 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp) { *bhp = getbuf(gl, blkno, CREATE); - if (!buffer_uptodate(*bhp)) + if (!buffer_uptodate(*bhp)) { ll_rw_block(READ_META, 1, bhp); - if (flags & DIO_WAIT) { - int error = gfs2_meta_wait(gl->gl_sbd, *bhp); - if (error) { - brelse(*bhp); - return error; + if (flags & DIO_WAIT) { + int error = gfs2_meta_wait(gl->gl_sbd, *bhp); + if (error) { + brelse(*bhp); + return error; + } } } -- cgit v1.2.3 From b3513fca7e41965d85125c9770ce5f8fd4ff509a Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 12 Dec 2007 09:24:08 -0600 Subject: [GFS2] Incremental patch to fix compiler warning Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index d7ff9cf6653f..68c4bf363c46 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -133,10 +133,9 @@ static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, u32 blk = goal; unsigned int bit, bitlong; unsigned long *plong, plong55; - static int c = 0; byte = buffer + (goal / GFS2_NBBY); - plong = buffer + (goal / GFS2_NBBY); + plong = (unsigned long *)(buffer + (goal / GFS2_NBBY)); bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; bitlong = bit; #if BITS_PER_LONG == 32 @@ -152,10 +151,8 @@ static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, blk += sizeof(unsigned long) * GFS2_NBBY; continue; } - if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) { - c++; + if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) return blk; - } bit += GFS2_BIT_SIZE; if (bit >= 8) { bit = 0; -- cgit v1.2.3 From c3f60b6e3a7667f78a63b15cf09655ecfca757fc Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 12 Dec 2007 11:44:41 -0600 Subject: [GFS2] Eliminate the no longer needed sd_statfs_mutex This patch eliminates the unneeded sd_statfs_mutex mutex but preserves the ordering as discussed. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 - fs/gfs2/ops_fstype.c | 1 - fs/gfs2/super.c | 4 ---- 3 files changed, 6 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 51166c12c5d7..350b5169a9a0 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -529,7 +529,6 @@ struct gfs2_sbd { /* StatFS stuff */ spinlock_t sd_statfs_spin; - struct mutex sd_statfs_mutex; struct gfs2_statfs_change_host sd_statfs_master; struct gfs2_statfs_change_host sd_statfs_local; unsigned long sd_statfs_sync_time; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 0921f17a164c..79f9bb365f10 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -60,7 +60,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) mutex_init(&sdp->sd_inum_mutex); spin_lock_init(&sdp->sd_statfs_spin); - mutex_init(&sdp->sd_statfs_mutex); spin_lock_init(&sdp->sd_rindex_spin); mutex_init(&sdp->sd_rindex_mutex); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 22e09660d648..5d0017d313a3 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -688,9 +688,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, if (error) return; - mutex_lock(&sdp->sd_statfs_mutex); gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); - mutex_unlock(&sdp->sd_statfs_mutex); spin_lock(&sdp->sd_statfs_spin); l_sc->sc_total += total; @@ -738,9 +736,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) if (error) goto out_bh2; - mutex_lock(&sdp->sd_statfs_mutex); gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); - mutex_unlock(&sdp->sd_statfs_mutex); spin_lock(&sdp->sd_statfs_spin); m_sc->sc_total += l_sc->sc_total; -- cgit v1.2.3 From fa3742fa8545df20e54aa0953a1873cca3a9bd92 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 12 Dec 2007 17:52:13 -0600 Subject: [GFS2] Minor correction This is a small correction to my previously posted patch1. It just changes a divide to a shift. It's faster and doesn't introduce odd dependencies on 32-bit compiles. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 79f9bb365f10..5537798af381 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -332,7 +332,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp) INIT_LIST_HEAD(&jd->extent_list); prev_db = 0; - for (lb = 0; lb < ip->i_di.di_size / sdp->sd_sb.sb_bsize; lb++) { + for (lb = 0; lb < ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; lb++) { bh.b_state = 0; bh.b_blocknr = 0; bh.b_size = 1 << ip->i_inode.i_blkbits; -- cgit v1.2.3 From ff91cc9bb41b62bc4ea7d5ced396fabf97539df9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 14 Dec 2007 14:04:34 +0000 Subject: [GFS2] Fix log block mapper A missing offset in the calculation. Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 69a583ec43c7..91645259e135 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -343,7 +343,7 @@ static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { if (lbn >= je->lblock && lbn < je->lblock + je->blocks) - return je->dblock + lbn; + return je->dblock + lbn - je->lblock; } return -1; -- cgit v1.2.3 From 65a6290998f3d38b5c5e84423ae9e08bdd957095 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 2 Jan 2008 10:16:56 +0000 Subject: [GFS2] Remove unused variable The go_drop_th function is never called or referenced. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 350b5169a9a0..745dada4085c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -131,7 +131,6 @@ struct gfs2_bufdata { struct gfs2_glock_operations { void (*go_xmote_th) (struct gfs2_glock *gl); void (*go_xmote_bh) (struct gfs2_glock *gl); - void (*go_drop_th) (struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); -- cgit v1.2.3 From e5d9dc278c7f79c220e4506cc1ade2efa2ca73fd Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 3 Jan 2008 11:31:38 +0000 Subject: [GFS2] Allow page migration for writeback and ordered pages To improve performance on NUMA, we use the VM's standard page migration for writeback and ordered pages. Probably we could also do the same for journaled data, but that would need a careful audit of the code, so will be the subject of a later patch. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_address.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 8f94e306c862..e16ad8104495 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -1098,6 +1098,7 @@ static const struct address_space_operations gfs2_writeback_aops = { .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, .direct_IO = gfs2_direct_IO, + .migratepage = buffer_migrate_page, }; static const struct address_space_operations gfs2_ordered_aops = { @@ -1112,6 +1113,7 @@ static const struct address_space_operations gfs2_ordered_aops = { .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, .direct_IO = gfs2_direct_IO, + .migratepage = buffer_migrate_page, }; static const struct address_space_operations gfs2_jdata_aops = { -- cgit v1.2.3 From 0811a127cb83ad2e0355e5e3e30164d7ef0f2d65 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 3 Jan 2008 09:24:53 -0600 Subject: [GFS2] Initialize extent_list earlier Here is a patch for the latest upstream GFS2 code: The journal extent map needs to be initialized sooner than it currently is. Otherwise failed mount attempts (e.g. not enough journals, etc.) may panic trying to access the uninitialized list. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 1 - fs/gfs2/super.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 5537798af381..43d511bba52d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -329,7 +329,6 @@ static int map_journal_extents(struct gfs2_sbd *sdp) struct buffer_head bh; int rc = 0; - INIT_LIST_HEAD(&jd->extent_list); prev_db = 0; for (lb = 0; lb < ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; lb++) { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5d0017d313a3..ef0562c3bc71 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -387,6 +387,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) if (!jd) break; + INIT_LIST_HEAD(&jd->extent_list); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { if (!jd->jd_inode) -- cgit v1.2.3 From 9656b2c14c6ee0806c90a6be41dec71117fc8f50 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 8 Jan 2008 08:14:30 +0000 Subject: [GFS2] Fix problems relating to execution of files on GFS2 This patch fixes a couple of problems which affected the execution of files on GFS2. The first is that there was a corner case where inodes were not always uptodate at the point at which permissions checks were being carried out, this was resulting in refusal of execute permission, but only on the first lookup, subsequent requests worked correctly. The second was a problem relating to incorrect updating of file sizes which was introduced with the write_begin/end code for GFS2 a little while back. Signed-off-by: Steven Whitehouse Cc: Abhijith Das --- fs/gfs2/ops_address.c | 13 +++++-------- fs/gfs2/ops_inode.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index e16ad8104495..37406a379e7a 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -848,14 +848,11 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (likely(ret >= 0)) { - copied = ret; - if ((pos + copied) > inode->i_size) { - di = (struct gfs2_dinode *)dibh->b_data; - ip->i_di.di_size = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); - mark_inode_dirty(inode); - } + if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) { + di = (struct gfs2_dinode *)dibh->b_data; + ip->i_di.di_size = inode->i_size; + di->di_size = cpu_to_be64(inode->i_size); + mark_inode_dirty(inode); } if (inode == sdp->sd_rindex) diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 291f0c7eaa3b..8386ab323e33 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -113,8 +113,18 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, if (inode && IS_ERR(inode)) return ERR_PTR(PTR_ERR(inode)); - if (inode) + if (inode) { + struct gfs2_glock *gl = GFS2_I(inode)->i_gl; + struct gfs2_holder gh; + int error; + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) { + iput(inode); + return ERR_PTR(error); + } + gfs2_glock_dq_uninit(&gh); return d_splice_alias(inode, dentry); + } d_add(dentry, inode); return NULL; -- cgit v1.2.3 From ac39aadd0440ae696e6dacaa8006ce1737b17008 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 10 Jan 2008 14:49:43 +0000 Subject: [GFS2] Fix assert in log code Although the values were all being calculated correctly, there was a race in the assert due to the way it was using atomic variables. This changes the value we assert on so that we get the same effect by testing a different variable. This prevents the assert triggering when it shouldn't. Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 91645259e135..161ab6f2058e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -757,7 +757,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { unsigned int reserved; - unsigned int old; + unsigned int unused; gfs2_log_lock(sdp); @@ -769,14 +769,11 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); reserved = calc_reserved(sdp); - old = atomic_read(&sdp->sd_log_blks_free); - atomic_add(tr->tr_reserved - (reserved - sdp->sd_log_blks_reserved), - &sdp->sd_log_blks_free); - - gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) >= old); + unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; + gfs2_assert_withdraw(sdp, unused >= 0); + atomic_add(unused, &sdp->sd_log_blks_free); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); - sdp->sd_log_blks_reserved = reserved; gfs2_log_unlock(sdp); -- cgit v1.2.3 From 6dbd822487d0a9f14432cb4680415b80656b63a2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 10 Jan 2008 15:18:55 +0000 Subject: [GFS2] Reduce inode size by moving i_alloc out of line It is possible to reduce the size of GFS2 inodes by taking the i_alloc structure out of the gfs2_inode. This patch allocates the i_alloc structure whenever its needed, and frees it afterward. This decreases the amount of low memory we use at the expense of requiring a memory allocation for each page or partial page that we write. A quick test with postmark shows that the overhead is not measurable and I also note that OCFS2 use the same approach. In the future I'd like to solve the problem by shrinking down the size of the members of the i_alloc structure, but for now, this reduces the immediate problem of using too much low-memory on x86 and doesn't add too much overhead. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 4 ++-- fs/gfs2/dir.c | 4 ++-- fs/gfs2/eattr.c | 2 +- fs/gfs2/incore.h | 2 +- fs/gfs2/inode.c | 7 ++++--- fs/gfs2/main.c | 1 + fs/gfs2/ops_address.c | 5 ++--- fs/gfs2/ops_file.c | 6 ++++-- fs/gfs2/ops_inode.c | 8 ++++---- fs/gfs2/quota.c | 12 ++++++------ fs/gfs2/rgrp.c | 20 +++++++++----------- fs/gfs2/rgrp.h | 4 +++- 12 files changed, 39 insertions(+), 36 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 224114166529..73dfad70de66 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -683,7 +683,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (metadata) revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; - error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh); + error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); if (error) return error; @@ -785,7 +785,7 @@ out_rg_gunlock: out_rlist: gfs2_rlist_free(&rlist); out: - gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh); + gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); return error; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 9949bb746a52..57e2ed932adc 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1876,7 +1876,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, if (error) goto out; - error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh); + error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); if (error) goto out_qs; @@ -1949,7 +1949,7 @@ out_rg_gunlock: gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); out_rlist: gfs2_rlist_free(&rlist); - gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh); + gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); out_qs: gfs2_quota_unhold(dip); out: diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 2a7435b5c4dc..bee99704ea10 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -1418,7 +1418,7 @@ out: static int ea_dealloc_block(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct gfs2_rgrpd *rgd; struct buffer_head *dibh; int error; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 745dada4085c..4cdda1a3e12c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -264,7 +264,7 @@ struct gfs2_inode { struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; struct gfs2_holder i_gh; /* for prepare/commit_write only */ - struct gfs2_alloc i_alloc; + struct gfs2_alloc *i_alloc; u64 i_last_rg_alloc; spinlock_t i_spin; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 53bca9978fb5..c84764ad82b3 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -711,9 +711,10 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); int error; - gfs2_alloc_get(dip); + if (gfs2_alloc_get(dip) == NULL) + return -ENOMEM; - dip->i_alloc.al_requested = RES_DINODE; + dip->i_alloc->al_requested = RES_DINODE; error = gfs2_inplace_reserve(dip); if (error) goto out; @@ -900,7 +901,7 @@ fail_end_trans: gfs2_trans_end(sdp); fail_ipreserv: - if (dip->i_alloc.al_rgd) + if (dip->i_alloc->al_rgd) gfs2_inplace_release(dip); fail_quota_locks: diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 653fd5a6203a..88686fcdfb1b 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -31,6 +31,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) inode_init_once(&ip->i_inode); spin_lock_init(&ip->i_spin); init_rwsem(&ip->i_rw_mutex); + ip->i_alloc = NULL; } static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 37406a379e7a..38dbe99a30ed 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -646,7 +646,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (error) goto out_unlock; - ip->i_alloc.al_requested = 0; if (alloc_required) { al = gfs2_alloc_get(ip); @@ -823,7 +822,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh; - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct gfs2_dinode *di; unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; @@ -861,7 +860,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); gfs2_trans_end(sdp); failed: - if (al->al_requested) { + if (al) { gfs2_inplace_release(ip); gfs2_quota_unlock(ip); gfs2_alloc_put(ip); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 597f7ff2bc11..d7f4726ae0ce 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -364,9 +364,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); if (ret || !alloc_required) goto out_unlock; - - ip->i_alloc.al_requested = 0; + ret = -ENOMEM; al = gfs2_alloc_get(ip); + if (al == NULL) + goto out_unlock; + ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (ret) goto out_alloc_put; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 8386ab323e33..9f71372c1757 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -61,7 +61,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); if (!IS_ERR(inode)) { gfs2_trans_end(sdp); - if (dip->i_alloc.al_rgd) + if (dip->i_alloc->al_rgd) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); gfs2_alloc_put(dip); @@ -376,7 +376,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, } gfs2_trans_end(sdp); - if (dip->i_alloc.al_rgd) + if (dip->i_alloc->al_rgd) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); gfs2_alloc_put(dip); @@ -452,7 +452,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ gfs2_trans_end(sdp); - if (dip->i_alloc.al_rgd) + if (dip->i_alloc->al_rgd) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); gfs2_alloc_put(dip); @@ -558,7 +558,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, } gfs2_trans_end(sdp); - if (dip->i_alloc.al_rgd) + if (dip->i_alloc->al_rgd) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); gfs2_alloc_put(dip); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 60cc50fe15b4..a08dabd6ce90 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -453,7 +453,7 @@ static void qdsb_put(struct gfs2_quota_data *qd) int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct gfs2_quota_data **qd = al->al_qd; int error; @@ -501,7 +501,7 @@ out: void gfs2_quota_unhold(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; unsigned int x; gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); @@ -853,7 +853,7 @@ fail: int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; unsigned int x; int error = 0; @@ -921,7 +921,7 @@ static int need_sync(struct gfs2_quota_data *qd) void gfs2_quota_unlock(struct gfs2_inode *ip) { - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct gfs2_quota_data *qda[4]; unsigned int count = 0; unsigned int x; @@ -969,7 +969,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type) int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct gfs2_quota_data *qd; s64 value; unsigned int x; @@ -1013,7 +1013,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid) { - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct gfs2_quota_data *qd; unsigned int x; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 68c4bf363c46..3552110b2e5f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -819,11 +819,9 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) { - struct gfs2_alloc *al = &ip->i_alloc; - - /* FIXME: Should assert that the correct locks are held here... */ - memset(al, 0, sizeof(*al)); - return al; + BUG_ON(ip->i_alloc != NULL); + ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL); + return ip->i_alloc; } /** @@ -1061,7 +1059,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) struct inode *inode = NULL; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd, *begin = NULL; - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; int flags = LM_FLAG_TRY; int skipped = 0; int loops = 0; @@ -1176,7 +1174,7 @@ out: int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct inode *inode; int error = 0; u64 last_unlinked = NO_BLOCK; @@ -1222,7 +1220,7 @@ try_again: void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) fs_warn(sdp, "al_alloced = %u, al_requested = %u " @@ -1412,7 +1410,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u64 gfs2_alloc_data(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct gfs2_rgrpd *rgd = al->al_rgd; u32 goal, blk; u64 block; @@ -1457,7 +1455,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip) u64 gfs2_alloc_meta(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al = &ip->i_alloc; + struct gfs2_alloc *al = ip->i_alloc; struct gfs2_rgrpd *rgd = al->al_rgd; u32 goal, blk; u64 block; @@ -1503,7 +1501,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip) u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_alloc *al = &dip->i_alloc; + struct gfs2_alloc *al = dip->i_alloc; struct gfs2_rgrpd *rgd = al->al_rgd; u32 blk; u64 block; diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4c6adfc6f2e..149bb161f4b6 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -32,7 +32,9 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); static inline void gfs2_alloc_put(struct gfs2_inode *ip) { - return; /* So we can see where ip->i_alloc is used */ + BUG_ON(ip->i_alloc == NULL); + kfree(ip->i_alloc); + ip->i_alloc = NULL; } int gfs2_inplace_reserve_i(struct gfs2_inode *ip, -- cgit v1.2.3 From 598278bd4808ed81b0e6fa445458a7d549f72a32 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 11 Jan 2008 13:31:12 -0600 Subject: [GFS2] Remove unneeded i_spin This patch removes a vestigial variable "i_spin" from the gfs2_inode structure. This not only saves us memory (>300000 of these in memory for the oom test) it also saves us time because we don't have to spend time initializing it (i.e. slightly better performance). Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 - fs/gfs2/main.c | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 4cdda1a3e12c..513aaf0dc0ab 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -267,7 +267,6 @@ struct gfs2_inode { struct gfs2_alloc *i_alloc; u64 i_last_rg_alloc; - spinlock_t i_spin; struct rw_semaphore i_rw_mutex; }; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 88686fcdfb1b..9c7765c12d62 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -29,7 +29,6 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) struct gfs2_inode *ip = foo; inode_init_once(&ip->i_inode); - spin_lock_init(&ip->i_spin); init_rwsem(&ip->i_rw_mutex); ip->i_alloc = NULL; } -- cgit v1.2.3 From 05220535196d413db434527a3edcba79b7187df8 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 11 Jan 2008 13:44:50 -0600 Subject: [GFS2] gfs2_alloc_required performance This is a small I/O performance enhancement to gfs2. (Actually, it is a rework of an earlier version I got wrong). The idea here is to check if the write extends past the last block in the file. If so, the function can save itself a lot of time and trouble because it knows an allocate will be required. Benchmarks like iozone should see better performance. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 73dfad70de66..4356cc2fb3f5 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1224,6 +1224,11 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, unsigned int shift = sdp->sd_sb.sb_bsize_shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; + if (lblock_stop > ip->i_di.di_blocks) { /* writing past the + last block */ + *alloc_required = 1; + return 0; + } } for (; lblock < lblock_stop; lblock += extlen) { -- cgit v1.2.3 From 1af535727bbf68e1da7ac232de47315da4c66ade Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 16 Jan 2008 14:24:05 +0000 Subject: [GFS2] Fix write alloc required shortcut calculation The comparison was being made against the wrong quantity. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 4356cc2fb3f5..e4effc47abfc 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1222,10 +1222,10 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, do_div(lblock_stop, bsize); } else { unsigned int shift = sdp->sd_sb.sb_bsize_shift; + u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; - if (lblock_stop > ip->i_di.di_blocks) { /* writing past the - last block */ + if (lblock_stop > end_of_file) { *alloc_required = 1; return 0; } -- cgit v1.2.3 From 3e5cd0877e6d2f059dc36b8206cb7e93938151db Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 16 Jan 2008 08:45:39 -0600 Subject: [GFS2] Fix typo This patch fixes a minor typo. Surprisingly, it still compiled. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 3144d35a6261..85aea27b4a86 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -284,7 +284,7 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, return; } - bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL), + bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL); bd->bd_bh = bh; bd->bd_gl = gl; -- cgit v1.2.3 From b7fe2e391ee7b711d6dfd6a694d60c4f21113cbb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 17 Jan 2008 15:12:03 +0000 Subject: [GFS2] Fix page_mkwrite truncation race path There was a bug in the truncation/invalidation race path for ->page_mkwrite for gfs2. It ought to return 0 so that the effect is the same as if the page was truncated at any of the other points at which the page_lock is dropped. This will result in the restart of the whole page fault path. If it was due to a real truncation (as opposed to an invalidate because we let a glock go) then the ->fault path will pick that up when it gets called again. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index d7f4726ae0ce..f4842f2548cd 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -394,6 +394,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; if (page->index > last_index) goto out_unlock_page; + ret = 0; if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) goto out_unlock_page; if (gfs2_is_stuffed(ip)) { -- cgit v1.2.3 From 1b8177ec1e779bcc3ed89419ff7c80dbc3dcc489 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Sat, 19 Jan 2008 21:50:24 -0600 Subject: [GFS2] Lockup on error I spotted this bug while I was digging around. Looks like it could cause a lockup in some rare error condition. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c84764ad82b3..728d3169e7bd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -860,7 +860,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); if (alloc_required < 0) - goto fail; + goto fail_quota_locks; if (alloc_required) { error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); if (error) -- cgit v1.2.3 From 7bc5c414fe6627ec518c82d154c796f0981f5b02 Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Fri, 18 Jan 2008 14:06:37 -0600 Subject: [GFS2] Allow journal recovery on read-only mount This patch allows gfs2 to perform journal recovery even if it is mounted read-only. Strictly speaking, a read-only mount should not be writing to the filesystem, but we do this only to perform journal recovery. A read-only mount will fail if we don't recover the dirty journal. Also, when gfs2 is used as a root filesystem, it will be mounted read-only before being mounted read-write during the boot sequence. A failed read-only mount will panic the machine during bootup. Signed-off-by: Abhijith Das Signed-off-by: Steven Whitehouse --- fs/gfs2/recovery.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 27c994f2d1f0..b249e294a95b 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -504,13 +504,21 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) ro = 1; } else { - if (sdp->sd_vfs->s_flags & MS_RDONLY) - ro = 1; + if (sdp->sd_vfs->s_flags & MS_RDONLY) { + /* check if device itself is read-only */ + ro = bdev_read_only(sdp->sd_vfs->s_bdev); + if (!ro) { + fs_info(sdp, "recovery required on " + "read-only filesystem.\n"); + fs_info(sdp, "write access will be " + "enabled during recovery.\n"); + } + } } if (ro) { - fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n", - jd->jd_jid); + fs_warn(sdp, "jid=%u: Can't replay: read-only block " + "device\n", jd->jd_jid); error = -EROFS; goto fail_gunlock_tr; } -- cgit v1.2.3