diff options
Diffstat (limited to 'fs')
248 files changed, 8350 insertions, 3993 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 6a4ad4bb7a54..062177956239 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -50,6 +50,23 @@ config EXT2_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. +config EXT2_FS_XIP + bool "Ext2 execute in place support" + depends on EXT2_FS + help + Execute in place can be used on memory-backed block devices. If you + enable this option, you can select to mount block devices which are + capable of this feature without using the page cache. + + If you do not use a block device that is capable of using this, + or if unsure, say N. + +config FS_XIP +# execute in place + bool + depends on EXT2_FS_XIP + default y + config EXT3_FS tristate "Ext3 journalling file system support" help @@ -717,6 +734,12 @@ config PROC_KCORE bool "/proc/kcore support" if !ARM depends on PROC_FS && MMU +config PROC_VMCORE + bool "/proc/vmcore support (EXPERIMENTAL)" + depends on PROC_FS && EMBEDDED && EXPERIMENTAL && CRASH_DUMP + help + Exports the dump image of crashed kernel in ELF format. + config SYSFS bool "sysfs file system support" if EMBEDDED default y @@ -741,56 +764,6 @@ config SYSFS Designers of embedded systems may wish to say N here to conserve space. -config DEVFS_FS - bool "/dev file system support (OBSOLETE)" - depends on EXPERIMENTAL - help - This is support for devfs, a virtual file system (like /proc) which - provides the file system interface to device drivers, normally found - in /dev. Devfs does not depend on major and minor number - allocations. Device drivers register entries in /dev which then - appear automatically, which means that the system administrator does - not have to create character and block special device files in the - /dev directory using the mknod command (or MAKEDEV script) anymore. - - This is work in progress. If you want to use this, you *must* read - the material in <file:Documentation/filesystems/devfs/>, especially - the file README there. - - Note that devfs no longer manages /dev/pts! If you are using UNIX98 - ptys, you will also need to mount the /dev/pts filesystem (devpts). - - Note that devfs has been obsoleted by udev, - <http://www.kernel.org/pub/linux/utils/kernel/hotplug/>. - It has been stripped down to a bare minimum and is only provided for - legacy installations that use its naming scheme which is - unfortunately different from the names normal Linux installations - use. - - If unsure, say N. - -config DEVFS_MOUNT - bool "Automatically mount at boot" - depends on DEVFS_FS - help - This option appears if you have CONFIG_DEVFS_FS enabled. Setting - this to 'Y' will make the kernel automatically mount devfs onto /dev - when the system is booted, before the init thread is started. - You can override this with the "devfs=nomount" boot option. - - If unsure, say N. - -config DEVFS_DEBUG - bool "Debug devfs" - depends on DEVFS_FS - help - If you say Y here, then the /dev file system code will generate - debugging messages. See the file - <file:Documentation/filesystems/devfs/boot-options> for more - details. - - If unsure, say N. - config DEVPTS_FS_XATTR bool "/dev/pts Extended Attributes" depends on UNIX98_PTYS @@ -1318,6 +1291,7 @@ config NFS_FS depends on INET select LOCKD select SUNRPC + select NFS_ACL_SUPPORT if NFS_V3_ACL help If you are connected to some other (usually local) Unix computer (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing @@ -1360,6 +1334,16 @@ config NFS_V3 If unsure, say Y. +config NFS_V3_ACL + bool "Provide client support for the NFSv3 ACL protocol extension" + depends on NFS_V3 + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists. The server should also be compiled with + the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. + + If unsure, say N. + config NFS_V4 bool "Provide NFSv4 client support (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL @@ -1403,6 +1387,7 @@ config NFSD select LOCKD select SUNRPC select EXPORTFS + select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL help If you want your Linux box to act as an NFS *server*, so that other computers on your local network which support NFS can access certain @@ -1426,6 +1411,10 @@ config NFSD To compile the NFS server support as a module, choose M here: the module will be called nfsd. If unsure, say N. +config NFSD_V2_ACL + bool + depends on NFSD + config NFSD_V3 bool "Provide NFSv3 server support" depends on NFSD @@ -1433,10 +1422,22 @@ config NFSD_V3 If you would like to include the NFSv3 server as well as the NFSv2 server, say Y here. If unsure, say Y. +config NFSD_V3_ACL + bool "Provide server support for the NFSv3 ACL protocol extension" + depends on NFSD_V3 + select NFSD_V2_ACL + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists on exported file systems. NFS clients should + be compiled with the NFSv3 ACL protocol extension; see the + CONFIG_NFS_V3_ACL option. If unsure, say N. + config NFSD_V4 bool "Provide NFSv4 server support (EXPERIMENTAL)" depends on NFSD_V3 && EXPERIMENTAL select NFSD_TCP + select CRYPTO_MD5 + select CRYPTO help If you would like to include the NFSv4 server as well as the NFSv2 and NFSv3 servers, say Y here. This feature is experimental, and @@ -1477,6 +1478,15 @@ config LOCKD_V4 config EXPORTFS tristate +config NFS_ACL_SUPPORT + tristate + select FS_POSIX_ACL + +config NFS_COMMON + bool + depends on NFSD || NFS_FS + default y + config SUNRPC tristate diff --git a/fs/Makefile b/fs/Makefile index 443f2bc56ccf..20edcf28bfd2 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -10,6 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ + ioprio.o obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_COMPAT) += compat.o @@ -31,6 +32,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o +obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c index 6fc88ae8ad94..7ac07d0d47b9 100644 --- a/fs/afs/kafsasyncd.c +++ b/fs/afs/kafsasyncd.c @@ -116,7 +116,7 @@ static int kafsasyncd(void *arg) remove_wait_queue(&kafsasyncd_sleepq, &myself); set_current_state(TASK_RUNNING); - try_to_freeze(PF_FREEZE); + try_to_freeze(); /* discard pending signals */ afs_discard_my_signals(); diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c index 86e710dd057e..65bc05ab8182 100644 --- a/fs/afs/kafstimod.c +++ b/fs/afs/kafstimod.c @@ -91,7 +91,7 @@ static int kafstimod(void *arg) complete_and_exit(&kafstimod_dead, 0); } - try_to_freeze(PF_FREEZE); + try_to_freeze(); /* discard pending signals */ afs_discard_my_signals(); @@ -58,6 +58,7 @@ static DEFINE_SPINLOCK(fput_lock); static LIST_HEAD(fput_head); static void aio_kick_handler(void *); +static void aio_queue_work(struct kioctx *); /* aio_setup * Creates the slab caches used by the aio routines, panic on @@ -747,6 +748,14 @@ out: * has already been kicked */ if (kiocbIsKicked(iocb)) { __queue_kicked_iocb(iocb); + + /* + * __queue_kicked_iocb will always return 1 here, because + * iocb->ki_run_list is empty at this point so it should + * be safe to unconditionally queue the context into the + * work queue. + */ + aio_queue_work(ctx); } } return ret; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index c7b2b8890188..9c09641ce907 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -185,6 +185,19 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); +static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry) +{ + int res = 0; + + while (d_mountpoint(*dentry)) { + int followed = follow_down(mnt, dentry); + if (!followed) + break; + res = 1; + } + return res; +} + static inline int simple_positive(struct dentry *dentry) { return dentry->d_inode && !d_unhashed(dentry); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 500425e24fba..feb6ac427d05 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -56,12 +56,9 @@ static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry) mntget(mnt); dget(dentry); - if (!follow_down(&mnt, &dentry)) + if (!autofs4_follow_mount(&mnt, &dentry)) goto done; - while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) - ; - /* This is an autofs submount, we can't expire it */ if (is_autofs4_dentry(dentry)) goto done; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 3765c047f157..2a771ec66956 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -205,7 +205,11 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) struct vfsmount *fp_mnt = mntget(mnt); struct dentry *fp_dentry = dget(dentry); - while (follow_down(&fp_mnt, &fp_dentry) && d_mountpoint(fp_dentry)); + if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { + dput(fp_dentry); + mntput(fp_mnt); + return -ENOENT; + } fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); status = PTR_ERR(fp); @@ -302,7 +306,14 @@ static int try_to_fill_dentry(struct dentry *dentry, DPRINTK("expire done status=%d", status); - return 0; + /* + * If the directory still exists the mount request must + * continue otherwise it can't be followed at the right + * time during the walk. + */ + status = d_invalidate(dentry); + if (status != -EBUSY) + return 0; } DPRINTK("dentry=%p %.*s ino=%p", diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 5a40d36e5a51..fa2348dcd671 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -191,6 +191,13 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, } if ( !wq ) { + /* Can't wait for an expire if there's no mount */ + if (notify == NFY_NONE && !d_mountpoint(dentry)) { + kfree(name); + up(&sbi->wq_sem); + return -ENOENT; + } + /* Create a new wait queue */ wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); if ( !wq ) { diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 009b8920c1ff..dd9baabaf016 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -316,6 +316,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; set_mm_counter(current->mm, rss, 0); current->mm->mmap = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f8f6b6b76179..7976a238f0a3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -775,6 +775,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) change some of these later */ set_mm_counter(current->mm, rss, 0); current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); if (retval < 0) { diff --git a/fs/block_dev.c b/fs/block_dev.c index c0cbd1bc1a02..e0df94c37b7e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -777,8 +777,7 @@ static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf, return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); } -static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd, - unsigned long arg) +static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); } @@ -803,7 +802,7 @@ struct file_operations def_blk_fops = { .aio_write = blkdev_file_aio_write, .mmap = generic_file_mmap, .fsync = block_fsync, - .ioctl = block_ioctl, + .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif diff --git a/fs/buffer.c b/fs/buffer.c index 7e9e409feaa7..561e63a14966 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -278,7 +278,7 @@ EXPORT_SYMBOL(thaw_bdev); */ static void do_sync(unsigned long wait) { - wakeup_bdflush(0); + wakeup_pdflush(0); sync_inodes(0); /* All mappings, inodes and their blockdevs */ DQUOT_SYNC(NULL); sync_supers(); /* Write the superblocks */ @@ -331,7 +331,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) return ret; } -asmlinkage long sys_fsync(unsigned int fd) +static long do_fsync(unsigned int fd, int datasync) { struct file * file; struct address_space *mapping; @@ -342,14 +342,14 @@ asmlinkage long sys_fsync(unsigned int fd) if (!file) goto out; - mapping = file->f_mapping; - ret = -EINVAL; if (!file->f_op || !file->f_op->fsync) { /* Why? We can still call filemap_fdatawrite */ goto out_putf; } + mapping = file->f_mapping; + current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(mapping); @@ -358,7 +358,7 @@ asmlinkage long sys_fsync(unsigned int fd) * which could cause livelocks in fsync_buffers_list */ down(&mapping->host->i_sem); - err = file->f_op->fsync(file, file->f_dentry, 0); + err = file->f_op->fsync(file, file->f_dentry, datasync); if (!ret) ret = err; up(&mapping->host->i_sem); @@ -373,39 +373,14 @@ out: return ret; } -asmlinkage long sys_fdatasync(unsigned int fd) +asmlinkage long sys_fsync(unsigned int fd) { - struct file * file; - struct address_space *mapping; - int ret, err; - - ret = -EBADF; - file = fget(fd); - if (!file) - goto out; - - ret = -EINVAL; - if (!file->f_op || !file->f_op->fsync) - goto out_putf; - - mapping = file->f_mapping; - - current->flags |= PF_SYNCWRITE; - ret = filemap_fdatawrite(mapping); - down(&mapping->host->i_sem); - err = file->f_op->fsync(file, file->f_dentry, 1); - if (!ret) - ret = err; - up(&mapping->host->i_sem); - err = filemap_fdatawait(mapping); - if (!ret) - ret = err; - current->flags &= ~PF_SYNCWRITE; + return do_fsync(fd, 0); +} -out_putf: - fput(file); -out: - return ret; +asmlinkage long sys_fdatasync(unsigned int fd) +{ + return do_fsync(fd, 1); } /* @@ -522,13 +497,13 @@ static void free_more_memory(void) struct zone **zones; pg_data_t *pgdat; - wakeup_bdflush(1024); + wakeup_pdflush(1024); yield(); for_each_pgdat(pgdat) { zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones; if (*zones) - try_to_free_pages(zones, GFP_NOFS, 0); + try_to_free_pages(zones, GFP_NOFS); } } @@ -1951,7 +1926,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (err) break; if (buffer_new(bh)) { - clear_buffer_new(bh); unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); if (PageUptodate(page)) { @@ -1993,9 +1967,14 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (!buffer_uptodate(*wait_bh)) err = -EIO; } - if (!err) - return err; - + if (!err) { + bh = head; + do { + if (buffer_new(bh)) + clear_buffer_new(bh); + } while ((bh = bh->b_this_page) != head); + return 0; + } /* Error case: */ /* * Zero out any newly allocated blocks to avoid exposing stale diff --git a/fs/char_dev.c b/fs/char_dev.c index c1e3537909fc..a69a5d8a406f 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -56,10 +56,21 @@ int get_chrdev_list(char *page) down(&chrdevs_lock); for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) { - for (cd = chrdevs[i]; cd; cd = cd->next) + for (cd = chrdevs[i]; cd; cd = cd->next) { + /* + * if the current name, plus the 5 extra characters + * in the device line for this entry + * would run us off the page, we're done + */ + if ((len+strlen(cd->name) + 5) >= PAGE_SIZE) + goto page_full; + + len += sprintf(page+len, "%3d %s\n", cd->major, cd->name); + } } +page_full: up(&chrdevs_lock); return len; @@ -139,7 +150,7 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct) struct char_device_struct *cd = NULL, **cp; int i = major_to_index(major); - up(&chrdevs_lock); + down(&chrdevs_lock); for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next) if ((*cp)->major == major && (*cp)->baseminor == baseminor && diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index ef001a9313e6..3d1cce3653b8 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -61,7 +61,7 @@ unsigned long coda_timeout = 30; /* .. secs, then signals will dequeue */ struct venus_comm coda_comms[MAX_CODADEVS]; -static struct class_simple *coda_psdev_class; +static struct class *coda_psdev_class; /* * Device operations @@ -363,14 +363,14 @@ static int init_coda_psdev(void) CODA_PSDEV_MAJOR); return -EIO; } - coda_psdev_class = class_simple_create(THIS_MODULE, "coda"); + coda_psdev_class = class_create(THIS_MODULE, "coda"); if (IS_ERR(coda_psdev_class)) { err = PTR_ERR(coda_psdev_class); goto out_chrdev; } devfs_mk_dir ("coda"); for (i = 0; i < MAX_CODADEVS; i++) { - class_simple_device_add(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i), + class_device_create(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i); err = devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i), S_IFCHR|S_IRUSR|S_IWUSR, "coda/%d", i); @@ -382,8 +382,8 @@ static int init_coda_psdev(void) out_class: for (i = 0; i < MAX_CODADEVS; i++) - class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); - class_simple_destroy(coda_psdev_class); + class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); + class_destroy(coda_psdev_class); out_chrdev: unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); out: @@ -425,10 +425,10 @@ static int __init init_coda(void) return 0; out: for (i = 0; i < MAX_CODADEVS; i++) { - class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); + class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); devfs_remove("coda/%d", i); } - class_simple_destroy(coda_psdev_class); + class_destroy(coda_psdev_class); devfs_remove("coda"); unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); coda_sysctl_clean(); @@ -447,10 +447,10 @@ static void __exit exit_coda(void) printk("coda: failed to unregister filesystem\n"); } for (i = 0; i < MAX_CODADEVS; i++) { - class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); + class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); devfs_remove("coda/%d", i); } - class_simple_destroy(coda_psdev_class); + class_destroy(coda_psdev_class); devfs_remove("coda"); unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); coda_sysctl_clean(); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 548556ff2506..efc97d9b7860 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -45,44 +45,15 @@ struct file_operations debugfs_file_operations = { .open = default_open, }; -#define simple_type(type, format, temptype, strtolfn) \ -static ssize_t read_file_##type(struct file *file, char __user *user_buf, \ - size_t count, loff_t *ppos) \ -{ \ - char buf[32]; \ - type *val = file->private_data; \ - \ - snprintf(buf, sizeof(buf), format "\n", *val); \ - return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));\ -} \ -static ssize_t write_file_##type(struct file *file, const char __user *user_buf,\ - size_t count, loff_t *ppos) \ -{ \ - char *endp; \ - char buf[32]; \ - int buf_size; \ - type *val = file->private_data; \ - temptype tmp; \ - \ - memset(buf, 0x00, sizeof(buf)); \ - buf_size = min(count, (sizeof(buf)-1)); \ - if (copy_from_user(buf, user_buf, buf_size)) \ - return -EFAULT; \ - \ - tmp = strtolfn(buf, &endp, 0); \ - if ((endp == buf) || ((type)tmp != tmp)) \ - return -EINVAL; \ - *val = tmp; \ - return count; \ -} \ -static struct file_operations fops_##type = { \ - .read = read_file_##type, \ - .write = write_file_##type, \ - .open = default_open, \ -}; -simple_type(u8, "%c", unsigned long, simple_strtoul); -simple_type(u16, "%hi", unsigned long, simple_strtoul); -simple_type(u32, "%i", unsigned long, simple_strtoul); +static void debugfs_u8_set(void *data, u64 val) +{ + *(u8 *)data = val; +} +static u64 debugfs_u8_get(void *data) +{ + return *(u8 *)data; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); /** * debugfs_create_u8 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value. @@ -116,6 +87,16 @@ struct dentry *debugfs_create_u8(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_u8); +static void debugfs_u16_set(void *data, u64 val) +{ + *(u16 *)data = val; +} +static u64 debugfs_u16_get(void *data) +{ + return *(u16 *)data; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); + /** * debugfs_create_u16 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value. * @@ -148,6 +129,16 @@ struct dentry *debugfs_create_u16(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_u16); +static void debugfs_u32_set(void *data, u64 val) +{ + *(u32 *)data = val; +} +static u64 debugfs_u32_get(void *data) +{ + return *(u32 *)data; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); + /** * debugfs_create_u32 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value. * diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b529786699e7..a86ac4aeaedb 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -110,16 +110,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent) return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); } -static struct dentry * get_dentry(struct dentry *parent, const char *name) -{ - struct qstr qstr; - - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); - return lookup_hash(&qstr,parent); -} - static struct super_block *debug_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -157,7 +147,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode, *dentry = NULL; down(&parent->d_inode->i_sem); - *dentry = get_dentry (parent, name); + *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry)) { if ((mode & S_IFMT) == S_IFDIR) error = debugfs_mkdir(parent->d_inode, *dentry, mode); diff --git a/fs/direct-io.c b/fs/direct-io.c index 1d55e7e67342..0d06097bc995 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -215,7 +215,7 @@ static struct page *dio_get_page(struct dio *dio) static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) { if (dio->end_io && dio->result) - dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private); + dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); if (dio->lock_type == DIO_LOCKING) up_read(&dio->inode->i_alloc_sem); } diff --git a/fs/dquot.c b/fs/dquot.c index 3995ce7907cc..b9732335bcdc 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -409,13 +409,10 @@ out_dqlock: * for this sb+type at all. */ static void invalidate_dquots(struct super_block *sb, int type) { - struct dquot *dquot; - struct list_head *head; + struct dquot *dquot, *tmp; spin_lock(&dq_list_lock); - for (head = inuse_list.next; head != &inuse_list;) { - dquot = list_entry(head, struct dquot, dq_inuse); - head = head->next; + list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { if (dquot->dq_sb != sb) continue; if (dquot->dq_type != type) @@ -1519,14 +1516,22 @@ out_path: * This function is used when filesystem needs to initialize quotas * during mount time. */ -int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry) +int vfs_quota_on_mount(struct super_block *sb, char *qf_name, + int format_id, int type) { + struct dentry *dentry; int error; + dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name)); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + error = security_quota_on(dentry); - if (error) - return error; - return vfs_quota_on_inode(dentry->d_inode, type, format_id); + if (!error) + error = vfs_quota_on_inode(dentry->d_inode, type, format_id); + + dput(dentry); + return error; } /* Generic routine for getting common part of quota structure */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9900e333655a..6ab1dd0ca904 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -101,57 +101,6 @@ /* Maximum number of poll wake up nests we are allowing */ #define EP_MAX_POLLWAKE_NESTS 4 -/* Macro to allocate a "struct epitem" from the slab cache */ -#define EPI_MEM_ALLOC() (struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL) - -/* Macro to free a "struct epitem" to the slab cache */ -#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p) - -/* Macro to allocate a "struct eppoll_entry" from the slab cache */ -#define PWQ_MEM_ALLOC() (struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL) - -/* Macro to free a "struct eppoll_entry" to the slab cache */ -#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p) - -/* Fast test to see if the file is an evenpoll file */ -#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops) - -/* Setup the structure that is used as key for the rb-tree */ -#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0) - -/* Compare rb-tree keys */ -#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \ - ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd)) - -/* Special initialization for the rb-tree node to detect linkage */ -#define EP_RB_INITNODE(n) (n)->rb_parent = (n) - -/* Removes a node from the rb-tree and marks it for a fast is-linked check */ -#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0) - -/* Fast check to verify that the item is linked to the main rb-tree */ -#define EP_RB_LINKED(n) ((n)->rb_parent != (n)) - -/* - * Remove the item from the list and perform its initialization. - * This is useful for us because we can test if the item is linked - * using "EP_IS_LINKED(p)". - */ -#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0) - -/* Tells us if the item is currently linked */ -#define EP_IS_LINKED(p) (!list_empty(p)) - -/* Get the "struct epitem" from a wait queue pointer */ -#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base) - -/* Get the "struct epitem" from an epoll queue wrapper */ -#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi) - -/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ -#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL) - - struct epoll_filefd { struct file *file; int fd; @@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = { +/* Fast test to see if the file is an evenpoll file */ +static inline int is_file_epoll(struct file *f) +{ + return f->f_op == &eventpoll_fops; +} + +/* Setup the structure that is used as key for the rb-tree */ +static inline void ep_set_ffd(struct epoll_filefd *ffd, + struct file *file, int fd) +{ + ffd->file = file; + ffd->fd = fd; +} + +/* Compare rb-tree keys */ +static inline int ep_cmp_ffd(struct epoll_filefd *p1, + struct epoll_filefd *p2) +{ + return (p1->file > p2->file ? +1: + (p1->file < p2->file ? -1 : p1->fd - p2->fd)); +} + +/* Special initialization for the rb-tree node to detect linkage */ +static inline void ep_rb_initnode(struct rb_node *n) +{ + n->rb_parent = n; +} + +/* Removes a node from the rb-tree and marks it for a fast is-linked check */ +static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) +{ + rb_erase(n, r); + n->rb_parent = n; +} + +/* Fast check to verify that the item is linked to the main rb-tree */ +static inline int ep_rb_linked(struct rb_node *n) +{ + return n->rb_parent != n; +} + +/* + * Remove the item from the list and perform its initialization. + * This is useful for us because we can test if the item is linked + * using "ep_is_linked(p)". + */ +static inline void ep_list_del(struct list_head *p) +{ + list_del(p); + INIT_LIST_HEAD(p); +} + +/* Tells us if the item is currently linked */ +static inline int ep_is_linked(struct list_head *p) +{ + return !list_empty(p); +} + +/* Get the "struct epitem" from a wait queue pointer */ +static inline struct epitem * ep_item_from_wait(wait_queue_t *p) +{ + return container_of(p, struct eppoll_entry, wait)->base; +} + +/* Get the "struct epitem" from an epoll queue wrapper */ +static inline struct epitem * ep_item_from_epqueue(poll_table *p) +{ + return container_of(p, struct ep_pqueue, pt)->epi; +} + +/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ +static inline int ep_op_hash_event(int op) +{ + return op != EPOLL_CTL_DEL; +} + /* Initialize the poll safe wake up structure */ static void ep_poll_safewake_init(struct poll_safewake *psw) { @@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file) epi = list_entry(lsthead->next, struct epitem, fllink); ep = epi->ep; - EP_LIST_DEL(&epi->fllink); + ep_list_del(&epi->fllink); down_write(&ep->sem); ep_remove(ep, epi); up_write(&ep->sem); @@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) current, epfd, op, fd, event)); error = -EFAULT; - if (EP_OP_HASH_EVENT(op) && + if (ep_op_hash_event(op) && copy_from_user(&epds, event, sizeof(struct epoll_event))) goto eexit_1; @@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) * adding an epoll file descriptor inside itself. */ error = -EINVAL; - if (file == tfile || !IS_FILE_EPOLL(file)) + if (file == tfile || !is_file_epoll(file)) goto eexit_3; /* @@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, * the user passed to us _is_ an eventpoll file. */ error = -EINVAL; - if (!IS_FILE_EPOLL(file)) + if (!is_file_epoll(file)) goto eexit_2; /* @@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) struct epitem *epi, *epir = NULL; struct epoll_filefd ffd; - EP_SET_FFD(&ffd, file, fd); + ep_set_ffd(&ffd, file, fd); read_lock_irqsave(&ep->lock, flags); for (rbp = ep->rbr.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); - kcmp = EP_CMP_FFD(&ffd, &epi->ffd); + kcmp = ep_cmp_ffd(&ffd, &epi->ffd); if (kcmp > 0) rbp = rbp->rb_right; else if (kcmp < 0) @@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi) { if (atomic_dec_and_test(&epi->usecnt)) - EPI_MEM_FREE(epi); + kmem_cache_free(epi_cache, epi); } @@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi) static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt) { - struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt); + struct epitem *epi = ep_item_from_epqueue(pt); struct eppoll_entry *pwq; - if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) { + if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) { init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); pwq->whead = whead; pwq->base = epi; @@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) while (*p) { parent = *p; epic = rb_entry(parent, struct epitem, rbn); - kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd); + kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); if (kcmp > 0) p = &parent->rb_right; else @@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct ep_pqueue epq; error = -ENOMEM; - if (!(epi = EPI_MEM_ALLOC())) + if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL))) goto eexit_1; /* Item initialization follow here ... */ - EP_RB_INITNODE(&epi->rbn); + ep_rb_initnode(&epi->rbn); INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); INIT_LIST_HEAD(&epi->txlink); INIT_LIST_HEAD(&epi->pwqlist); epi->ep = ep; - EP_SET_FFD(&epi->ffd, tfile, fd); + ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; atomic_set(&epi->usecnt, 1); epi->nwait = 0; @@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, ep_rbtree_insert(ep, epi); /* If the file is already "ready" we drop it inside the ready list */ - if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) { + if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); /* Notify waiting tasks that events are available */ @@ -1007,11 +1032,11 @@ eexit_2: * allocated wait queue. */ write_lock_irqsave(&ep->lock, flags); - if (EP_IS_LINKED(&epi->rdllink)) - EP_LIST_DEL(&epi->rdllink); + if (ep_is_linked(&epi->rdllink)) + ep_list_del(&epi->rdllink); write_unlock_irqrestore(&ep->lock, flags); - EPI_MEM_FREE(epi); + kmem_cache_free(epi_cache, epi); eexit_1: return error; } @@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * If the item is not linked to the hash it means that it's on its * way toward the removal. Do nothing in this case. */ - if (EP_RB_LINKED(&epi->rbn)) { + if (ep_rb_linked(&epi->rbn)) { /* * If the item is "hot" and it is not registered inside the ready * list, push it inside. If the item is not "hot" and it is currently * registered inside the ready list, unlink it. */ if (revents & event->events) { - if (!EP_IS_LINKED(&epi->rdllink)) { + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); /* Notify waiting tasks that events are available */ @@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) while (!list_empty(lsthead)) { pwq = list_entry(lsthead->next, struct eppoll_entry, llink); - EP_LIST_DEL(&pwq->llink); + ep_list_del(&pwq->llink); remove_wait_queue(pwq->whead, &pwq->wait); - PWQ_MEM_FREE(pwq); + kmem_cache_free(pwq_cache, pwq); } } } @@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) * The check protect us from doing a double unlink ( crash ). */ error = -ENOENT; - if (!EP_RB_LINKED(&epi->rbn)) + if (!ep_rb_linked(&epi->rbn)) goto eexit_1; /* @@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) * This operation togheter with the above check closes the door to * double unlinks. */ - EP_RB_ERASE(&epi->rbn, &ep->rbr); + ep_rb_erase(&epi->rbn, &ep->rbr); /* * If the item we are going to remove is inside the ready file descriptors * we want to remove it from this list to avoid stale events. */ - if (EP_IS_LINKED(&epi->rdllink)) - EP_LIST_DEL(&epi->rdllink); + if (ep_is_linked(&epi->rdllink)) + ep_list_del(&epi->rdllink); error = 0; eexit_1: @@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_ep_lock); - if (EP_IS_LINKED(&epi->fllink)) - EP_LIST_DEL(&epi->fllink); + if (ep_is_linked(&epi->fllink)) + ep_list_del(&epi->fllink); spin_unlock(&file->f_ep_lock); /* We need to acquire the write IRQ lock before calling ep_unlink() */ @@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k { int pwake = 0; unsigned long flags; - struct epitem *epi = EP_ITEM_FROM_WAIT(wait); + struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", @@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k goto is_disabled; /* If this file is already in the ready list we exit soon */ - if (EP_IS_LINKED(&epi->rdllink)) + if (ep_is_linked(&epi->rdllink)) goto is_linked; list_add_tail(&epi->rdllink, &ep->rdllist); @@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist lnk = lnk->next; /* If this file is already in the ready list we exit soon */ - if (!EP_IS_LINKED(&epi->txlink)) { + if (!ep_is_linked(&epi->txlink)) { /* * This is initialized in this way so that the default * behaviour of the reinjecting code will be to push back @@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist /* * Unlink the item from the ready list. */ - EP_LIST_DEL(&epi->rdllink); + ep_list_del(&epi->rdllink); } } @@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) epi = list_entry(txlist->next, struct epitem, txlink); /* Unlink the current item from the transfer list */ - EP_LIST_DEL(&epi->txlink); + ep_list_del(&epi->txlink); /* * If the item is no more linked to the interest set, we don't @@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) * item is set to have an Edge Triggered behaviour, we don't have * to push it back either. */ - if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) && - (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) { + if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) && + (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); ricnt++; } diff --git a/fs/exec.c b/fs/exec.c index 3a4b35a14c0d..48871917d363 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -58,6 +58,9 @@ int core_uses_pid; char core_pattern[65] = "core"; +int suid_dumpable = 0; + +EXPORT_SYMBOL(suid_dumpable); /* The maximal length of core_pattern is also specified in sysctl.c */ static struct linux_binfmt *formats; @@ -864,6 +867,9 @@ int flush_old_exec(struct linux_binprm * bprm) if (current->euid == current->uid && current->egid == current->gid) current->mm->dumpable = 1; + else + current->mm->dumpable = suid_dumpable; + name = bprm->filename; /* Copies the binary name from after last slash */ @@ -884,7 +890,7 @@ int flush_old_exec(struct linux_binprm * bprm) permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { suid_keys(current); - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; } /* An exec changes our domain. We are no longer part of the thread @@ -1432,6 +1438,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) struct inode * inode; struct file * file; int retval = 0; + int fsuid = current->fsuid; + int flag = 0; binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) @@ -1441,6 +1449,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) up_write(&mm->mmap_sem); goto fail; } + + /* + * We cannot trust fsuid as being the "true" uid of the + * process nor do we know its entire history. We only know it + * was tainted so we dump it as root in mode 2. + */ + if (mm->dumpable == 2) { /* Setuid core dump mode */ + flag = O_EXCL; /* Stop rewrite attacks */ + current->fsuid = 0; /* Dump root private */ + } mm->dumpable = 0; init_completion(&mm->core_done); spin_lock_irq(¤t->sighand->siglock); @@ -1466,7 +1484,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) lock_kernel(); format_corename(corename, core_pattern, signr); unlock_kernel(); - file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600); + file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); if (IS_ERR(file)) goto fail_unlock; inode = file->f_dentry->d_inode; @@ -1491,6 +1509,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) close_fail: filp_close(file, NULL); fail_unlock: + current->fsuid = fsuid; complete_all(&mm->core_done); fail: return retval; diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index ee240a14e70f..c5d02da73bc3 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -10,3 +10,4 @@ ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o +ext2-$(CONFIG_EXT2_FS_XIP) += xip.o diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 25f4a64fd6bc..213148c36ebe 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -396,12 +396,12 @@ static size_t ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); + const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); if (!test_opt(inode->i_sb, POSIX_ACL)) return 0; if (list && size <= list_size) - memcpy(list, XATTR_NAME_ACL_ACCESS, size); + memcpy(list, POSIX_ACL_XATTR_ACCESS, size); return size; } @@ -409,12 +409,12 @@ static size_t ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); + const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); if (!test_opt(inode->i_sb, POSIX_ACL)) return 0; if (list && size <= list_size) - memcpy(list, XATTR_NAME_ACL_DEFAULT, size); + memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); return size; } @@ -506,14 +506,14 @@ ext2_xattr_set_acl_default(struct inode *inode, const char *name, } struct xattr_handler ext2_xattr_acl_access_handler = { - .prefix = XATTR_NAME_ACL_ACCESS, + .prefix = POSIX_ACL_XATTR_ACCESS, .list = ext2_xattr_list_acl_access, .get = ext2_xattr_get_acl_access, .set = ext2_xattr_set_acl_access, }; struct xattr_handler ext2_xattr_acl_default_handler = { - .prefix = XATTR_NAME_ACL_DEFAULT, + .prefix = POSIX_ACL_XATTR_DEFAULT, .list = ext2_xattr_list_acl_default, .get = ext2_xattr_get_acl_default, .set = ext2_xattr_set_acl_default, diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index fed96ae81a7d..0bde85bafe38 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -4,7 +4,7 @@ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ -#include <linux/xattr_acl.h> +#include <linux/posix_acl_xattr.h> #define EXT2_ACL_VERSION 0x0001 diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 8f0fd726c3f1..eed521d22cf0 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -147,9 +147,11 @@ extern struct file_operations ext2_dir_operations; /* file.c */ extern struct inode_operations ext2_file_inode_operations; extern struct file_operations ext2_file_operations; +extern struct file_operations ext2_xip_file_operations; /* inode.c */ extern struct address_space_operations ext2_aops; +extern struct address_space_operations ext2_aops_xip; extern struct address_space_operations ext2_nobh_aops; /* namei.c */ diff --git a/fs/ext2/file.c b/fs/ext2/file.c index f5e86141ec54..a484412fc782 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -55,6 +55,20 @@ struct file_operations ext2_file_operations = { .sendfile = generic_file_sendfile, }; +#ifdef CONFIG_EXT2_FS_XIP +struct file_operations ext2_xip_file_operations = { + .llseek = generic_file_llseek, + .read = xip_file_read, + .write = xip_file_write, + .ioctl = ext2_ioctl, + .mmap = xip_file_mmap, + .open = generic_file_open, + .release = ext2_release_file, + .fsync = ext2_sync_file, + .sendfile = xip_file_sendfile, +}; +#endif + struct inode_operations ext2_file_inode_operations = { .truncate = ext2_truncate, #ifdef CONFIG_EXT2_FS_XATTR diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index a50d9db4b6e4..53dceb0c6593 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -33,6 +33,7 @@ #include <linux/mpage.h> #include "ext2.h" #include "acl.h" +#include "xip.h" MODULE_AUTHOR("Remy Card and others"); MODULE_DESCRIPTION("Second Extended Filesystem"); @@ -594,6 +595,16 @@ out: if (err) goto cleanup; + if (ext2_use_xip(inode->i_sb)) { + /* + * we need to clear the block + */ + err = ext2_clear_xip_target (inode, + le32_to_cpu(chain[depth-1].key)); + if (err) + goto cleanup; + } + if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) goto changed; @@ -691,6 +702,11 @@ struct address_space_operations ext2_aops = { .writepages = ext2_writepages, }; +struct address_space_operations ext2_aops_xip = { + .bmap = ext2_bmap, + .get_xip_page = ext2_get_xip_page, +}; + struct address_space_operations ext2_nobh_aops = { .readpage = ext2_readpage, .readpages = ext2_readpages, @@ -910,7 +926,9 @@ void ext2_truncate (struct inode * inode) iblock = (inode->i_size + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); - if (test_opt(inode->i_sb, NOBH)) + if (mapping_is_xip(inode->i_mapping)) + xip_truncate_page(inode->i_mapping, inode->i_size); + else if (test_opt(inode->i_sb, NOBH)) nobh_truncate_page(inode->i_mapping, inode->i_size); else block_truncate_page(inode->i_mapping, @@ -1110,11 +1128,16 @@ void ext2_read_inode (struct inode * inode) if (S_ISREG(inode->i_mode)) { inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - if (test_opt(inode->i_sb, NOBH)) + if (ext2_use_xip(inode->i_sb)) { + inode->i_mapping->a_ops = &ext2_aops_xip; + inode->i_fop = &ext2_xip_file_operations; + } else if (test_opt(inode->i_sb, NOBH)) { inode->i_mapping->a_ops = &ext2_nobh_aops; - else + inode->i_fop = &ext2_file_operations; + } else { inode->i_mapping->a_ops = &ext2_aops; + inode->i_fop = &ext2_file_operations; + } } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 3176b3d3ffa8..c5513953c825 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -34,6 +34,7 @@ #include "ext2.h" #include "xattr.h" #include "acl.h" +#include "xip.h" /* * Couple of helper functions - make the code slightly cleaner. @@ -127,11 +128,16 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st int err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - if (test_opt(inode->i_sb, NOBH)) + if (ext2_use_xip(inode->i_sb)) { + inode->i_mapping->a_ops = &ext2_aops_xip; + inode->i_fop = &ext2_xip_file_operations; + } else if (test_opt(inode->i_sb, NOBH)) { inode->i_mapping->a_ops = &ext2_nobh_aops; - else + inode->i_fop = &ext2_file_operations; + } else { inode->i_mapping->a_ops = &ext2_aops; + inode->i_fop = &ext2_file_operations; + } mark_inode_dirty(inode); err = ext2_add_nondir(dentry, inode); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 661c3d98d946..876e391f2871 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -31,6 +31,7 @@ #include "ext2.h" #include "xattr.h" #include "acl.h" +#include "xip.h" static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es); @@ -257,7 +258,7 @@ enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, }; @@ -286,6 +287,7 @@ static match_table_t tokens = { {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_xip, "xip"}, {Opt_ignore, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, @@ -397,6 +399,13 @@ static int parse_options (char * options, printk("EXT2 (no)acl options not supported\n"); break; #endif + case Opt_xip: +#ifdef CONFIG_EXT2_FS_XIP + set_opt (sbi->s_mount_opt, XIP); +#else + printk("EXT2 xip option not supported\n"); +#endif + break; case Opt_ignore: break; default: @@ -640,6 +649,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset + EXT2_MOUNT_XIP if not */ + if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || @@ -668,6 +680,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); + if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) || + (sb->s_blocksize != blocksize))) { + if (!silent) + printk("XIP: Unsupported blocksize\n"); + goto failed_mount; + } + /* If the blocksize doesn't match, re-read the thing.. */ if (sb->s_blocksize != blocksize) { brelse(bh); @@ -916,6 +935,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) { struct ext2_sb_info * sbi = EXT2_SB(sb); struct ext2_super_block * es; + unsigned long old_mount_opt = sbi->s_mount_opt; /* * Allow the "check" option to be passed as a remount option. @@ -927,6 +947,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); es = sbi->s_es; + if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != + (old_mount_opt & EXT2_MOUNT_XIP)) && + invalidate_inodes(sb)) + ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\ + "xip remain in cache (no functional problem)"); if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; if (*flags & MS_RDONLY) { diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c new file mode 100644 index 000000000000..d44431d1a338 --- /dev/null +++ b/fs/ext2/xip.c @@ -0,0 +1,80 @@ +/* + * linux/fs/ext2/xip.c + * + * Copyright (C) 2005 IBM Corporation + * Author: Carsten Otte (cotte@de.ibm.com) + */ + +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/genhd.h> +#include <linux/buffer_head.h> +#include <linux/ext2_fs_sb.h> +#include <linux/ext2_fs.h> +#include "ext2.h" +#include "xip.h" + +static inline int +__inode_direct_access(struct inode *inode, sector_t sector, unsigned long *data) { + BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access); + return inode->i_sb->s_bdev->bd_disk->fops + ->direct_access(inode->i_sb->s_bdev,sector,data); +} + +int +ext2_clear_xip_target(struct inode *inode, int block) { + sector_t sector = block*(PAGE_SIZE/512); + unsigned long data; + int rc; + + rc = __inode_direct_access(inode, sector, &data); + if (rc) + return rc; + clear_page((void*)data); + return 0; +} + +void ext2_xip_verify_sb(struct super_block *sb) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + + if ((sbi->s_mount_opt & EXT2_MOUNT_XIP)) { + if ((sb->s_bdev == NULL) || + sb->s_bdev->bd_disk == NULL || + sb->s_bdev->bd_disk->fops == NULL || + sb->s_bdev->bd_disk->fops->direct_access == NULL) { + sbi->s_mount_opt &= (~EXT2_MOUNT_XIP); + ext2_warning(sb, __FUNCTION__, + "ignoring xip option - not supported by bdev"); + } + } +} + +struct page* +ext2_get_xip_page(struct address_space *mapping, sector_t blockno, + int create) +{ + int rc; + unsigned long data; + struct buffer_head tmp; + + tmp.b_state = 0; + tmp.b_blocknr = 0; + rc = ext2_get_block(mapping->host, blockno/(PAGE_SIZE/512) , &tmp, + create); + if (rc) + return ERR_PTR(rc); + if (tmp.b_blocknr == 0) { + /* SPARSE block */ + BUG_ON(create); + return ERR_PTR(-ENODATA); + } + + rc = __inode_direct_access + (mapping->host,tmp.b_blocknr*(PAGE_SIZE/512) ,&data); + if (rc) + return ERR_PTR(rc); + + SetPageUptodate(virt_to_page(data)); + return virt_to_page(data); +} diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h new file mode 100644 index 000000000000..aa85331d6c56 --- /dev/null +++ b/fs/ext2/xip.h @@ -0,0 +1,25 @@ +/* + * linux/fs/ext2/xip.h + * + * Copyright (C) 2005 IBM Corporation + * Author: Carsten Otte (cotte@de.ibm.com) + */ + +#ifdef CONFIG_EXT2_FS_XIP +extern void ext2_xip_verify_sb (struct super_block *); +extern int ext2_clear_xip_target (struct inode *, int); + +static inline int ext2_use_xip (struct super_block *sb) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + return (sbi->s_mount_opt & EXT2_MOUNT_XIP); +} +struct page* ext2_get_xip_page (struct address_space *, sector_t, int); +#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page) +#else +#define mapping_is_xip(map) 0 +#define ext2_xip_verify_sb(sb) do { } while (0) +#define ext2_use_xip(sb) 0 +#define ext2_clear_xip_target(inode, chain) 0 +#define ext2_get_xip_page NULL +#endif diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 638c13a26c03..3ac38266fc9e 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -393,7 +393,8 @@ ext3_acl_chmod(struct inode *inode) int retries = 0; retry: - handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); + handle = ext3_journal_start(inode, + EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) { error = PTR_ERR(handle); ext3_std_error(inode->i_sb, error); @@ -417,12 +418,12 @@ static size_t ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, const char *name, size_t name_len) { - const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); + const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); if (!test_opt(inode->i_sb, POSIX_ACL)) return 0; if (list && size <= list_len) - memcpy(list, XATTR_NAME_ACL_ACCESS, size); + memcpy(list, POSIX_ACL_XATTR_ACCESS, size); return size; } @@ -430,12 +431,12 @@ static size_t ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, const char *name, size_t name_len) { - const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); + const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); if (!test_opt(inode->i_sb, POSIX_ACL)) return 0; if (list && size <= list_len) - memcpy(list, XATTR_NAME_ACL_DEFAULT, size); + memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); return size; } @@ -503,7 +504,7 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value, acl = NULL; retry: - handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); + handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); error = ext3_set_acl(handle, inode, type, acl); @@ -535,14 +536,14 @@ ext3_xattr_set_acl_default(struct inode *inode, const char *name, } struct xattr_handler ext3_xattr_acl_access_handler = { - .prefix = XATTR_NAME_ACL_ACCESS, + .prefix = POSIX_ACL_XATTR_ACCESS, .list = ext3_xattr_list_acl_access, .get = ext3_xattr_get_acl_access, .set = ext3_xattr_set_acl_access, }; struct xattr_handler ext3_xattr_acl_default_handler = { - .prefix = XATTR_NAME_ACL_DEFAULT, + .prefix = POSIX_ACL_XATTR_DEFAULT, .list = ext3_xattr_list_acl_default, .get = ext3_xattr_get_acl_default, .set = ext3_xattr_set_acl_default, diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 98af0c0d0ba9..92d50b53a933 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -4,7 +4,7 @@ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ -#include <linux/xattr_acl.h> +#include <linux/posix_acl_xattr.h> #define EXT3_ACL_VERSION 0x0001 diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index ccd632fcc6d8..e463dca008e4 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -749,24 +749,24 @@ fail_access: * to find a free region that is of my size and has not * been reserved. * - * on succeed, it returns the reservation window to be appended to. - * failed, return NULL. */ -static struct ext3_reserve_window_node *find_next_reservable_window( +static int find_next_reservable_window( struct ext3_reserve_window_node *search_head, - unsigned long size, int *start_block, + struct ext3_reserve_window_node *my_rsv, + struct super_block * sb, int start_block, int last_block) { struct rb_node *next; struct ext3_reserve_window_node *rsv, *prev; int cur; + int size = my_rsv->rsv_goal_size; /* TODO: make the start of the reservation window byte-aligned */ /* cur = *start_block & ~7;*/ - cur = *start_block; + cur = start_block; rsv = search_head; if (!rsv) - return NULL; + return -1; while (1) { if (cur <= rsv->rsv_end) @@ -782,11 +782,11 @@ static struct ext3_reserve_window_node *find_next_reservable_window( * space with expected-size (or more)... */ if (cur > last_block) - return NULL; /* fail */ + return -1; /* fail */ prev = rsv; next = rb_next(&rsv->rsv_node); - rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); + rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node); /* * Reached the last reservation, we can just append to the @@ -813,8 +813,25 @@ static struct ext3_reserve_window_node *find_next_reservable_window( * return the reservation window that we could append to. * succeed. */ - *start_block = cur; - return prev; + + if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) + rsv_window_remove(sb, my_rsv); + + /* + * Let's book the whole avaliable window for now. We will check the + * disk bitmap later and then, if there are free blocks then we adjust + * the window size if it's larger than requested. + * Otherwise, we will remove this node from the tree next time + * call find_next_reservable_window. + */ + my_rsv->rsv_start = cur; + my_rsv->rsv_end = cur + size - 1; + my_rsv->rsv_alloc_hit = 0; + + if (prev != my_rsv) + ext3_rsv_window_add(sb, my_rsv); + + return 0; } /** @@ -852,6 +869,7 @@ static struct ext3_reserve_window_node *find_next_reservable_window( * @sb: the super block * @group: the group we are trying to allocate in * @bitmap_bh: the block group block bitmap + * */ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, int goal, struct super_block *sb, @@ -860,10 +878,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, struct ext3_reserve_window_node *search_head; int group_first_block, group_end_block, start_block; int first_free_block; - int reservable_space_start; - struct ext3_reserve_window_node *prev_rsv; struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; unsigned long size; + int ret; + spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + group * EXT3_BLOCKS_PER_GROUP(sb); @@ -875,6 +893,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, start_block = goal + group_first_block; size = my_rsv->rsv_goal_size; + if (!rsv_is_empty(&my_rsv->rsv_window)) { /* * if the old reservation is cross group boundary @@ -908,6 +927,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, my_rsv->rsv_goal_size= size; } } + + spin_lock(rsv_lock); /* * shift the search start to the window near the goal block */ @@ -921,11 +942,16 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, * need to check the bitmap after we found a reservable window. */ retry: - prev_rsv = find_next_reservable_window(search_head, size, - &start_block, group_end_block); - if (prev_rsv == NULL) - goto failed; - reservable_space_start = start_block; + ret = find_next_reservable_window(search_head, my_rsv, sb, + start_block, group_end_block); + + if (ret == -1) { + if (!rsv_is_empty(&my_rsv->rsv_window)) + rsv_window_remove(sb, my_rsv); + spin_unlock(rsv_lock); + return -1; + } + /* * On success, find_next_reservable_window() returns the * reservation window where there is a reservable space after it. @@ -937,8 +963,9 @@ retry: * block. Search start from the start block of the reservable space * we just found. */ + spin_unlock(rsv_lock); first_free_block = bitmap_search_next_usable_block( - reservable_space_start - group_first_block, + my_rsv->rsv_start - group_first_block, bitmap_bh, group_end_block - group_first_block + 1); if (first_free_block < 0) { @@ -946,54 +973,29 @@ retry: * no free block left on the bitmap, no point * to reserve the space. return failed. */ - goto failed; + spin_lock(rsv_lock); + if (!rsv_is_empty(&my_rsv->rsv_window)) + rsv_window_remove(sb, my_rsv); + spin_unlock(rsv_lock); + return -1; /* failed */ } + start_block = first_free_block + group_first_block; /* * check if the first free block is within the - * free space we just found + * free space we just reserved */ - if ((start_block >= reservable_space_start) && - (start_block < reservable_space_start + size)) - goto found_rsv_window; + if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end) + return 0; /* success */ /* * if the first free bit we found is out of the reservable space - * this means there is no free block on the reservable space - * we should continue search for next reservable space, + * continue search for next reservable space, * start from where the free block is, * we also shift the list head to where we stopped last time */ - search_head = prev_rsv; + search_head = my_rsv; + spin_lock(rsv_lock); goto retry; - -found_rsv_window: - /* - * great! the reservable space contains some free blocks. - * if the search returns that we should add the new - * window just next to where the old window, we don't - * need to remove the old window first then add it to the - * same place, just update the new start and new end. - */ - if (my_rsv != prev_rsv) { - if (!rsv_is_empty(&my_rsv->rsv_window)) - rsv_window_remove(sb, my_rsv); - } - my_rsv->rsv_start = reservable_space_start; - my_rsv->rsv_end = my_rsv->rsv_start + size - 1; - my_rsv->rsv_alloc_hit = 0; - if (my_rsv != prev_rsv) { - ext3_rsv_window_add(sb, my_rsv); - } - return 0; /* succeed */ -failed: - /* - * failed to find a new reservation window in the current - * group, remove the current(stale) reservation window - * if there is any - */ - if (!rsv_is_empty(&my_rsv->rsv_window)) - rsv_window_remove(sb, my_rsv); - return -1; /* failed */ } /* @@ -1023,7 +1025,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, int goal, struct ext3_reserve_window_node * my_rsv, int *errp) { - spinlock_t *rsv_lock; unsigned long group_first_block; int ret = 0; int fatal; @@ -1052,7 +1053,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL); goto out; } - rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; /* * goal is a group relative block number (if there is a goal) * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) @@ -1078,30 +1078,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, * then we could go to allocate from the reservation window directly. */ while (1) { - struct ext3_reserve_window rsv_copy; - - rsv_copy._rsv_start = my_rsv->rsv_start; - rsv_copy._rsv_end = my_rsv->rsv_end; - - if (rsv_is_empty(&rsv_copy) || (ret < 0) || - !goal_in_my_reservation(&rsv_copy, goal, group, sb)) { - spin_lock(rsv_lock); + if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || + !goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) { ret = alloc_new_reservation(my_rsv, goal, sb, group, bitmap_bh); - rsv_copy._rsv_start = my_rsv->rsv_start; - rsv_copy._rsv_end = my_rsv->rsv_end; - spin_unlock(rsv_lock); if (ret < 0) break; /* failed */ - if (!goal_in_my_reservation(&rsv_copy, goal, group, sb)) + if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) goal = -1; } - if ((rsv_copy._rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) - || (rsv_copy._rsv_end < group_first_block)) + if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) + || (my_rsv->rsv_end < group_first_block)) BUG(); ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, - &rsv_copy); + &my_rsv->rsv_window); if (ret >= 0) { my_rsv->rsv_alloc_hit++; break; /* succeed */ diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 5ad8cf0292df..98e78345ead9 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -36,7 +36,11 @@ static int ext3_release_file (struct inode * inode, struct file * filp) /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1)) + { + down(&EXT3_I(inode)->truncate_sem); ext3_discard_reservation(inode); + up(&EXT3_I(inode)->truncate_sem); + } if (is_dx(inode) && filp->private_data) ext3_htree_free_dir_info(filp->private_data); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 0d5fa73b18dc..0b2db4f618cb 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -128,7 +128,7 @@ static unsigned long blocks_for_truncate(struct inode *inode) if (needed > EXT3_MAX_TRANS_DATA) needed = EXT3_MAX_TRANS_DATA; - return EXT3_DATA_TRANS_BLOCKS + needed; + return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed; } /* @@ -2763,7 +2763,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) /* (user+group)*(old+new) structure, inode write (sb, * inode block, ? - but truncate inode update has it) */ - handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3); + handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+ + EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3); if (IS_ERR(handle)) { error = PTR_ERR(handle); goto err_out; @@ -2861,7 +2862,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode) #ifdef CONFIG_QUOTA /* We know that structure was already allocated during DQUOT_INIT so * we will be updating only the data blocks + inodes */ - ret += 2*EXT3_QUOTA_TRANS_BLOCKS; + ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); #endif return ret; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 79742d824a0a..50378d8ff84b 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -932,8 +932,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, struct inode *dir = dentry->d_parent->d_inode; sb = dir->i_sb; - if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) - return NULL; + /* NFS may look up ".." - look at dx_root directory block */ + if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ + if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) + return NULL; + } else { + frame = frames; + frame->bh = NULL; /* for dx_release() */ + frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ + dx_set_block(frame->at, 0); /* dx_root block is 0 */ + } hash = hinfo.hash; do { block = dx_get_block(frame->at); @@ -1637,9 +1645,9 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, int err, retries = 0; retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT3_QUOTA_INIT_BLOCKS); + 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1671,9 +1679,9 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry, return -EINVAL; retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT3_QUOTA_INIT_BLOCKS); + 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1707,9 +1715,9 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode) return -EMLINK; retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT3_QUOTA_INIT_BLOCKS); + 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1998,7 +2006,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go in * separate transaction */ DQUOT_INIT(dentry->d_inode); - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); + handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2057,7 +2065,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go * in separate transaction */ DQUOT_INIT(dentry->d_inode); - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); + handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2112,9 +2120,9 @@ static int ext3_symlink (struct inode * dir, return -ENAMETOOLONG; retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + - 2*EXT3_QUOTA_INIT_BLOCKS); + 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2166,7 +2174,7 @@ static int ext3_link (struct dentry * old_dentry, return -EMLINK; retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2208,7 +2216,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, * in separate transaction */ if (new_dentry->d_inode) DQUOT_INIT(new_dentry->d_inode); - handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + + handle = ext3_journal_start(old_dir, 2 * + EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); if (IS_ERR(handle)) return PTR_ERR(handle); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 981ccb233ef5..a6d1779d7de4 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -589,7 +589,7 @@ enum { Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, }; @@ -634,10 +634,10 @@ static match_table_t tokens = { {Opt_grpjquota, "grpjquota=%s"}, {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_ignore, "grpquota"}, - {Opt_ignore, "noquota"}, - {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, + {Opt_quota, "grpquota"}, + {Opt_noquota, "noquota"}, + {Opt_quota, "quota"}, + {Opt_quota, "usrquota"}, {Opt_barrier, "barrier=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, @@ -876,6 +876,7 @@ set_qf_name: sbi->s_qf_names[qtype] = NULL; return 0; } + set_opt(sbi->s_mount_opt, QUOTA); break; case Opt_offusrjquota: qtype = USRQUOTA; @@ -898,6 +899,17 @@ clear_qf_name: case Opt_jqfmt_vfsv0: sbi->s_jquota_fmt = QFMT_VFS_V0; break; + case Opt_quota: + set_opt(sbi->s_mount_opt, QUOTA); + break; + case Opt_noquota: + if (sb_any_quota_enabled(sb)) { + printk(KERN_ERR "EXT3-fs: Cannot change quota " + "options when quota turned on.\n"); + return 0; + } + clear_opt(sbi->s_mount_opt, QUOTA); + break; #else case Opt_usrjquota: case Opt_grpjquota: @@ -909,6 +921,9 @@ clear_qf_name: "EXT3-fs: journalled quota options not " "supported.\n"); break; + case Opt_quota: + case Opt_noquota: + break; #endif case Opt_abort: set_opt(sbi->s_mount_opt, ABORT); @@ -929,7 +944,8 @@ clear_qf_name: "for remount\n"); return 0; } - match_int(&args[0], &option); + if (match_int(&args[0], &option) != 0) + return 0; *n_blocks_count = option; break; case Opt_nobh: @@ -2238,7 +2254,7 @@ static int ext3_dquot_initialize(struct inode *inode, int type) int ret, err; /* We may create quota structure so we need to reserve enough blocks */ - handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); + handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_initialize(inode, type); @@ -2254,7 +2270,7 @@ static int ext3_dquot_drop(struct inode *inode) int ret, err; /* We may delete quota structure so we need to reserve enough blocks */ - handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); + handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_drop(inode); @@ -2272,7 +2288,7 @@ static int ext3_write_dquot(struct dquot *dquot) inode = dquot_to_inode(dquot); handle = ext3_journal_start(inode, - EXT3_QUOTA_TRANS_BLOCKS); + EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); @@ -2288,7 +2304,7 @@ static int ext3_acquire_dquot(struct dquot *dquot) handle_t *handle; handle = ext3_journal_start(dquot_to_inode(dquot), - EXT3_QUOTA_INIT_BLOCKS); + EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); @@ -2304,7 +2320,7 @@ static int ext3_release_dquot(struct dquot *dquot) handle_t *handle; handle = ext3_journal_start(dquot_to_inode(dquot), - EXT3_QUOTA_INIT_BLOCKS); + EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_release(dquot); @@ -2348,22 +2364,8 @@ static int ext3_write_info(struct super_block *sb, int type) */ static int ext3_quota_on_mount(struct super_block *sb, int type) { - int err; - struct dentry *dentry; - struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type], - .hash = 0, - .len = strlen(EXT3_SB(sb)->s_qf_names[type])}; - - dentry = lookup_hash(&name, sb->s_root); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry); - /* Now invalidate and put the dentry - quota got its own reference - * to inode and dentry has at least wrong hash so we had better - * throw it away */ - d_invalidate(dentry); - dput(dentry); - return err; + return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], + EXT3_SB(sb)->s_jquota_fmt, type); } /* @@ -2375,6 +2377,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, int err; struct nameidata nd; + if (!test_opt(sb, QUOTA)) + return -EINVAL; /* Not journalling quota? */ if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 4cbc6d0212d3..3f9dfa643b19 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -1044,7 +1044,7 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name, int error, retries = 0; retry: - handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); + handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) { error = PTR_ERR(handle); } else { diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 7c52e465a619..77c24fcf712a 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -56,7 +56,7 @@ int __init fat_cache_init(void) return 0; } -void __exit fat_cache_destroy(void) +void fat_cache_destroy(void) { if (kmem_cache_destroy(fat_cache_cachep)) printk(KERN_INFO "fat_cache: not all structures were freed\n"); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8ccee8415488..96ae85b67eba 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1327,16 +1327,25 @@ out_fail: EXPORT_SYMBOL(fat_fill_super); int __init fat_cache_init(void); -void __exit fat_cache_destroy(void); +void fat_cache_destroy(void); static int __init init_fat_fs(void) { - int ret; + int err; - ret = fat_cache_init(); - if (ret < 0) - return ret; - return fat_init_inodecache(); + err = fat_cache_init(); + if (err) + return err; + + err = fat_init_inodecache(); + if (err) + goto failed; + + return 0; + +failed: + fat_cache_destroy(); + return err; } static void __exit exit_fat_fs(void) diff --git a/fs/file_table.c b/fs/file_table.c index 03d83cb686b1..fa7849fae134 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -63,42 +63,45 @@ static inline void file_free(struct file *f) */ struct file *get_empty_filp(void) { -static int old_max; + static int old_max; struct file * f; /* * Privileged users can go above max_files */ - if (files_stat.nr_files < files_stat.max_files || - capable(CAP_SYS_ADMIN)) { - f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); - if (f) { - memset(f, 0, sizeof(*f)); - if (security_file_alloc(f)) { - file_free(f); - goto fail; - } - eventpoll_init_file(f); - atomic_set(&f->f_count, 1); - f->f_uid = current->fsuid; - f->f_gid = current->fsgid; - rwlock_init(&f->f_owner.lock); - /* f->f_version: 0 */ - INIT_LIST_HEAD(&f->f_list); - f->f_maxcount = INT_MAX; - return f; - } - } - + if (files_stat.nr_files >= files_stat.max_files && + !capable(CAP_SYS_ADMIN)) + goto over; + + f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); + if (f == NULL) + goto fail; + + memset(f, 0, sizeof(*f)); + if (security_file_alloc(f)) + goto fail_sec; + + eventpoll_init_file(f); + atomic_set(&f->f_count, 1); + f->f_uid = current->fsuid; + f->f_gid = current->fsgid; + rwlock_init(&f->f_owner.lock); + /* f->f_version: 0 */ + INIT_LIST_HEAD(&f->f_list); + f->f_maxcount = INT_MAX; + return f; + +over: /* Ran out of filps - report that */ - if (files_stat.max_files >= old_max) { + if (files_stat.nr_files > old_max) { printk(KERN_INFO "VFS: file-max limit %d reached\n", files_stat.max_files); - old_max = files_stat.max_files; - } else { - /* Big problems... */ - printk(KERN_WARNING "VFS: filp allocation failed\n"); + old_max = files_stat.nr_files; } + goto fail; + +fail_sec: + file_free(f); fail: return NULL; } diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h index 8da0252642a4..583bd78086d8 100644 --- a/fs/freevxfs/vxfs.h +++ b/fs/freevxfs/vxfs.h @@ -37,7 +37,6 @@ * superblocks of the Veritas Filesystem. */ #include <linux/types.h> -#include "vxfs_kcompat.h" /* diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c index bc4b57da306a..d3f6b2835bc8 100644 --- a/fs/freevxfs/vxfs_bmap.c +++ b/fs/freevxfs/vxfs_bmap.c @@ -101,7 +101,7 @@ vxfs_bmap_ext4(struct inode *ip, long bn) return 0; fail_size: - printk("vxfs: indirect extent to big!\n"); + printk("vxfs: indirect extent too big!\n"); fail_buf: return 0; } diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c index 05b19f70bf97..6dee109aeea4 100644 --- a/fs/freevxfs/vxfs_fshead.c +++ b/fs/freevxfs/vxfs_fshead.c @@ -78,17 +78,18 @@ vxfs_getfsh(struct inode *ip, int which) struct buffer_head *bp; bp = vxfs_bread(ip, which); - if (buffer_mapped(bp)) { + if (bp) { struct vxfs_fsh *fhp; - if (!(fhp = kmalloc(sizeof(*fhp), SLAB_KERNEL))) - return NULL; + if (!(fhp = kmalloc(sizeof(*fhp), GFP_KERNEL))) + goto out; memcpy(fhp, bp->b_data, sizeof(*fhp)); - brelse(bp); + put_bh(bp); return (fhp); } - +out: + brelse(bp); return NULL; } diff --git a/fs/freevxfs/vxfs_kcompat.h b/fs/freevxfs/vxfs_kcompat.h deleted file mode 100644 index 342a4cc860f4..000000000000 --- a/fs/freevxfs/vxfs_kcompat.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef _VXFS_KCOMPAT_H -#define _VXFS_KCOMPAT_H - -#include <linux/version.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -#include <linux/blkdev.h> - -typedef long sector_t; - -/* From include/linux/fs.h (Linux 2.5.2-pre3) */ -static inline struct buffer_head * sb_bread(struct super_block *sb, int block) -{ - return bread(sb->s_dev, block, sb->s_blocksize); -} - -/* Dito. */ -static inline void map_bh(struct buffer_head *bh, struct super_block *sb, int block) -{ - bh->b_state |= 1 << BH_Mapped; - bh->b_dev = sb->s_dev; - bh->b_blocknr = block; -} - -/* From fs/block_dev.c (Linux 2.5.2-pre2) */ -static inline int sb_set_blocksize(struct super_block *sb, int size) -{ - int bits; - if (set_blocksize(sb->s_dev, size) < 0) - return 0; - sb->s_blocksize = size; - for (bits = 9, size >>= 9; size >>= 1; bits++) - ; - sb->s_blocksize_bits = bits; - return sb->s_blocksize; -} - -/* Dito. */ -static inline int sb_min_blocksize(struct super_block *sb, int size) -{ - int minsize = get_hardsect_size(sb->s_dev); - if (size < minsize) - size = minsize; - return sb_set_blocksize(sb, size); -} - -#endif /* Kernel 2.4 */ -#endif /* _VXFS_KCOMPAT_H */ diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 506ae251d2c0..554eb455722c 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -61,13 +61,13 @@ struct file_operations vxfs_dir_operations = { }; -static __inline__ u_long +static inline u_long dir_pages(struct inode *inode) { return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; } -static __inline__ u_long +static inline u_long dir_blocks(struct inode *ip) { u_long bsize = ip->i_sb->s_blocksize; @@ -79,7 +79,7 @@ dir_blocks(struct inode *ip) * * len <= VXFS_NAMELEN and de != NULL are guaranteed by caller. */ -static __inline__ int +static inline int vxfs_match(int len, const char * const name, struct vxfs_direct *de) { if (len != de->d_namelen) @@ -89,7 +89,7 @@ vxfs_match(int len, const char * const name, struct vxfs_direct *de) return !memcmp(name, de->d_name, len); } -static __inline__ struct vxfs_direct * +static inline struct vxfs_direct * vxfs_next_entry(struct vxfs_direct *de) { return ((struct vxfs_direct *)((char*)de + de->d_reclen)); diff --git a/fs/freevxfs/vxfs_olt.c b/fs/freevxfs/vxfs_olt.c index 7a204e31aad9..133476201d84 100644 --- a/fs/freevxfs/vxfs_olt.c +++ b/fs/freevxfs/vxfs_olt.c @@ -38,7 +38,7 @@ #include "vxfs_olt.h" -static __inline__ void +static inline void vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp) { if (infp->vsi_fshino) @@ -46,7 +46,7 @@ vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp) infp->vsi_fshino = fshp->olt_fsino[0]; } -static __inline__ void +static inline void vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp) { if (infp->vsi_iext) @@ -54,7 +54,7 @@ vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp) infp->vsi_iext = ilistp->olt_iext[0]; } -static __inline__ u_long +static inline u_long vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize) { if (sbp->s_blocksize % bsize) @@ -104,8 +104,8 @@ vxfs_read_olt(struct super_block *sbp, u_long bsize) goto fail; } - oaddr = (char *)bp->b_data + op->olt_size; - eaddr = (char *)bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize); + oaddr = bp->b_data + op->olt_size; + eaddr = bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize); while (oaddr < eaddr) { struct vxfs_oltcommon *ocp = diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c index 5e305612054a..50aae77651b2 100644 --- a/fs/freevxfs/vxfs_subr.c +++ b/fs/freevxfs/vxfs_subr.c @@ -36,7 +36,6 @@ #include <linux/slab.h> #include <linux/pagemap.h> -#include "vxfs_kcompat.h" #include "vxfs_extern.h" diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 0ae2c7b8182a..27f66d3e8a04 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -155,12 +155,11 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) sbp->s_flags |= MS_RDONLY; - infp = kmalloc(sizeof(*infp), GFP_KERNEL); + infp = kcalloc(1, sizeof(*infp), GFP_KERNEL); if (!infp) { printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); return -ENOMEM; } - memset(infp, 0, sizeof(*infp)); bsize = sb_min_blocksize(sbp, BLOCK_SIZE); if (!bsize) { @@ -196,7 +195,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) #endif sbp->s_magic = rsbp->vs_magic; - sbp->s_fs_info = (void *)infp; + sbp->s_fs_info = infp; infp->vsi_raw = rsbp; infp->vsi_bp = bp; @@ -263,7 +262,7 @@ vxfs_init(void) sizeof(struct vxfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL); if (vxfs_inode_cachep) - return (register_filesystem(&vxfs_fs_type)); + return register_filesystem(&vxfs_fs_type); return -ENOMEM; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8e050fa58218..e94ab398b717 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -485,32 +485,6 @@ static void set_sb_syncing(int val) spin_unlock(&sb_lock); } -/* - * Find a superblock with inodes that need to be synced - */ -static struct super_block *get_super_to_sync(void) -{ - struct super_block *sb; -restart: - spin_lock(&sb_lock); - sb = sb_entry(super_blocks.prev); - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { - if (sb->s_syncing) - continue; - sb->s_syncing = 1; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (!sb->s_root) { - drop_super(sb); - goto restart; - } - return sb; - } - spin_unlock(&sb_lock); - return NULL; -} - /** * sync_inodes - writes all inodes to disk * @wait: wait for completion @@ -530,23 +504,39 @@ restart: * outstanding dirty inodes, the writeback goes block-at-a-time within the * filesystem's write_inode(). This is extremely slow. */ -void sync_inodes(int wait) +static void __sync_inodes(int wait) { struct super_block *sb; - set_sb_syncing(0); - while ((sb = get_super_to_sync()) != NULL) { - sync_inodes_sb(sb, 0); - sync_blockdev(sb->s_bdev); - drop_super(sb); + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_syncing) + continue; + sb->s_syncing = 1; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) { + sync_inodes_sb(sb, wait); + sync_blockdev(sb->s_bdev); + } + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } + spin_unlock(&sb_lock); +} + +void sync_inodes(int wait) +{ + set_sb_syncing(0); + __sync_inodes(0); + if (wait) { set_sb_syncing(0); - while ((sb = get_super_to_sync()) != NULL) { - sync_inodes_sb(sb, 1); - sync_blockdev(sb->s_bdev); - drop_super(sb); - } + __sync_inodes(1); } } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2af3338f891b..3a9b6d179cbd 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -122,6 +122,9 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, start_addr = mm->free_area_cache; + if (len <= mm->cached_hole_size) + start_addr = TASK_UNMAPPED_BASE; + full_search: addr = ALIGN(start_addr, HPAGE_SIZE); diff --git a/fs/inode.c b/fs/inode.c index 801fe7f36280..1f9a3a2b89bc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -500,7 +500,7 @@ repeat: continue; if (!test(inode, data)) continue; - if (inode->i_state & (I_FREEING|I_CLEAR)) { + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } @@ -525,7 +525,7 @@ repeat: continue; if (inode->i_sb != sb) continue; - if (inode->i_state & (I_FREEING|I_CLEAR)) { + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } @@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { spin_lock(&inode_lock); - if (!(inode->i_state & I_FREEING)) + if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) __iget(inode); else /* @@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_LOCK))) list_move(&inode->i_list, &inode_unused); inodes_stat.nr_unused++; - spin_unlock(&inode_lock); - if (!sb || (sb->s_flags & MS_ACTIVE)) + if (!sb || (sb->s_flags & MS_ACTIVE)) { + spin_unlock(&inode_lock); return; + } + inode->i_state |= I_WILL_FREE; + spin_unlock(&inode_lock); write_inode_now(inode, 1); spin_lock(&inode_lock); + inode->i_state &= ~I_WILL_FREE; inodes_stat.nr_unused--; hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); - inode->i_state|=I_FREEING; + inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); if (inode->i_data.nrpages) diff --git a/fs/ioprio.c b/fs/ioprio.c new file mode 100644 index 000000000000..663e420636d6 --- /dev/null +++ b/fs/ioprio.c @@ -0,0 +1,172 @@ +/* + * fs/ioprio.c + * + * Copyright (C) 2004 Jens Axboe <axboe@suse.de> + * + * Helper functions for setting/querying io priorities of processes. The + * system calls closely mimmick getpriority/setpriority, see the man page for + * those. The prio argument is a composite of prio class and prio data, where + * the data argument has meaning within that class. The standard scheduling + * classes have 8 distinct prio levels, with 0 being the highest prio and 7 + * being the lowest. + * + * IOW, setting BE scheduling class with prio 2 is done ala: + * + * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; + * + * ioprio_set(PRIO_PROCESS, pid, prio); + * + * See also Documentation/block/ioprio.txt + * + */ +#include <linux/kernel.h> +#include <linux/ioprio.h> +#include <linux/blkdev.h> + +static int set_task_ioprio(struct task_struct *task, int ioprio) +{ + struct io_context *ioc; + + if (task->uid != current->euid && + task->uid != current->uid && !capable(CAP_SYS_NICE)) + return -EPERM; + + task_lock(task); + + task->ioprio = ioprio; + + ioc = task->io_context; + if (ioc && ioc->set_ioprio) + ioc->set_ioprio(ioc, ioprio); + + task_unlock(task); + return 0; +} + +asmlinkage int sys_ioprio_set(int which, int who, int ioprio) +{ + int class = IOPRIO_PRIO_CLASS(ioprio); + int data = IOPRIO_PRIO_DATA(ioprio); + struct task_struct *p, *g; + struct user_struct *user; + int ret; + + switch (class) { + case IOPRIO_CLASS_RT: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + /* fall through, rt has prio field too */ + case IOPRIO_CLASS_BE: + if (data >= IOPRIO_BE_NR || data < 0) + return -EINVAL; + + break; + case IOPRIO_CLASS_IDLE: + break; + default: + return -EINVAL; + } + + ret = -ESRCH; + read_lock_irq(&tasklist_lock); + switch (which) { + case IOPRIO_WHO_PROCESS: + if (!who) + p = current; + else + p = find_task_by_pid(who); + if (p) + ret = set_task_ioprio(p, ioprio); + break; + case IOPRIO_WHO_PGRP: + if (!who) + who = process_group(current); + do_each_task_pid(who, PIDTYPE_PGID, p) { + ret = set_task_ioprio(p, ioprio); + if (ret) + break; + } while_each_task_pid(who, PIDTYPE_PGID, p); + break; + case IOPRIO_WHO_USER: + if (!who) + user = current->user; + else + user = find_user(who); + + if (!user) + break; + + do_each_thread(g, p) { + if (p->uid != who) + continue; + ret = set_task_ioprio(p, ioprio); + if (ret) + break; + } while_each_thread(g, p); + + if (who) + free_uid(user); + break; + default: + ret = -EINVAL; + } + + read_unlock_irq(&tasklist_lock); + return ret; +} + +asmlinkage int sys_ioprio_get(int which, int who) +{ + struct task_struct *g, *p; + struct user_struct *user; + int ret = -ESRCH; + + read_lock_irq(&tasklist_lock); + switch (which) { + case IOPRIO_WHO_PROCESS: + if (!who) + p = current; + else + p = find_task_by_pid(who); + if (p) + ret = p->ioprio; + break; + case IOPRIO_WHO_PGRP: + if (!who) + who = process_group(current); + do_each_task_pid(who, PIDTYPE_PGID, p) { + if (ret == -ESRCH) + ret = p->ioprio; + else + ret = ioprio_best(ret, p->ioprio); + } while_each_task_pid(who, PIDTYPE_PGID, p); + break; + case IOPRIO_WHO_USER: + if (!who) + user = current->user; + else + user = find_user(who); + + if (!user) + break; + + do_each_thread(g, p) { + if (p->uid != user->uid) + continue; + if (ret == -ESRCH) + ret = p->ioprio; + else + ret = ioprio_best(ret, p->ioprio); + } while_each_thread(g, p); + + if (who) + free_uid(user); + break; + default: + ret = -EINVAL; + } + + read_unlock_irq(&tasklist_lock); + return ret; +} + diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 6030956b894b..7901ac9f97ab 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -193,12 +193,17 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, /* Handle everything else. Do name translation if there is no Rock Ridge NM field. */ - if (sbi->s_unhide == 'n') { - /* Do not report hidden or associated files */ - if (de->flags[-sbi->s_high_sierra] & 5) { - filp->f_pos += de_len; - continue; - } + + /* + * Do not report hidden files if so instructed, or associated + * files unless instructed to do so + */ + if ((sbi->s_hide == 'y' && + (de->flags[-sbi->s_high_sierra] & 1)) || + (sbi->s_showassoc =='n' && + (de->flags[-sbi->s_high_sierra] & 4))) { + filp->f_pos += de_len; + continue; } map = 1; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index abd7b12eeca7..1652de1b6cb9 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -28,11 +28,6 @@ #define BEQUIET -#ifdef LEAK_CHECK -static int check_malloc; -static int check_bread; -#endif - static int isofs_hashi(struct dentry *parent, struct qstr *qstr); static int isofs_hash(struct dentry *parent, struct qstr *qstr); static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); @@ -55,11 +50,6 @@ static void isofs_put_super(struct super_block *sb) } #endif -#ifdef LEAK_CHECK - printk("Outstanding mallocs:%d, outstanding buffers: %d\n", - check_malloc, check_bread); -#endif - kfree(sbi); sb->s_fs_info = NULL; return; @@ -73,7 +63,7 @@ static kmem_cache_t *isofs_inode_cachep; static struct inode *isofs_alloc_inode(struct super_block *sb) { struct iso_inode_info *ei; - ei = (struct iso_inode_info *)kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -84,9 +74,9 @@ static void isofs_destroy_inode(struct inode *inode) kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) { - struct iso_inode_info *ei = (struct iso_inode_info *) foo; + struct iso_inode_info *ei = foo; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) @@ -107,7 +97,8 @@ static int init_inodecache(void) static void destroy_inodecache(void) { if (kmem_cache_destroy(isofs_inode_cachep)) - printk(KERN_INFO "iso_inode_cache: not all structures were freed\n"); + printk(KERN_INFO "iso_inode_cache: not all structures were " + "freed\n"); } static int isofs_remount(struct super_block *sb, int *flags, char *data) @@ -144,7 +135,7 @@ static struct dentry_operations isofs_dentry_ops[] = { { .d_hash = isofs_hashi_ms, .d_compare = isofs_dentry_cmpi_ms, - } + }, #endif }; @@ -153,7 +144,8 @@ struct iso9660_options{ char rock; char joliet; char cruft; - char unhide; + char hide; + char showassoc; char nocompress; unsigned char check; unsigned int blocksize; @@ -219,8 +211,8 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) /* * Case insensitive compare of two isofs names. */ -static int -isofs_dentry_cmpi_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int ms) +static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a, + struct qstr *b, int ms) { int alen, blen; @@ -243,8 +235,8 @@ isofs_dentry_cmpi_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int /* * Case sensitive compare of two isofs names. */ -static int -isofs_dentry_cmp_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int ms) +static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a, + struct qstr *b, int ms) { int alen, blen; @@ -318,13 +310,15 @@ enum { Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore, Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet, Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err, - Opt_nocompress, + Opt_nocompress, Opt_hide, Opt_showassoc, }; static match_table_t tokens = { {Opt_norock, "norock"}, {Opt_nojoliet, "nojoliet"}, {Opt_unhide, "unhide"}, + {Opt_hide, "hide"}, + {Opt_showassoc, "showassoc"}, {Opt_cruft, "cruft"}, {Opt_utf8, "utf8"}, {Opt_iocharset, "iocharset=%s"}, @@ -356,7 +350,7 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static int parse_options(char *options, struct iso9660_options * popt) +static int parse_options(char *options, struct iso9660_options *popt) { char *p; int option; @@ -365,7 +359,8 @@ static int parse_options(char *options, struct iso9660_options * popt) popt->rock = 'y'; popt->joliet = 'y'; popt->cruft = 'n'; - popt->unhide = 'n'; + popt->hide = 'n'; + popt->showassoc = 'n'; popt->check = 'u'; /* unset */ popt->nocompress = 0; popt->blocksize = 1024; @@ -398,8 +393,12 @@ static int parse_options(char *options, struct iso9660_options * popt) case Opt_nojoliet: popt->joliet = 'n'; break; + case Opt_hide: + popt->hide = 'y'; + break; case Opt_unhide: - popt->unhide = 'y'; + case Opt_showassoc: + popt->showassoc = 'y'; break; case Opt_cruft: popt->cruft = 'y'; @@ -493,7 +492,7 @@ static int parse_options(char *options, struct iso9660_options * popt) */ #define WE_OBEY_THE_WRITTEN_STANDARDS 1 -static unsigned int isofs_get_last_session(struct super_block *sb,s32 session ) +static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) { struct cdrom_multisession ms_info; unsigned int vol_desc_start; @@ -518,7 +517,8 @@ static unsigned int isofs_get_last_session(struct super_block *sb,s32 session ) printk(KERN_ERR "Invalid session number or type of track\n"); } i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); - if(session > 0) printk(KERN_ERR "Invalid session number\n"); + if (session > 0) + printk(KERN_ERR "Invalid session number\n"); #if 0 printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i); if (i==0) { @@ -557,13 +557,13 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) struct iso9660_options opt; struct isofs_sb_info * sbi; - sbi = kmalloc(sizeof(struct isofs_sb_info), GFP_KERNEL); + sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; s->s_fs_info = sbi; - memset(sbi, 0, sizeof(struct isofs_sb_info)); + memset(sbi, 0, sizeof(*sbi)); - if (!parse_options((char *) data, &opt)) + if (!parse_options((char *)data, &opt)) goto out_freesbi; /* @@ -792,7 +792,8 @@ root_found: sbi->s_rock = (opt.rock == 'y' ? 2 : 0); sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/ sbi->s_cruft = opt.cruft; - sbi->s_unhide = opt.unhide; + sbi->s_hide = opt.hide; + sbi->s_showassoc = opt.showassoc; sbi->s_uid = opt.uid; sbi->s_gid = opt.gid; sbi->s_utf8 = opt.utf8; @@ -1002,7 +1003,6 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, rv++; } - abort: unlock_kernel(); return rv; @@ -1014,7 +1014,7 @@ abort: static int isofs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - if ( create ) { + if (create) { printk("isofs_get_block: Kernel tries to allocate a block\n"); return -EROFS; } @@ -1061,19 +1061,17 @@ static struct address_space_operations isofs_aops = { static inline void test_and_set_uid(uid_t *p, uid_t value) { - if(value) { + if (value) *p = value; - } } static inline void test_and_set_gid(gid_t *p, gid_t value) { - if(value) { + if (value) *p = value; - } } -static int isofs_read_level3_size(struct inode * inode) +static int isofs_read_level3_size(struct inode *inode) { unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra; @@ -1136,7 +1134,7 @@ static int isofs_read_level3_size(struct inode * inode) bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; - memcpy((void *) tmpde + slop, bh->b_data, offset); + memcpy((void *)tmpde+slop, bh->b_data, offset); } de = tmpde; } @@ -1150,12 +1148,11 @@ static int isofs_read_level3_size(struct inode * inode) more_entries = de->flags[-high_sierra] & 0x80; i++; - if(i > 100) + if (i > 100) goto out_toomany; - } while(more_entries); + } while (more_entries); out: - if (tmpde) - kfree(tmpde); + kfree(tmpde); if (bh) brelse(bh); return 0; @@ -1179,7 +1176,7 @@ out_toomany: goto out; } -static void isofs_read_inode(struct inode * inode) +static void isofs_read_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); @@ -1249,7 +1246,7 @@ static void isofs_read_inode(struct inode * inode) ei->i_format_parm[2] = 0; ei->i_section_size = isonum_733 (de->size); - if(de->flags[-high_sierra] & 0x80) { + if (de->flags[-high_sierra] & 0x80) { if(isofs_read_level3_size(inode)) goto fail; } else { ei->i_next_section_block = 0; @@ -1336,16 +1333,16 @@ static void isofs_read_inode(struct inode * inode) /* XXX - parse_rock_ridge_inode() had already set i_rdev. */ init_special_inode(inode, inode->i_mode, inode->i_rdev); - out: +out: if (tmpde) kfree(tmpde); if (bh) brelse(bh); return; - out_badread: +out_badread: printk(KERN_WARNING "ISOFS: unable to read i-node block\n"); - fail: +fail: make_bad_inode(inode); goto out; } @@ -1394,11 +1391,8 @@ struct inode *isofs_iget(struct super_block *sb, hashval = (block << sb->s_blocksize_bits) | offset; - inode = iget5_locked(sb, - hashval, - &isofs_iget5_test, - &isofs_iget5_set, - &data); + inode = iget5_locked(sb, hashval, &isofs_iget5_test, + &isofs_iget5_set, &data); if (inode && (inode->i_state & I_NEW)) { sb->s_op->read_inode(inode); @@ -1408,36 +1402,6 @@ struct inode *isofs_iget(struct super_block *sb, return inode; } -#ifdef LEAK_CHECK -#undef malloc -#undef free_s -#undef sb_bread -#undef brelse - -void * leak_check_malloc(unsigned int size){ - void * tmp; - check_malloc++; - tmp = kmalloc(size, GFP_KERNEL); - return tmp; -} - -void leak_check_free_s(void * obj, int size){ - check_malloc--; - return kfree(obj); -} - -struct buffer_head * leak_check_bread(struct super_block *sb, int block){ - check_bread++; - return sb_bread(sb, block); -} - -void leak_check_brelse(struct buffer_head * bh){ - check_bread--; - return brelse(bh); -} - -#endif - static struct super_block *isofs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 9ce7b51fb614..38c75151fc66 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -47,6 +47,8 @@ struct isofs_sb_info { unsigned char s_nosuid; unsigned char s_nodev; unsigned char s_nocompress; + unsigned char s_hide; + unsigned char s_showassoc; mode_t s_mode; gid_t s_gid; diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 690edf37173c..e37e82b7cbf0 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -131,14 +131,16 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, } /* - * Skip hidden or associated files unless unhide is set + * Skip hidden or associated files unless hide or showassoc, + * respectively, is set */ match = 0; if (dlen > 0 && - (!(de->flags[-sbi->s_high_sierra] & 5) - || sbi->s_unhide == 'y')) - { - match = (isofs_cmp(dentry,dpnt,dlen) == 0); + (sbi->s_hide =='n' || + (!(de->flags[-sbi->s_high_sierra] & 1))) && + (sbi->s_showassoc =='y' || + (!(de->flags[-sbi->s_high_sierra] & 4)))) { + match = (isofs_cmp(dentry, dpnt, dlen) == 0); } if (match) { isofs_normalize_block_and_offset(de, @@ -146,11 +148,11 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, &offset_saved); *block_rv = block_saved; *offset_rv = offset_saved; - if (bh) brelse(bh); + brelse(bh); return 1; } } - if (bh) brelse(bh); + brelse(bh); return 0; } diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 089e79c65585..4326cb47f8fa 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -13,352 +13,542 @@ #include "isofs.h" #include "rock.h" -/* These functions are designed to read the system areas of a directory record +/* + * These functions are designed to read the system areas of a directory record * and extract relevant information. There are different functions provided * depending upon what information we need at the time. One function fills * out an inode structure, a second one extracts a filename, a third one * returns a symbolic link name, and a fourth one returns the extent number - * for the file. */ - -#define SIG(A,B) ((A) | ((B) << 8)) /* isonum_721() */ - - -/* This is a way of ensuring that we have something in the system - use fields that is compatible with Rock Ridge */ -#define CHECK_SP(FAIL) \ - if(rr->u.SP.magic[0] != 0xbe) FAIL; \ - if(rr->u.SP.magic[1] != 0xef) FAIL; \ - ISOFS_SB(inode->i_sb)->s_rock_offset=rr->u.SP.skip; -/* We define a series of macros because each function must do exactly the - same thing in certain places. We use the macros to ensure that everything - is done correctly */ - -#define CONTINUE_DECLS \ - int cont_extent = 0, cont_offset = 0, cont_size = 0; \ - void *buffer = NULL - -#define CHECK_CE \ - {cont_extent = isonum_733(rr->u.CE.extent); \ - cont_offset = isonum_733(rr->u.CE.offset); \ - cont_size = isonum_733(rr->u.CE.size);} - -#define SETUP_ROCK_RIDGE(DE,CHR,LEN) \ - {LEN= sizeof(struct iso_directory_record) + DE->name_len[0]; \ - if(LEN & 1) LEN++; \ - CHR = ((unsigned char *) DE) + LEN; \ - LEN = *((unsigned char *) DE) - LEN; \ - if (LEN<0) LEN=0; \ - if (ISOFS_SB(inode->i_sb)->s_rock_offset!=-1) \ - { \ - LEN-=ISOFS_SB(inode->i_sb)->s_rock_offset; \ - CHR+=ISOFS_SB(inode->i_sb)->s_rock_offset; \ - if (LEN<0) LEN=0; \ - } \ -} - -#define MAYBE_CONTINUE(LABEL,DEV) \ - {if (buffer) { kfree(buffer); buffer = NULL; } \ - if (cont_extent){ \ - int block, offset, offset1; \ - struct buffer_head * pbh; \ - buffer = kmalloc(cont_size,GFP_KERNEL); \ - if (!buffer) goto out; \ - block = cont_extent; \ - offset = cont_offset; \ - offset1 = 0; \ - pbh = sb_bread(DEV->i_sb, block); \ - if(pbh){ \ - if (offset > pbh->b_size || offset + cont_size > pbh->b_size){ \ - brelse(pbh); \ - goto out; \ - } \ - memcpy(buffer + offset1, pbh->b_data + offset, cont_size - offset1); \ - brelse(pbh); \ - chr = (unsigned char *) buffer; \ - len = cont_size; \ - cont_extent = 0; \ - cont_size = 0; \ - cont_offset = 0; \ - goto LABEL; \ - } \ - printk("Unable to read rock-ridge attributes\n"); \ - }} - -/* return length of name field; 0: not found, -1: to be ignored */ -int get_rock_ridge_filename(struct iso_directory_record * de, - char * retname, struct inode * inode) + * for the file. + */ + +#define SIG(A,B) ((A) | ((B) << 8)) /* isonum_721() */ + +struct rock_state { + void *buffer; + unsigned char *chr; + int len; + int cont_size; + int cont_extent; + int cont_offset; + struct inode *inode; +}; + +/* + * This is a way of ensuring that we have something in the system + * use fields that is compatible with Rock Ridge. Return zero on success. + */ + +static int check_sp(struct rock_ridge *rr, struct inode *inode) { - int len; - unsigned char * chr; - CONTINUE_DECLS; - int retnamlen = 0, truncate=0; - - if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; - *retname = 0; - - SETUP_ROCK_RIDGE(de, chr, len); - repeat: - { - struct rock_ridge * rr; - int sig; - - while (len > 2){ /* There may be one byte for padding somewhere */ - rr = (struct rock_ridge *) chr; - if (rr->len < 3) goto out; /* Something got screwed up here */ - sig = isonum_721(chr); - chr += rr->len; - len -= rr->len; - if (len < 0) goto out; /* corrupted isofs */ - - switch(sig){ - case SIG('R','R'): - if((rr->u.RR.flags[0] & RR_NM) == 0) goto out; - break; - case SIG('S','P'): - CHECK_SP(goto out); - break; - case SIG('C','E'): - CHECK_CE; - break; - case SIG('N','M'): - if (truncate) break; - if (rr->len < 5) break; - /* - * If the flags are 2 or 4, this indicates '.' or '..'. - * We don't want to do anything with this, because it - * screws up the code that calls us. We don't really - * care anyways, since we can just use the non-RR - * name. - */ - if (rr->u.NM.flags & 6) { - break; + if (rr->u.SP.magic[0] != 0xbe) + return -1; + if (rr->u.SP.magic[1] != 0xef) + return -1; + ISOFS_SB(inode->i_sb)->s_rock_offset = rr->u.SP.skip; + return 0; +} + +static void setup_rock_ridge(struct iso_directory_record *de, + struct inode *inode, struct rock_state *rs) +{ + rs->len = sizeof(struct iso_directory_record) + de->name_len[0]; + if (rs->len & 1) + (rs->len)++; + rs->chr = (unsigned char *)de + rs->len; + rs->len = *((unsigned char *)de) - rs->len; + if (rs->len < 0) + rs->len = 0; + + if (ISOFS_SB(inode->i_sb)->s_rock_offset != -1) { + rs->len -= ISOFS_SB(inode->i_sb)->s_rock_offset; + rs->chr += ISOFS_SB(inode->i_sb)->s_rock_offset; + if (rs->len < 0) + rs->len = 0; + } +} + +static void init_rock_state(struct rock_state *rs, struct inode *inode) +{ + memset(rs, 0, sizeof(*rs)); + rs->inode = inode; +} + +/* + * Returns 0 if the caller should continue scanning, 1 if the scan must end + * and -ve on error. + */ +static int rock_continue(struct rock_state *rs) +{ + int ret = 1; + int blocksize = 1 << rs->inode->i_blkbits; + const int min_de_size = offsetof(struct rock_ridge, u); + + kfree(rs->buffer); + rs->buffer = NULL; + + if ((unsigned)rs->cont_offset > blocksize - min_de_size || + (unsigned)rs->cont_size > blocksize || + (unsigned)(rs->cont_offset + rs->cont_size) > blocksize) { + printk(KERN_NOTICE "rock: corrupted directory entry. " + "extent=%d, offset=%d, size=%d\n", + rs->cont_extent, rs->cont_offset, rs->cont_size); + ret = -EIO; + goto out; } - if (rr->u.NM.flags & ~1) { - printk("Unsupported NM flag settings (%d)\n",rr->u.NM.flags); - break; + if (rs->cont_extent) { + struct buffer_head *bh; + + rs->buffer = kmalloc(rs->cont_size, GFP_KERNEL); + if (!rs->buffer) { + ret = -ENOMEM; + goto out; + } + ret = -EIO; + bh = sb_bread(rs->inode->i_sb, rs->cont_extent); + if (bh) { + memcpy(rs->buffer, bh->b_data + rs->cont_offset, + rs->cont_size); + put_bh(bh); + rs->chr = rs->buffer; + rs->len = rs->cont_size; + rs->cont_extent = 0; + rs->cont_size = 0; + rs->cont_offset = 0; + return 0; + } + printk("Unable to read rock-ridge attributes\n"); + } +out: + kfree(rs->buffer); + rs->buffer = NULL; + return ret; +} + +/* + * We think there's a record of type `sig' at rs->chr. Parse the signature + * and make sure that there's really room for a record of that type. + */ +static int rock_check_overflow(struct rock_state *rs, int sig) +{ + int len; + + switch (sig) { + case SIG('S', 'P'): + len = sizeof(struct SU_SP_s); + break; + case SIG('C', 'E'): + len = sizeof(struct SU_CE_s); + break; + case SIG('E', 'R'): + len = sizeof(struct SU_ER_s); + break; + case SIG('R', 'R'): + len = sizeof(struct RR_RR_s); + break; + case SIG('P', 'X'): + len = sizeof(struct RR_PX_s); + break; + case SIG('P', 'N'): + len = sizeof(struct RR_PN_s); + break; + case SIG('S', 'L'): + len = sizeof(struct RR_SL_s); + break; + case SIG('N', 'M'): + len = sizeof(struct RR_NM_s); + break; + case SIG('C', 'L'): + len = sizeof(struct RR_CL_s); + break; + case SIG('P', 'L'): + len = sizeof(struct RR_PL_s); + break; + case SIG('T', 'F'): + len = sizeof(struct RR_TF_s); + break; + case SIG('Z', 'F'): + len = sizeof(struct RR_ZF_s); + break; + default: + len = 0; + break; } - if((strlen(retname) + rr->len - 5) >= 254) { - truncate = 1; - break; + len += offsetof(struct rock_ridge, u); + if (len > rs->len) { + printk(KERN_NOTICE "rock: directory entry would overflow " + "storage\n"); + printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n", + sig, len, rs->len); + return -EIO; + } + return 0; +} + +/* + * return length of name field; 0: not found, -1: to be ignored + */ +int get_rock_ridge_filename(struct iso_directory_record *de, + char *retname, struct inode *inode) +{ + struct rock_state rs; + struct rock_ridge *rr; + int sig; + int retnamlen = 0; + int truncate = 0; + int ret = 0; + + if (!ISOFS_SB(inode->i_sb)->s_rock) + return 0; + *retname = 0; + + init_rock_state(&rs, inode); + setup_rock_ridge(de, inode, &rs); +repeat: + + while (rs.len > 2) { /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *)rs.chr; + if (rr->len < 3) + goto out; /* Something got screwed up here */ + sig = isonum_721(rs.chr); + if (rock_check_overflow(&rs, sig)) + goto eio; + rs.chr += rr->len; + rs.len -= rr->len; + if (rs.len < 0) + goto eio; /* corrupted isofs */ + + switch (sig) { + case SIG('R', 'R'): + if ((rr->u.RR.flags[0] & RR_NM) == 0) + goto out; + break; + case SIG('S', 'P'): + if (check_sp(rr, inode)) + goto out; + break; + case SIG('C', 'E'): + rs.cont_extent = isonum_733(rr->u.CE.extent); + rs.cont_offset = isonum_733(rr->u.CE.offset); + rs.cont_size = isonum_733(rr->u.CE.size); + break; + case SIG('N', 'M'): + if (truncate) + break; + if (rr->len < 5) + break; + /* + * If the flags are 2 or 4, this indicates '.' or '..'. + * We don't want to do anything with this, because it + * screws up the code that calls us. We don't really + * care anyways, since we can just use the non-RR + * name. + */ + if (rr->u.NM.flags & 6) + break; + + if (rr->u.NM.flags & ~1) { + printk("Unsupported NM flag settings (%d)\n", + rr->u.NM.flags); + break; + } + if ((strlen(retname) + rr->len - 5) >= 254) { + truncate = 1; + break; + } + strncat(retname, rr->u.NM.name, rr->len - 5); + retnamlen += rr->len - 5; + break; + case SIG('R', 'E'): + kfree(rs.buffer); + return -1; + default: + break; + } } - strncat(retname, rr->u.NM.name, rr->len - 5); - retnamlen += rr->len - 5; - break; - case SIG('R','E'): - if (buffer) kfree(buffer); - return -1; - default: - break; - } - } - } - MAYBE_CONTINUE(repeat,inode); - if (buffer) kfree(buffer); - return retnamlen; /* If 0, this file did not have a NM field */ - out: - if(buffer) kfree(buffer); - return 0; + ret = rock_continue(&rs); + if (ret == 0) + goto repeat; + if (ret == 1) + return retnamlen; /* If 0, this file did not have a NM field */ +out: + kfree(rs.buffer); + return ret; +eio: + ret = -EIO; + goto out; } static int parse_rock_ridge_inode_internal(struct iso_directory_record *de, struct inode *inode, int regard_xa) { - int len; - unsigned char * chr; - int symlink_len = 0; - CONTINUE_DECLS; - - if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; - - SETUP_ROCK_RIDGE(de, chr, len); - if (regard_xa) - { - chr+=14; - len-=14; - if (len<0) len=0; - } - - repeat: - { - int cnt, sig; - struct inode * reloc; - struct rock_ridge * rr; - int rootflag; - - while (len > 2){ /* There may be one byte for padding somewhere */ - rr = (struct rock_ridge *) chr; - if (rr->len < 3) goto out; /* Something got screwed up here */ - sig = isonum_721(chr); - chr += rr->len; - len -= rr->len; - if (len < 0) goto out; /* corrupted isofs */ - - switch(sig){ + int symlink_len = 0; + int cnt, sig; + struct inode *reloc; + struct rock_ridge *rr; + int rootflag; + struct rock_state rs; + int ret = 0; + + if (!ISOFS_SB(inode->i_sb)->s_rock) + return 0; + + init_rock_state(&rs, inode); + setup_rock_ridge(de, inode, &rs); + if (regard_xa) { + rs.chr += 14; + rs.len -= 14; + if (rs.len < 0) + rs.len = 0; + } + +repeat: + while (rs.len > 2) { /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *)rs.chr; + if (rr->len < 3) + goto out; /* Something got screwed up here */ + sig = isonum_721(rs.chr); + if (rock_check_overflow(&rs, sig)) + goto eio; + rs.chr += rr->len; + rs.len -= rr->len; + if (rs.len < 0) + goto eio; /* corrupted isofs */ + + switch (sig) { #ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ - case SIG('R','R'): - if((rr->u.RR.flags[0] & - (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) goto out; - break; + case SIG('R', 'R'): + if ((rr->u.RR.flags[0] & + (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) + goto out; + break; #endif - case SIG('S','P'): - CHECK_SP(goto out); - break; - case SIG('C','E'): - CHECK_CE; - break; - case SIG('E','R'): - ISOFS_SB(inode->i_sb)->s_rock = 1; - printk(KERN_DEBUG "ISO 9660 Extensions: "); - { int p; - for(p=0;p<rr->u.ER.len_id;p++) printk("%c",rr->u.ER.data[p]); - } - printk("\n"); - break; - case SIG('P','X'): - inode->i_mode = isonum_733(rr->u.PX.mode); - inode->i_nlink = isonum_733(rr->u.PX.n_links); - inode->i_uid = isonum_733(rr->u.PX.uid); - inode->i_gid = isonum_733(rr->u.PX.gid); - break; - case SIG('P','N'): - { int high, low; - high = isonum_733(rr->u.PN.dev_high); - low = isonum_733(rr->u.PN.dev_low); - /* - * The Rock Ridge standard specifies that if sizeof(dev_t) <= 4, - * then the high field is unused, and the device number is completely - * stored in the low field. Some writers may ignore this subtlety, - * and as a result we test to see if the entire device number is - * stored in the low field, and use that. - */ - if((low & ~0xff) && high == 0) { - inode->i_rdev = MKDEV(low >> 8, low & 0xff); - } else { - inode->i_rdev = MKDEV(high, low); - } - } - break; - case SIG('T','F'): - /* Some RRIP writers incorrectly place ctime in the TF_CREATE field. - Try to handle this correctly for either case. */ - cnt = 0; /* Rock ridge never appears on a High Sierra disk */ - if(rr->u.TF.flags & TF_CREATE) { - inode->i_ctime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0); - inode->i_ctime.tv_nsec = 0; - } - if(rr->u.TF.flags & TF_MODIFY) { - inode->i_mtime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0); - inode->i_mtime.tv_nsec = 0; - } - if(rr->u.TF.flags & TF_ACCESS) { - inode->i_atime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0); - inode->i_atime.tv_nsec = 0; - } - if(rr->u.TF.flags & TF_ATTRIBUTES) { - inode->i_ctime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0); - inode->i_ctime.tv_nsec = 0; - } - break; - case SIG('S','L'): - {int slen; - struct SL_component * slp; - struct SL_component * oldslp; - slen = rr->len - 5; - slp = &rr->u.SL.link; - inode->i_size = symlink_len; - while (slen > 1){ - rootflag = 0; - switch(slp->flags &~1){ - case 0: - inode->i_size += slp->len; - break; - case 2: - inode->i_size += 1; - break; - case 4: - inode->i_size += 2; - break; - case 8: - rootflag = 1; - inode->i_size += 1; - break; - default: - printk("Symlink component flag not implemented\n"); - } - slen -= slp->len + 2; - oldslp = slp; - slp = (struct SL_component *) (((char *) slp) + slp->len + 2); - - if(slen < 2) { - if( ((rr->u.SL.flags & 1) != 0) - && ((oldslp->flags & 1) == 0) ) inode->i_size += 1; - break; - } - - /* - * If this component record isn't continued, then append a '/'. - */ - if (!rootflag && (oldslp->flags & 1) == 0) - inode->i_size += 1; - } - } - symlink_len = inode->i_size; - break; - case SIG('R','E'): - printk(KERN_WARNING "Attempt to read inode for relocated directory\n"); - goto out; - case SIG('C','L'): - ISOFS_I(inode)->i_first_extent = isonum_733(rr->u.CL.location); - reloc = isofs_iget(inode->i_sb, ISOFS_I(inode)->i_first_extent, 0); - if (!reloc) - goto out; - inode->i_mode = reloc->i_mode; - inode->i_nlink = reloc->i_nlink; - inode->i_uid = reloc->i_uid; - inode->i_gid = reloc->i_gid; - inode->i_rdev = reloc->i_rdev; - inode->i_size = reloc->i_size; - inode->i_blocks = reloc->i_blocks; - inode->i_atime = reloc->i_atime; - inode->i_ctime = reloc->i_ctime; - inode->i_mtime = reloc->i_mtime; - iput(reloc); - break; + case SIG('S', 'P'): + if (check_sp(rr, inode)) + goto out; + break; + case SIG('C', 'E'): + rs.cont_extent = isonum_733(rr->u.CE.extent); + rs.cont_offset = isonum_733(rr->u.CE.offset); + rs.cont_size = isonum_733(rr->u.CE.size); + break; + case SIG('E', 'R'): + ISOFS_SB(inode->i_sb)->s_rock = 1; + printk(KERN_DEBUG "ISO 9660 Extensions: "); + { + int p; + for (p = 0; p < rr->u.ER.len_id; p++) + printk("%c", rr->u.ER.data[p]); + } + printk("\n"); + break; + case SIG('P', 'X'): + inode->i_mode = isonum_733(rr->u.PX.mode); + inode->i_nlink = isonum_733(rr->u.PX.n_links); + inode->i_uid = isonum_733(rr->u.PX.uid); + inode->i_gid = isonum_733(rr->u.PX.gid); + break; + case SIG('P', 'N'): + { + int high, low; + high = isonum_733(rr->u.PN.dev_high); + low = isonum_733(rr->u.PN.dev_low); + /* + * The Rock Ridge standard specifies that if + * sizeof(dev_t) <= 4, then the high field is + * unused, and the device number is completely + * stored in the low field. Some writers may + * ignore this subtlety, + * and as a result we test to see if the entire + * device number is + * stored in the low field, and use that. + */ + if ((low & ~0xff) && high == 0) { + inode->i_rdev = + MKDEV(low >> 8, low & 0xff); + } else { + inode->i_rdev = + MKDEV(high, low); + } + } + break; + case SIG('T', 'F'): + /* + * Some RRIP writers incorrectly place ctime in the + * TF_CREATE field. Try to handle this correctly for + * either case. + */ + /* Rock ridge never appears on a High Sierra disk */ + cnt = 0; + if (rr->u.TF.flags & TF_CREATE) { + inode->i_ctime.tv_sec = + iso_date(rr->u.TF.times[cnt++].time, + 0); + inode->i_ctime.tv_nsec = 0; + } + if (rr->u.TF.flags & TF_MODIFY) { + inode->i_mtime.tv_sec = + iso_date(rr->u.TF.times[cnt++].time, + 0); + inode->i_mtime.tv_nsec = 0; + } + if (rr->u.TF.flags & TF_ACCESS) { + inode->i_atime.tv_sec = + iso_date(rr->u.TF.times[cnt++].time, + 0); + inode->i_atime.tv_nsec = 0; + } + if (rr->u.TF.flags & TF_ATTRIBUTES) { + inode->i_ctime.tv_sec = + iso_date(rr->u.TF.times[cnt++].time, + 0); + inode->i_ctime.tv_nsec = 0; + } + break; + case SIG('S', 'L'): + { + int slen; + struct SL_component *slp; + struct SL_component *oldslp; + slen = rr->len - 5; + slp = &rr->u.SL.link; + inode->i_size = symlink_len; + while (slen > 1) { + rootflag = 0; + switch (slp->flags & ~1) { + case 0: + inode->i_size += + slp->len; + break; + case 2: + inode->i_size += 1; + break; + case 4: + inode->i_size += 2; + break; + case 8: + rootflag = 1; + inode->i_size += 1; + break; + default: + printk("Symlink component flag " + "not implemented\n"); + } + slen -= slp->len + 2; + oldslp = slp; + slp = (struct SL_component *) + (((char *)slp) + slp->len + 2); + + if (slen < 2) { + if (((rr->u.SL. + flags & 1) != 0) + && + ((oldslp-> + flags & 1) == 0)) + inode->i_size += + 1; + break; + } + + /* + * If this component record isn't + * continued, then append a '/'. + */ + if (!rootflag + && (oldslp->flags & 1) == 0) + inode->i_size += 1; + } + } + symlink_len = inode->i_size; + break; + case SIG('R', 'E'): + printk(KERN_WARNING "Attempt to read inode for " + "relocated directory\n"); + goto out; + case SIG('C', 'L'): + ISOFS_I(inode)->i_first_extent = + isonum_733(rr->u.CL.location); + reloc = + isofs_iget(inode->i_sb, + ISOFS_I(inode)->i_first_extent, + 0); + if (!reloc) + goto out; + inode->i_mode = reloc->i_mode; + inode->i_nlink = reloc->i_nlink; + inode->i_uid = reloc->i_uid; + inode->i_gid = reloc->i_gid; + inode->i_rdev = reloc->i_rdev; + inode->i_size = reloc->i_size; + inode->i_blocks = reloc->i_blocks; + inode->i_atime = reloc->i_atime; + inode->i_ctime = reloc->i_ctime; + inode->i_mtime = reloc->i_mtime; + iput(reloc); + break; #ifdef CONFIG_ZISOFS - case SIG('Z','F'): - if ( !ISOFS_SB(inode->i_sb)->s_nocompress ) { - int algo; - algo = isonum_721(rr->u.ZF.algorithm); - if ( algo == SIG('p','z') ) { - int block_shift = isonum_711(&rr->u.ZF.parms[1]); - if ( block_shift < PAGE_CACHE_SHIFT || block_shift > 17 ) { - printk(KERN_WARNING "isofs: Can't handle ZF block size of 2^%d\n", block_shift); - } else { - /* Note: we don't change i_blocks here */ - ISOFS_I(inode)->i_file_format = isofs_file_compressed; - /* Parameters to compression algorithm (header size, block size) */ - ISOFS_I(inode)->i_format_parm[0] = isonum_711(&rr->u.ZF.parms[0]); - ISOFS_I(inode)->i_format_parm[1] = isonum_711(&rr->u.ZF.parms[1]); - inode->i_size = isonum_733(rr->u.ZF.real_size); - } - } else { - printk(KERN_WARNING "isofs: Unknown ZF compression algorithm: %c%c\n", - rr->u.ZF.algorithm[0], rr->u.ZF.algorithm[1]); - } - } - break; + case SIG('Z', 'F'): { + int algo; + + if (ISOFS_SB(inode->i_sb)->s_nocompress) + break; + algo = isonum_721(rr->u.ZF.algorithm); + if (algo == SIG('p', 'z')) { + int block_shift = + isonum_711(&rr->u.ZF.parms[1]); + if (block_shift < PAGE_CACHE_SHIFT + || block_shift > 17) { + printk(KERN_WARNING "isofs: " + "Can't handle ZF block " + "size of 2^%d\n", + block_shift); + } else { + /* + * Note: we don't change + * i_blocks here + */ + ISOFS_I(inode)->i_file_format = + isofs_file_compressed; + /* + * Parameters to compression + * algorithm (header size, + * block size) + */ + ISOFS_I(inode)->i_format_parm[0] = + isonum_711(&rr->u.ZF.parms[0]); + ISOFS_I(inode)->i_format_parm[1] = + isonum_711(&rr->u.ZF.parms[1]); + inode->i_size = + isonum_733(rr->u.ZF. + real_size); + } + } else { + printk(KERN_WARNING + "isofs: Unknown ZF compression " + "algorithm: %c%c\n", + rr->u.ZF.algorithm[0], + rr->u.ZF.algorithm[1]); + } + break; + } #endif - default: - break; - } - } - } - MAYBE_CONTINUE(repeat,inode); - out: - if(buffer) kfree(buffer); - return 0; + default: + break; + } + } + ret = rock_continue(&rs); + if (ret == 0) + goto repeat; + if (ret == 1) + ret = 0; +out: + kfree(rs.buffer); + return ret; +eio: + ret = -EIO; + goto out; } static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) @@ -376,32 +566,32 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) if (slp->len > plimit - rpnt) return NULL; memcpy(rpnt, slp->text, slp->len); - rpnt+=slp->len; + rpnt += slp->len; break; case 2: if (rpnt >= plimit) return NULL; - *rpnt++='.'; + *rpnt++ = '.'; break; case 4: if (2 > plimit - rpnt) return NULL; - *rpnt++='.'; - *rpnt++='.'; + *rpnt++ = '.'; + *rpnt++ = '.'; break; case 8: if (rpnt >= plimit) return NULL; rootflag = 1; - *rpnt++='/'; + *rpnt++ = '/'; break; default: printk("Symlink component flag not implemented (%d)\n", - slp->flags); + slp->flags); } slen -= slp->len + 2; oldslp = slp; - slp = (struct SL_component *) ((char *) slp + slp->len + 2); + slp = (struct SL_component *)((char *)slp + slp->len + 2); if (slen < 2) { /* @@ -412,7 +602,7 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) !(oldslp->flags & 1)) { if (rpnt >= plimit) return NULL; - *rpnt++='/'; + *rpnt++ = '/'; } break; } @@ -423,59 +613,61 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) if (!rootflag && !(oldslp->flags & 1)) { if (rpnt >= plimit) return NULL; - *rpnt++='/'; + *rpnt++ = '/'; } } return rpnt; } -int parse_rock_ridge_inode(struct iso_directory_record * de, - struct inode * inode) +int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) { - int result=parse_rock_ridge_inode_internal(de,inode,0); - /* if rockridge flag was reset and we didn't look for attributes - * behind eventual XA attributes, have a look there */ - if ((ISOFS_SB(inode->i_sb)->s_rock_offset==-1) - &&(ISOFS_SB(inode->i_sb)->s_rock==2)) - { - result=parse_rock_ridge_inode_internal(de,inode,14); - } - return result; -} + int result = parse_rock_ridge_inode_internal(de, inode, 0); -/* readpage() for symlinks: reads symlink contents into the page and either - makes it uptodate and returns 0 or returns error (-EIO) */ + /* + * if rockridge flag was reset and we didn't look for attributes + * behind eventual XA attributes, have a look there + */ + if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) + && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { + result = parse_rock_ridge_inode_internal(de, inode, 14); + } + return result; +} +/* + * readpage() for symlinks: reads symlink contents into the page and either + * makes it uptodate and returns 0 or returns error (-EIO) + */ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - struct iso_inode_info *ei = ISOFS_I(inode); + struct iso_inode_info *ei = ISOFS_I(inode); char *link = kmap(page); unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); struct buffer_head *bh; char *rpnt = link; unsigned char *pnt; - struct iso_directory_record *raw_inode; - CONTINUE_DECLS; + struct iso_directory_record *raw_de; unsigned long block, offset; int sig; - int len; - unsigned char *chr; struct rock_ridge *rr; + struct rock_state rs; + int ret; if (!ISOFS_SB(inode->i_sb)->s_rock) goto error; + init_rock_state(&rs, inode); block = ei->i_iget5_block; lock_kernel(); bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; - offset = ei->i_iget5_offset; - pnt = (unsigned char *) bh->b_data + offset; + offset = ei->i_iget5_offset; + pnt = (unsigned char *)bh->b_data + offset; - raw_inode = (struct iso_directory_record *) pnt; + raw_de = (struct iso_directory_record *)pnt; /* * If we go past the end of the buffer, there is some sort of error. @@ -483,20 +675,24 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) if (offset + *pnt > bufsize) goto out_bad_span; - /* Now test for possible Rock Ridge extensions which will override - some of these numbers in the inode structure. */ + /* + * Now test for possible Rock Ridge extensions which will override + * some of these numbers in the inode structure. + */ - SETUP_ROCK_RIDGE(raw_inode, chr, len); + setup_rock_ridge(raw_de, inode, &rs); - repeat: - while (len > 2) { /* There may be one byte for padding somewhere */ - rr = (struct rock_ridge *) chr; +repeat: + while (rs.len > 2) { /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *)rs.chr; if (rr->len < 3) goto out; /* Something got screwed up here */ - sig = isonum_721(chr); - chr += rr->len; - len -= rr->len; - if (len < 0) + sig = isonum_721(rs.chr); + if (rock_check_overflow(&rs, sig)) + goto out; + rs.chr += rr->len; + rs.len -= rr->len; + if (rs.len < 0) goto out; /* corrupted isofs */ switch (sig) { @@ -505,7 +701,8 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) goto out; break; case SIG('S', 'P'): - CHECK_SP(goto out); + if (check_sp(rr, inode)) + goto out; break; case SIG('S', 'L'): rpnt = get_symlink_chunk(rpnt, rr, @@ -515,14 +712,18 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) break; case SIG('C', 'E'): /* This tells is if there is a continuation record */ - CHECK_CE; + rs.cont_extent = isonum_733(rr->u.CE.extent); + rs.cont_offset = isonum_733(rr->u.CE.offset); + rs.cont_size = isonum_733(rr->u.CE.size); default: break; } } - MAYBE_CONTINUE(repeat, inode); - if (buffer) - kfree(buffer); + ret = rock_continue(&rs); + if (ret == 0) + goto repeat; + if (ret < 0) + goto fail; if (rpnt == link) goto fail; @@ -535,19 +736,18 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) return 0; /* error exit from macro */ - out: - if (buffer) - kfree(buffer); +out: + kfree(rs.buffer); goto fail; - out_noread: +out_noread: printk("unable to read i-node block"); goto fail; - out_bad_span: +out_bad_span: printk("symlink spans iso9660 blocks\n"); - fail: +fail: brelse(bh); unlock_kernel(); - error: +error: SetPageError(page); kunmap(page); unlock_page(page); @@ -555,5 +755,5 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) } struct address_space_operations isofs_symlink_aops = { - .readpage = rock_ridge_symlink_readpage + .readpage = rock_ridge_symlink_readpage }; diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h index deaf5c8e8b4a..ed09e2b08637 100644 --- a/fs/isofs/rock.h +++ b/fs/isofs/rock.h @@ -1,85 +1,88 @@ -/* These structs are used by the system-use-sharing protocol, in which the - Rock Ridge extensions are embedded. It is quite possible that other - extensions are present on the disk, and this is fine as long as they - all use SUSP */ - -struct SU_SP{ - unsigned char magic[2]; - unsigned char skip; -} __attribute__((packed)); - -struct SU_CE{ - char extent[8]; - char offset[8]; - char size[8]; +/* + * These structs are used by the system-use-sharing protocol, in which the + * Rock Ridge extensions are embedded. It is quite possible that other + * extensions are present on the disk, and this is fine as long as they + * all use SUSP + */ + +struct SU_SP_s { + unsigned char magic[2]; + unsigned char skip; +} __attribute__ ((packed)); + +struct SU_CE_s { + char extent[8]; + char offset[8]; + char size[8]; }; -struct SU_ER{ - unsigned char len_id; - unsigned char len_des; - unsigned char len_src; - unsigned char ext_ver; - char data[0]; -} __attribute__((packed)); - -struct RR_RR{ - char flags[1]; -} __attribute__((packed)); - -struct RR_PX{ - char mode[8]; - char n_links[8]; - char uid[8]; - char gid[8]; +struct SU_ER_s { + unsigned char len_id; + unsigned char len_des; + unsigned char len_src; + unsigned char ext_ver; + char data[0]; +} __attribute__ ((packed)); + +struct RR_RR_s { + char flags[1]; +} __attribute__ ((packed)); + +struct RR_PX_s { + char mode[8]; + char n_links[8]; + char uid[8]; + char gid[8]; }; -struct RR_PN{ - char dev_high[8]; - char dev_low[8]; +struct RR_PN_s { + char dev_high[8]; + char dev_low[8]; }; +struct SL_component { + unsigned char flags; + unsigned char len; + char text[0]; +} __attribute__ ((packed)); -struct SL_component{ - unsigned char flags; - unsigned char len; - char text[0]; -} __attribute__((packed)); +struct RR_SL_s { + unsigned char flags; + struct SL_component link; +} __attribute__ ((packed)); -struct RR_SL{ - unsigned char flags; - struct SL_component link; -} __attribute__((packed)); +struct RR_NM_s { + unsigned char flags; + char name[0]; +} __attribute__ ((packed)); -struct RR_NM{ - unsigned char flags; - char name[0]; -} __attribute__((packed)); - -struct RR_CL{ - char location[8]; +struct RR_CL_s { + char location[8]; }; -struct RR_PL{ - char location[8]; +struct RR_PL_s { + char location[8]; }; -struct stamp{ - char time[7]; -} __attribute__((packed)); +struct stamp { + char time[7]; +} __attribute__ ((packed)); -struct RR_TF{ - char flags; - struct stamp times[0]; /* Variable number of these beasts */ -} __attribute__((packed)); +struct RR_TF_s { + char flags; + struct stamp times[0]; /* Variable number of these beasts */ +} __attribute__ ((packed)); /* Linux-specific extension for transparent decompression */ -struct RR_ZF{ - char algorithm[2]; - char parms[2]; - char real_size[8]; +struct RR_ZF_s { + char algorithm[2]; + char parms[2]; + char real_size[8]; }; -/* These are the bits and their meanings for flags in the TF structure. */ +/* + * These are the bits and their meanings for flags in the TF structure. + */ #define TF_CREATE 1 #define TF_MODIFY 2 #define TF_ACCESS 4 @@ -89,31 +92,31 @@ struct RR_ZF{ #define TF_EFFECTIVE 64 #define TF_LONG_FORM 128 -struct rock_ridge{ - char signature[2]; - unsigned char len; - unsigned char version; - union{ - struct SU_SP SP; - struct SU_CE CE; - struct SU_ER ER; - struct RR_RR RR; - struct RR_PX PX; - struct RR_PN PN; - struct RR_SL SL; - struct RR_NM NM; - struct RR_CL CL; - struct RR_PL PL; - struct RR_TF TF; - struct RR_ZF ZF; - } u; +struct rock_ridge { + char signature[2]; + unsigned char len; + unsigned char version; + union { + struct SU_SP_s SP; + struct SU_CE_s CE; + struct SU_ER_s ER; + struct RR_RR_s RR; + struct RR_PX_s PX; + struct RR_PN_s PN; + struct RR_SL_s SL; + struct RR_NM_s NM; + struct RR_CL_s CL; + struct RR_PL_s PL; + struct RR_TF_s TF; + struct RR_ZF_s ZF; + } u; }; -#define RR_PX 1 /* POSIX attributes */ -#define RR_PN 2 /* POSIX devices */ -#define RR_SL 4 /* Symbolic link */ -#define RR_NM 8 /* Alternate Name */ -#define RR_CL 16 /* Child link */ -#define RR_PL 32 /* Parent link */ -#define RR_RE 64 /* Relocation directory */ -#define RR_TF 128 /* Timestamps */ +#define RR_PX 1 /* POSIX attributes */ +#define RR_PN 2 /* POSIX devices */ +#define RR_SL 4 /* Symbolic link */ +#define RR_NM 8 /* Alternate Name */ +#define RR_CL 16 /* Child link */ +#define RR_PL 32 /* Parent link */ +#define RR_RE 64 /* Relocation directory */ +#define RR_TF 128 /* Timestamps */ diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 1e6f2e2ad4a3..5e7b43949517 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -167,7 +167,7 @@ loop: } wake_up(&journal->j_wait_done_commit); - if (current->flags & PF_FREEZE) { + if (freezing(current)) { /* * The simpler the better. Flushing journal isn't a * good idea, because that depends on threads that may @@ -175,7 +175,7 @@ loop: */ jbd_debug(1, "Now suspending kjournald\n"); spin_unlock(&journal->j_state_lock); - refrigerator(PF_FREEZE); + refrigerator(); spin_lock(&journal->j_state_lock); } else { /* diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index 8cc6893fc56c..fc589ddd0762 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -175,8 +175,64 @@ jffs_hexdump(struct mtd_info *mtd, loff_t pos, int size) } } +/* Print the contents of a node. */ +static void +jffs_print_node(struct jffs_node *n) +{ + D(printk("jffs_node: 0x%p\n", n)); + D(printk("{\n")); + D(printk(" 0x%08x, /* version */\n", n->version)); + D(printk(" 0x%08x, /* data_offset */\n", n->data_offset)); + D(printk(" 0x%08x, /* data_size */\n", n->data_size)); + D(printk(" 0x%08x, /* removed_size */\n", n->removed_size)); + D(printk(" 0x%08x, /* fm_offset */\n", n->fm_offset)); + D(printk(" 0x%02x, /* name_size */\n", n->name_size)); + D(printk(" 0x%p, /* fm, fm->offset: %u */\n", + n->fm, (n->fm ? n->fm->offset : 0))); + D(printk(" 0x%p, /* version_prev */\n", n->version_prev)); + D(printk(" 0x%p, /* version_next */\n", n->version_next)); + D(printk(" 0x%p, /* range_prev */\n", n->range_prev)); + D(printk(" 0x%p, /* range_next */\n", n->range_next)); + D(printk("}\n")); +} + #endif +/* Print the contents of a raw inode. */ +static void +jffs_print_raw_inode(struct jffs_raw_inode *raw_inode) +{ + D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino)); + D(printk("{\n")); + D(printk(" 0x%08x, /* magic */\n", raw_inode->magic)); + D(printk(" 0x%08x, /* ino */\n", raw_inode->ino)); + D(printk(" 0x%08x, /* pino */\n", raw_inode->pino)); + D(printk(" 0x%08x, /* version */\n", raw_inode->version)); + D(printk(" 0x%08x, /* mode */\n", raw_inode->mode)); + D(printk(" 0x%04x, /* uid */\n", raw_inode->uid)); + D(printk(" 0x%04x, /* gid */\n", raw_inode->gid)); + D(printk(" 0x%08x, /* atime */\n", raw_inode->atime)); + D(printk(" 0x%08x, /* mtime */\n", raw_inode->mtime)); + D(printk(" 0x%08x, /* ctime */\n", raw_inode->ctime)); + D(printk(" 0x%08x, /* offset */\n", raw_inode->offset)); + D(printk(" 0x%08x, /* dsize */\n", raw_inode->dsize)); + D(printk(" 0x%08x, /* rsize */\n", raw_inode->rsize)); + D(printk(" 0x%02x, /* nsize */\n", raw_inode->nsize)); + D(printk(" 0x%02x, /* nlink */\n", raw_inode->nlink)); + D(printk(" 0x%02x, /* spare */\n", + raw_inode->spare)); + D(printk(" %u, /* rename */\n", + raw_inode->rename)); + D(printk(" %u, /* deleted */\n", + raw_inode->deleted)); + D(printk(" 0x%02x, /* accurate */\n", + raw_inode->accurate)); + D(printk(" 0x%08x, /* dchksum */\n", raw_inode->dchksum)); + D(printk(" 0x%04x, /* nchksum */\n", raw_inode->nchksum)); + D(printk(" 0x%04x, /* chksum */\n", raw_inode->chksum)); + D(printk("}\n")); +} + #define flash_safe_acquire(arg) #define flash_safe_release(arg) @@ -2507,64 +2563,6 @@ jffs_update_file(struct jffs_file *f, struct jffs_node *node) return 0; } -/* Print the contents of a node. */ -void -jffs_print_node(struct jffs_node *n) -{ - D(printk("jffs_node: 0x%p\n", n)); - D(printk("{\n")); - D(printk(" 0x%08x, /* version */\n", n->version)); - D(printk(" 0x%08x, /* data_offset */\n", n->data_offset)); - D(printk(" 0x%08x, /* data_size */\n", n->data_size)); - D(printk(" 0x%08x, /* removed_size */\n", n->removed_size)); - D(printk(" 0x%08x, /* fm_offset */\n", n->fm_offset)); - D(printk(" 0x%02x, /* name_size */\n", n->name_size)); - D(printk(" 0x%p, /* fm, fm->offset: %u */\n", - n->fm, (n->fm ? n->fm->offset : 0))); - D(printk(" 0x%p, /* version_prev */\n", n->version_prev)); - D(printk(" 0x%p, /* version_next */\n", n->version_next)); - D(printk(" 0x%p, /* range_prev */\n", n->range_prev)); - D(printk(" 0x%p, /* range_next */\n", n->range_next)); - D(printk("}\n")); -} - - -/* Print the contents of a raw inode. */ -void -jffs_print_raw_inode(struct jffs_raw_inode *raw_inode) -{ - D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino)); - D(printk("{\n")); - D(printk(" 0x%08x, /* magic */\n", raw_inode->magic)); - D(printk(" 0x%08x, /* ino */\n", raw_inode->ino)); - D(printk(" 0x%08x, /* pino */\n", raw_inode->pino)); - D(printk(" 0x%08x, /* version */\n", raw_inode->version)); - D(printk(" 0x%08x, /* mode */\n", raw_inode->mode)); - D(printk(" 0x%04x, /* uid */\n", raw_inode->uid)); - D(printk(" 0x%04x, /* gid */\n", raw_inode->gid)); - D(printk(" 0x%08x, /* atime */\n", raw_inode->atime)); - D(printk(" 0x%08x, /* mtime */\n", raw_inode->mtime)); - D(printk(" 0x%08x, /* ctime */\n", raw_inode->ctime)); - D(printk(" 0x%08x, /* offset */\n", raw_inode->offset)); - D(printk(" 0x%08x, /* dsize */\n", raw_inode->dsize)); - D(printk(" 0x%08x, /* rsize */\n", raw_inode->rsize)); - D(printk(" 0x%02x, /* nsize */\n", raw_inode->nsize)); - D(printk(" 0x%02x, /* nlink */\n", raw_inode->nlink)); - D(printk(" 0x%02x, /* spare */\n", - raw_inode->spare)); - D(printk(" %u, /* rename */\n", - raw_inode->rename)); - D(printk(" %u, /* deleted */\n", - raw_inode->deleted)); - D(printk(" 0x%02x, /* accurate */\n", - raw_inode->accurate)); - D(printk(" 0x%08x, /* dchksum */\n", raw_inode->dchksum)); - D(printk(" 0x%04x, /* nchksum */\n", raw_inode->nchksum)); - D(printk(" 0x%04x, /* chksum */\n", raw_inode->chksum)); - D(printk("}\n")); -} - - /* Print the contents of a file. */ #if 0 int diff --git a/fs/jffs/intrep.h b/fs/jffs/intrep.h index 4ae97b17911c..5c7abe0e2695 100644 --- a/fs/jffs/intrep.h +++ b/fs/jffs/intrep.h @@ -49,8 +49,6 @@ int jffs_garbage_collect_thread(void *c); void jffs_garbage_collect_trigger(struct jffs_control *c); /* For debugging purposes. */ -void jffs_print_node(struct jffs_node *n); -void jffs_print_raw_inode(struct jffs_raw_inode *raw_inode); #if 0 int jffs_print_file(struct jffs_file *f); #endif /* 0 */ diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c index 0cab8da49d3c..053e3a98a276 100644 --- a/fs/jffs/jffs_fm.c +++ b/fs/jffs/jffs_fm.c @@ -31,6 +31,60 @@ static void jffs_free_fm(struct jffs_fm *n); extern kmem_cache_t *fm_cache; extern kmem_cache_t *node_cache; +#if CONFIG_JFFS_FS_VERBOSE > 0 +void +jffs_print_fmcontrol(struct jffs_fmcontrol *fmc) +{ + D(printk("struct jffs_fmcontrol: 0x%p\n", fmc)); + D(printk("{\n")); + D(printk(" %u, /* flash_size */\n", fmc->flash_size)); + D(printk(" %u, /* used_size */\n", fmc->used_size)); + D(printk(" %u, /* dirty_size */\n", fmc->dirty_size)); + D(printk(" %u, /* free_size */\n", fmc->free_size)); + D(printk(" %u, /* sector_size */\n", fmc->sector_size)); + D(printk(" %u, /* min_free_size */\n", fmc->min_free_size)); + D(printk(" %u, /* max_chunk_size */\n", fmc->max_chunk_size)); + D(printk(" 0x%p, /* mtd */\n", fmc->mtd)); + D(printk(" 0x%p, /* head */ " + "(head->offset = 0x%08x)\n", + fmc->head, (fmc->head ? fmc->head->offset : 0))); + D(printk(" 0x%p, /* tail */ " + "(tail->offset + tail->size = 0x%08x)\n", + fmc->tail, + (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0))); + D(printk(" 0x%p, /* head_extra */\n", fmc->head_extra)); + D(printk(" 0x%p, /* tail_extra */\n", fmc->tail_extra)); + D(printk("}\n")); +} +#endif /* CONFIG_JFFS_FS_VERBOSE > 0 */ + +#if CONFIG_JFFS_FS_VERBOSE > 2 +static void +jffs_print_fm(struct jffs_fm *fm) +{ + D(printk("struct jffs_fm: 0x%p\n", fm)); + D(printk("{\n")); + D(printk(" 0x%08x, /* offset */\n", fm->offset)); + D(printk(" %u, /* size */\n", fm->size)); + D(printk(" 0x%p, /* prev */\n", fm->prev)); + D(printk(" 0x%p, /* next */\n", fm->next)); + D(printk(" 0x%p, /* nodes */\n", fm->nodes)); + D(printk("}\n")); +} +#endif /* CONFIG_JFFS_FS_VERBOSE > 2 */ + +#if 0 +void +jffs_print_node_ref(struct jffs_node_ref *ref) +{ + D(printk("struct jffs_node_ref: 0x%p\n", ref)); + D(printk("{\n")); + D(printk(" 0x%p, /* node */\n", ref->node)); + D(printk(" 0x%p, /* next */\n", ref->next)); + D(printk("}\n")); +} +#endif /* 0 */ + /* This function creates a new shiny flash memory control structure. */ struct jffs_fmcontrol * jffs_build_begin(struct jffs_control *c, int unit) @@ -742,54 +796,3 @@ int jffs_get_node_inuse(void) { return no_jffs_node; } - -void -jffs_print_fmcontrol(struct jffs_fmcontrol *fmc) -{ - D(printk("struct jffs_fmcontrol: 0x%p\n", fmc)); - D(printk("{\n")); - D(printk(" %u, /* flash_size */\n", fmc->flash_size)); - D(printk(" %u, /* used_size */\n", fmc->used_size)); - D(printk(" %u, /* dirty_size */\n", fmc->dirty_size)); - D(printk(" %u, /* free_size */\n", fmc->free_size)); - D(printk(" %u, /* sector_size */\n", fmc->sector_size)); - D(printk(" %u, /* min_free_size */\n", fmc->min_free_size)); - D(printk(" %u, /* max_chunk_size */\n", fmc->max_chunk_size)); - D(printk(" 0x%p, /* mtd */\n", fmc->mtd)); - D(printk(" 0x%p, /* head */ " - "(head->offset = 0x%08x)\n", - fmc->head, (fmc->head ? fmc->head->offset : 0))); - D(printk(" 0x%p, /* tail */ " - "(tail->offset + tail->size = 0x%08x)\n", - fmc->tail, - (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0))); - D(printk(" 0x%p, /* head_extra */\n", fmc->head_extra)); - D(printk(" 0x%p, /* tail_extra */\n", fmc->tail_extra)); - D(printk("}\n")); -} - -void -jffs_print_fm(struct jffs_fm *fm) -{ - D(printk("struct jffs_fm: 0x%p\n", fm)); - D(printk("{\n")); - D(printk(" 0x%08x, /* offset */\n", fm->offset)); - D(printk(" %u, /* size */\n", fm->size)); - D(printk(" 0x%p, /* prev */\n", fm->prev)); - D(printk(" 0x%p, /* next */\n", fm->next)); - D(printk(" 0x%p, /* nodes */\n", fm->nodes)); - D(printk("}\n")); -} - -#if 0 -void -jffs_print_node_ref(struct jffs_node_ref *ref) -{ - D(printk("struct jffs_node_ref: 0x%p\n", ref)); - D(printk("{\n")); - D(printk(" 0x%p, /* node */\n", ref->node)); - D(printk(" 0x%p, /* next */\n", ref->next)); - D(printk("}\n")); -} -#endif /* 0 */ - diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h index bc291c431822..f64151e74122 100644 --- a/fs/jffs/jffs_fm.h +++ b/fs/jffs/jffs_fm.h @@ -139,8 +139,9 @@ int jffs_add_node(struct jffs_node *node); void jffs_fmfree_partly(struct jffs_fmcontrol *fmc, struct jffs_fm *fm, __u32 size); +#if CONFIG_JFFS_FS_VERBOSE > 0 void jffs_print_fmcontrol(struct jffs_fmcontrol *fmc); -void jffs_print_fm(struct jffs_fm *fm); +#endif #if 0 void jffs_print_node_ref(struct jffs_node_ref *ref); #endif /* 0 */ diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 1be6de27dd81..638836b277d4 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -92,7 +92,7 @@ static int jffs2_garbage_collect_thread(void *_c) schedule(); } - if (try_to_freeze(0)) + if (try_to_freeze()) continue; cond_resched(); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 8d2a9ab981d4..e892dab40c26 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/fs.h> #include <linux/quotaops.h> +#include <linux/posix_acl_xattr.h> #include "jfs_incore.h" #include "jfs_xattr.h" #include "jfs_acl.h" @@ -36,11 +37,11 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) switch(type) { case ACL_TYPE_ACCESS: - ea_name = XATTR_NAME_ACL_ACCESS; + ea_name = POSIX_ACL_XATTR_ACCESS; p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: - ea_name = XATTR_NAME_ACL_DEFAULT; + ea_name = POSIX_ACL_XATTR_DEFAULT; p_acl = &ji->i_default_acl; break; default: @@ -70,8 +71,7 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) if (!IS_ERR(acl)) *p_acl = posix_acl_dup(acl); } - if (value) - kfree(value); + kfree(value); return acl; } @@ -89,11 +89,11 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) switch(type) { case ACL_TYPE_ACCESS: - ea_name = XATTR_NAME_ACL_ACCESS; + ea_name = POSIX_ACL_XATTR_ACCESS; p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: - ea_name = XATTR_NAME_ACL_DEFAULT; + ea_name = POSIX_ACL_XATTR_DEFAULT; p_acl = &ji->i_default_acl; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; @@ -102,7 +102,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) return -EINVAL; } if (acl) { - size = xattr_acl_size(acl->a_count); + size = posix_acl_xattr_size(acl->a_count); value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; @@ -112,8 +112,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } rc = __jfs_setxattr(inode, ea_name, value, size, 0); out: - if (value) - kfree(value); + kfree(value); if (!rc) { if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED)) diff --git a/fs/jfs/file.c b/fs/jfs/file.c index a87b06fa8ff8..c2c19c9ed9a4 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -1,6 +1,6 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 - * Portions Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,16 +19,13 @@ #include <linux/fs.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_dmap.h" #include "jfs_txnmgr.h" #include "jfs_xattr.h" #include "jfs_acl.h" #include "jfs_debug.h" - -extern int jfs_commit_inode(struct inode *, int); -extern void jfs_truncate(struct inode *); - int jfs_fsync(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 24a689179af2..2137138c59b0 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -23,6 +23,7 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_imap.h" #include "jfs_extent.h" @@ -30,14 +31,6 @@ #include "jfs_debug.h" -extern struct inode_operations jfs_dir_inode_operations; -extern struct inode_operations jfs_file_inode_operations; -extern struct inode_operations jfs_symlink_inode_operations; -extern struct file_operations jfs_dir_operations; -extern struct file_operations jfs_file_operations; -struct address_space_operations jfs_aops; -extern int freeZeroLink(struct inode *); - void jfs_read_inode(struct inode *inode) { if (diRead(inode)) { @@ -136,7 +129,7 @@ void jfs_delete_inode(struct inode *inode) jfs_info("In jfs_delete_inode, inode = 0x%p", inode); if (test_cflag(COMMIT_Freewmap, inode)) - freeZeroLink(inode); + jfs_free_zero_link(inode); diFree(inode); diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index d2ae430adecf..a3acd3eec059 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -20,8 +20,6 @@ #ifdef CONFIG_JFS_POSIX_ACL -#include <linux/xattr_acl.h> - int jfs_permission(struct inode *, int, struct nameidata *); int jfs_init_acl(struct inode *, struct inode *); int jfs_setattr(struct dentry *, struct iattr *); diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 91a0a889ebc5..4caea6b43b92 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -58,8 +58,6 @@ void dump_mem(char *label, void *data, int length) static struct proc_dir_entry *base; #ifdef CONFIG_JFS_DEBUG -extern read_proc_t jfs_txanchor_read; - static int loglevel_read(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -97,14 +95,6 @@ static int loglevel_write(struct file *file, const char __user *buffer, } #endif - -#ifdef CONFIG_JFS_STATISTICS -extern read_proc_t jfs_lmstats_read; -extern read_proc_t jfs_txstats_read; -extern read_proc_t jfs_xtstat_read; -extern read_proc_t jfs_mpstat_read; -#endif - static struct { const char *name; read_proc_t *read_fn; diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index a38079ae1e00..ddffbbd4d955 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h @@ -1,6 +1,6 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 - * Portions Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,7 +31,9 @@ * CONFIG_JFS_DEBUG or CONFIG_JFS_STATISTICS is defined */ #if defined(CONFIG_PROC_FS) && (defined(CONFIG_JFS_DEBUG) || defined(CONFIG_JFS_STATISTICS)) - #define PROC_FS_JFS +#define PROC_FS_JFS +extern void jfs_proc_init(void); +extern void jfs_proc_clean(void); #endif /* @@ -65,8 +67,8 @@ extern int jfsloglevel; -/* dump memory contents */ extern void dump_mem(char *label, void *data, int length); +extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); /* information message: e.g., configuration, major event */ #define jfs_info(fmt, arg...) do { \ @@ -110,6 +112,11 @@ extern void dump_mem(char *label, void *data, int length); * ---------- */ #ifdef CONFIG_JFS_STATISTICS +extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *); +extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *); +extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *); +extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *); + #define INCREMENT(x) ((x)++) #define DECREMENT(x) ((x)--) #define HIGHWATERMARK(x,y) ((x) = max((x), (y))) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 69007fd546ef..cced2fed9d0f 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -272,7 +272,6 @@ int dbMount(struct inode *ipbmap) int dbUnmount(struct inode *ipbmap, int mounterror) { struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; - int i; if (!(mounterror || isReadOnly(ipbmap))) dbSync(ipbmap); @@ -282,14 +281,6 @@ int dbUnmount(struct inode *ipbmap, int mounterror) */ truncate_inode_pages(ipbmap->i_mapping, 0); - /* - * Sanity Check - */ - for (i = 0; i < bmp->db_numag; i++) - if (atomic_read(&bmp->db_active[i])) - printk(KERN_ERR "dbUnmount: db_active[%d] = %d\n", - i, atomic_read(&bmp->db_active[i])); - /* free the memory for the in-memory bmap. */ kfree(bmp); diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index ac41f72d6d50..8676aee3ae48 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -2931,6 +2931,9 @@ static void add_missing_indices(struct inode *inode, s64 bn) ASSERT(p->header.flag & BT_LEAF); tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); + if (BT_IS_ROOT(mp)) + tlck->type |= tlckBTROOT; + dtlck = (struct dt_lock *) &tlck->lock; stbl = DT_GETSTBL(p); diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 1953acb79266..4879603daa1c 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/quotaops.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_superblock.h" #include "jfs_dmap.h" #include "jfs_extent.h" @@ -33,12 +34,6 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *); #endif static s64 extRoundDown(s64 nb); -/* - * external references - */ -extern int jfs_commit_inode(struct inode *, int); - - #define DPD(a) (printk("(a): %d\n",(a))) #define DPC(a) (printk("(a): %c\n",(a))) #define DPL1(a) \ diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 7acff2ce3c80..971af2977eff 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -47,6 +47,7 @@ #include <linux/quotaops.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_dinode.h" #include "jfs_dmap.h" @@ -69,11 +70,6 @@ #define AG_UNLOCK(imap,agno) up(&imap->im_aglock[agno]) /* - * external references - */ -extern struct address_space_operations jfs_aops; - -/* * forward references */ static int diAllocAG(struct inomap *, int, boolean_t, struct inode *); diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 84f2459b2191..2af5efbfd06f 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/quotaops.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_imap.h" #include "jfs_dinode.h" diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 3df91fbfe781..b54bac576cb3 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2001 + * Copyright (C) International Business Machines Corp., 2000-2001 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,5 +19,22 @@ #define _H_JFS_INODE extern struct inode *ialloc(struct inode *, umode_t); +extern int jfs_fsync(struct file *, struct dentry *, int); +extern void jfs_read_inode(struct inode *); +extern int jfs_commit_inode(struct inode *, int); +extern int jfs_write_inode(struct inode*, int); +extern void jfs_delete_inode(struct inode *); +extern void jfs_dirty_inode(struct inode *); +extern void jfs_truncate(struct inode *); +extern void jfs_truncate_nolock(struct inode *, loff_t); +extern void jfs_free_zero_link(struct inode *); +extern struct dentry *jfs_get_parent(struct dentry *dentry); +extern struct address_space_operations jfs_aops; +extern struct inode_operations jfs_dir_inode_operations; +extern struct file_operations jfs_dir_operations; +extern struct inode_operations jfs_file_inode_operations; +extern struct file_operations jfs_file_operations; +extern struct inode_operations jfs_symlink_inode_operations; +extern struct dentry_operations jfs_ci_dentry_operations; #endif /* _H_JFS_INODE */ diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index dfa1200daa61..79d07624bfe1 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -71,6 +71,7 @@ #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" +#include "jfs_superblock.h" #include "jfs_txnmgr.h" #include "jfs_debug.h" @@ -167,14 +168,6 @@ static struct jfs_log *dummy_log = NULL; static DECLARE_MUTEX(jfs_log_sem); /* - * external references - */ -extern void txLazyUnlock(struct tblock * tblk); -extern int jfs_stop_threads; -extern struct completion jfsIOwait; -extern int jfs_tlocks_low; - -/* * forward references */ static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, @@ -1624,6 +1617,8 @@ void jfs_flush_journal(struct jfs_log *log, int wait) } } assert(list_empty(&log->cqueue)); + +#ifdef CONFIG_JFS_DEBUG if (!list_empty(&log->synclist)) { struct logsyncblk *lp; @@ -1638,9 +1633,8 @@ void jfs_flush_journal(struct jfs_log *log, int wait) dump_mem("orphan tblock", lp, sizeof(struct tblock)); } -// current->state = TASK_INTERRUPTIBLE; -// schedule(); } +#endif //assert(list_empty(&log->synclist)); clear_bit(log_FLUSH, &log->flag); } @@ -2365,9 +2359,9 @@ int jfsIOWait(void *arg) lbmStartIO(bp); spin_lock_irq(&log_redrive_lock); } - if (current->flags & PF_FREEZE) { + if (freezing(current)) { spin_unlock_irq(&log_redrive_lock); - refrigerator(PF_FREEZE); + refrigerator(); } else { add_wait_queue(&jfs_IO_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index 51291fbc420c..747114cd38b8 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -507,6 +507,8 @@ extern int lmLogClose(struct super_block *sb); extern int lmLogShutdown(struct jfs_log * log); extern int lmLogInit(struct jfs_log * log); extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); +extern int lmGroupCommit(struct jfs_log *, struct tblock *); +extern int jfsIOWait(void *); extern void jfs_flush_journal(struct jfs_log * log, int wait); extern void jfs_syncpt(struct jfs_log *log); diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 41bf078dce05..6c5485d16c39 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -198,7 +198,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) } } -static inline struct metapage *alloc_metapage(int gfp_mask) +static inline struct metapage *alloc_metapage(unsigned int gfp_mask) { return mempool_alloc(metapage_mempool, gfp_mask); } @@ -726,12 +726,12 @@ void force_metapage(struct metapage *mp) page_cache_release(page); } -extern void hold_metapage(struct metapage *mp) +void hold_metapage(struct metapage *mp) { lock_page(mp->page); } -extern void put_metapage(struct metapage *mp) +void put_metapage(struct metapage *mp) { if (mp->count || mp->nohomeok) { /* Someone else will release this */ diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index 991e9fb84c75..f0b7d3282b07 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -1,6 +1,6 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 - * Portions Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -58,6 +58,8 @@ struct metapage { #define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag) /* function prototypes */ +extern int metapage_init(void); +extern void metapage_exit(void); extern struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, unsigned int size, int absolute, unsigned long new); diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index ab0566f70cfa..fcf781bf31cb 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h @@ -109,5 +109,16 @@ struct jfs_superblock { extern int readSuper(struct super_block *, struct buffer_head **); extern int updateSuper(struct super_block *, uint); extern void jfs_error(struct super_block *, const char *, ...); +extern int jfs_mount(struct super_block *); +extern int jfs_mount_rw(struct super_block *, int); +extern int jfs_umount(struct super_block *); +extern int jfs_umount_rw(struct super_block *); + +extern int jfs_stop_threads; +extern struct completion jfsIOwait; +extern wait_queue_head_t jfs_IO_thread_wait; +extern wait_queue_head_t jfs_commit_thread_wait; +extern wait_queue_head_t jfs_sync_thread_wait; +extern int jfs_extendfs(struct super_block *, s64, int); #endif /*_H_JFS_SUPERBLOCK */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index e93d01aa12c4..121c981ff453 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -42,7 +42,6 @@ * hold on to mp+lock thru update of maps */ - #include <linux/fs.h> #include <linux/vmalloc.h> #include <linux/smp_lock.h> @@ -51,6 +50,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_metapage.h" #include "jfs_dinode.h" @@ -109,7 +109,6 @@ static int TxLockHWM; /* High water mark for number of txLocks used */ static int TxLockVHWM; /* Very High water mark */ struct tlock *TxLock; /* transaction lock table */ - /* * transaction management lock */ @@ -149,7 +148,6 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) #define TXN_WAKEUP(event) wake_up_all(event) - /* * statistics */ @@ -161,16 +159,6 @@ static struct { int waitlock; /* 4: # of tlock wait */ } stattx; - -/* - * external references - */ -extern int lmGroupCommit(struct jfs_log *, struct tblock *); -extern int jfs_commit_inode(struct inode *, int); -extern int jfs_stop_threads; - -extern struct completion jfsIOwait; - /* * forward references */ @@ -358,7 +346,6 @@ void txExit(void) TxBlock = NULL; } - /* * NAME: txBegin() * @@ -460,7 +447,6 @@ tid_t txBegin(struct super_block *sb, int flag) return t; } - /* * NAME: txBeginAnon() * @@ -503,7 +489,6 @@ void txBeginAnon(struct super_block *sb) TXN_UNLOCK(); } - /* * txEnd() * @@ -592,7 +577,6 @@ wakeup: TXN_WAKEUP(&TxAnchor.freewait); } - /* * txLock() * @@ -868,7 +852,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, return NULL; } - /* * NAME: txRelease() * @@ -908,7 +891,6 @@ static void txRelease(struct tblock * tblk) TXN_UNLOCK(); } - /* * NAME: txUnlock() * @@ -996,7 +978,6 @@ static void txUnlock(struct tblock * tblk) } } - /* * txMaplock() * @@ -1069,7 +1050,6 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) return tlck; } - /* * txLinelock() * @@ -1103,8 +1083,6 @@ struct linelock *txLinelock(struct linelock * tlock) return linelock; } - - /* * transaction commit management * ----------------------------- @@ -1373,7 +1351,6 @@ int txCommit(tid_t tid, /* transaction identifier */ return rc; } - /* * NAME: txLog() * @@ -1437,7 +1414,6 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) return rc; } - /* * diLog() * @@ -1465,7 +1441,6 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, if (tlck->type & tlckENTRY) { /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); -// *pxd = mp->cm_pxd; PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); @@ -1552,7 +1527,6 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return rc; } - /* * dataLog() * @@ -1599,7 +1573,6 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return 0; } - /* * dtLog() * @@ -1639,7 +1612,6 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); else lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); -// *pxd = mp->cm_pxd; PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); @@ -1704,7 +1676,6 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return; } - /* * xtLog() * @@ -1760,7 +1731,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); -// *page_pxd = mp->cm_pxd; PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); @@ -2093,7 +2063,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return; } - /* * mapLog() * @@ -2180,7 +2149,6 @@ void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } } - /* * txEA() * @@ -2233,7 +2201,6 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) } } - /* * txForce() * @@ -2300,7 +2267,6 @@ void txForce(struct tblock * tblk) } } - /* * txUpdateMap() * @@ -2437,7 +2403,6 @@ static void txUpdateMap(struct tblock * tblk) } } - /* * txAllocPMap() * @@ -2509,7 +2474,6 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock, } } - /* * txFreeMap() * @@ -2611,7 +2575,6 @@ void txFreeMap(struct inode *ip, } } - /* * txFreelock() * @@ -2652,7 +2615,6 @@ void txFreelock(struct inode *ip) TXN_UNLOCK(); } - /* * txAbort() * @@ -2826,9 +2788,9 @@ int jfs_lazycommit(void *arg) /* In case a wakeup came while all threads were active */ jfs_commit_thread_waking = 0; - if (current->flags & PF_FREEZE) { + if (freezing(current)) { LAZY_UNLOCK(flags); - refrigerator(PF_FREEZE); + refrigerator(); } else { DECLARE_WAITQUEUE(wq, current); @@ -3025,9 +2987,9 @@ int jfs_sync(void *arg) /* Add anon_list2 back to anon_list */ list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); - if (current->flags & PF_FREEZE) { + if (freezing(current)) { TXN_UNLOCK(); - refrigerator(PF_FREEZE); + refrigerator(); } else { DECLARE_WAITQUEUE(wq, current); diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index b71b82c2df04..59ad0f6b7231 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h @@ -285,34 +285,26 @@ struct commit { /* * external declarations */ -extern struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage *mp, - int flag); - -extern struct tlock *txMaplock(tid_t tid, struct inode *ip, int flag); - -extern int txCommit(tid_t tid, int nip, struct inode **iplist, int flag); - -extern tid_t txBegin(struct super_block *sb, int flag); - -extern void txBeginAnon(struct super_block *sb); - -extern void txEnd(tid_t tid); - -extern void txAbort(tid_t tid, int dirty); - -extern struct linelock *txLinelock(struct linelock * tlock); - -extern void txFreeMap(struct inode *ip, struct maplock * maplock, - struct tblock * tblk, int maptype); - -extern void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea); - -extern void txFreelock(struct inode *ip); - -extern int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck); - -extern void txQuiesce(struct super_block *sb); - -extern void txResume(struct super_block *sb); +extern int jfs_tlocks_low; + +extern int txInit(void); +extern void txExit(void); +extern struct tlock *txLock(tid_t, struct inode *, struct metapage *, int); +extern struct tlock *txMaplock(tid_t, struct inode *, int); +extern int txCommit(tid_t, int, struct inode **, int); +extern tid_t txBegin(struct super_block *, int); +extern void txBeginAnon(struct super_block *); +extern void txEnd(tid_t); +extern void txAbort(tid_t, int); +extern struct linelock *txLinelock(struct linelock *); +extern void txFreeMap(struct inode *, struct maplock *, struct tblock *, int); +extern void txEA(tid_t, struct inode *, dxd_t *, dxd_t *); +extern void txFreelock(struct inode *); +extern int lmLog(struct jfs_log *, struct tblock *, struct lrd *, + struct tlock *); +extern void txQuiesce(struct super_block *); +extern void txResume(struct super_block *); +extern void txLazyUnlock(struct tblock *); +extern int jfs_lazycommit(void *); +extern int jfs_sync(void *); #endif /* _H_JFS_TXNMGR */ diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 8413a368f449..1cae14e741eb 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -31,20 +31,9 @@ #include "jfs_acl.h" #include "jfs_debug.h" -extern struct inode_operations jfs_file_inode_operations; -extern struct inode_operations jfs_symlink_inode_operations; -extern struct file_operations jfs_file_operations; -extern struct address_space_operations jfs_aops; - -extern int jfs_fsync(struct file *, struct dentry *, int); -extern void jfs_truncate_nolock(struct inode *, loff_t); -extern int jfs_init_acl(struct inode *, struct inode *); - /* * forward references */ -struct inode_operations jfs_dir_inode_operations; -struct file_operations jfs_dir_operations; struct dentry_operations jfs_ci_dentry_operations; static s64 commitZeroLink(tid_t, struct inode *); @@ -655,7 +644,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) /* - * NAME: freeZeroLink() + * NAME: jfs_free_zero_link() * * FUNCTION: for non-directory, called by iClose(), * free resources of a file from cache and WORKING map @@ -663,15 +652,12 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) * while associated with a pager object, * * PARAMETER: ip - pointer to inode of file. - * - * RETURN: 0 -ok */ -int freeZeroLink(struct inode *ip) +void jfs_free_zero_link(struct inode *ip) { - int rc = 0; int type; - jfs_info("freeZeroLink: ip = 0x%p", ip); + jfs_info("jfs_free_zero_link: ip = 0x%p", ip); /* return if not reg or symbolic link or if size is * already ok. @@ -684,10 +670,10 @@ int freeZeroLink(struct inode *ip) case S_IFLNK: /* if its contained in inode nothing to do */ if (ip->i_size < IDATASIZE) - return 0; + return; break; default: - return 0; + return; } /* @@ -737,9 +723,7 @@ int freeZeroLink(struct inode *ip) * free xtree/data blocks from working block map; */ if (ip->i_size) - rc = xtTruncate(0, ip, 0, COMMIT_WMAP); - - return rc; + xtTruncate(0, ip, 0, COMMIT_WMAP); } /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 5e774ed7fb64..ee32211288ce 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -24,10 +24,12 @@ #include <linux/completion.h> #include <linux/vfs.h> #include <linux/moduleparam.h> +#include <linux/posix_acl.h> #include <asm/uaccess.h> #include "jfs_incore.h" #include "jfs_filsys.h" +#include "jfs_inode.h" #include "jfs_metapage.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -62,37 +64,6 @@ module_param(jfsloglevel, int, 0644); MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)"); #endif -/* - * External declarations - */ -extern int jfs_mount(struct super_block *); -extern int jfs_mount_rw(struct super_block *, int); -extern int jfs_umount(struct super_block *); -extern int jfs_umount_rw(struct super_block *); - -extern int jfsIOWait(void *); -extern int jfs_lazycommit(void *); -extern int jfs_sync(void *); - -extern void jfs_read_inode(struct inode *inode); -extern void jfs_dirty_inode(struct inode *inode); -extern void jfs_delete_inode(struct inode *inode); -extern int jfs_write_inode(struct inode *inode, int wait); - -extern struct dentry *jfs_get_parent(struct dentry *dentry); -extern int jfs_extendfs(struct super_block *, s64, int); - -extern struct dentry_operations jfs_ci_dentry_operations; - -#ifdef PROC_FS_JFS /* see jfs_debug.h */ -extern void jfs_proc_init(void); -extern void jfs_proc_clean(void); -#endif - -extern wait_queue_head_t jfs_IO_thread_wait; -extern wait_queue_head_t jfs_commit_thread_wait; -extern wait_queue_head_t jfs_sync_thread_wait; - static void jfs_handle_error(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); @@ -593,11 +564,6 @@ static struct file_system_type jfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -extern int metapage_init(void); -extern int txInit(void); -extern void txExit(void); -extern void metapage_exit(void); - static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index ef4c07ee92b2..287d8d6c3cfd 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/namei.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_xattr.h" static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 7a9ffd5d03dc..ee438d429d45 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/xattr.h> +#include <linux/posix_acl_xattr.h> #include <linux/quotaops.h> #include "jfs_incore.h" #include "jfs_superblock.h" @@ -718,9 +719,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return -EPERM; /* - * XATTR_NAME_ACL_ACCESS is tied to i_mode + * POSIX_ACL_XATTR_ACCESS is tied to i_mode */ - if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) { + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { acl = posix_acl_from_xattr(value, value_len); if (IS_ERR(acl)) { rc = PTR_ERR(acl); @@ -750,7 +751,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED; return 0; - } else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) { + } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { acl = posix_acl_from_xattr(value, value_len); if (IS_ERR(acl)) { rc = PTR_ERR(acl); @@ -946,8 +947,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value, out: up_write(&JFS_IP(inode)->xattr_sem); - if (os2name) - kfree(os2name); + kfree(os2name); return rc; } @@ -1042,8 +1042,7 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, out: up_read(&JFS_IP(inode)->xattr_sem); - if (os2name) - kfree(os2name); + kfree(os2name); return size; } diff --git a/fs/libfs.c b/fs/libfs.c index f90b29595927..58101dff2c66 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -183,6 +183,7 @@ struct file_operations simple_dir_operations = { .llseek = dcache_dir_lseek, .read = generic_read_dir, .readdir = dcache_readdir, + .fsync = simple_sync_file, }; struct inode_operations simple_dir_inode_operations = { @@ -519,6 +520,102 @@ int simple_transaction_release(struct inode *inode, struct file *file) return 0; } +/* Simple attribute files */ + +struct simple_attr { + u64 (*get)(void *); + void (*set)(void *, u64); + char get_buf[24]; /* enough to store a u64 and "\n\0" */ + char set_buf[24]; + void *data; + const char *fmt; /* format for read operation */ + struct semaphore sem; /* protects access to these buffers */ +}; + +/* simple_attr_open is called by an actual attribute open file operation + * to set the attribute specific access operations. */ +int simple_attr_open(struct inode *inode, struct file *file, + u64 (*get)(void *), void (*set)(void *, u64), + const char *fmt) +{ + struct simple_attr *attr; + + attr = kmalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->get = get; + attr->set = set; + attr->data = inode->u.generic_ip; + attr->fmt = fmt; + init_MUTEX(&attr->sem); + + file->private_data = attr; + + return nonseekable_open(inode, file); +} + +int simple_attr_close(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +/* read from the buffer that is filled with the get function */ +ssize_t simple_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct simple_attr *attr; + size_t size; + ssize_t ret; + + attr = file->private_data; + + if (!attr->get) + return -EACCES; + + down(&attr->sem); + if (*ppos) /* continued read */ + size = strlen(attr->get_buf); + else /* first read */ + size = scnprintf(attr->get_buf, sizeof(attr->get_buf), + attr->fmt, + (unsigned long long)attr->get(attr->data)); + + ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); + up(&attr->sem); + return ret; +} + +/* interpret the buffer as a number to call the set function with */ +ssize_t simple_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct simple_attr *attr; + u64 val; + size_t size; + ssize_t ret; + + attr = file->private_data; + + if (!attr->set) + return -EACCES; + + down(&attr->sem); + ret = -EFAULT; + size = min(sizeof(attr->set_buf) - 1, len); + if (copy_from_user(attr->set_buf, buf, size)) + goto out; + + ret = len; /* claim we got the whole input */ + attr->set_buf[size] = '\0'; + val = simple_strtol(attr->set_buf, NULL, 0); + attr->set(attr->data, val); +out: + up(&attr->sem); + return ret; +} + EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); @@ -547,3 +644,7 @@ EXPORT_SYMBOL(simple_read_from_buffer); EXPORT_SYMBOL(simple_transaction_get); EXPORT_SYMBOL(simple_transaction_read); EXPORT_SYMBOL(simple_transaction_release); +EXPORT_SYMBOL_GPL(simple_attr_open); +EXPORT_SYMBOL_GPL(simple_attr_close); +EXPORT_SYMBOL_GPL(simple_attr_read); +EXPORT_SYMBOL_GPL(simple_attr_write); diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index ef7103b8c5bd..006bb9e14579 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -31,7 +31,7 @@ static int reclaimer(void *ptr); * This is the representation of a blocked client lock. */ struct nlm_wait { - struct nlm_wait * b_next; /* linked list */ + struct list_head b_list; /* linked list */ wait_queue_head_t b_wait; /* where to wait on */ struct nlm_host * b_host; struct file_lock * b_lock; /* local file lock */ @@ -39,27 +39,54 @@ struct nlm_wait { u32 b_status; /* grant callback status */ }; -static struct nlm_wait * nlm_blocked; +static LIST_HEAD(nlm_blocked); /* - * Block on a lock + * Queue up a lock for blocking so that the GRANTED request can see it */ -int -nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) +int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl) +{ + struct nlm_wait *block; + + BUG_ON(req->a_block != NULL); + block = kmalloc(sizeof(*block), GFP_KERNEL); + if (block == NULL) + return -ENOMEM; + block->b_host = host; + block->b_lock = fl; + init_waitqueue_head(&block->b_wait); + block->b_status = NLM_LCK_BLOCKED; + + list_add(&block->b_list, &nlm_blocked); + req->a_block = block; + + return 0; +} + +void nlmclnt_finish_block(struct nlm_rqst *req) { - struct nlm_wait block, **head; - int err; - u32 pstate; + struct nlm_wait *block = req->a_block; - block.b_host = host; - block.b_lock = fl; - init_waitqueue_head(&block.b_wait); - block.b_status = NLM_LCK_BLOCKED; - block.b_next = nlm_blocked; - nlm_blocked = █ + if (block == NULL) + return; + req->a_block = NULL; + list_del(&block->b_list); + kfree(block); +} + +/* + * Block on a lock + */ +long nlmclnt_block(struct nlm_rqst *req, long timeout) +{ + struct nlm_wait *block = req->a_block; + long ret; - /* Remember pseudo nsm state */ - pstate = host->h_state; + /* A borken server might ask us to block even if we didn't + * request it. Just say no! + */ + if (!req->a_args.block) + return -EAGAIN; /* Go to sleep waiting for GRANT callback. Some servers seem * to lose callbacks, however, so we're going to poll from @@ -69,28 +96,16 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) * a 1 minute timeout would do. See the comment before * nlmclnt_lock for an explanation. */ - sleep_on_timeout(&block.b_wait, 30*HZ); - - for (head = &nlm_blocked; *head; head = &(*head)->b_next) { - if (*head == &block) { - *head = block.b_next; - break; - } - } + ret = wait_event_interruptible_timeout(block->b_wait, + block->b_status != NLM_LCK_BLOCKED, + timeout); - if (!signalled()) { - *statp = block.b_status; - return 0; + if (block->b_status != NLM_LCK_BLOCKED) { + req->a_res.status = block->b_status; + block->b_status = NLM_LCK_BLOCKED; } - /* Okay, we were interrupted. Cancel the pending request - * unless the server has rebooted. - */ - if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0) - printk(KERN_NOTICE - "lockd: CANCEL call failed (errno %d)\n", -err); - - return -ERESTARTSYS; + return ret; } /* @@ -100,27 +115,23 @@ u32 nlmclnt_grant(struct nlm_lock *lock) { struct nlm_wait *block; + u32 res = nlm_lck_denied; /* * Look up blocked request based on arguments. * Warning: must not use cookie to match it! */ - for (block = nlm_blocked; block; block = block->b_next) { - if (nlm_compare_locks(block->b_lock, &lock->fl)) - break; + list_for_each_entry(block, &nlm_blocked, b_list) { + if (nlm_compare_locks(block->b_lock, &lock->fl)) { + /* Alright, we found a lock. Set the return status + * and wake up the caller + */ + block->b_status = NLM_LCK_GRANTED; + wake_up(&block->b_wait); + res = nlm_granted; + } } - - /* Ooops, no blocked request found. */ - if (block == NULL) - return nlm_lck_denied; - - /* Alright, we found the lock. Set the return status and - * wake up the caller. - */ - block->b_status = NLM_LCK_GRANTED; - wake_up(&block->b_wait); - - return nlm_granted; + return res; } /* @@ -230,7 +241,7 @@ restart: host->h_reclaiming = 0; /* Now, wake up all processes that sleep on a blocked lock */ - for (block = nlm_blocked; block; block = block->b_next) { + list_for_each_entry(block, &nlm_blocked, b_list) { if (block->b_host == host) { block->b_status = NLM_LCK_DENIED_GRACE_PERIOD; wake_up(&block->b_wait); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a4407619b1f1..14b3ce87fa29 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -21,6 +21,7 @@ #define NLMDBG_FACILITY NLMDBG_CLIENT #define NLMCLNT_GRACE_WAIT (5*HZ) +#define NLMCLNT_POLL_TIMEOUT (30*HZ) static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); @@ -312,7 +313,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue) prepare_to_wait(queue, &wait, TASK_INTERRUPTIBLE); if (!signalled ()) { schedule_timeout(NLMCLNT_GRACE_WAIT); - try_to_freeze(PF_FREEZE); + try_to_freeze(); if (!signalled ()) status = 0; } @@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - int status; + long timeout; + int status; if (!host->h_monitored && nsm_monitor(host) < 0) { printk(KERN_NOTICE "lockd: failed to monitor %s\n", @@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) goto out; } - do { - if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) { - if (resp->status != NLM_LCK_BLOCKED) - break; - status = nlmclnt_block(host, fl, &resp->status); - } + if (req->a_args.block) { + status = nlmclnt_prepare_block(req, host, fl); if (status < 0) goto out; - } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); + } + for(;;) { + status = nlmclnt_call(req, NLMPROC_LOCK); + if (status < 0) + goto out_unblock; + if (resp->status != NLM_LCK_BLOCKED) + break; + /* Wait on an NLM blocking lock */ + timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT); + /* Did a reclaimer thread notify us of a server reboot? */ + if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) + continue; + if (resp->status != NLM_LCK_BLOCKED) + break; + if (timeout >= 0) + continue; + /* We were interrupted. Send a CANCEL request to the server + * and exit + */ + status = (int)timeout; + goto out_unblock; + } if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; @@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) do_vfs_lock(fl); } status = nlm_stat_to_errno(resp->status); +out_unblock: + nlmclnt_finish_block(req); + /* Cancel the blocked request if it is still pending */ + if (resp->status == NLM_LCK_BLOCKED) + nlmclnt_cancel(host, fl); out: nlmclnt_release_lockargs(req); return status; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 52707c5ad6ea..82c77df81c5f 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -189,17 +189,15 @@ nlm_bind_host(struct nlm_host *host) goto forgetit; xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); + xprt->nocong = 1; /* No congestion control for NLM */ + xprt->resvport = 1; /* NLM requires a reserved port */ /* Existing NLM servers accept AUTH_UNIX only */ clnt = rpc_create_client(xprt, host->h_name, &nlm_program, host->h_version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); + if (IS_ERR(clnt)) goto forgetit; - } clnt->cl_autobind = 1; /* turn on pmap queries */ - xprt->nocong = 1; /* No congestion control for NLM */ - xprt->resvport = 1; /* NLM requires a reserved port */ host->h_rpcclnt = clnt; } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 6fc1bebeec1d..2d144abe84ad 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -115,20 +115,19 @@ nsm_create(void) xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; + xprt->resvport = 1; /* NSM requires a reserved port */ clnt = rpc_create_client(xprt, "localhost", &nsm_program, SM_VERSION, RPC_AUTH_NULL); if (IS_ERR(clnt)) - goto out_destroy; + goto out_err; clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; - xprt->resvport = 1; /* NSM requires a reserved port */ return clnt; -out_destroy: - xprt_destroy(xprt); +out_err: return clnt; } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b82e470912e8..6e242556b903 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -191,7 +191,9 @@ lockd(struct svc_rqst *rqstp) printk(KERN_DEBUG "lockd: new process, skipping host shutdown\n"); wake_up(&lockd_exit); - + + flush_signals(current); + /* Exit the RPC thread */ svc_exit_thread(rqstp); diff --git a/fs/locks.c b/fs/locks.c index 3fa6a7ce57a7..a0bc03495bd4 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1548,6 +1548,8 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) if (filp->f_op && filp->f_op->lock) { error = filp->f_op->lock(filp, F_GETLK, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); if (error < 0) goto out; else @@ -1690,6 +1692,8 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) if (filp->f_op && filp->f_op->lock) { error = filp->f_op->lock(filp, F_GETLK, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); if (error < 0) goto out; else @@ -1873,6 +1877,8 @@ void locks_remove_flock(struct file *filp) .fl_end = OFFSET_MAX, }; filp->f_op->flock(filp, F_SETLKW, &fl); + if (fl.fl_ops && fl.fl_ops->fl_release_private) + fl.fl_ops->fl_release_private(&fl); } lock_kernel(); diff --git a/fs/namei.c b/fs/namei.c index 6e888dd10461..56e9f0f7e761 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1577,19 +1577,35 @@ do_link: * * Simple function to lookup and return a dentry and create it * if it doesn't exist. Is SMP-safe. + * + * Returns with nd->dentry->d_inode->i_sem locked. */ struct dentry *lookup_create(struct nameidata *nd, int is_dir) { - struct dentry *dentry; + struct dentry *dentry = ERR_PTR(-EEXIST); down(&nd->dentry->d_inode->i_sem); - dentry = ERR_PTR(-EEXIST); + /* + * Yucky last component or no last component at all? + * (foo/., foo/.., /////) + */ if (nd->last_type != LAST_NORM) goto fail; nd->flags &= ~LOOKUP_PARENT; + + /* + * Do the final lookup. + */ dentry = lookup_hash(&nd->last, nd->dentry); if (IS_ERR(dentry)) goto fail; + + /* + * Special case - lookup gave negative, but... we had foo/bar/ + * From the vfs_mknod() POV we just have a negative dentry - + * all is fine. Let's be bastards - you had / on the end, you've + * been asking for (non-existent) directory. -ENOENT for you. + */ if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) goto enoent; return dentry; diff --git a/fs/namespace.c b/fs/namespace.c index 3b93e5d750eb..208c079e9fdb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -337,7 +337,7 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -void umount_tree(struct vfsmount *mnt) +static void umount_tree(struct vfsmount *mnt) { struct vfsmount *p; LIST_HEAD(kill); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 2dc2d8693968..a9f7a8ab1d59 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -705,18 +705,6 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, DPRINTK("ncp_do_readdir: init failed, err=%d\n", err); return; } -#ifdef USE_OLD_SLOW_DIRECTORY_LISTING - for (;;) { - err = ncp_search_for_file_or_subdir(server, &seq, &entry.i); - if (err) { - DPRINTK("ncp_do_readdir: search failed, err=%d\n", err); - break; - } - entry.volume = entry.i.volNumber; - if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry)) - break; - } -#else /* We MUST NOT use server->buffer_size handshaked with server if we are using UDP, as for UDP server uses max. buffer size determined by MTU, and for TCP server uses hardwired value 65KB (== 66560 bytes). @@ -754,7 +742,6 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, } } while (more); vfree(buf); -#endif return; } diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index e4eb5ed4bee4..c755e1848a42 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -845,46 +845,6 @@ out: return result; } -/* Search for everything */ -int ncp_search_for_file_or_subdir(struct ncp_server *server, - struct nw_search_sequence *seq, - struct nw_info_struct *target) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 3); /* subfunction */ - ncp_add_byte(server, server->name_space[seq->volNumber]); - ncp_add_byte(server, 0); /* data stream (???) */ - ncp_add_word(server, cpu_to_le16(0x8006)); /* Search attribs */ - ncp_add_dword(server, RIM_ALL); /* return info mask */ - ncp_add_mem(server, seq, 9); -#ifdef CONFIG_NCPFS_NFS_NS - if (server->name_space[seq->volNumber] == NW_NS_NFS) { - ncp_add_byte(server, 0); /* 0 byte pattern */ - } else -#endif - { - ncp_add_byte(server, 2); /* 2 byte pattern */ - ncp_add_byte(server, 0xff); /* following is a wildcard */ - ncp_add_byte(server, '*'); - } - - if ((result = ncp_request(server, 87)) != 0) - goto out; - memcpy(seq, ncp_reply_data(server, 0), sizeof(*seq)); - ncp_extract_file_info(ncp_reply_data(server, 10), target); - - ncp_unlock_server(server); - - result = ncp_obtain_nfs_info(server, target); - return result; - -out: - ncp_unlock_server(server); - return result; -} - int ncp_search_for_fileset(struct ncp_server *server, struct nw_search_sequence *seq, int* more, diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 05ec2e9d90c6..9e4dc30c2435 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -87,9 +87,6 @@ int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, char *, int ncp_initialize_search(struct ncp_server *, struct inode *, struct nw_search_sequence *target); -int ncp_search_for_file_or_subdir(struct ncp_server *server, - struct nw_search_sequence *seq, - struct nw_info_struct *target); int ncp_search_for_fileset(struct ncp_server *server, struct nw_search_sequence *seq, int* more, int* cnt, diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b4baa031edf4..8b3bb715d177 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -8,6 +8,7 @@ nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ proc.o read.o symlink.o unlink.o write.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 560d6175dd58..f2ca782aba33 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ece27e42b93b..65f1e19e4d19 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include <linux/config.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d271df9df2b2..7c33b9a81a94 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -10,6 +10,7 @@ #include <linux/sunrpc/svc.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #define CB_OP_TAGLEN_MAXSZ (512) @@ -410,7 +411,6 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - rqstp->rq_res.head[0].iov_len = PAGE_SIZE; xdr_init_encode(&xdr_out, &rqstp->rq_res, p); decode_compound_hdr_arg(&xdr_in, &hdr_arg); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5b9c60f97791..d7f7eb669d03 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -16,6 +16,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_xdr.h> +#include "nfs4_fs.h" #include "delegation.h" static struct nfs_delegation *nfs_alloc_delegation(void) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ff6155f5e8d9..b38a57e78a63 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -32,6 +32,7 @@ #include <linux/smp_lock.h> #include <linux/namei.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFS_PARANOIA 1 @@ -50,8 +51,10 @@ static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, struct dentry *, int); +static loff_t nfs_llseek_dir(struct file *, loff_t, int); struct file_operations nfs_dir_operations = { + .llseek = nfs_llseek_dir, .read = generic_read_dir, .readdir = nfs_readdir, .open = nfs_opendir, @@ -74,6 +77,27 @@ struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_V3 */ + #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); @@ -90,6 +114,9 @@ struct inode_operations nfs4_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, }; #endif /* CONFIG_NFS_V4 */ @@ -116,7 +143,8 @@ typedef struct { struct page *page; unsigned long page_index; u32 *ptr; - u64 target; + u64 *dir_cookie; + loff_t current_index; struct nfs_entry *entry; decode_dirent_t decode; int plus; @@ -164,12 +192,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either - * throught inode->i_sem or some other mechanism. + * through inode->i_sem or some other mechanism. */ - if (page->index == 0) { - invalidate_inode_pages(inode->i_mapping); - NFS_I(inode)->readdir_timestamp = timestamp; - } + if (page->index == 0) + invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); unlock_page(page); return 0; error: @@ -202,22 +228,22 @@ void dir_page_release(nfs_readdir_descriptor_t *desc) /* * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry. + * to readdir, find the next entry with cookie '*desc->dir_cookie'. * * If the end of the buffer has been reached, return -EAGAIN, if not, * return the offset within the buffer of the next entry to be * read. */ static inline -int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) +int find_dirent(nfs_readdir_descriptor_t *desc) { struct nfs_entry *entry = desc->entry; int loop_count = 0, status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie); - if (entry->prev_cookie == desc->target) + dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { loop_count = 0; @@ -229,8 +255,44 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) } /* - * Find the given page, and call find_dirent() in order to try to - * return the next entry. + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the entry at offset 'desc->file->f_pos'. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent_index(nfs_readdir_descriptor_t *desc) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + for(;;) { + status = dir_decode(desc); + if (status) + break; + + dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); + + if (desc->file->f_pos == desc->current_index) { + *desc->dir_cookie = entry->cookie; + break; + } + desc->current_index++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); + return status; +} + +/* + * Find the given page, and call find_dirent() or find_dirent_index in + * order to try to return the next entry. */ static inline int find_dirent_page(nfs_readdir_descriptor_t *desc) @@ -253,7 +315,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) /* NOTE: Someone else may have changed the READDIRPLUS flag */ desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - status = find_dirent(desc, page); + if (*desc->dir_cookie != 0) + status = find_dirent(desc); + else + status = find_dirent_index(desc); if (status < 0) dir_page_release(desc); out: @@ -268,7 +333,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) * Recurse through the page cache pages, and return a * filled nfs_entry structure of the next directory entry if possible. * - * The target for the search is 'desc->target'. + * The target for the search is '*desc->dir_cookie' if non-0, + * 'desc->file->f_pos' otherwise */ static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) @@ -276,7 +342,16 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) int loop_count = 0; int res; - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target); + /* Always search-by-index from the beginning of the cache */ + if (*desc->dir_cookie == 0) { + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + desc->current_index = 0; + } else + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + for (;;) { res = find_dirent_page(desc); if (res != -EAGAIN) @@ -313,7 +388,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -333,10 +408,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, } res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, d_type); + file->f_pos, fileid, d_type); if (res < 0) break; - file->f_pos = desc->target = entry->cookie; + file->f_pos++; + *desc->dir_cookie = entry->cookie; if (dir_decode(desc) != 0) { desc->page_index ++; break; @@ -349,7 +425,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; } @@ -375,14 +451,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { status = -ENOMEM; goto out; } - desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target, + desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); @@ -391,7 +467,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = desc->target; + desc->entry->prev_cookie = *desc->dir_cookie; } else status = -EIO; if (status < 0) @@ -412,8 +488,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } -/* The file offset position is now represented as a true offset into the - * page cache as is the case in most of the other filesystems. +/* The file offset position represents the dirent entry number. A + last cookie cache takes care of the common case of reading the + whole directory. */ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -435,15 +512,15 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } /* - * filp->f_pos points to the file offset in the page cache. - * but if the cache has meanwhile been zapped, we need to - * read from the last dirent to revalidate f_pos - * itself. + * filp->f_pos points to the dirent entry number. + * *desc->dir_cookie has the cookie for the next entry. We have + * to either find the entry with the appropriate number or + * revalidate the cookie. */ memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->target = filp->f_pos; + desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -455,9 +532,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) while(!desc->entry->eof) { res = readdir_search_pagecache(desc); + if (res == -EBADCOOKIE) { /* This means either end of directory */ - if (desc->entry->cookie != desc->target) { + if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); if (res >= 0) @@ -490,6 +568,28 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) return 0; } +loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +{ + down(&filp->f_dentry->d_inode->i_sem); + switch (origin) { + case 1: + offset += filp->f_pos; + case 0: + if (offset >= 0) + break; + default: + offset = -EINVAL; + goto out; + } + if (offset != filp->f_pos) { + filp->f_pos = offset; + ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + } +out: + up(&filp->f_dentry->d_inode->i_sem); + return offset; +} + /* * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 68df803f27ca..6537f2c4ae44 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -517,7 +517,7 @@ retry: result = tot_bytes; out: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return result; @@ -751,11 +751,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, retval = -EFAULT; if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) goto out; - if (file->f_error) { - retval = file->f_error; - file->f_error = 0; - goto out; - } retval = -EFBIG; if (limit != RLIM_INFINITY) { if (pos >= limit) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 55c907592490..5621ba9885f4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -71,6 +71,18 @@ struct inode_operations nfs_file_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_v3 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -116,6 +128,21 @@ nfs_file_release(struct inode *inode, struct file *filp) } /** + * nfs_revalidate_file - Revalidate the page cache & related metadata + * @inode - pointer to inode struct + * @file - pointer to file + */ +static int nfs_revalidate_file(struct inode *inode, struct file *filp) +{ + int retval = 0; + + if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + nfs_revalidate_mapping(inode, filp->f_mapping); + return 0; +} + +/** * nfs_revalidate_size - Revalidate the file size * @inode - pointer to inode struct * @file - pointer to struct file @@ -137,7 +164,8 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) goto force_reval; if (nfsi->npages != 0) return 0; - return nfs_revalidate_inode(server, inode); + if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + return 0; force_reval: return __nfs_revalidate_inode(server, inode); } @@ -198,7 +226,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + result = nfs_revalidate_file(inode, iocb->ki_filp); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); return result; @@ -216,7 +244,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_file(inode, filp); if (!res) res = generic_file_sendfile(filp, ppos, count, actor, target); return res; @@ -232,7 +260,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + status = nfs_revalidate_file(inode, file); if (!status) status = generic_file_mmap(file, vma); return status; @@ -321,9 +349,15 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result) - goto out; + /* + * O_APPEND implies that we must revalidate the file length. + */ + if (iocb->ki_filp->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (result) + goto out; + } + nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 87f4f9aeac86..ffb8df91dc34 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -50,6 +50,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> +#include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f2317f3e29f9..4845911f1c63 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -39,6 +39,7 @@ #include <asm/system.h> #include <asm/uaccess.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -63,6 +64,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -106,6 +108,21 @@ static struct rpc_program nfs_program = { .pipe_dir_name = "/nfs", }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -118,7 +135,7 @@ nfs_write_inode(struct inode *inode, int sync) int flags = sync ? FLUSH_WAIT : 0; int ret; - ret = nfs_commit_inode(inode, 0, 0, flags); + ret = nfs_commit_inode(inode, flags); if (ret < 0) return ret; return 0; @@ -140,10 +157,6 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { @@ -152,6 +165,7 @@ nfs_clear_inode(struct inode *inode) nfs_wb_all(inode); BUG_ON (!list_empty(&nfsi->open_files)); + nfs_zap_acl_cache(inode); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -161,11 +175,13 @@ nfs_clear_inode(struct inode *inode) void nfs_umount_begin(struct super_block *sb) { - struct nfs_server *server = NFS_SB(sb); - struct rpc_clnt *rpc; + struct rpc_clnt *rpc = NFS_SB(sb)->client; /* -EIO all pending I/O */ - if ((rpc = server->client) != NULL) + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = NFS_SB(sb)->client_acl; + if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } @@ -366,13 +382,15 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, &server->addr, &timeparms); if (IS_ERR(xprt)) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); goto out_fail; } @@ -383,7 +401,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) return clnt; out_fail: - xprt_destroy(xprt); return clnt; } @@ -427,21 +444,16 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ - if (server->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) { server->rpc_ops = &nfs_v3_clientops; server->caps |= NFS_CAP_READDIRPLUS; - if (data->version < 4) { - printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); - return -EIO; - } -#else - printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); - return -EIO; -#endif } else { server->rpc_ops = &nfs_v2_clientops; } +#else + server->rpc_ops = &nfs_v2_clientops; +#endif /* Fill in pseudoflavor for mount version < 5 */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) @@ -455,17 +467,34 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { + struct rpc_auth *auth; + server->client_sys = rpc_clone_client(server->client); if (IS_ERR(server->client_sys)) return PTR_ERR(server->client_sys); - if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) - return -ENOMEM; + auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); + if (IS_ERR(auth)) + return PTR_ERR(auth); } else { atomic_inc(&server->client->cl_count); server->client_sys = server->client; } - if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3_ACL + if (!(server->flags & NFS_MOUNT_NOACL)) { + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + /* No errors! Assume that Sun nfsacls are supported */ + if (!IS_ERR(server->client_acl)) + server->caps |= NFS_CAP_ACLS; + } +#else + server->flags &= ~NFS_MOUNT_NOACL; +#endif /* CONFIG_NFS_V3_ACL */ + /* + * The VFS shouldn't apply the umask to mode bits. We will + * do so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; @@ -549,6 +578,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; @@ -590,9 +620,19 @@ nfs_zap_caches(struct inode *inode) memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; +} + +static void nfs_zap_acl_cache(struct inode *inode) +{ + void (*clear_acl_cache)(struct inode *); + + clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; + if (clear_acl_cache != NULL) + clear_acl_cache(inode); + NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; } /* @@ -689,7 +729,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -792,7 +832,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) } } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; nfs_end_data_update(inode); unlock_kernel(); return error; @@ -851,7 +891,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; - init_waitqueue_head(&ctx->waitq); + ctx->dir_cookie = 0; } return ctx; } @@ -1015,6 +1055,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; } flags = nfsi->flags; + nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). @@ -1022,21 +1063,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (verifier == nfsi->cache_change_attribute) nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); /* Do the page cache invalidation */ - if (flags & NFS_INO_INVALID_DATA) { - if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); - nfs_wb_all(inode); - } - nfsi->flags &= ~NFS_INO_INVALID_DATA; - invalidate_inode_pages2(inode->i_mapping); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); - /* This ensures we revalidate dentries */ - nfsi->cache_change_attribute++; - } + nfs_revalidate_mapping(inode, inode->i_mapping); + if (flags & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1074,6 +1103,34 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } /** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfsi->flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(mapping) == 0) + filemap_fdatawait(mapping); + nfs_wb_all(inode); + } + invalidate_inode_pages2(mapping); + nfsi->flags &= ~NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) { + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + /* This ensures we revalidate child dentries */ + nfsi->cache_change_attribute++; + } + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + } +} + +/** * nfs_begin_data_update * @inode - pointer to inode * Declare that a set of operations will update file data on the server @@ -1106,27 +1163,6 @@ void nfs_end_data_update(struct inode *inode) } /** - * nfs_end_data_update_defer - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will defer marking the inode as needing revalidation - * unless there are no other pending updates. - */ -void nfs_end_data_update_defer(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - if (atomic_dec_and_test(&nfsi->data_updates)) { - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute ++; - } -} - -/** * nfs_refresh_inode - verify consistency of the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes @@ -1152,8 +1188,11 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; - if (!data_unstable && nfsi->change_attr != fattr->change_attr) + if (nfsi->change_attr != fattr->change_attr) { nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } } if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -1176,18 +1215,22 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } /* Verify a few of the more important attributes */ - if (!data_unstable) { - if (!timespec_equal(&inode->i_mtime, &fattr->mtime) - || cur_size != new_isize) - nfsi->flags |= NFS_INO_INVALID_ATTR; - } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) - nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } + if (cur_size != new_isize) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (nfsi->npages == 0) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) @@ -1215,10 +1258,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { struct nfs_inode *nfsi = NFS_I(inode); - __u64 new_size; - loff_t new_isize; + loff_t cur_isize, new_isize; unsigned int invalid = 0; - loff_t cur_isize; int data_unstable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", @@ -1251,61 +1292,56 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign /* Are we racing with known updates of the metadata on the server? */ data_unstable = ! nfs_verify_change_attribute(inode, verifier); - /* Check if the file size agrees */ - new_size = fattr->size; + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); - if (cur_isize != new_size) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif - /* - * If we have pending writebacks, things can get - * messy. - */ - if (S_ISREG(inode->i_mode) && data_unstable) { - if (new_isize > cur_isize) { + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes? */ + if (nfsi->npages == 0) { + /* No, but did we race with nfs_end_data_update()? */ + if (verifier == nfsi->cache_change_attribute) { inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_DATA; } - } else { + invalid |= NFS_INO_INVALID_ATTR; + } else if (new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } + dprintk("NFS: isize change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); } - /* - * Note: we don't check inode->i_mtime since pipes etc. - * can change this value in VFS without requiring a - * cache revalidation. - */ + /* Check if the mtime agrees */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); if (!data_unstable) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) && nfsi->change_attr != fattr->change_attr) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", + dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif nfsi->change_attr = fattr->change_attr; if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { + if (!data_unstable) + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; @@ -1385,74 +1421,95 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { int error; - struct nfs_server *server; + struct nfs_server *server = NULL; struct super_block *s; struct nfs_fh *root; struct nfs_mount_data *data = raw_data; - if (!data) { - printk("nfs_read_super: missing data argument\n"); - return ERR_PTR(-EINVAL); + s = ERR_PTR(-EINVAL); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + goto out_err; + } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + goto out_err; } + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + goto out_err; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + goto out_err; + } + case 5: + memset(data->context, 0, sizeof(data->context)); + } +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + s = ERR_PTR(-EPROTONOSUPPORT); + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + goto out_err; + } +#endif /* CONFIG_NFS_V3 */ + s = ERR_PTR(-ENOMEM); server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - return ERR_PTR(-ENOMEM); + goto out_err; memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - - if (data->version != NFS_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS_MOUNT_VERSION ? "older" : "newer"); - if (data->version < 2) - data->namlen = 0; - if (data->version < 3) - data->bsize = 0; - if (data->version < 4) { - data->flags &= ~NFS_MOUNT_VER3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - } - if (data->version < 5) - data->flags &= ~NFS_MOUNT_SECFLAVOUR; - } + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); root = &server->fh; if (data->flags & NFS_MOUNT_VER3) root->size = data->root.size; else root->size = NFS2_FHSIZE; + s = ERR_PTR(-EINVAL); if (root->size > sizeof(root->data)) { - printk("nfs_get_sb: invalid root filehandle\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + goto out_err; } memcpy(root->data, data->root.data, root->size); /* We now require that the mount process passes the remote address */ memcpy(&server->addr, &data->addr, sizeof(server->addr)); if (server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote address!\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + goto out_err; } - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) { - kfree(server); - return s; + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_err; } - s->s_flags = flags; + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s) || s->s_root) + goto out_rpciod_down; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - kfree(server); - return ERR_PTR(-EIO); - } + s->s_flags = flags; error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { @@ -1462,6 +1519,11 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, } s->s_flags |= MS_ACTIVE; return s; +out_rpciod_down: + rpciod_down(); +out_err: + kfree(server); + return s; } static void nfs_kill_super(struct super_block *s) @@ -1470,10 +1532,12 @@ static void nfs_kill_super(struct super_block *s) kill_anon_super(s); - if (server->client != NULL && !IS_ERR(server->client)) + if (!IS_ERR(server->client)) rpc_shutdown_client(server->client); - if (server->client_sys != NULL && !IS_ERR(server->client_sys)) + if (!IS_ERR(server->client_sys)) rpc_shutdown_client(server->client_sys); + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ @@ -1594,15 +1658,19 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { - printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return -EIO; } /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen > 1) - printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n"); + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { err = -EFAULT; goto out_fail; @@ -1610,21 +1678,22 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } down_write(&clp->cl_sem); - if (clp->cl_rpcclient == NULL) { + if (IS_ERR(clp->cl_rpcclient)) { xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); err = PTR_ERR(xprt); + dprintk("%s: cannot create RPC transport. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, authflavour); if (IS_ERR(clnt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt->cl_intr = 1; @@ -1656,21 +1725,26 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp = NULL; if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - return PTR_ERR(clnt); + err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + return err; } server->client = clnt; if (server->nfs4_state->cl_idmap == NULL) { - printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + dprintk("%s: failed to create idmapper.\n", __FUNCTION__); return -ENOMEM; } if (clnt->cl_auth->au_flavor != authflavour) { - if (rpcauth_create(authflavour, clnt) == NULL) { - printk(KERN_WARNING "NFS: couldn't create credcache!\n"); - return -ENOMEM; + struct rpc_auth *auth; + + auth = rpcauth_create(authflavour, clnt); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return PTR_ERR(auth); } } @@ -1730,8 +1804,12 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, struct nfs4_mount_data *data = raw_data; void *p; - if (!data) { - printk("nfs_read_super: missing data argument\n"); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); return ERR_PTR(-EINVAL); } @@ -1741,11 +1819,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - - if (data->version != NFS4_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); - } + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) @@ -1773,11 +1847,20 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, } if (server->addr.sin_family != AF_INET || server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote IP address!\n"); + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); s = ERR_PTR(-EINVAL); goto out_free; } + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_free; + } + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); if (IS_ERR(s) || s->s_root) @@ -1785,13 +1868,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, s->s_flags = flags; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - s = ERR_PTR(-EIO); - goto out_free; - } - error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); @@ -1875,6 +1951,13 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) if (!nfsi) return NULL; nfsi->flags = 0; +#ifdef CONFIG_NFS_V3_ACL + nfsi->acl_access = ERR_PTR(-EAGAIN); + nfsi->acl_default = ERR_PTR(-EAGAIN); +#endif +#ifdef CONFIG_NFS_V4 + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 9d3ddad96d9e..0e82617f2de0 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, clnt = rpc_create_client(xprt, hostname, &mnt_program, version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); - } else { + if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c new file mode 100644 index 000000000000..1b7a3ef2f813 --- /dev/null +++ b/fs/nfs/nfs3acl.c @@ -0,0 +1,403 @@ +#include <linux/fs.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> +#include <linux/posix_acl_xattr.h> +#include <linux/nfsacl.h> + +#define NFSDBG_FACILITY NFSDBG_PROC + +ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int pos=0, len=0; + +# define output(s) do { \ + if (pos + sizeof(s) <= size) { \ + memcpy(buffer + pos, s, sizeof(s)); \ + pos += sizeof(s); \ + } \ + len += sizeof(s); \ + } while(0) + + acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_access"); + posix_acl_release(acl); + } + + if (S_ISDIR(inode->i_mode)) { + acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_default"); + posix_acl_release(acl); + } + } + +# undef output + + if (!buffer || len <= size) + return len; + return -ERANGE; +} + +ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error = 0; + + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = nfs3_proc_getacl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + if (type == ACL_TYPE_ACCESS && acl->a_count == 0) + error = -ENODATA; + else + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + } else + error = -ENODATA; + + return error; +} + +int nfs3_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error; + + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + error = nfs3_proc_setacl(inode, type, acl); + posix_acl_release(acl); + + return error; +} + +int nfs3_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + int type; + + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + return nfs3_proc_setacl(inode, type, NULL); +} + +static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) +{ + if (!IS_ERR(nfsi->acl_access)) { + posix_acl_release(nfsi->acl_access); + nfsi->acl_access = ERR_PTR(-EAGAIN); + } + if (!IS_ERR(nfsi->acl_default)) { + posix_acl_release(nfsi->acl_default); + nfsi->acl_default = ERR_PTR(-EAGAIN); + } +} + +void nfs3_forget_cached_acls(struct inode *inode) +{ + dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, + inode->i_ino); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct posix_acl *acl = ERR_PTR(-EINVAL); + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = nfsi->acl_access; + break; + + case ACL_TYPE_DEFAULT: + acl = nfsi->acl_default; + break; + + default: + goto out; + } + if (IS_ERR(acl)) + acl = ERR_PTR(-EAGAIN); + else + acl = posix_acl_dup(acl); +out: + spin_unlock(&inode->i_lock); + dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, + inode->i_ino, type, acl); + return acl; +} + +static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id, + inode->i_ino, acl, dfacl); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + nfsi->acl_access = posix_acl_dup(acl); + nfsi->acl_default = posix_acl_dup(dfacl); + spin_unlock(&inode->i_lock); +} + +struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_getaclargs args = { + .fh = NFS_FH(inode), + /* The xdr layer may allocate pages here. */ + .pages = pages, + }; + struct nfs3_getaclres res = { + .fattr = &fattr, + }; + struct posix_acl *acl; + int status, count; + + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + return ERR_PTR(-EOPNOTSUPP); + + status = nfs_revalidate_inode(server, inode); + if (status < 0) + return ERR_PTR(status); + acl = nfs3_get_cached_acl(inode, type); + if (acl != ERR_PTR(-EAGAIN)) + return acl; + acl = NULL; + + /* + * Only get the access acl when explicitly requested: We don't + * need it for access decisions, and only some applications use + * it. Applications which request the access acl first are not + * penalized from this optimization. + */ + if (type == ACL_TYPE_ACCESS) + args.mask |= NFS_ACLCNT|NFS_ACL; + if (S_ISDIR(inode->i_mode)) + args.mask |= NFS_DFACLCNT|NFS_DFACL; + if (args.mask == 0) + return NULL; + + dprintk("NFS call getacl\n"); + status = rpc_call(server->client_acl, ACLPROC3_GETACL, + &args, &res, 0); + dprintk("NFS reply getacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL extension not supported; disabling\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + default: + goto getout; + } + if ((args.mask & res.mask) != args.mask) { + status = -EIO; + goto getout; + } + + if (res.acl_access != NULL) { + if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) { + posix_acl_release(res.acl_access); + res.acl_access = NULL; + } + } + nfs3_cache_acls(inode, res.acl_access, res.acl_default); + + switch(type) { + case ACL_TYPE_ACCESS: + acl = res.acl_access; + res.acl_access = NULL; + break; + + case ACL_TYPE_DEFAULT: + acl = res.acl_default; + res.acl_default = NULL; + } + +getout: + posix_acl_release(res.acl_access); + posix_acl_release(res.acl_default); + + if (status != 0) { + posix_acl_release(acl); + acl = ERR_PTR(status); + } + return acl; +} + +static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_setaclargs args = { + .inode = inode, + .mask = NFS_ACL, + .acl_access = acl, + .pages = pages, + }; + int status, count; + + status = -EOPNOTSUPP; + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + goto out; + + /* We are doing this here, because XDR marshalling can only + return -ENOMEM. */ + status = -ENOSPC; + if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (S_ISDIR(inode->i_mode)) { + args.mask |= NFS_DFACL; + args.acl_default = dfacl; + } + + dprintk("NFS call setacl\n"); + nfs_begin_data_update(inode); + status = rpc_call(server->client_acl, ACLPROC3_SETACL, + &args, &fattr, 0); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + nfs_end_data_update(inode); + dprintk("NFS reply setacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL SETACL RPC not supported" + "(will not retry)\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + } +out: + return status; +} + +int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *alloc = NULL, *dfacl = NULL; + int status; + + if (S_ISDIR(inode->i_mode)) { + switch(type) { + case ACL_TYPE_ACCESS: + alloc = dfacl = nfs3_proc_getacl(inode, + ACL_TYPE_DEFAULT); + if (IS_ERR(alloc)) + goto fail; + break; + + case ACL_TYPE_DEFAULT: + dfacl = acl; + alloc = acl = nfs3_proc_getacl(inode, + ACL_TYPE_ACCESS); + if (IS_ERR(alloc)) + goto fail; + break; + + default: + return -EINVAL; + } + } else if (type != ACL_TYPE_ACCESS) + return -EINVAL; + + if (acl == NULL) { + alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(alloc)) + goto fail; + } + status = nfs3_proc_setacls(inode, acl, dfacl); + posix_acl_release(alloc); + return status; + +fail: + return PTR_ERR(alloc); +} + +int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode) +{ + struct posix_acl *dfacl, *acl; + int error = 0; + + dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(dfacl)) { + error = PTR_ERR(dfacl); + return (error == -EOPNOTSUPP) ? 0 : error; + } + if (!dfacl) + return 0; + acl = posix_acl_clone(dfacl, GFP_KERNEL); + error = -ENOMEM; + if (!acl) + goto out_release_dfacl; + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out_release_acl; + error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? + dfacl : NULL); +out_release_acl: + posix_acl_release(acl); +out_release_dfacl: + posix_acl_release(dfacl); + return error; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3878494dfc2c..7851569b31c6 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -17,6 +17,7 @@ #include <linux/nfs_page.h> #include <linux/lockd/bind.h> #include <linux/smp_lock.h> +#include <linux/nfs_mount.h> #define NFSDBG_FACILITY NFSDBG_PROC @@ -45,7 +46,7 @@ static inline int nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) { struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[proc], + .rpc_proc = &clnt->cl_procinfo[proc], .rpc_argp = argp, .rpc_resp = resp, }; @@ -313,7 +314,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; - int status; + mode_t mode = sattr->ia_mode; + int status; dprintk("NFS call create %s\n", dentry->d_name.name); arg.createmode = NFS3_CREATE_UNCHECKED; @@ -323,6 +325,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, arg.verifier[1] = current->pid; } + sattr->ia_mode &= ~current->fs->umask; + again: dir_attr.valid = 0; fattr.valid = 0; @@ -369,6 +373,9 @@ again: nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: dprintk("NFS reply create: %d\n", status); return status; @@ -538,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) .fh = &fhandle, .fattr = &fattr }; - int status; + int mode = sattr->ia_mode; + int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); dir_attr.valid = 0; fattr.valid = 0; + + sattr->ia_mode &= ~current->fs->umask; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -641,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fh, .fattr = &fattr }; + mode_t mode = sattr->ia_mode; int status; switch (sattr->ia_mode & S_IFMT) { @@ -653,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); + + sattr->ia_mode &= ~current->fs->umask; + dir_attr.valid = 0; fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mknod: %d\n", status); return status; } @@ -825,7 +850,8 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, - .dir_inode_ops = &nfs_dir_inode_operations, + .dir_inode_ops = &nfs3_dir_inode_operations, + .file_inode_ops = &nfs3_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -856,4 +882,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, + .clear_acl_cache = nfs3_forget_cached_acls, }; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a3593d47e5ab..db4a904810a4 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -21,6 +21,7 @@ #include <linux/nfs.h> #include <linux/nfs3.h> #include <linux/nfs_fs.h> +#include <linux/nfsacl.h> #define NFSDBG_FACILITY NFSDBG_XDR @@ -79,6 +80,11 @@ extern int nfs_stat_to_errno(int); #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) #define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) +#define ACL3_getaclargs_sz (NFS3_fh_sz+1) +#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3)) +#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3)) +#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) + /* * Map file type to S_IFMT bits */ @@ -627,6 +633,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Encode GETACL arguments + */ +static int +nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->mask); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + if (args->mask & (NFS_ACL | NFS_DFACL)) { + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + + ACL3_getaclres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, + NFSACL_MAXPAGES << PAGE_SHIFT); + } + return 0; +} + +/* + * Encode SETACL arguments + */ +static int +nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_setaclargs *args) +{ + struct xdr_buf *buf = &req->rq_snd_buf; + unsigned int base, len_in_head, len = nfsacl_size( + (args->mask & NFS_ACL) ? args->acl_access : NULL, + (args->mask & NFS_DFACL) ? args->acl_default : NULL); + int count, err; + + p = xdr_encode_fhandle(p, NFS_FH(args->inode)); + *p++ = htonl(args->mask); + base = (char *)p - (char *)buf->head->iov_base; + /* put as much of the acls into head as possible. */ + len_in_head = min_t(unsigned int, buf->head->iov_len - base, len); + len -= len_in_head; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2)); + + for (count = 0; (count << PAGE_SHIFT) < len; count++) { + args->pages[count] = alloc_page(GFP_KERNEL); + if (!args->pages[count]) { + while (count) + __free_page(args->pages[--count]); + return -ENOMEM; + } + } + xdr_encode_pages(buf, args->pages, 0, len); + + err = nfsacl_encode(buf, base, args->inode, + (args->mask & NFS_ACL) ? + args->acl_access : NULL, 1, 0); + if (err > 0) + err = nfsacl_encode(buf, base + err, args->inode, + (args->mask & NFS_DFACL) ? + args->acl_default : NULL, 1, + NFS_ACL_DEFAULT); + return (err > 0) ? 0 : err; +} +#endif /* CONFIG_NFS_V3_ACL */ + /* * NFS XDR decode functions */ @@ -978,6 +1052,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Decode GETACL reply + */ +static int +nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclres *res) +{ + struct xdr_buf *buf = &req->rq_rcv_buf; + int status = ntohl(*p++); + struct posix_acl **acl; + unsigned int *aclcnt; + int err, base; + + if (status != 0) + return -nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + res->mask = ntohl(*p++); + if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + return -EINVAL; + base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; + + acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; + aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; + err = nfsacl_decode(buf, base, aclcnt, acl); + + acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; + aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; + if (err > 0) + err = nfsacl_decode(buf, base + err, aclcnt, acl); + return (err > 0) ? 0 : err; +} + +/* + * Decode setacl reply. + */ +static int +nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status = ntohl(*p++); + + if (status) + return -nfs_stat_to_errno(status); + xdr_decode_post_op_attr(p, fattr); + return 0; +} +#endif /* CONFIG_NFS_V3_ACL */ + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif @@ -1021,3 +1143,28 @@ struct rpc_version nfs_version3 = { .procs = nfs3_procedures }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_procinfo nfs3_acl_procedures[] = { + [ACLPROC3_GETACL] = { + .p_proc = ACLPROC3_GETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, + .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, + .p_timer = 1, + }, + [ACLPROC3_SETACL] = { + .p_proc = ACLPROC3_SETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, + .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, + .p_timer = 0, + }, +}; + +struct rpc_version nfsacl_version3 = { + .number = 3, + .nrprocs = sizeof(nfs3_acl_procedures)/ + sizeof(nfs3_acl_procedures[0]), + .procs = nfs3_acl_procedures, +}; +#endif /* CONFIG_NFS_V3_ACL */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h new file mode 100644 index 000000000000..ec1a22d7b876 --- /dev/null +++ b/fs/nfs/nfs4_fs.h @@ -0,0 +1,253 @@ +/* + * linux/fs/nfs/nfs4_fs.h + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4-specific filesystem definitions and declarations + */ + +#ifndef __LINUX_FS_NFS_NFS4_FS_H +#define __LINUX_FS_NFS_NFS4_FS_H + +#ifdef CONFIG_NFS_V4 + +struct idmap; + +/* + * In a seqid-mutating op, this macro controls which error return + * values trigger incrementation of the seqid. + * + * from rfc 3010: + * The client MUST monotonically increment the sequence number for the + * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE + * operations. This is true even in the event that the previous + * operation that used the sequence number received an error. The only + * exception to this rule is if the previous operation received one of + * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, + * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, + * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. + * + */ +#define seqid_mutating_err(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_BAD_XDR) && \ + ((err) != NFSERR_RESOURCE) && \ + ((err) != NFSERR_NOFILEHANDLE)) + +enum nfs4_client_state { + NFS4CLNT_OK = 0, +}; + +/* + * The nfs4_client identifies our client state to the server. + */ +struct nfs4_client { + struct list_head cl_servers; /* Global list of servers */ + struct in_addr cl_addr; /* Server identifier */ + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +}; + +/* + * NFS4 state_owners and lock_owners are simply labels for ordered + * sequences of RPC calls. Their sole purpose is to provide once-only + * semantics by allowing the server to identify replayed requests. + * + * The ->so_sema is held during all state_owner seqid-mutating operations: + * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize + * so_seqid. + */ +struct nfs4_state_owner { + struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs4_client *so_client; + u32 so_id; /* 32-bit identifier, unique */ + struct semaphore so_sema; + u32 so_seqid; /* protected by so_sema */ + atomic_t so_count; + + struct rpc_cred *so_cred; /* Associated cred */ + struct list_head so_states; + struct list_head so_delegations; +}; + +/* + * struct nfs4_state maintains the client-side state for a given + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). + * + * OPEN: + * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, + * we need to know how many files are open for reading or writing on a + * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) + */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + struct nfs4_state * ls_state; /* Pointer to open state */ + fl_owner_t ls_owner; /* POSIX lock owner */ +#define NFS_LOCK_INITIALIZED 1 + int ls_flags; + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, + NFS_DELEGATED_STATE, +}; + +struct nfs4_state { + struct list_head open_states; /* List of states for the same state_owner */ + struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ + + struct nfs4_state_owner *owner; /* Pointer to the open owner */ + struct inode *inode; /* Pointer to the inode */ + + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + spinlock_t state_lock; /* Protects the lock_states list */ + + nfs4_stateid stateid; + + unsigned int nreaders; + unsigned int nwriters; + int state; /* State on the server (R,W, or RW) */ + atomic_t count; +}; + + +struct nfs4_exception { + long timeout; + int retry; +}; + +struct nfs4_state_recovery_ops { + int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); + int (*recover_lock)(struct nfs4_state *, struct file_lock *); +}; + +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); + + +/* nfs4proc.c */ +extern int nfs4_map_errors(int err); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); + +extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; + +extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[2]; +extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[2]; + +/* nfs4renewd.c */ +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_renew_state(void *); + +/* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); +extern struct nfs4_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_drop_state_owner(struct nfs4_state_owner *); +extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); +extern void nfs4_put_open_state(struct nfs4_state *); +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + +extern const nfs4_stateid zero_stateid; + +/* nfs4xdr.c */ +extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +struct nfs4_mount_data; + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; + +#else + +#define init_nfsv4_state(server) do { } while (0) +#define destroy_nfsv4_state(server) do { } while (0) +#define nfs4_put_state_owner(inode, owner) do { } while (0) +#define nfs4_put_open_state(state) do { } while (0) +#define nfs4_close_state(a, b) do { } while (0) + +#endif /* CONFIG_NFS_V4 */ +#endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1d5cb3e80c3e..1b76f80aedb9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -48,6 +48,7 @@ #include <linux/smp_lock.h> #include <linux/namei.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -62,8 +63,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; -extern nfs4_stateid zero_stateid; - /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) { @@ -104,7 +103,7 @@ const u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_pathconf_bitmap[2] = { +const u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 @@ -124,7 +123,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, BUG_ON(readdir->count < 80); if (cookie > 2) { - readdir->cookie = (cookie > 2) ? cookie : 0; + readdir->cookie = cookie; memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); return; } @@ -270,14 +269,9 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta int err; do { err = _nfs4_open_reclaim(sp, state); - switch (err) { - case 0: - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - return err; - } - err = nfs4_handle_exception(server, err, &exception); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -509,6 +503,20 @@ out_stale: goto out_nodeleg; } +static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +{ + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_open_expired(sp, state, dentry); + if (err == -NFS4ERR_DELAY) + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -521,7 +529,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta continue; get_nfs_open_context(ctx); spin_unlock(&state->inode->i_lock); - status = _nfs4_open_expired(sp, state, ctx->dentry); + status = nfs4_do_open_expired(sp, state, ctx->dentry); put_nfs_open_context(ctx); return status; } @@ -748,11 +756,10 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, fattr->valid = 0; - if (state != NULL) + if (state != NULL) { msg.rpc_cred = state->owner->so_cred; - if (sattr->ia_valid & ATTR_SIZE) - nfs4_copy_stateid(&arg.stateid, state, NULL); - else + nfs4_copy_stateid(&arg.stateid, state, current->files); + } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); return rpc_call_sync(server->client, &msg, 0); @@ -1116,47 +1123,31 @@ static int nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { - struct inode * inode = dentry->d_inode; - int size_change = sattr->ia_valid & ATTR_SIZE; - struct nfs4_state *state = NULL; - int need_iput = 0; + struct rpc_cred *cred; + struct inode *inode = dentry->d_inode; + struct nfs4_state *state; int status; fattr->valid = 0; - if (size_change) { - struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); + cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + /* Search for an existing WRITE delegation first */ + state = nfs4_open_delegated(inode, FMODE_WRITE, cred); + if (!IS_ERR(state)) { + /* NB: nfs4_open_delegated() bumps the inode->i_count */ + iput(inode); + } else { + /* Search for an existing open(O_WRITE) stateid */ state = nfs4_find_state(inode, cred, FMODE_WRITE); - if (state == NULL) { - state = nfs4_open_delegated(dentry->d_inode, - FMODE_WRITE, cred); - if (IS_ERR(state)) - state = nfs4_do_open(dentry->d_parent->d_inode, - dentry, FMODE_WRITE, - NULL, cred); - need_iput = 1; - } - put_rpccred(cred); - if (IS_ERR(state)) - return PTR_ERR(state); - - if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); - status = -EIO; - goto out; - } } + status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); -out: - if (state) { - inode = state->inode; + if (state != NULL) nfs4_close_state(state, FMODE_WRITE); - if (need_iput) - iput(inode); - } + put_rpccred(cred); return status; } @@ -1731,6 +1722,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; + dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name, + (unsigned long long)cookie); lock_kernel(); nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; @@ -1738,6 +1733,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); unlock_kernel(); + dprintk("%s: returns %d\n", __FUNCTION__, status); return status; } @@ -2163,6 +2159,193 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp) return 0; } +static inline int nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on + * the stack. + */ +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) + +static void buf_to_pages(const void *buf, size_t buflen, + struct page **pages, unsigned int *pgbase) +{ + const void *p = buf; + + *pgbase = offset_in_page(buf); + p -= *pgbase; + while (p < buf + buflen) { + *(pages++) = virt_to_page(p); + p += PAGE_CACHE_SIZE; + } +} + +struct nfs4_cached_acl { + int cached; + size_t len; + char data[0]; +}; + +static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + kfree(nfsi->nfs4_acl); + nfsi->nfs4_acl = acl; + spin_unlock(&inode->i_lock); +} + +static void nfs4_zap_acl_attr(struct inode *inode) +{ + nfs4_set_cached_acl(inode, NULL); +} + +static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_cached_acl *acl; + int ret = -ENOENT; + + spin_lock(&inode->i_lock); + acl = nfsi->nfs4_acl; + if (acl == NULL) + goto out; + if (buf == NULL) /* user is just asking for length */ + goto out_len; + if (acl->cached == 0) + goto out; + ret = -ERANGE; /* see getxattr(2) man page */ + if (acl->len > buflen) + goto out; + memcpy(buf, acl->data, acl->len); +out_len: + ret = acl->len; +out: + spin_unlock(&inode->i_lock); + return ret; +} + +static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) +{ + struct nfs4_cached_acl *acl; + + if (buf && acl_len <= PAGE_SIZE) { + acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 1; + memcpy(acl->data, buf, acl_len); + } else { + acl = kmalloc(sizeof(*acl), GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 0; + } + acl->len = acl_len; +out: + nfs4_set_cached_acl(inode, acl); +} + +static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_getaclargs args = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + size_t resp_len = buflen; + void *resp_buf; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = &args, + .rpc_resp = &resp_len, + }; + struct page *localpage = NULL; + int ret; + + if (buflen < PAGE_SIZE) { + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + localpage = alloc_page(GFP_KERNEL); + resp_buf = page_address(localpage); + if (localpage == NULL) + return -ENOMEM; + args.acl_pages[0] = localpage; + args.acl_pgbase = 0; + args.acl_len = PAGE_SIZE; + } else { + resp_buf = buf; + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + } + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (ret) + goto out_free; + if (resp_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, resp_len); + else + nfs4_write_cached_acl(inode, resp_buf, resp_len); + if (buf) { + ret = -ERANGE; + if (resp_len > buflen) + goto out_free; + if (localpage) + memcpy(buf, resp_buf, resp_len); + } + ret = resp_len; +out_free: + if (localpage) + __free_page(localpage); + return ret; +} + +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + ret = nfs_revalidate_inode(server, inode); + if (ret < 0) + return ret; + ret = nfs4_read_cached_acl(inode, buf, buflen); + if (ret != -ENOENT) + return ret; + return nfs4_get_acl_uncached(inode, buf, buflen); +} + +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + if (ret == 0) + nfs4_write_cached_acl(inode, buf, buflen); + return ret; +} + static int nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) { @@ -2448,14 +2631,11 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (lsp) - nlo.id = lsp->ls_id; - else { - spin_lock(&clp->cl_lock); - nlo.id = nfs4_alloc_lockowner_id(clp); - spin_unlock(&clp->cl_lock); - } + status = nfs4_set_lock_state(state, request); + if (status != 0) + goto out; + lsp = request->fl_u.nfs4_fl.owner; + nlo.id = lsp->ls_id; arg.u.lockt = &nlo; status = rpc_call_sync(server->client, &msg, 0); if (!status) { @@ -2476,8 +2656,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock request->fl_pid = 0; status = 0; } - if (lsp) - nfs4_put_lock_state(lsp); +out: up(&state->lock_sema); up_read(&clp->cl_sem); return status; @@ -2537,28 +2716,26 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock }; struct nfs4_lock_state *lsp; struct nfs_locku_opargs luargs; - int status = 0; + int status; down_read(&clp->cl_sem); down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (!lsp) + status = nfs4_set_lock_state(state, request); + if (status != 0) goto out; + lsp = request->fl_u.nfs4_fl.owner; /* We might have lost the locks! */ - if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); - } + if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_lock_seqid(status, lsp); - if (status == 0) { + if (status == 0) memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(lsp->ls_stateid)); - nfs4_notify_unlck(state, request, lsp); - } - nfs4_put_lock_state(lsp); out: up(&state->lock_sema); if (status == 0) @@ -2584,7 +2761,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_lock_state *lsp; + struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), @@ -2606,9 +2783,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r }; int status; - lsp = nfs4_get_lock_state(state, request->fl_owner); - if (lsp == NULL) - return -ENOMEM; if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { @@ -2630,38 +2804,57 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r * seqid mutating errors */ nfs4_increment_seqid(status, owner); up(&owner->so_sema); + if (status == 0) { + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + lsp->ls_seqid++; + } } else { struct nfs_exist_lock el = { .seqid = lsp->ls_seqid, }; memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); largs.u.exist_lock = ⪙ - largs.new_lock_owner = 0; arg.u.lock = &largs; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); } - /* increment seqid on success, and * seqid mutating errors*/ - nfs4_increment_lock_seqid(status, lsp); /* save the returned stateid. */ - if (status == 0) { + if (status == 0) memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - if (!reclaim) - nfs4_notify_setlk(state, request, lsp); - } else if (status == -NFS4ERR_DENIED) + else if (status == -NFS4ERR_DENIED) status = -EAGAIN; - nfs4_put_lock_state(lsp); return status; } static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) { - return _nfs4_do_setlk(state, F_SETLK, request, 1); + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_setlk(state, F_SETLK, request, 1); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; } static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) { - return _nfs4_do_setlk(state, F_SETLK, request, 0); + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_setlk(state, F_SETLK, request, 0); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; } static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) @@ -2671,7 +2864,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); down(&state->lock_sema); - status = _nfs4_do_setlk(state, cmd, request, 0); + status = nfs4_set_lock_state(state, request); + if (status == 0) + status = _nfs4_do_setlk(state, cmd, request, 0); up(&state->lock_sema); if (status == 0) { /* Note: we always want to sleep here! */ @@ -2729,10 +2924,53 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (signalled()) break; } while(status < 0); - return status; } + +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + +int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + + return nfs4_proc_set_acl(inode, buf, buflen); +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); +} + +ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; +} + struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, @@ -2743,10 +2981,20 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { .recover_lock = nfs4_lock_expired, }; +static struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, @@ -2777,6 +3025,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .file_open = nfs4_proc_file_open, .file_release = nfs4_proc_file_release, .lock = nfs4_proc_lock, + .clear_acl_cache = nfs4_zap_acl_attr, }; /* diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 667e06f1c647..a3001628ad32 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -53,6 +53,7 @@ #include <linux/nfs.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PROC diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 231cebce3c87..afe587d82f1e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -46,24 +46,18 @@ #include <linux/workqueue.h> #include <linux/bitops.h> +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #define OPENOWNER_POOL_SIZE 8 -static DEFINE_SPINLOCK(state_spinlock); - -nfs4_stateid zero_stateid; - -#if 0 -nfs4_stateid one_stateid = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -#endif +const nfs4_stateid zero_stateid; +static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); static void nfs4_recover_state(void *); -extern void nfs4_renew_state(void *); void init_nfsv4_state(struct nfs_server *server) @@ -116,6 +110,7 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_superblocks); init_waitqueue_head(&clp->cl_waitq); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_OK; return clp; @@ -137,7 +132,7 @@ nfs4_free_client(struct nfs4_client *clp) if (clp->cl_cred) put_rpccred(clp->cl_cred); nfs_idmap_delete(clp); - if (clp->cl_rpcclient) + if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); nfs_callback_down(); @@ -365,7 +360,7 @@ nfs4_alloc_open_state(void) atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); init_MUTEX(&state->lock_sema); - rwlock_init(&state->state_lock); + spin_lock_init(&state->state_lock); return state; } @@ -547,16 +542,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } -struct nfs4_lock_state * -nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) -{ - struct nfs4_lock_state *lsp; - read_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); - read_unlock(&state->state_lock); - return lsp; -} - /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -573,14 +558,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f return NULL; lsp->ls_flags = 0; lsp->ls_seqid = 0; /* arbitrary */ - lsp->ls_id = -1; memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; - INIT_LIST_HEAD(&lsp->ls_locks); spin_lock(&clp->cl_lock); lsp->ls_id = nfs4_alloc_lockowner_id(clp); spin_unlock(&clp->cl_lock); + INIT_LIST_HEAD(&lsp->ls_locks); return lsp; } @@ -590,121 +574,112 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f * * The caller must be holding state->lock_sema and clp->cl_sem */ -struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { - struct nfs4_lock_state * lsp; + struct nfs4_lock_state *lsp, *new = NULL; - lsp = nfs4_find_lock_state(state, owner); - if (lsp == NULL) - lsp = nfs4_alloc_lock_state(state, owner); + for(;;) { + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, owner); + if (lsp != NULL) + break; + if (new != NULL) { + new->ls_state = state; + list_add(&new->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + lsp = new; + new = NULL; + break; + } + spin_unlock(&state->state_lock); + new = nfs4_alloc_lock_state(state, owner); + if (new == NULL) + return NULL; + } + spin_unlock(&state->state_lock); + kfree(new); return lsp; } /* - * Byte-range lock aware utility to initialize the stateid of read/write - * requests. + * Release reference to lock_state, and free it if we see that + * it is no longer in use */ -void -nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { - if (test_bit(LK_STATE_IN_USE, &state->flags)) { - struct nfs4_lock_state *lsp; + struct nfs4_state *state; - lsp = nfs4_find_lock_state(state, fl_owner); - if (lsp) { - memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); - nfs4_put_lock_state(lsp); - return; - } - } - memcpy(dst, &state->stateid, sizeof(*dst)); + if (lsp == NULL) + return; + state = lsp->ls_state; + if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + return; + list_del(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + spin_unlock(&state->state_lock); + kfree(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; -} + struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; -/* -* Check to see if the request lock (type FL_UNLK) effects the fl lock. -* -* fl and request must have the same posix owner -* -* return: -* 0 -> fl not effected by request -* 1 -> fl consumed by request -*/ + dst->fl_u.nfs4_fl.owner = lsp; + atomic_inc(&lsp->ls_count); +} -static int -nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +static void nfs4_fl_release_lock(struct file_lock *fl) { - if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) - return 1; - return 0; + nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); } -/* - * Post an initialized lock_state on the state->lock_states list. - */ -void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +static struct file_lock_operations nfs4_fl_lock_ops = { + .fl_copy_lock = nfs4_fl_copy_lock, + .fl_release_private = nfs4_fl_release_lock, +}; + +int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) { - if (!list_empty(&lsp->ls_locks)) - return; - atomic_inc(&lsp->ls_count); - write_lock(&state->state_lock); - list_add(&lsp->ls_locks, &state->lock_states); - set_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + struct nfs4_lock_state *lsp; + + if (fl->fl_ops != NULL) + return 0; + lsp = nfs4_get_lock_state(state, fl->fl_owner); + if (lsp == NULL) + return -ENOMEM; + fl->fl_u.nfs4_fl.owner = lsp; + fl->fl_ops = &nfs4_fl_lock_ops; + return 0; } -/* - * to decide to 'reap' lock state: - * 1) search i_flock for file_locks with fl.lock_state = to ls. - * 2) determine if unlock will consume found lock. - * if so, reap - * - * else, don't reap. - * +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. */ -void -nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) { - struct inode *inode = state->inode; - struct file_lock *fl; + struct nfs4_lock_state *lsp; - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) - continue; - if (fl->fl_owner != lsp->ls_owner) - continue; - /* Exit if we find at least one lock which is not consumed */ - if (nfs4_check_unlock(fl,request) == 0) - return; - } + memcpy(dst, &state->stateid, sizeof(*dst)); + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) + return; - write_lock(&state->state_lock); - list_del_init(&lsp->ls_locks); - if (list_empty(&state->lock_states)) - clear_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); } /* - * Release reference to lock_state, and free it if we see that - * it is no longer in use - */ -void -nfs4_put_lock_state(struct nfs4_lock_state *lsp) +* Called with state->lock_sema and clp->cl_sem held. +*/ +void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) { - if (!atomic_dec_and_test(&lsp->ls_count)) - return; - BUG_ON (!list_empty(&lsp->ls_locks)); - kfree(lsp); + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; } /* diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5f4de05763c9..6c564ef9489e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -51,6 +51,7 @@ #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -82,12 +83,16 @@ static int nfs_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_fattr_bitmap_maxsz 3 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) -#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ - nfs4_fattr_bitmap_maxsz) +/* This is based on getfattr, which uses the most attributes: */ +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ + 3 + 3 + 3 + 2 * nfs4_name_maxsz)) +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_value_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) @@ -122,11 +127,11 @@ static int nfs_stat_to_errno(int); #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ nfs4_path_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) @@ -205,7 +210,7 @@ static int nfs_stat_to_errno(int); #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ - nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ @@ -360,6 +365,20 @@ static int nfs_stat_to_errno(int); encode_delegreturn_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_delegreturn_maxsz) +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -459,7 +478,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -660,8 +679,6 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1 static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fattr_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], bitmask[1] & nfs4_fattr_bitmap[1]); @@ -669,8 +686,6 @@ static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fsinfo_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], bitmask[1] & nfs4_fsinfo_bitmap[1]); } @@ -969,7 +984,6 @@ static int encode_putrootfh(struct xdr_stream *xdr) static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) { - extern nfs4_stateid zero_stateid; nfs4_stateid stateid; uint32_t *p; @@ -1000,6 +1014,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { struct rpc_auth *auth = req->rq_task->tk_auth; + uint32_t attrs[2] = { + FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, + FATTR4_WORD1_MOUNTED_ON_FILEID, + }; int replen; uint32_t *p; @@ -1010,13 +1028,20 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ WRITE32(readdir->count); WRITE32(2); - if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) { - WRITE32(0); - WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID); - } else { - WRITE32(FATTR4_WORD0_FILEID); - WRITE32(0); - } + /* Switch to mounted_on_fileid if the server supports it */ + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) + attrs[0] &= ~FATTR4_WORD0_FILEID; + else + attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + WRITE32(attrs[0] & readdir->bitmask[0]); + WRITE32(attrs[1] & readdir->bitmask[1]); + dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n", + __FUNCTION__, + (unsigned long long)readdir->cookie, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1], + attrs[0] & readdir->bitmask[0], + attrs[1] & readdir->bitmask[1]); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1025,6 +1050,9 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages, readdir->pgbase, readdir->count); + dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", + __FUNCTION__, replen, readdir->pages, + readdir->pgbase, readdir->count); return 0; } @@ -1089,6 +1117,25 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client } static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(2*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + if (arg->acl_len % 4) + return -EINVAL; + RESERVE_SPACE(4); + WRITE32(arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + return 0; +} + +static int encode_savefh(struct xdr_stream *xdr) { uint32_t *p; @@ -1632,6 +1679,34 @@ out: } /* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p, + struct nfs_getaclargs *args) +{ + struct xdr_stream xdr; + struct rpc_auth *auth = req->rq_task->tk_auth; + struct compound_hdr hdr = { + .nops = 2, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); + /* set up reply buffer: */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->acl_pages, args->acl_pgbase, args->acl_len); +out: + return status; +} + +/* * Encode a WRITE request */ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) @@ -1697,7 +1772,6 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs */ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args) { - extern u32 nfs4_pathconf_bitmap[2]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1718,7 +1792,6 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct */ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args) { - extern u32 nfs4_statfs_bitmap[]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -3003,6 +3076,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n return status; READ_BUF(8); COPYMEM(readdir->verifier.data, 8); + dprintk("%s: verifier = 0x%x%x\n", + __FUNCTION__, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1]); + hdrlen = (char *) p - (char *) iov->iov_base; recvd = rcvbuf->len - hdrlen; @@ -3017,12 +3095,14 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n for (nr = 0; *p++; nr++) { if (p + 3 > end) goto short_pkt; + dprintk("cookie = %Lu, ", *((unsigned long long *)p)); p += 2; /* cookie */ len = ntohl(*p++); /* filename length */ if (len > NFS4_MAXNAMLEN) { printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); goto err_unmap; } + dprintk("filename = %*s\n", len, (char *)p); p += XDR_QUADLEN(len); if (p + 1 > end) goto short_pkt; @@ -3042,6 +3122,7 @@ out: kunmap_atomic(kaddr, KM_USER0); return 0; short_pkt: + dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr); entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { @@ -3127,6 +3208,47 @@ static int decode_renew(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_RENEW); } +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, + size_t *acl_len) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + struct kvec *iov = req->rq_rcv_buf.head; + int status; + + *acl_len = 0; + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto out; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto out; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto out; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + int hdrlen, recvd; + + /* We ignore &savep and don't do consistency checks on + * the attr length. Let userspace figure it out.... */ + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (attrlen > recvd) { + printk(KERN_WARNING "NFS: server cheating in getattr" + " acl reply: attrlen %u > recvd %u\n", + attrlen, recvd); + return -EINVAL; + } + if (attrlen <= *acl_len) + xdr_read_pages(xdr, attrlen); + *acl_len = attrlen; + } + +out: + return status; +} + static int decode_savefh(struct xdr_stream *xdr) { @@ -3418,6 +3540,71 @@ out: } +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_setacl(&xdr, args); +out: + return status; +} +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, rqstp, acl_len); + +out: + return status; +} /* * Decode CLOSE response @@ -3895,6 +4082,12 @@ uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) } len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ if (len > 0) { + if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { + bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; + /* Ignore the return value of rdattr_error for now */ + p++; + len--; + } if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) xdr_decode_hyper(p, &entry->ino); else if (bitmap[0] == FATTR4_WORD0_FILEID) @@ -3934,6 +4127,8 @@ static struct { { NFS4ERR_DQUOT, EDQUOT }, { NFS4ERR_STALE, ESTALE }, { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BADOWNER, EINVAL }, + { NFS4ERR_BADNAME, EINVAL }, { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, { NFS4ERR_NOTSUPP, ENOTSUPP }, { NFS4ERR_TOOSMALL, ETOOSMALL }, @@ -4019,6 +4214,8 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), }; struct rpc_version nfs_version4 = { diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index fd5bc596fe8a..1b272a135a31 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -124,6 +124,7 @@ enum { Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, /* Error token */ Opt_err }; @@ -158,6 +159,8 @@ static match_table_t __initdata tokens = { {Opt_udp, "udp"}, {Opt_tcp, "proto=tcp"}, {Opt_tcp, "tcp"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, {Opt_err, NULL} }; @@ -266,6 +269,12 @@ static int __init root_nfs_parse(char *name, char *buf) case Opt_tcp: nfs_data.flags |= NFS_MOUNT_TCP; break; + case Opt_acl: + nfs_data.flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + nfs_data.flags |= NFS_MOUNT_NOACL; + break; default : return 0; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4f1ba723848d..d53857b148e2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -107,11 +107,38 @@ void nfs_unlock_request(struct nfs_page *req) smp_mb__before_clear_bit(); clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_clear_bit(); - wake_up_all(&req->wb_context->waitq); + wake_up_bit(&req->wb_flags, PG_BUSY); nfs_release_request(req); } /** + * nfs_set_page_writeback_locked - Lock a request for writeback + * @req: + */ +int nfs_set_page_writeback_locked(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + if (!nfs_lock_request(req)) + return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + return 1; +} + +/** + * nfs_clear_page_writeback - Unlock request and wake up sleepers + */ +void nfs_clear_page_writeback(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + spin_lock(&nfsi->req_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + spin_unlock(&nfsi->req_lock); + nfs_unlock_request(req); +} + +/** * nfs_clear_request - Free up all resources allocated to the request * @req: * @@ -150,34 +177,15 @@ nfs_release_request(struct nfs_page *req) nfs_page_free(req); } -/** - * nfs_list_add_request - Insert a request into a sorted list - * @req: request - * @head: head of list into which to insert the request. - * - * Note that the wb_list is sorted by page index in order to facilitate - * coalescing of requests. - * We use an insertion sort that is optimized for the case of appended - * writes. - */ -void -nfs_list_add_request(struct nfs_page *req, struct list_head *head) +static int nfs_wait_bit_interruptible(void *word) { - struct list_head *pos; + int ret = 0; -#ifdef NFS_PARANOIA - if (!list_empty(&req->wb_list)) { - printk(KERN_ERR "NFS: Add to list failed!\n"); - BUG(); - } -#endif - list_for_each_prev(pos, head) { - struct nfs_page *p = nfs_list_entry(pos); - if (p->wb_index < req->wb_index) - break; - } - list_add(&req->wb_list, pos); - req->wb_list_head = head; + if (signal_pending(current)) + ret = -ERESTARTSYS; + else + schedule(); + return ret; } /** @@ -190,12 +198,22 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head) int nfs_wait_on_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - - if (!NFS_WBACK_BUSY(req)) - return 0; - return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req)); + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); + sigset_t oldmask; + int ret = 0; + + if (!test_bit(PG_BUSY, &req->wb_flags)) + goto out; + /* + * Note: the call to rpc_clnt_sigmask() suffices to ensure that we + * are not interrupted if intr flag is not set + */ + rpc_clnt_sigmask(clnt, &oldmask); + ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE); + rpc_clnt_sigunmask(clnt, &oldmask); +out: + return ret; } /** @@ -243,6 +261,62 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst, return npages; } +#define NFS_SCAN_MAXENTRIES 16 +/** + * nfs_scan_lock_dirty - Scan the radix tree for dirty requests + * @nfsi: NFS inode + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's req_lock when calling this function + */ +int +nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages) +{ + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + unsigned long idx_end; + int found, i; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + for (;;) { + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, + NFS_PAGE_TAG_DIRTY); + if (found <= 0) + break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + + idx_start = req->wb_index + 1; + + if (nfs_set_page_writeback_locked(req)) { + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + } + } +out: + return res; +} + /** * nfs_scan_list - Scan a list for matching requests * @head: One of the NFS inode request lists @@ -280,7 +354,7 @@ nfs_scan_list(struct list_head *head, struct list_head *dst, if (req->wb_index > idx_end) break; - if (!nfs_lock_request(req)) + if (!nfs_set_page_writeback_locked(req)) continue; nfs_list_remove_request(req); nfs_list_add_request(req, dst); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index d31b4d6e5a5e..cedf636bcf3c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -622,6 +622,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a0042fb58634..6f866b8aa2d5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -173,7 +173,6 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, &one_request); nfs_pagein_one(&one_request, inode); return 0; @@ -185,7 +184,6 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_clear_request(req); nfs_release_request(req); - nfs_unlock_request(req); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, @@ -553,7 +551,6 @@ readpage_async_filler(void *data, struct page *page) } if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, desc->head); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6f7a4af3bc46..5130eda231d7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -220,7 +220,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, ClearPageError(page); io_error: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return written ? written : result; } @@ -352,7 +352,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) if (err < 0) goto out; } - err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); + err = nfs_commit_inode(inode, wb_priority(wbc)); if (err > 0) { wbc->nr_to_write -= err; err = 0; @@ -401,7 +401,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); iput(inode); } else spin_unlock(&nfsi->req_lock); @@ -446,6 +446,8 @@ nfs_mark_request_dirty(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); @@ -503,13 +505,12 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int spin_lock(&nfsi->req_lock); next = idx_start; - while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; - if (!NFS_WBACK_BUSY(req)) - continue; + BUG_ON(!NFS_WBACK_BUSY(req)); atomic_inc(&req->wb_count); spin_unlock(&nfsi->req_lock); @@ -538,12 +539,15 @@ static int nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); - nfsi->ndirty -= res; - sub_page_state(nr_dirty,res); - if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + int res = 0; + + if (nfsi->ndirty != 0) { + res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); + nfsi->ndirty -= res; + sub_page_state(nr_dirty,res); + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + } return res; } @@ -562,11 +566,14 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); - nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + int res = 0; + + if (nfsi->ncommit != 0) { + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + nfsi->ncommit -= res; + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + } return res; } #endif @@ -750,7 +757,7 @@ int nfs_updatepage(struct file *file, struct page *page, * is entirely in cache, it may be more efficient to avoid * fragmenting write requests. */ - if (PageUptodate(page) && inode->i_flock == NULL) { + if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { loff_t end_offs = i_size_read(inode) - 1; unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; @@ -821,7 +828,7 @@ out: #else nfs_inode_remove_request(req); #endif - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } static inline int flush_task_priority(int how) @@ -952,7 +959,7 @@ out_bad: nfs_writedata_free(data); } nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); return -ENOMEM; } @@ -1002,7 +1009,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1029,7 +1036,7 @@ nfs_flush_list(struct list_head *head, int wpages, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return error; } @@ -1121,7 +1128,7 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) nfs_inode_remove_request(req); #endif next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } } @@ -1210,36 +1217,24 @@ static void nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; - struct nfs_page *first, *last; + struct nfs_page *first; struct inode *inode; - loff_t start, end, len; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - last = nfs_list_entry(data->pages.prev); inode = first->wb_context->dentry->d_inode; - /* - * Determine the offset range of requests in the COMMIT call. - * We rely on the fact that data->pages is an ordered list... - */ - start = req_offset(first); - end = req_offset(last) + last->wb_bytes; - len = end - start; - /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ - if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1)) - len = 0; - data->inode = inode; data->cred = first->wb_context->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; + /* Note: we always request a commit of the entire inode */ + data->args.offset = 0; + data->args.count = 0; + data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -1278,7 +1273,7 @@ nfs_commit_list(struct list_head *head, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_commit(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1324,7 +1319,7 @@ nfs_commit_done(struct rpc_task *task) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); res++; } sub_page_state(nr_unstable,res); @@ -1342,16 +1337,23 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, spin_lock(&nfsi->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); - if (res) - error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); + if (res) { + struct nfs_server *server = NFS_SERVER(inode); + + /* For single writes, FLUSH_STABLE is more efficient */ + if (res == nfsi->npages && nfsi->npages <= server->wpages) { + if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) + how |= FLUSH_STABLE; + } + error = nfs_flush_list(&head, server->wpages, how); + } if (error < 0) return error; return res; } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -int nfs_commit_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); @@ -1359,15 +1361,13 @@ int nfs_commit_inode(struct inode *inode, unsigned long idx_start, error = 0; spin_lock(&nfsi->req_lock); - res = nfs_scan_commit(inode, &head, idx_start, npages); + res = nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&nfsi->req_lock); if (res) { - res += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); error = nfs_commit_list(&head, how); - } else - spin_unlock(&nfsi->req_lock); - if (error < 0) - return error; + if (error < 0) + return error; + } return res; } #endif @@ -1389,7 +1389,7 @@ int nfs_sync_inode(struct inode *inode, unsigned long idx_start, error = nfs_flush_inode(inode, idx_start, npages, how); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (error == 0) - error = nfs_commit_inode(inode, idx_start, npages, how); + error = nfs_commit_inode(inode, how); #endif } while (error > 0); return error; diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile new file mode 100644 index 000000000000..f689ed82af3a --- /dev/null +++ b/fs/nfs_common/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for Linux filesystem routines that are shared by client and server. +# + +obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o + +nfs_acl-objs := nfsacl.o diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c new file mode 100644 index 000000000000..18c58c32e326 --- /dev/null +++ b/fs/nfs_common/nfsacl.c @@ -0,0 +1,257 @@ +/* + * fs/nfs_common/nfsacl.c + * + * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> + */ + +/* + * The Solaris nfsacl protocol represents some ACLs slightly differently + * than POSIX 1003.1e draft 17 does (and we do): + * + * - Minimal ACLs always have an ACL_MASK entry, so they have + * four instead of three entries. + * - The ACL_MASK entry in such minimal ACLs always has the same + * permissions as the ACL_GROUP_OBJ entry. (In extended ACLs + * the ACL_MASK and ACL_GROUP_OBJ entries may differ.) + * - The identifier fields of the ACL_USER_OBJ and ACL_GROUP_OBJ + * entries contain the identifiers of the owner and owning group. + * (In POSIX ACLs we always set them to ACL_UNDEFINED_ID). + * - ACL entries in the kernel are kept sorted in ascending order + * of (e_tag, e_id). Solaris ACLs are unsorted. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/sunrpc/xdr.h> +#include <linux/nfsacl.h> +#include <linux/nfs3.h> +#include <linux/sort.h> + +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(nfsacl_encode); +EXPORT_SYMBOL(nfsacl_decode); + +struct nfsacl_encode_desc { + struct xdr_array2_desc desc; + unsigned int count; + struct posix_acl *acl; + int typeflag; + uid_t uid; + gid_t gid; +}; + +static int +xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) +{ + struct nfsacl_encode_desc *nfsacl_desc = + (struct nfsacl_encode_desc *) desc; + u32 *p = (u32 *) elem; + + if (nfsacl_desc->count < nfsacl_desc->acl->a_count) { + struct posix_acl_entry *entry = + &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + + *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); + switch(entry->e_tag) { + case ACL_USER_OBJ: + *p++ = htonl(nfsacl_desc->uid); + break; + case ACL_GROUP_OBJ: + *p++ = htonl(nfsacl_desc->gid); + break; + case ACL_USER: + case ACL_GROUP: + *p++ = htonl(entry->e_id); + break; + default: /* Solaris depends on that! */ + *p++ = 0; + break; + } + *p++ = htonl(entry->e_perm & S_IRWXO); + } else { + const struct posix_acl_entry *pa, *pe; + int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE; + + FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) { + if (pa->e_tag == ACL_GROUP_OBJ) { + group_obj_perm = pa->e_perm & S_IRWXO; + break; + } + } + /* fake up ACL_MASK entry */ + *p++ = htonl(ACL_MASK | nfsacl_desc->typeflag); + *p++ = htonl(0); + *p++ = htonl(group_obj_perm); + } + + return 0; +} + +unsigned int +nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag) +{ + int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; + struct nfsacl_encode_desc nfsacl_desc = { + .desc = { + .elem_size = 12, + .array_len = encode_entries ? entries : 0, + .xcode = xdr_nfsace_encode, + }, + .acl = acl, + .typeflag = typeflag, + .uid = inode->i_uid, + .gid = inode->i_gid, + }; + int err; + + if (entries > NFS_ACL_MAX_ENTRIES || + xdr_encode_word(buf, base, entries)) + return -EINVAL; + err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); + if (!err) + err = 8 + nfsacl_desc.desc.elem_size * + nfsacl_desc.desc.array_len; + return err; +} + +struct nfsacl_decode_desc { + struct xdr_array2_desc desc; + unsigned int count; + struct posix_acl *acl; +}; + +static int +xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem) +{ + struct nfsacl_decode_desc *nfsacl_desc = + (struct nfsacl_decode_desc *) desc; + u32 *p = (u32 *) elem; + struct posix_acl_entry *entry; + + if (!nfsacl_desc->acl) { + if (desc->array_len > NFS_ACL_MAX_ENTRIES) + return -EINVAL; + nfsacl_desc->acl = posix_acl_alloc(desc->array_len, GFP_KERNEL); + if (!nfsacl_desc->acl) + return -ENOMEM; + nfsacl_desc->count = 0; + } + + entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT; + entry->e_id = ntohl(*p++); + entry->e_perm = ntohl(*p++); + + switch(entry->e_tag) { + case ACL_USER_OBJ: + case ACL_USER: + case ACL_GROUP_OBJ: + case ACL_GROUP: + case ACL_OTHER: + if (entry->e_perm & ~S_IRWXO) + return -EINVAL; + break; + case ACL_MASK: + /* Solaris sometimes sets additonal bits in the mask */ + entry->e_perm &= S_IRWXO; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +cmp_acl_entry(const void *x, const void *y) +{ + const struct posix_acl_entry *a = x, *b = y; + + if (a->e_tag != b->e_tag) + return a->e_tag - b->e_tag; + else if (a->e_id > b->e_id) + return 1; + else if (a->e_id < b->e_id) + return -1; + else + return 0; +} + +/* + * Convert from a Solaris ACL to a POSIX 1003.1e draft 17 ACL. + */ +static int +posix_acl_from_nfsacl(struct posix_acl *acl) +{ + struct posix_acl_entry *pa, *pe, + *group_obj = NULL, *mask = NULL; + + if (!acl) + return 0; + + sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry), + cmp_acl_entry, NULL); + + /* Clear undefined identifier fields and find the ACL_GROUP_OBJ + and ACL_MASK entries. */ + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_id = ACL_UNDEFINED_ID; + break; + case ACL_GROUP_OBJ: + pa->e_id = ACL_UNDEFINED_ID; + group_obj = pa; + break; + case ACL_MASK: + mask = pa; + /* fall through */ + case ACL_OTHER: + pa->e_id = ACL_UNDEFINED_ID; + break; + } + } + if (acl->a_count == 4 && group_obj && mask && + mask->e_perm == group_obj->e_perm) { + /* remove bogus ACL_MASK entry */ + memmove(mask, mask+1, (3 - (mask - acl->a_entries)) * + sizeof(struct posix_acl_entry)); + acl->a_count = 3; + } + return 0; +} + +unsigned int +nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, + struct posix_acl **pacl) +{ + struct nfsacl_decode_desc nfsacl_desc = { + .desc = { + .elem_size = 12, + .xcode = pacl ? xdr_nfsace_decode : NULL, + }, + }; + u32 entries; + int err; + + if (xdr_decode_word(buf, base, &entries) || + entries > NFS_ACL_MAX_ENTRIES) + return -EINVAL; + err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc); + if (err) + return err; + if (pacl) { + if (entries != nfsacl_desc.desc.array_len || + posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) { + posix_acl_release(nfsacl_desc.acl); + return -EINVAL; + } + *pacl = nfsacl_desc.acl; + } + if (aclcnt) + *aclcnt = entries; + return 8 + nfsacl_desc.desc.elem_size * + nfsacl_desc.desc.array_len; +} diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index b8680a247f8b..ce341dc76d5e 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -6,7 +6,9 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ - nfs4acl.o nfs4callback.o + nfs4acl.o nfs4callback.o nfs4recover.o nfsd-objs := $(nfsd-y) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c new file mode 100644 index 000000000000..7cbf0682b2f0 --- /dev/null +++ b/fs/nfsd/nfs2acl.c @@ -0,0 +1,336 @@ +/* + * linux/fs/nfsd/nfsacl.c + * + * Process version 2 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> + */ + +#include <linux/sunrpc/svc.h> +#include <linux/nfs.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/xdr3.h> +#include <linux/posix_acl.h> +#include <linux/nfsacl.h> + +#define NFSDDBG_FACILITY NFSDDBG_PROC +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static int +nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static int nfsacld_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + int nfserr = 0; + + dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + RETURN_STATUS(nfserr_inval); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfssvc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static int nfsacld_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd_attrstat *resp) +{ + svc_fh *fh; + int nfserr = 0; + + dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfssvc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + return nfserr; +} + +/* + * Check file attributes + */ +static int nfsacld_proc_getattr(struct svc_rqst * rqstp, + struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) +{ + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); +} + +/* + * Check file access + */ +static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp, + struct nfsd3_accessres *resp) +{ + int nfserr; + + dprintk("nfsd: ACCESS(2acl) %s 0x%x\n", + SVCFH_fmt(&argp->fh), + argp->access); + + fh_copy(&resp->fh, &argp->fh); + resp->access = argp->access; + nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + return nfserr; +} + +/* + * XDR decode functions + */ +static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_setaclargs *argp) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p++); + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (argp->mask & NFS_ACL) ? + &argp->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (argp->mask & NFS_DFACL) ? + &argp->acl_default : NULL); + return (n > 0); +} + +static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + struct inode *inode = dentry->d_inode; + int w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + + if (dentry == NULL || dentry->d_inode == NULL) + return 0; + inode = dentry->d_inode; + + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w; + while (w > 0) { + if (!svc_take_res_page(rqstp)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + return 1; +} + +static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_attrstat *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +/* ACCESS */ +static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessres *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *resp) +{ + fh_put(&resp->fh); + return 1; +} + +#define nfsaclsvc_decode_voidargs NULL +#define nfsaclsvc_encode_voidres NULL +#define nfsaclsvc_release_void NULL +#define nfsd3_fhandleargs nfsd_fhandle +#define nfsd3_attrstatres nfsd_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsacld_proc_##name, \ + (kxdrproc_t) nfsaclsvc_decode_##argt##args, \ + (kxdrproc_t) nfsaclsvc_encode_##rest##res, \ + (kxdrproc_t) nfsaclsvc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures2[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(access, access, access, fhandle, RC_NOCACHE, ST+AT+1), +}; + +struct svc_version nfsd_acl_version2 = { + .vs_vers = 2, + .vs_nproc = 5, + .vs_proc = nfsd_acl_procedures2, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c new file mode 100644 index 000000000000..64ba40572fea --- /dev/null +++ b/fs/nfsd/nfs3acl.c @@ -0,0 +1,267 @@ +/* + * linux/fs/nfsd/nfs3acl.c + * + * Process version 3 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> + */ + +#include <linux/sunrpc/svc.h> +#include <linux/nfs3.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr3.h> +#include <linux/posix_acl.h> +#include <linux/nfsacl.h> + +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static int +nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static int nfsd3_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + int nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + RETURN_STATUS(nfserr_inval); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfs3svc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static int nfsd3_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd3_attrstat *resp) +{ + svc_fh *fh; + int nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfs3svc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * XDR decode functions + */ +static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclargs *args) +{ + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_setaclargs *args) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p++); + if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (args->mask & NFS_ACL) ? + &args->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (args->mask & NFS_DFACL) ? + &args->acl_default : NULL); + return (n > 0); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0 && dentry && dentry->d_inode) { + struct inode *inode = dentry->d_inode; + int w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w; + while (w > 0) { + if (!svc_take_res_page(rqstp)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + } else + if (!xdr_ressize_check(rqstp, p)) + return 0; + + return 1; +} + +/* SETACL */ +static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +#define nfs3svc_decode_voidargs NULL +#define nfs3svc_release_void NULL +#define nfsd3_setaclres nfsd3_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd3_proc_##name, \ + (kxdrproc_t) nfs3svc_decode_##argt##args, \ + (kxdrproc_t) nfs3svc_encode_##rest##res, \ + (kxdrproc_t) nfs3svc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures3[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, setacl, fhandle, RC_NOCACHE, ST+pAT), +}; + +struct svc_version nfsd_acl_version3 = { + .vs_vers = 3, + .vs_nproc = 3, + .vs_proc = nfsd_acl_procedures3, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; + diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 11f806835c5a..e0e134d6baba 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -71,6 +71,12 @@ decode_fh(u32 *p, struct svc_fh *fhp) return p + XDR_QUADLEN(size); } +/* Helper function for NFSv3 ACL code */ +u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { @@ -233,6 +239,13 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) return p; } +/* Helper for NFSv3 ACLs */ +u32 * +nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + return encode_post_op_attr(rqstp, p, fhp); +} + /* * Enocde weak cache consistency data */ diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 11ebf6c4aa54..4a2105552ac4 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -125,7 +125,7 @@ static short ace2type(struct nfs4_ace *); static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int); int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); -int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); +static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); struct nfs4_acl * nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, @@ -775,7 +775,7 @@ out_err: return pacl; } -int +static int nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) { struct list_head *h, *n; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1a55dfcb74bc..583c0710e45e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -54,7 +54,6 @@ /* declarations */ static void nfs4_cb_null(struct rpc_task *task); -extern spinlock_t recall_lock; /* Index of predefined Linux callback client operations */ @@ -329,12 +328,12 @@ out: .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ } -struct rpc_procinfo nfs4_cb_procedures[] = { +static struct rpc_procinfo nfs4_cb_procedures[] = { PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null), PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall), }; -struct rpc_version nfs_cb_version4 = { +static struct rpc_version nfs_cb_version4 = { .number = 1, .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]), .procs = nfs4_cb_procedures @@ -348,7 +347,7 @@ static struct rpc_version * nfs_cb_version[] = { /* * Use the SETCLIENTID credential */ -struct rpc_cred * +static struct rpc_cred * nfsd4_lookupcred(struct nfs4_client *clp, int taskflags) { struct auth_cred acred; @@ -387,9 +386,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) char hostname[32]; int status; - dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n", - cb->cb_parsed, atomic_read(&cb->cb_set)); - if (!cb->cb_parsed || atomic_read(&cb->cb_set)) + if (atomic_read(&cb->cb_set)) return; /* Initialize address */ @@ -427,10 +424,10 @@ nfsd4_probe_callback(struct nfs4_client *clp) * XXX AUTH_UNIX only - need AUTH_GSS.... */ sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr)); - clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX); + clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { dprintk("NFSD: couldn't create callback client\n"); - goto out_xprt; + goto out_err; } clnt->cl_intr = 0; clnt->cl_softrtry = 1; @@ -465,8 +462,6 @@ out_rpciod: out_clnt: rpc_shutdown_client(clnt); goto out_err; -out_xprt: - xprt_destroy(xprt); out_err: dprintk("NFSD: warning: no callback path to client %.*s\n", (int)clp->cl_name.len, clp->cl_name.data); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 4ba540841cf6..5605a26efc57 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -104,7 +104,7 @@ ent_update(struct ent *new, struct ent *itm) ent_init(new, itm); } -void +static void ent_put(struct cache_head *ch, struct cache_detail *cd) { if (cache_put(ch, cd)) { @@ -186,7 +186,7 @@ warn_no_idmapd(struct cache_detail *detail) static int idtoname_parse(struct cache_detail *, char *, int); static struct ent *idtoname_lookup(struct ent *, int); -struct cache_detail idtoname_cache = { +static struct cache_detail idtoname_cache = { .hash_size = ENT_HASHMAX, .hash_table = idtoname_table, .name = "nfs4.idtoname", @@ -277,7 +277,7 @@ nametoid_hash(struct ent *ent) return hash_str(ent->name, ENT_HASHBITS); } -void +static void nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, int *blen) { @@ -317,9 +317,9 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) } static struct ent *nametoid_lookup(struct ent *, int); -int nametoid_parse(struct cache_detail *, char *, int); +static int nametoid_parse(struct cache_detail *, char *, int); -struct cache_detail nametoid_cache = { +static struct cache_detail nametoid_cache = { .hash_size = ENT_HASHMAX, .hash_table = nametoid_table, .name = "nfs4.nametoid", @@ -330,7 +330,7 @@ struct cache_detail nametoid_cache = { .warn_no_listener = warn_no_idmapd, }; -int +static int nametoid_parse(struct cache_detail *cd, char *buf, int buflen) { struct ent ent, *res; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e8158741e8b5..d71f14517b9c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -45,6 +45,7 @@ #include <linux/param.h> #include <linux/major.h> #include <linux/slab.h> +#include <linux/file.h> #include <linux/sunrpc/svc.h> #include <linux/nfsd/nfsd.h> @@ -198,6 +199,11 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open if (status) goto out; switch (open->op_claim_type) { + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + status = nfserr_inval; + if (open->op_create) + goto out; + /* fall through */ case NFS4_OPEN_CLAIM_NULL: /* * (1) set CURRENT_FH to the file being opened, @@ -220,7 +226,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open if (status) goto out; break; - case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_DELEGATE_PREV: printk("NFSD: unsupported OPEN claim type %d\n", open->op_claim_type); @@ -473,26 +478,27 @@ static inline int nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read) { int status; - struct file *filp = NULL; /* no need to check permission - this will be done in nfsd_read() */ + read->rd_filp = NULL; if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; nfs4_lock_state(); /* check stateid */ if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid, - CHECK_FH | RD_STATE, &filp))) { + CHECK_FH | RD_STATE, &read->rd_filp))) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } + if (read->rd_filp) + get_file(read->rd_filp); status = nfs_ok; out: nfs4_unlock_state(); read->rd_rqstp = rqstp; read->rd_fhp = current_fh; - read->rd_filp = filp; return status; } @@ -532,6 +538,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem { int status; + if (nfs4_in_grace()) + return nfserr_grace; status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen); if (status == nfserr_symlink) return nfserr_notdir; @@ -550,6 +558,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh, if (!save_fh->fh_dentry) return status; + if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags + & NFSEXP_NOSUBTREECHECK)) + return nfserr_grace; status = nfsd_rename(rqstp, save_fh, rename->rn_sname, rename->rn_snamelen, current_fh, rename->rn_tname, rename->rn_tnamelen); @@ -624,6 +635,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); goto out; } + if (filp) + get_file(filp); nfs4_unlock_state(); write->wr_bytes_written = write->wr_buflen; @@ -635,6 +648,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, write->wr_vec, write->wr_vlen, write->wr_buflen, &write->wr_how_written); + if (filp) + fput(filp); if (status == nfserr_symlink) status = nfserr_inval; @@ -923,6 +938,9 @@ encode_op: nfs4_put_stateowner(replay_owner); replay_owner = NULL; } + /* XXX Ugh, we need to get rid of this kind of special case: */ + if (op->opnum == OP_READ && op->u.read.rd_filp) + fput(op->u.read.rd_filp); } out: diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c new file mode 100644 index 000000000000..095f1740f3ae --- /dev/null +++ b/fs/nfsd/nfs4recover.c @@ -0,0 +1,431 @@ +/* +* linux/fs/nfsd/nfs4recover.c +* +* Copyright (c) 2004 The Regents of the University of Michigan. +* All rights reserved. +* +* Andy Adamson <andros@citi.umich.edu> +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* 3. Neither the name of the University nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ + + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfs4.h> +#include <linux/nfsd/state.h> +#include <linux/nfsd/xdr4.h> +#include <linux/param.h> +#include <linux/file.h> +#include <linux/namei.h> +#include <asm/uaccess.h> +#include <asm/scatterlist.h> +#include <linux/crypto.h> + + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +/* Globals */ +static struct nameidata rec_dir; +static int rec_dir_init = 0; + +static void +nfs4_save_user(uid_t *saveuid, gid_t *savegid) +{ + *saveuid = current->fsuid; + *savegid = current->fsgid; + current->fsuid = 0; + current->fsgid = 0; +} + +static void +nfs4_reset_user(uid_t saveuid, gid_t savegid) +{ + current->fsuid = saveuid; + current->fsgid = savegid; +} + +static void +md5_to_hex(char *out, char *md5) +{ + int i; + + for (i=0; i<16; i++) { + unsigned char c = md5[i]; + + *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1); + *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1); + } + *out = '\0'; +} + +int +nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) +{ + struct xdr_netobj cksum; + struct crypto_tfm *tfm; + struct scatterlist sg[1]; + int status = nfserr_resource; + + dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", + clname->len, clname->data); + tfm = crypto_alloc_tfm("md5", 0); + if (tfm == NULL) + goto out; + cksum.len = crypto_tfm_alg_digestsize(tfm); + cksum.data = kmalloc(cksum.len, GFP_KERNEL); + if (cksum.data == NULL) + goto out; + crypto_digest_init(tfm); + + sg[0].page = virt_to_page(clname->data); + sg[0].offset = offset_in_page(clname->data); + sg[0].length = clname->len; + + crypto_digest_update(tfm, sg, 1); + crypto_digest_final(tfm, cksum.data); + + md5_to_hex(dname, cksum.data); + + kfree(cksum.data); + status = nfs_ok; +out: + if (tfm) + crypto_free_tfm(tfm); + return status; +} + +static int +nfsd4_rec_fsync(struct dentry *dentry) +{ + struct file *filp; + int status = nfs_ok; + + dprintk("NFSD: nfs4_fsync_rec_dir\n"); + filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR); + if (IS_ERR(filp)) { + status = PTR_ERR(filp); + goto out; + } + if (filp->f_op && filp->f_op->fsync) + status = filp->f_op->fsync(filp, filp->f_dentry, 0); + fput(filp); +out: + if (status) + printk("nfsd4: unable to sync recovery directory\n"); + return status; +} + +int +nfsd4_create_clid_dir(struct nfs4_client *clp) +{ + char *dname = clp->cl_recdir; + struct dentry *dentry; + uid_t uid; + gid_t gid; + int status; + + dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); + + if (!rec_dir_init || clp->cl_firststate) + return 0; + + nfs4_save_user(&uid, &gid); + + /* lock the parent */ + down(&rec_dir.dentry->d_inode->i_sem); + + dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); + if (IS_ERR(dentry)) { + status = PTR_ERR(dentry); + goto out_unlock; + } + status = -EEXIST; + if (dentry->d_inode) { + dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); + goto out_put; + } + status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); +out_put: + dput(dentry); +out_unlock: + up(&rec_dir.dentry->d_inode->i_sem); + if (status == 0) { + clp->cl_firststate = 1; + status = nfsd4_rec_fsync(rec_dir.dentry); + } + nfs4_reset_user(uid, gid); + dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); + return status; +} + +typedef int (recdir_func)(struct dentry *, struct dentry *); + +struct dentry_list { + struct dentry *dentry; + struct list_head list; +}; + +struct dentry_list_arg { + struct list_head dentries; + struct dentry *parent; +}; + +static int +nfsd4_build_dentrylist(void *arg, const char *name, int namlen, + loff_t offset, ino_t ino, unsigned int d_type) +{ + struct dentry_list_arg *dla = arg; + struct list_head *dentries = &dla->dentries; + struct dentry *parent = dla->parent; + struct dentry *dentry; + struct dentry_list *child; + + if (name && isdotent(name, namlen)) + return nfs_ok; + dentry = lookup_one_len(name, parent, namlen); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + child = kmalloc(sizeof(*child), GFP_KERNEL); + if (child == NULL) + return -ENOMEM; + child->dentry = dentry; + list_add(&child->list, dentries); + return 0; +} + +static int +nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) +{ + struct file *filp; + struct dentry_list_arg dla = { + .parent = dir, + }; + struct list_head *dentries = &dla.dentries; + struct dentry_list *child; + uid_t uid; + gid_t gid; + int status; + + if (!rec_dir_init) + return 0; + + nfs4_save_user(&uid, &gid); + + filp = dentry_open(dget(dir), mntget(rec_dir.mnt), + O_RDWR); + status = PTR_ERR(filp); + if (IS_ERR(filp)) + goto out; + INIT_LIST_HEAD(dentries); + status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla); + fput(filp); + while (!list_empty(dentries)) { + child = list_entry(dentries->next, struct dentry_list, list); + status = f(dir, child->dentry); + if (status) + goto out; + list_del(&child->list); + dput(child->dentry); + kfree(child); + } +out: + while (!list_empty(dentries)) { + child = list_entry(dentries->next, struct dentry_list, list); + list_del(&child->list); + dput(child->dentry); + kfree(child); + } + nfs4_reset_user(uid, gid); + return status; +} + +static int +nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry) +{ + int status; + + if (!S_ISREG(dir->d_inode->i_mode)) { + printk("nfsd4: non-file found in client recovery directory\n"); + return -EINVAL; + } + down(&dir->d_inode->i_sem); + status = vfs_unlink(dir->d_inode, dentry); + up(&dir->d_inode->i_sem); + return status; +} + +static int +nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) +{ + int status; + + /* For now this directory should already be empty, but we empty it of + * any regular files anyway, just in case the directory was created by + * a kernel from the future.... */ + nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); + down(&dir->d_inode->i_sem); + status = vfs_rmdir(dir->d_inode, dentry); + up(&dir->d_inode->i_sem); + return status; +} + +static int +nfsd4_unlink_clid_dir(char *name, int namlen) +{ + struct dentry *dentry; + int status; + + dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); + + dentry = lookup_one_len(name, rec_dir.dentry, namlen); + if (IS_ERR(dentry)) { + status = PTR_ERR(dentry); + return status; + } + status = -ENOENT; + if (!dentry->d_inode) + goto out; + + status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry); +out: + dput(dentry); + return status; +} + +void +nfsd4_remove_clid_dir(struct nfs4_client *clp) +{ + uid_t uid; + gid_t gid; + int status; + + if (!rec_dir_init || !clp->cl_firststate) + return; + + nfs4_save_user(&uid, &gid); + status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); + nfs4_reset_user(uid, gid); + if (status == 0) + status = nfsd4_rec_fsync(rec_dir.dentry); + if (status) + printk("NFSD: Failed to remove expired client state directory" + " %.*s\n", HEXDIR_LEN, clp->cl_recdir); + return; +} + +static int +purge_old(struct dentry *parent, struct dentry *child) +{ + int status; + + if (nfs4_has_reclaimed_state(child->d_name.name)) + return nfs_ok; + + status = nfsd4_clear_clid_dir(parent, child); + if (status) + printk("failed to remove client recovery directory %s\n", + child->d_name.name); + /* Keep trying, success or failure: */ + return nfs_ok; +} + +void +nfsd4_recdir_purge_old(void) { + int status; + + if (!rec_dir_init) + return; + status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); + if (status == 0) + status = nfsd4_rec_fsync(rec_dir.dentry); + if (status) + printk("nfsd4: failed to purge old clients from recovery" + " directory %s\n", rec_dir.dentry->d_name.name); + return; +} + +static int +load_recdir(struct dentry *parent, struct dentry *child) +{ + if (child->d_name.len != HEXDIR_LEN - 1) { + printk("nfsd4: illegal name %s in recovery directory\n", + child->d_name.name); + /* Keep trying; maybe the others are OK: */ + return nfs_ok; + } + nfs4_client_to_reclaim(child->d_name.name); + return nfs_ok; +} + +int +nfsd4_recdir_load(void) { + int status; + + status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir); + if (status) + printk("nfsd4: failed loading clients from recovery" + " directory %s\n", rec_dir.dentry->d_name.name); + return status; +} + +/* + * Hold reference to the recovery directory. + */ + +void +nfsd4_init_recdir(char *rec_dirname) +{ + uid_t uid = 0; + gid_t gid = 0; + int status; + + printk("NFSD: Using %s as the NFSv4 state recovery directory\n", + rec_dirname); + + BUG_ON(rec_dir_init); + + nfs4_save_user(&uid, &gid); + + status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir); + if (status == -ENOENT) + printk("NFSD: recovery directory %s doesn't exist\n", + rec_dirname); + + if (!status) + rec_dir_init = 1; + nfs4_reset_user(uid, gid); +} + +void +nfsd4_shutdown_recdir(void) +{ + if (!rec_dir_init) + return; + rec_dir_init = 0; + path_release(&rec_dir); +} diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 75e8b137580c..89e36526d7f2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -48,39 +48,32 @@ #include <linux/nfs4.h> #include <linux/nfsd/state.h> #include <linux/nfsd/xdr4.h> +#include <linux/namei.h> #define NFSDDBG_FACILITY NFSDDBG_PROC /* Globals */ static time_t lease_time = 90; /* default lease time */ -static time_t old_lease_time = 90; /* past incarnation lease time */ -static u32 nfs4_reclaim_init = 0; -time_t boot_time; -static time_t grace_end = 0; +static time_t user_lease_time = 90; +static time_t boot_time; +static int in_grace = 1; static u32 current_clientid = 1; static u32 current_ownerid = 1; static u32 current_fileid = 1; static u32 current_delegid = 1; static u32 nfs4_init; -stateid_t zerostateid; /* bits all 0 */ -stateid_t onestateid; /* bits all 1 */ - -/* debug counters */ -u32 list_add_perfile = 0; -u32 list_del_perfile = 0; -u32 add_perclient = 0; -u32 del_perclient = 0; -u32 alloc_file = 0; -u32 free_file = 0; -u32 vfsopen = 0; -u32 vfsclose = 0; -u32 alloc_delegation= 0; -u32 free_delegation= 0; +static stateid_t zerostateid; /* bits all 0 */ +static stateid_t onestateid; /* bits all 1 */ + +#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) +#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) /* forward declarations */ -struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); +static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); static void release_stateid_lockowners(struct nfs4_stateid *open_stp); +static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; +static void nfs4_set_recdir(char *recdir); /* Locking: * @@ -90,6 +83,11 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp); */ static DECLARE_MUTEX(client_sema); +static kmem_cache_t *stateowner_slab = NULL; +static kmem_cache_t *file_slab = NULL; +static kmem_cache_t *stateid_slab = NULL; +static kmem_cache_t *deleg_slab = NULL; + void nfs4_lock_state(void) { @@ -118,16 +116,36 @@ opaque_hashval(const void *ptr, int nbytes) /* forward declarations */ static void release_stateowner(struct nfs4_stateowner *sop); static void release_stateid(struct nfs4_stateid *stp, int flags); -static void release_file(struct nfs4_file *fp); /* * Delegation state */ /* recall_lock protects the del_recall_lru */ -spinlock_t recall_lock; +static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED; static struct list_head del_recall_lru; +static void +free_nfs4_file(struct kref *kref) +{ + struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref); + list_del(&fp->fi_hash); + iput(fp->fi_inode); + kmem_cache_free(file_slab, fp); +} + +static inline void +put_nfs4_file(struct nfs4_file *fi) +{ + kref_put(&fi->fi_ref, free_nfs4_file); +} + +static inline void +get_nfs4_file(struct nfs4_file *fi) +{ + kref_get(&fi->fi_ref); +} + static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) { @@ -136,13 +154,14 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; dprintk("NFSD alloc_init_deleg\n"); - if ((dp = kmalloc(sizeof(struct nfs4_delegation), - GFP_KERNEL)) == NULL) + dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); + if (dp == NULL) return dp; - INIT_LIST_HEAD(&dp->dl_del_perfile); - INIT_LIST_HEAD(&dp->dl_del_perclnt); + INIT_LIST_HEAD(&dp->dl_perfile); + INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); dp->dl_client = clp; + get_nfs4_file(fp); dp->dl_file = fp; dp->dl_flock = NULL; get_file(stp->st_vfs_file); @@ -160,9 +179,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f current_fh->fh_handle.fh_size); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - list_add(&dp->dl_del_perfile, &fp->fi_del_perfile); - list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt); - alloc_delegation++; + list_add(&dp->dl_perfile, &fp->fi_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); return dp; } @@ -171,8 +189,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { dprintk("NFSD: freeing dp %p\n",dp); - kfree(dp); - free_delegation++; + put_nfs4_file(dp->dl_file); + kmem_cache_free(deleg_slab, dp); } } @@ -193,15 +211,14 @@ nfs4_close_delegation(struct nfs4_delegation *dp) if (dp->dl_flock) setlease(filp, F_UNLCK, &dp->dl_flock); nfsd_close(filp); - vfsclose++; } /* Called under the state lock. */ static void unhash_delegation(struct nfs4_delegation *dp) { - list_del_init(&dp->dl_del_perfile); - list_del_init(&dp->dl_del_perclnt); + list_del_init(&dp->dl_perfile); + list_del_init(&dp->dl_perclnt); spin_lock(&recall_lock); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); @@ -220,8 +237,8 @@ unhash_delegation(struct nfs4_delegation *dp) #define clientid_hashval(id) \ ((id) & CLIENT_HASH_MASK) -#define clientstr_hashval(name, namelen) \ - (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK) +#define clientstr_hashval(name) \ + (opaque_hashval((name), 8) & CLIENT_HASH_MASK) /* * reclaim_str_hashtbl[] holds known client info from previous reset/reboot * used in reboot/reset lease grace period processing @@ -331,11 +348,11 @@ expire_client(struct nfs4_client *clp) INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); - while (!list_empty(&clp->cl_del_perclnt)) { - dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt); + while (!list_empty(&clp->cl_delegations)) { + dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); dprintk("NFSD: expire client. dp %p, fp %p\n", dp, dp->dl_flock); - list_del_init(&dp->dl_del_perclnt); + list_del_init(&dp->dl_perclnt); list_move(&dp->dl_recall_lru, &reaplist); } spin_unlock(&recall_lock); @@ -347,26 +364,26 @@ expire_client(struct nfs4_client *clp) list_del(&clp->cl_idhash); list_del(&clp->cl_strhash); list_del(&clp->cl_lru); - while (!list_empty(&clp->cl_perclient)) { - sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient); + while (!list_empty(&clp->cl_openowners)) { + sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); release_stateowner(sop); } put_nfs4_client(clp); } static struct nfs4_client * -create_client(struct xdr_netobj name) { +create_client(struct xdr_netobj name, char *recdir) { struct nfs4_client *clp; if (!(clp = alloc_client(name))) goto out; + memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); atomic_set(&clp->cl_count, 1); atomic_set(&clp->cl_callback.cb_set, 0); - clp->cl_callback.cb_parsed = 0; INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); - INIT_LIST_HEAD(&clp->cl_perclient); - INIT_LIST_HEAD(&clp->cl_del_perclnt); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); out: return clp; @@ -392,11 +409,9 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) { get_group_info(target->cr_group_info); } -static int -cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) { - if (!n1 || !n2) - return 0; - return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len)); +static inline int +same_name(const char *n1, const char *n2) { + return 0 == memcmp(n1, n2, HEXDIR_LEN); } static int @@ -446,7 +461,7 @@ check_name(struct xdr_netobj name) { return 1; } -void +static void add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) { unsigned int idhashval; @@ -458,7 +473,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) clp->cl_time = get_seconds(); } -void +static void move_to_confirmed(struct nfs4_client *clp) { unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); @@ -468,8 +483,7 @@ move_to_confirmed(struct nfs4_client *clp) list_del_init(&clp->cl_strhash); list_del_init(&clp->cl_idhash); list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); - strhashval = clientstr_hashval(clp->cl_name.data, - clp->cl_name.len); + strhashval = clientstr_hashval(clp->cl_recdir); list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); renew_client(clp); } @@ -500,6 +514,30 @@ find_unconfirmed_client(clientid_t *clid) return NULL; } +static struct nfs4_client * +find_confirmed_client_by_str(const char *dname, unsigned int hashval) +{ + struct nfs4_client *clp; + + list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { + if (same_name(clp->cl_recdir, dname)) + return clp; + } + return NULL; +} + +static struct nfs4_client * +find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) +{ + struct nfs4_client *clp; + + list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { + if (same_name(clp->cl_recdir, dname)) + return clp; + } + return NULL; +} + /* a helper function for parse_callback */ static int parse_octet(unsigned int *lenp, char **addrp) @@ -534,7 +572,7 @@ parse_octet(unsigned int *lenp, char **addrp) } /* parse and set the setclientid ipv4 callback address */ -int +static int parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) { int temp = 0; @@ -570,7 +608,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne return 1; } -void +static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) { struct nfs4_callback *cb = &clp->cl_callback; @@ -584,14 +622,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) goto out_err; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; - cb->cb_parsed = 1; return; out_err: printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " "will not receive delegations\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - cb->cb_parsed = 0; return; } @@ -638,59 +674,43 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid) }; nfs4_verifier clverifier = setclid->se_verf; unsigned int strhashval; - struct nfs4_client * conf, * unconf, * new, * clp; + struct nfs4_client *conf, *unconf, *new; int status; + char dname[HEXDIR_LEN]; status = nfserr_inval; if (!check_name(clname)) goto out; + status = nfs4_make_rec_clidname(dname, &clname); + if (status) + goto out; + /* * XXX The Duplicate Request Cache (DRC) has been checked (??) * We get here on a DRC miss. */ - strhashval = clientstr_hashval(clname.data, clname.len); + strhashval = clientstr_hashval(dname); - conf = NULL; nfs4_lock_state(); - list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) { - if (!cmp_name(&clp->cl_name, &clname)) - continue; + conf = find_confirmed_client_by_str(dname, strhashval); + if (conf) { /* * CASE 0: * clname match, confirmed, different principal * or different ip_address */ status = nfserr_clid_inuse; - if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) { - printk("NFSD: setclientid: string in use by client" - "(clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - goto out; - } - if (clp->cl_addr != ip_addr) { + if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred) + || conf->cl_addr != ip_addr) { printk("NFSD: setclientid: string in use by client" "(clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id); goto out; } - - /* - * cl_name match from a previous SETCLIENTID operation - * XXX check for additional matches? - */ - conf = clp; - break; - } - unconf = NULL; - list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) { - if (!cmp_name(&clp->cl_name, &clname)) - continue; - /* cl_name match from a previous SETCLIENTID operation */ - unconf = clp; - break; } + unconf = find_unconfirmed_client_by_str(dname, strhashval); status = nfserr_resource; if (!conf) { /* @@ -699,7 +719,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid) */ if (unconf) expire_client(unconf); - if (!(new = create_client(clname))) + new = create_client(clname, dname); + if (new == NULL) goto out; copy_verf(new, &clverifier); new->cl_addr = ip_addr; @@ -722,12 +743,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid) * nfs4_client, but with the new callback info and a * new cl_confirm */ - if ((unconf) && - cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) && - cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) { - expire_client(unconf); + if (unconf) { + /* Note this is removing unconfirmed {*x***}, + * which is stronger than RFC recommended {vxc**}. + * This has the advantage that there is at most + * one {*x***} in either list at any time. + */ + expire_client(unconf); } - if (!(new = create_client(clname))) + new = create_client(clname, dname); + if (new == NULL) goto out; copy_verf(new,&conf->cl_verifier); new->cl_addr = ip_addr; @@ -745,7 +770,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid) * using input clverifier, clname, and callback info * and generate a new cl_clientid and cl_confirm. */ - if (!(new = create_client(clname))) + new = create_client(clname, dname); + if (new == NULL) goto out; copy_verf(new,&clverifier); new->cl_addr = ip_addr; @@ -771,7 +797,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid) * new cl_verifier and a new cl_confirm */ expire_client(unconf); - if (!(new = create_client(clname))) + new = create_client(clname, dname); + if (new == NULL) goto out; copy_verf(new,&clverifier); new->cl_addr = ip_addr; @@ -807,7 +834,7 @@ int nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) { u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; - struct nfs4_client *clp, *conf = NULL, *unconf = NULL; + struct nfs4_client *conf, *unconf; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; int status; @@ -820,102 +847,90 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi */ nfs4_lock_state(); - clp = find_confirmed_client(clid); - if (clp) { - status = nfserr_inval; - /* - * Found a record for this clientid. If the IP addresses - * don't match, return ERR_INVAL just as if the record had - * not been found. - */ - if (clp->cl_addr != ip_addr) { - printk("NFSD: setclientid: string in use by client" - "(clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - goto out; - } - conf = clp; - } - clp = find_unconfirmed_client(clid); - if (clp) { - status = nfserr_inval; - if (clp->cl_addr != ip_addr) { - printk("NFSD: setclientid: string in use by client" - "(clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - goto out; - } - unconf = clp; - } - /* CASE 1: - * unconf record that matches input clientid and input confirm. - * conf record that matches input clientid. - * conf and unconf records match names, verifiers - */ + + conf = find_confirmed_client(clid); + unconf = find_unconfirmed_client(clid); + + status = nfserr_clid_inuse; + if (conf && conf->cl_addr != ip_addr) + goto out; + if (unconf && unconf->cl_addr != ip_addr) + goto out; + if ((conf && unconf) && (cmp_verf(&unconf->cl_confirm, &confirm)) && (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) && - (cmp_name(&conf->cl_name,&unconf->cl_name)) && + (same_name(conf->cl_recdir,unconf->cl_recdir)) && (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) { + /* CASE 1: + * unconf record that matches input clientid and input confirm. + * conf record that matches input clientid. + * conf and unconf records match names, verifiers + */ if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) status = nfserr_clid_inuse; else { - expire_client(conf); - clp = unconf; - move_to_confirmed(unconf); + /* XXX: We just turn off callbacks until we can handle + * change request correctly. */ + atomic_set(&conf->cl_callback.cb_set, 0); + gen_confirm(conf); + expire_client(unconf); status = nfs_ok; + } - goto out; - } - /* CASE 2: - * conf record that matches input clientid. - * if unconf record that matches input clientid, then unconf->cl_name - * or unconf->cl_verifier don't match the conf record. - */ - if ((conf && !unconf) || + } else if ((conf && !unconf) || ((conf && unconf) && (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) || - !cmp_name(&conf->cl_name, &unconf->cl_name)))) { - if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) { + !same_name(conf->cl_recdir, unconf->cl_recdir)))) { + /* CASE 2: + * conf record that matches input clientid. + * if unconf record matches input clientid, then + * unconf->cl_name or unconf->cl_verifier don't match the + * conf record. + */ + if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) status = nfserr_clid_inuse; - } else { - clp = conf; + else status = nfs_ok; - } - goto out; - } - /* CASE 3: - * conf record not found. - * unconf record found. - * unconf->cl_confirm matches input confirm - */ - if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) { + } else if (!conf && unconf + && cmp_verf(&unconf->cl_confirm, &confirm)) { + /* CASE 3: + * conf record not found. + * unconf record found. + * unconf->cl_confirm matches input confirm + */ if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) { status = nfserr_clid_inuse; } else { - status = nfs_ok; - clp = unconf; + unsigned int hash = + clientstr_hashval(unconf->cl_recdir); + conf = find_confirmed_client_by_str(unconf->cl_recdir, + hash); + if (conf) { + nfsd4_remove_clid_dir(conf); + expire_client(conf); + } move_to_confirmed(unconf); + conf = unconf; + status = nfs_ok; } - goto out; - } - /* CASE 4: - * conf record not found, or if conf, then conf->cl_confirm does not - * match input confirm. - * unconf record not found, or if unconf, then unconf->cl_confirm - * does not match input confirm. - */ - if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) && - (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) { + } else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) + && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, + &confirm)))) { + /* CASE 4: + * conf record not found, or if conf, conf->cl_confirm does not + * match input confirm. + * unconf record not found, or if unconf, unconf->cl_confirm + * does not match input confirm. + */ status = nfserr_stale_clientid; - goto out; + } else { + /* check that we have hit one of the cases...*/ + status = nfserr_clid_inuse; } - /* check that we have hit one of the cases...*/ - status = nfserr_inval; - goto out; out: if (!status) - nfsd4_probe_callback(clp); + nfsd4_probe_callback(conf); nfs4_unlock_state(); return status; } @@ -961,60 +976,65 @@ alloc_init_file(struct inode *ino) struct nfs4_file *fp; unsigned int hashval = file_hashval(ino); - if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) { + fp = kmem_cache_alloc(file_slab, GFP_KERNEL); + if (fp) { + kref_init(&fp->fi_ref); INIT_LIST_HEAD(&fp->fi_hash); - INIT_LIST_HEAD(&fp->fi_perfile); - INIT_LIST_HEAD(&fp->fi_del_perfile); + INIT_LIST_HEAD(&fp->fi_stateids); + INIT_LIST_HEAD(&fp->fi_delegations); list_add(&fp->fi_hash, &file_hashtbl[hashval]); fp->fi_inode = igrab(ino); fp->fi_id = current_fileid++; - alloc_file++; return fp; } return NULL; } static void -release_all_files(void) +nfsd4_free_slab(kmem_cache_t **slab) { - int i; - struct nfs4_file *fp; + int status; - for (i=0;i<FILE_HASH_SIZE;i++) { - while (!list_empty(&file_hashtbl[i])) { - fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash); - /* this should never be more than once... */ - if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) { - printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp); - } - release_file(fp); - } - } + if (*slab == NULL) + return; + status = kmem_cache_destroy(*slab); + *slab = NULL; + WARN_ON(status); } -kmem_cache_t *stateowner_slab = NULL; +static void +nfsd4_free_slabs(void) +{ + nfsd4_free_slab(&stateowner_slab); + nfsd4_free_slab(&file_slab); + nfsd4_free_slab(&stateid_slab); + nfsd4_free_slab(&deleg_slab); +} static int nfsd4_init_slabs(void) { stateowner_slab = kmem_cache_create("nfsd4_stateowners", sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL); - if (stateowner_slab == NULL) { - dprintk("nfsd4: out of memory while initializing nfsv4\n"); - return -ENOMEM; - } + if (stateowner_slab == NULL) + goto out_nomem; + file_slab = kmem_cache_create("nfsd4_files", + sizeof(struct nfs4_file), 0, 0, NULL, NULL); + if (file_slab == NULL) + goto out_nomem; + stateid_slab = kmem_cache_create("nfsd4_stateids", + sizeof(struct nfs4_stateid), 0, 0, NULL, NULL); + if (stateid_slab == NULL) + goto out_nomem; + deleg_slab = kmem_cache_create("nfsd4_delegations", + sizeof(struct nfs4_delegation), 0, 0, NULL, NULL); + if (deleg_slab == NULL) + goto out_nomem; return 0; -} - -static void -nfsd4_free_slabs(void) -{ - int status = 0; - - if (stateowner_slab) - status = kmem_cache_destroy(stateowner_slab); - stateowner_slab = NULL; - BUG_ON(status); +out_nomem: + nfsd4_free_slabs(); + dprintk("nfsd4: out of memory while initializing nfsv4\n"); + return -ENOMEM; } void @@ -1055,14 +1075,13 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str INIT_LIST_HEAD(&sop->so_idhash); INIT_LIST_HEAD(&sop->so_strhash); INIT_LIST_HEAD(&sop->so_perclient); - INIT_LIST_HEAD(&sop->so_perfilestate); - INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */ + INIT_LIST_HEAD(&sop->so_stateids); + INIT_LIST_HEAD(&sop->so_perstateid); /* not used */ INIT_LIST_HEAD(&sop->so_close_lru); sop->so_time = 0; list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); - list_add(&sop->so_perclient, &clp->cl_perclient); - add_perclient++; + list_add(&sop->so_perclient, &clp->cl_openowners); sop->so_is_open_owner = 1; sop->so_id = current_ownerid++; sop->so_client = clp; @@ -1080,10 +1099,10 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp) { struct nfs4_stateowner *lock_sop; - while (!list_empty(&open_stp->st_perlockowner)) { - lock_sop = list_entry(open_stp->st_perlockowner.next, - struct nfs4_stateowner, so_perlockowner); - /* list_del(&open_stp->st_perlockowner); */ + while (!list_empty(&open_stp->st_lockowners)) { + lock_sop = list_entry(open_stp->st_lockowners.next, + struct nfs4_stateowner, so_perstateid); + /* list_del(&open_stp->st_lockowners); */ BUG_ON(lock_sop->so_is_open_owner); release_stateowner(lock_sop); } @@ -1096,14 +1115,12 @@ unhash_stateowner(struct nfs4_stateowner *sop) list_del(&sop->so_idhash); list_del(&sop->so_strhash); - if (sop->so_is_open_owner) { + if (sop->so_is_open_owner) list_del(&sop->so_perclient); - del_perclient++; - } - list_del(&sop->so_perlockowner); - while (!list_empty(&sop->so_perfilestate)) { - stp = list_entry(sop->so_perfilestate.next, - struct nfs4_stateid, st_perfilestate); + list_del(&sop->so_perstateid); + while (!list_empty(&sop->so_stateids)) { + stp = list_entry(sop->so_stateids.next, + struct nfs4_stateid, st_perstateowner); if (sop->so_is_open_owner) release_stateid(stp, OPEN_STATE); else @@ -1125,14 +1142,14 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); INIT_LIST_HEAD(&stp->st_hash); - INIT_LIST_HEAD(&stp->st_perfilestate); - INIT_LIST_HEAD(&stp->st_perlockowner); + INIT_LIST_HEAD(&stp->st_perstateowner); + INIT_LIST_HEAD(&stp->st_lockowners); INIT_LIST_HEAD(&stp->st_perfile); list_add(&stp->st_hash, &stateid_hashtbl[hashval]); - list_add(&stp->st_perfilestate, &sop->so_perfilestate); - list_add_perfile++; - list_add(&stp->st_perfile, &fp->fi_perfile); + list_add(&stp->st_perstateowner, &sop->so_stateids); + list_add(&stp->st_perfile, &fp->fi_stateids); stp->st_stateowner = sop; + get_nfs4_file(fp); stp->st_file = fp; stp->st_stateid.si_boot = boot_time; stp->st_stateid.si_stateownerid = sop->so_id; @@ -1150,30 +1167,20 @@ release_stateid(struct nfs4_stateid *stp, int flags) struct file *filp = stp->st_vfs_file; list_del(&stp->st_hash); - list_del_perfile++; list_del(&stp->st_perfile); - list_del(&stp->st_perfilestate); + list_del(&stp->st_perstateowner); if (flags & OPEN_STATE) { release_stateid_lockowners(stp); stp->st_vfs_file = NULL; nfsd_close(filp); - vfsclose++; } else if (flags & LOCK_STATE) locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); - kfree(stp); + put_nfs4_file(stp->st_file); + kmem_cache_free(stateid_slab, stp); stp = NULL; } static void -release_file(struct nfs4_file *fp) -{ - free_file++; - list_del(&fp->fi_hash); - iput(fp->fi_inode); - kfree(fp); -} - -void move_to_close_lru(struct nfs4_stateowner *sop) { dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); @@ -1183,11 +1190,10 @@ move_to_close_lru(struct nfs4_stateowner *sop) sop->so_time = get_seconds(); } -void +static void release_state_owner(struct nfs4_stateid *stp, int flag) { struct nfs4_stateowner *sop = stp->st_stateowner; - struct nfs4_file *fp = stp->st_file; dprintk("NFSD: release_state_owner\n"); release_stateid(stp, flag); @@ -1196,12 +1202,8 @@ release_state_owner(struct nfs4_stateid *stp, int flag) * released by the laundromat service after the lease period * to enable us to handle CLOSE replay */ - if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) + if (sop->so_confirmed && list_empty(&sop->so_stateids)) move_to_close_lru(sop); - /* unused nfs4_file's are releseed. XXX slab cache? */ - if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) { - release_file(fp); - } } static int @@ -1231,8 +1233,10 @@ find_file(struct inode *ino) struct nfs4_file *fp; list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { - if (fp->fi_inode == ino) + if (fp->fi_inode == ino) { + get_nfs4_file(fp); return fp; + } } return NULL; } @@ -1240,7 +1244,7 @@ find_file(struct inode *ino) #define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0) #define TEST_DENY(x) ((x >= 0 || x < 5)?1:0) -void +static void set_access(unsigned int *access, unsigned long bmap) { int i; @@ -1251,7 +1255,7 @@ set_access(unsigned int *access, unsigned long bmap) { } } -void +static void set_deny(unsigned int *deny, unsigned long bmap) { int i; @@ -1277,25 +1281,30 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid */ -int +static int nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_file *fp; struct nfs4_stateid *stp; + int ret; dprintk("NFSD: nfs4_share_conflict\n"); fp = find_file(ino); - if (fp) { + if (!fp) + return nfs_ok; + ret = nfserr_share_denied; /* Search for conflicting share reservations */ - list_for_each_entry(stp, &fp->fi_perfile, st_perfile) { - if (test_bit(deny_type, &stp->st_deny_bmap) || - test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) - return nfserr_share_denied; - } + list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { + if (test_bit(deny_type, &stp->st_deny_bmap) || + test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) + goto out; } - return nfs_ok; + ret = nfs_ok; +out: + put_nfs4_file(fp); + return ret; } static inline void @@ -1427,7 +1436,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) return -EAGAIN; } -struct lock_manager_operations nfsd_lease_mng_ops = { +static struct lock_manager_operations nfsd_lease_mng_ops = { .fl_break = nfsd_break_deleg_cb, .fl_release_private = nfsd_release_deleg_cb, .fl_copy_lock = nfsd_copy_lock_deleg_cb, @@ -1526,6 +1535,51 @@ out: return status; } +static inline int +nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) +{ + if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ)) + return nfserr_openmode; + else + return nfs_ok; +} + +static struct nfs4_delegation * +find_delegation_file(struct nfs4_file *fp, stateid_t *stid) +{ + struct nfs4_delegation *dp; + + list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { + if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) + return dp; + } + return NULL; +} + +static int +nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, + struct nfs4_delegation **dp) +{ + int flags; + int status = nfserr_bad_stateid; + + *dp = find_delegation_file(fp, &open->op_delegate_stateid); + if (*dp == NULL) + goto out; + flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ? + RD_STATE : WR_STATE; + status = nfs4_check_delegmode(*dp, flags); + if (status) + *dp = NULL; +out: + if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR) + return nfs_ok; + if (status) + return status; + open->op_stateowner->so_confirmed = 1; + return nfs_ok; +} + static int nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp) { @@ -1533,7 +1587,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_state int status = nfserr_share_denied; struct nfs4_stateowner *sop = open->op_stateowner; - list_for_each_entry(local, &fp->fi_perfile, st_perfile) { + list_for_each_entry(local, &fp->fi_stateids, st_perfile) { /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; @@ -1549,25 +1603,37 @@ out: return status; } +static inline struct nfs4_stateid * +nfs4_alloc_stateid(void) +{ + return kmem_cache_alloc(stateid_slab, GFP_KERNEL); +} + static int nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, + struct nfs4_delegation *dp, struct svc_fh *cur_fh, int flags) { struct nfs4_stateid *stp; - int status; - stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL); + stp = nfs4_alloc_stateid(); if (stp == NULL) return nfserr_resource; - status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file); - if (status) { - if (status == nfserr_dropit) - status = nfserr_jukebox; - kfree(stp); - return status; + if (dp) { + get_file(dp->dl_vfs_file); + stp->st_vfs_file = dp->dl_vfs_file; + } else { + int status; + status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, + &stp->st_vfs_file); + if (status) { + if (status == nfserr_dropit) + status = nfserr_jukebox; + kmem_cache_free(stateid_slab, stp); + return status; + } } - vfsopen++; *stpp = stp; return 0; } @@ -1628,6 +1694,7 @@ nfs4_set_claim_prev(struct nfsd4_open *open, int *status) *status = nfserr_reclaim_bad; else { open->op_stateowner->so_confirmed = 1; + open->op_stateowner->so_client->cl_firststate = 1; open->op_stateowner->so_seqid--; } } @@ -1646,14 +1713,30 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta int status, flag = 0; flag = NFS4_OPEN_DELEGATE_NONE; - if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL - || !atomic_read(&cb->cb_set) || !sop->so_confirmed) - goto out; - - if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flag = NFS4_OPEN_DELEGATE_WRITE; - else - flag = NFS4_OPEN_DELEGATE_READ; + open->op_recall = 0; + switch (open->op_claim_type) { + case NFS4_OPEN_CLAIM_PREVIOUS: + if (!atomic_read(&cb->cb_set)) + open->op_recall = 1; + flag = open->op_delegate_type; + if (flag == NFS4_OPEN_DELEGATE_NONE) + goto out; + break; + case NFS4_OPEN_CLAIM_NULL: + /* Let's not give out any delegations till everyone's + * had the chance to reclaim theirs.... */ + if (nfs4_in_grace()) + goto out; + if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) + goto out; + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + flag = NFS4_OPEN_DELEGATE_WRITE; + else + flag = NFS4_OPEN_DELEGATE_READ; + break; + default: + goto out; + } dp = alloc_init_deleg(sop->so_client, stp, fh, flag); if (dp == NULL) { @@ -1687,6 +1770,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta dp->dl_stateid.si_fileid, dp->dl_stateid.si_generation); out: + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS + && flag == NFS4_OPEN_DELEGATE_NONE + && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) + printk("NFSD: WARNING: refusing delegation reclaim\n"); open->op_delegate_type = flag; } @@ -1699,6 +1786,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_file *fp = NULL; struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_stateid *stp = NULL; + struct nfs4_delegation *dp = NULL; int status; status = nfserr_inval; @@ -1713,7 +1801,13 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (fp) { if ((status = nfs4_check_open(fp, open, &stp))) goto out; + status = nfs4_check_deleg(fp, open, &dp); + if (status) + goto out; } else { + status = nfserr_bad_stateid; + if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) + goto out; status = nfserr_resource; fp = alloc_init_file(ino); if (fp == NULL) @@ -1736,7 +1830,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf flags = MAY_WRITE; else flags = MAY_READ; - if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags))) + status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); + if (status) goto out; init_stateid(stp, fp, open); status = nfsd4_truncate(rqstp, current_fh, open); @@ -1759,10 +1854,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, stp->st_stateid.si_fileid, stp->st_stateid.si_generation); out: - /* take the opportunity to clean up unused state */ - if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) - release_file(fp); - + if (fp) + put_nfs4_file(fp); /* CLAIM_PREVIOUS has different error returns */ nfs4_set_claim_prev(open, &status); /* @@ -1775,6 +1868,7 @@ out: return status; } +static struct workqueue_struct *laundry_wq; static struct work_struct laundromat_work; static void laundromat_main(void *); static DECLARE_WORK(laundromat_work, laundromat_main, NULL); @@ -1800,7 +1894,7 @@ nfsd4_renew(clientid_t *clid) } renew_client(clp); status = nfserr_cb_path_down; - if (!list_empty(&clp->cl_del_perclnt) + if (!list_empty(&clp->cl_delegations) && !atomic_read(&clp->cl_callback.cb_set)) goto out; status = nfs_ok; @@ -1809,7 +1903,15 @@ out: return status; } -time_t +static void +end_grace(void) +{ + dprintk("NFSD: end of grace period\n"); + nfsd4_recdir_purge_old(); + in_grace = 0; +} + +static time_t nfs4_laundromat(void) { struct nfs4_client *clp; @@ -1823,6 +1925,8 @@ nfs4_laundromat(void) nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); + if (in_grace) + end_grace(); list_for_each_safe(pos, next, &client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { @@ -1833,6 +1937,7 @@ nfs4_laundromat(void) } dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); + nfsd4_remove_clid_dir(clp); expire_client(clp); } INIT_LIST_HEAD(&reaplist); @@ -1882,13 +1987,13 @@ laundromat_main(void *not_used) t = nfs4_laundromat(); dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); - schedule_delayed_work(&laundromat_work, t*HZ); + queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); } /* search ownerid_hashtbl[] and close_lru for stateid owner * (stateid->si_stateownerid) */ -struct nfs4_stateowner * +static struct nfs4_stateowner * find_openstateowner_id(u32 st_id, int flags) { struct nfs4_stateowner *local = NULL; @@ -1949,15 +2054,6 @@ out: } static inline int -nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) -{ - if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ)) - return nfserr_openmode; - else - return nfs_ok; -} - -static inline int check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) { /* Trying to call delegreturn with a special stateid? Yuch: */ @@ -2071,7 +2167,7 @@ out: /* * Checks for sequence id mutating operations. */ -int +static int nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) { int status; @@ -2230,6 +2326,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs stp->st_stateid.si_stateownerid, stp->st_stateid.si_fileid, stp->st_stateid.si_generation); + + nfsd4_create_clid_dir(sop->so_client); out: if (oc->oc_stateowner) nfs4_get_stateowner(oc->oc_stateowner); @@ -2387,7 +2485,7 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE]; static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE]; -struct nfs4_stateid * +static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags) { struct nfs4_stateid *local = NULL; @@ -2419,25 +2517,19 @@ find_stateid(stateid_t *stid, int flags) static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid) { - struct nfs4_delegation *dp = NULL; - struct nfs4_file *fp = NULL; - u32 st_id; + struct nfs4_file *fp; + struct nfs4_delegation *dl; dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", stid->si_boot, stid->si_stateownerid, stid->si_fileid, stid->si_generation); - st_id = stid->si_stateownerid; fp = find_file(ino); - if (fp) { - list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) { - if(dp->dl_stateid.si_stateownerid == st_id) { - dprintk("NFSD: find_delegation dp %p\n",dp); - return dp; - } - } - } - return NULL; + if (!fp) + return NULL; + dl = find_delegation_file(fp, stid); + put_nfs4_file(fp); + return dl; } /* @@ -2457,7 +2549,7 @@ nfs4_transform_lock_offset(struct file_lock *lock) lock->fl_end = OFFSET_MAX; } -int +static int nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) { struct nfs4_stateowner *local = NULL; @@ -2498,22 +2590,6 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) } static struct nfs4_stateowner * -find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid) -{ - struct nfs4_stateowner *local = NULL; - int i; - - for (i = 0; i < LOCK_HASH_SIZE; i++) { - list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) { - if (!cmp_owner_str(local, owner, clid)) - continue; - return local; - } - } - return NULL; -} - -static struct nfs4_stateowner * find_lockstateowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { @@ -2548,13 +2624,13 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str INIT_LIST_HEAD(&sop->so_idhash); INIT_LIST_HEAD(&sop->so_strhash); INIT_LIST_HEAD(&sop->so_perclient); - INIT_LIST_HEAD(&sop->so_perfilestate); - INIT_LIST_HEAD(&sop->so_perlockowner); + INIT_LIST_HEAD(&sop->so_stateids); + INIT_LIST_HEAD(&sop->so_perstateid); INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ sop->so_time = 0; list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); - list_add(&sop->so_perlockowner, &open_stp->st_perlockowner); + list_add(&sop->so_perstateid, &open_stp->st_lockowners); sop->so_is_open_owner = 0; sop->so_id = current_ownerid++; sop->so_client = clp; @@ -2567,24 +2643,24 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str return sop; } -struct nfs4_stateid * +static struct nfs4_stateid * alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp) { struct nfs4_stateid *stp; unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); - if ((stp = kmalloc(sizeof(struct nfs4_stateid), - GFP_KERNEL)) == NULL) + stp = nfs4_alloc_stateid(); + if (stp == NULL) goto out; INIT_LIST_HEAD(&stp->st_hash); INIT_LIST_HEAD(&stp->st_perfile); - INIT_LIST_HEAD(&stp->st_perfilestate); - INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */ + INIT_LIST_HEAD(&stp->st_perstateowner); + INIT_LIST_HEAD(&stp->st_lockowners); /* not used */ list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); - list_add(&stp->st_perfile, &fp->fi_perfile); - list_add_perfile++; - list_add(&stp->st_perfilestate, &sop->so_perfilestate); + list_add(&stp->st_perfile, &fp->fi_stateids); + list_add(&stp->st_perstateowner, &sop->so_stateids); stp->st_stateowner = sop; + get_nfs4_file(fp); stp->st_file = fp; stp->st_stateid.si_boot = boot_time; stp->st_stateid.si_stateownerid = sop->so_id; @@ -2598,7 +2674,7 @@ out: return stp; } -int +static int check_lock_length(u64 offset, u64 length) { return ((length == 0) || ((length != ~(u64)0) && @@ -2611,7 +2687,7 @@ check_lock_length(u64 offset, u64 length) int nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock) { - struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL; + struct nfs4_stateowner *open_sop = NULL; struct nfs4_stateid *lock_stp; struct file *filp; struct file_lock file_lock; @@ -2670,16 +2746,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock strhashval = lock_ownerstr_hashval(fp->fi_inode, open_sop->so_client->cl_clientid.cl_id, &lock->v.new.owner); - /* - * If we already have this lock owner, the client is in - * error (or our bookeeping is wrong!) - * for asking for a 'new lock'. - */ - status = nfserr_bad_stateid; - lock_sop = find_lockstateowner(&lock->v.new.owner, - &lock->v.new.clientid); - if (lock_sop) - goto out; + /* XXX: Do we need to check for duplicate stateowners on + * the same file, or should they just be allowed (and + * create new stateids)? */ status = nfserr_resource; if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock))) goto out; @@ -2970,8 +3039,11 @@ int nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) { clientid_t *clid = &rlockowner->rl_clientid; - struct nfs4_stateowner *local = NULL; + struct nfs4_stateowner *sop; + struct nfs4_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; + struct list_head matches; + int i; int status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", @@ -2987,22 +3059,32 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner * nfs4_lock_state(); - status = nfs_ok; - local = find_lockstateowner(owner, clid); - if (local) { - struct nfs4_stateid *stp; - - /* check for any locks held by any stateid - * associated with the (lock) stateowner */ - status = nfserr_locks_held; - list_for_each_entry(stp, &local->so_perfilestate, - st_perfilestate) { - if (check_for_locks(stp->st_vfs_file, local)) - goto out; + status = nfserr_locks_held; + /* XXX: we're doing a linear search through all the lockowners. + * Yipes! For now we'll just hope clients aren't really using + * release_lockowner much, but eventually we have to fix these + * data structures. */ + INIT_LIST_HEAD(&matches); + for (i = 0; i < LOCK_HASH_SIZE; i++) { + list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) { + if (!cmp_owner_str(sop, owner, clid)) + continue; + list_for_each_entry(stp, &sop->so_stateids, + st_perstateowner) { + if (check_for_locks(stp->st_vfs_file, sop)) + goto out; + /* Note: so_perclient unused for lockowners, + * so it's OK to fool with here. */ + list_add(&sop->so_perclient, &matches); + } } - /* no locks held by (lock) stateowner */ - status = nfs_ok; - release_stateowner(local); + } + /* Clients probably won't expect us to return with some (but not all) + * of the lockowner state released; so don't release any until all + * have been checked. */ + status = nfs_ok; + list_for_each_entry(sop, &matches, so_perclient) { + release_stateowner(sop); } out: nfs4_unlock_state(); @@ -3010,39 +3092,38 @@ out: } static inline struct nfs4_client_reclaim * -alloc_reclaim(int namelen) +alloc_reclaim(void) { - struct nfs4_client_reclaim *crp = NULL; + return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); +} - crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); - if (!crp) - return NULL; - crp->cr_name.data = kmalloc(namelen, GFP_KERNEL); - if (!crp->cr_name.data) { - kfree(crp); - return NULL; - } - return crp; +int +nfs4_has_reclaimed_state(const char *name) +{ + unsigned int strhashval = clientstr_hashval(name); + struct nfs4_client *clp; + + clp = find_confirmed_client_by_str(name, strhashval); + return clp ? 1 : 0; } /* * failure => all reset bets are off, nfserr_no_grace... */ -static int -nfs4_client_to_reclaim(char *name, int namlen) +int +nfs4_client_to_reclaim(const char *name) { unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name); - crp = alloc_reclaim(namlen); + dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name); + crp = alloc_reclaim(); if (!crp) return 0; - strhashval = clientstr_hashval(name, namlen); + strhashval = clientstr_hashval(name); INIT_LIST_HEAD(&crp->cr_strhash); list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); - memcpy(crp->cr_name.data, name, namlen); - crp->cr_name.len = namlen; + memcpy(crp->cr_recdir, name, HEXDIR_LEN); reclaim_str_hashtbl_size++; return 1; } @@ -3053,13 +3134,11 @@ nfs4_release_reclaim(void) struct nfs4_client_reclaim *crp = NULL; int i; - BUG_ON(!nfs4_reclaim_init); for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&reclaim_str_hashtbl[i])) { crp = list_entry(reclaim_str_hashtbl[i].next, struct nfs4_client_reclaim, cr_strhash); list_del(&crp->cr_strhash); - kfree(crp->cr_name.data); kfree(crp); reclaim_str_hashtbl_size--; } @@ -3069,7 +3148,7 @@ nfs4_release_reclaim(void) /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ -struct nfs4_client_reclaim * +static struct nfs4_client_reclaim * nfs4_find_reclaim_client(clientid_t *clid) { unsigned int strhashval; @@ -3082,13 +3161,14 @@ nfs4_find_reclaim_client(clientid_t *clid) if (clp == NULL) return NULL; - dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n", - clp->cl_name.len, clp->cl_name.data); + dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n", + clp->cl_name.len, clp->cl_name.data, + clp->cl_recdir); /* find clp->cl_name in reclaim_str_hashtbl */ - strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len); + strhashval = clientstr_hashval(clp->cl_recdir); list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { - if (cmp_name(&crp->cr_name, &clp->cl_name)) { + if (same_name(crp->cr_recdir, clp->cl_recdir)) { return crp; } } @@ -3101,30 +3181,16 @@ nfs4_find_reclaim_client(clientid_t *clid) int nfs4_check_open_reclaim(clientid_t *clid) { - struct nfs4_client_reclaim *crp; - - if ((crp = nfs4_find_reclaim_client(clid)) == NULL) - return nfserr_reclaim_bad; - return nfs_ok; + return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; } +/* initialization to perform at module load time: */ -/* - * Start and stop routines - */ - -static void -__nfs4_state_init(void) +void +nfs4_state_init(void) { int i; - time_t grace_time; - if (!nfs4_reclaim_init) { - for (i = 0; i < CLIENT_HASH_SIZE; i++) - INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); - reclaim_str_hashtbl_size = 0; - nfs4_reclaim_init = 1; - } for (i = 0; i < CLIENT_HASH_SIZE; i++) { INIT_LIST_HEAD(&conf_id_hashtbl[i]); INIT_LIST_HEAD(&conf_str_hashtbl[i]); @@ -3146,26 +3212,46 @@ __nfs4_state_init(void) INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]); INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); } - memset(&zerostateid, 0, sizeof(stateid_t)); memset(&onestateid, ~0, sizeof(stateid_t)); - INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); - spin_lock_init(&recall_lock); + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); + reclaim_str_hashtbl_size = 0; +} + +static void +nfsd4_load_reboot_recovery_data(void) +{ + int status; + + nfs4_lock_state(); + nfsd4_init_recdir(user_recovery_dirname); + status = nfsd4_recdir_load(); + nfs4_unlock_state(); + if (status) + printk("NFSD: Failure reading reboot recovery data\n"); +} + +/* initialization to perform when the nfsd service is started: */ + +static void +__nfs4_state_start(void) +{ + time_t grace_time; + boot_time = get_seconds(); - grace_time = max(old_lease_time, lease_time); - if (reclaim_str_hashtbl_size == 0) - grace_time = 0; - if (grace_time) - printk("NFSD: starting %ld-second grace period\n", grace_time); - grace_end = boot_time + grace_time; - INIT_WORK(&laundromat_work,laundromat_main, NULL); - schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ); + grace_time = max(user_lease_time, lease_time); + lease_time = user_lease_time; + in_grace = 1; + printk("NFSD: starting %ld-second grace period\n", grace_time); + laundry_wq = create_singlethread_workqueue("nfsd4"); + queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ); } int -nfs4_state_init(void) +nfs4_state_start(void) { int status; @@ -3174,7 +3260,8 @@ nfs4_state_init(void) status = nfsd4_init_slabs(); if (status) return status; - __nfs4_state_init(); + nfsd4_load_reboot_recovery_data(); + __nfs4_state_start(); nfs4_init = 1; return 0; } @@ -3182,14 +3269,7 @@ nfs4_state_init(void) int nfs4_in_grace(void) { - return get_seconds() < grace_end; -} - -void -set_no_grace(void) -{ - printk("NFSD: ERROR in reboot recovery. State reclaims will fail.\n"); - grace_end = get_seconds(); + return in_grace; } time_t @@ -3236,21 +3316,11 @@ __nfs4_state_shutdown(void) unhash_delegation(dp); } - release_all_files(); cancel_delayed_work(&laundromat_work); - flush_scheduled_work(); + flush_workqueue(laundry_wq); + destroy_workqueue(laundry_wq); + nfsd4_shutdown_recdir(); nfs4_init = 0; - dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n", - list_add_perfile, list_del_perfile); - dprintk("NFSD: add_perclient %d del_perclient %d\n", - add_perclient, del_perclient); - dprintk("NFSD: alloc_file %d free_file %d\n", - alloc_file, free_file); - dprintk("NFSD: vfsopen %d vfsclose %d\n", - vfsopen, vfsclose); - dprintk("NFSD: alloc_delegation %d free_delegation %d\n", - alloc_delegation, free_delegation); - } void @@ -3263,56 +3333,48 @@ nfs4_state_shutdown(void) nfs4_unlock_state(); } +static void +nfs4_set_recdir(char *recdir) +{ + nfs4_lock_state(); + strcpy(user_recovery_dirname, recdir); + nfs4_unlock_state(); +} + +/* + * Change the NFSv4 recovery directory to recdir. + */ +int +nfs4_reset_recoverydir(char *recdir) +{ + int status; + struct nameidata nd; + + status = path_lookup(recdir, LOOKUP_FOLLOW, &nd); + if (status) + return status; + status = -ENOTDIR; + if (S_ISDIR(nd.dentry->d_inode->i_mode)) { + nfs4_set_recdir(recdir); + status = 0; + } + path_release(&nd); + return status; +} + /* * Called when leasetime is changed. * - * if nfsd is not started, simply set the global lease. - * - * if nfsd(s) are running, lease change requires nfsv4 state to be reset. - * e.g: boot_time is reset, existing nfs4_client structs are - * used to fill reclaim_str_hashtbl, then all state (except for the - * reclaim_str_hashtbl) is re-initialized. - * - * if the old lease time is greater than the new lease time, the grace - * period needs to be set to the old lease time to allow clients to reclaim - * their state. XXX - we may want to set the grace period == lease time - * after an initial grace period == old lease time - * - * if an error occurs in this process, the new lease is set, but the server - * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace - * which means OPEN/LOCK/READ/WRITE will fail during grace period. - * - * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and - * OPEN and LOCK reclaims. + * The only way the protocol gives us to handle on-the-fly lease changes is to + * simulate a reboot. Instead of doing that, we just wait till the next time + * we start to register any changes in lease time. If the administrator + * really wants to change the lease time *now*, they can go ahead and bring + * nfsd down and then back up again after changing the lease time. */ void nfs4_reset_lease(time_t leasetime) { - struct nfs4_client *clp; - int i; - - printk("NFSD: New leasetime %ld\n",leasetime); - if (!nfs4_init) - return; - nfs4_lock_state(); - old_lease_time = lease_time; - lease_time = leasetime; - - nfs4_release_reclaim(); - - /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */ - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) { - if (!nfs4_client_to_reclaim(clp->cl_name.data, - clp->cl_name.len)) { - nfs4_release_reclaim(); - goto init_state; - } - } - } -init_state: - __nfs4_state_shutdown(); - __nfs4_state_init(); - nfs4_unlock_state(); + lock_kernel(); + user_lease_time = leasetime; + unlock_kernel(); } - diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 36a058a112d5..91fb171d2ace 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -136,7 +136,7 @@ xdr_error: \ } \ } while (0) -u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) +static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) { /* We want more bytes than seem to be available. * Maybe we need a new page, maybe we have just run out @@ -190,7 +190,7 @@ defer_free(struct nfsd4_compoundargs *argp, return 0; } -char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) +static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) { void *new = NULL; if (p == argp->tmp) { @@ -1366,7 +1366,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME ); + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) + WRITE32(NFS4_FH_VOLATILE_ANY); + else + WRITE32(NFS4_FH_VOLATILE_ANY|NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { /* @@ -1969,7 +1972,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open case NFS4_OPEN_DELEGATE_READ: RESERVE_SPACE(20 + sizeof(stateid_t)); WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); - WRITE32(0); + WRITE32(open->op_recall); /* * TODO: ACE's in delegations diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 161afdcb8f7d..841c562991e8 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -51,6 +51,7 @@ enum { NFSD_Fh, NFSD_Threads, NFSD_Leasetime, + NFSD_RecoveryDir, }; /* @@ -66,6 +67,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size); static ssize_t write_filehandle(struct file *file, char *buf, size_t size); static ssize_t write_threads(struct file *file, char *buf, size_t size); static ssize_t write_leasetime(struct file *file, char *buf, size_t size); +static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Svc] = write_svc, @@ -78,6 +80,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Fh] = write_filehandle, [NFSD_Threads] = write_threads, [NFSD_Leasetime] = write_leasetime, + [NFSD_RecoveryDir] = write_recoverydir, }; static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) @@ -349,6 +352,25 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) return strlen(buf); } +static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + char *recdir; + int len, status; + + if (size > PATH_MAX || buf[size-1] != '\n') + return -EINVAL; + buf[size-1] = 0; + + recdir = mesg; + len = qword_get(&mesg, recdir, size); + if (len <= 0) + return -EINVAL; + + status = nfs4_reset_recoverydir(recdir); + return strlen(buf); +} + /*----------------------------------------------------------------------------*/ /* * populating the filesystem. @@ -369,6 +391,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, #endif /* last one */ {""} }; @@ -397,9 +420,8 @@ static int __init init_nfsd(void) nfsd_cache_init(); /* RPC reply cache */ nfsd_export_init(); /* Exports table */ nfsd_lockd_init(); /* lockd->nfsd callbacks */ -#ifdef CONFIG_NFSD_V4 + nfs4_state_init(); /* NFSv4 locking state */ nfsd_idmap_init(); /* Name to ID mapping */ -#endif /* CONFIG_NFSD_V4 */ if (proc_mkdir("fs/nfs", NULL)) { struct proc_dir_entry *entry; entry = create_proc_entry("fs/nfs/exports", 0, NULL); @@ -426,9 +448,7 @@ static void __exit exit_nfsd(void) remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); nfsd_lockd_shutdown(); -#ifdef CONFIG_NFSD_V4 nfsd_idmap_shutdown(); -#endif /* CONFIG_NFSD_V4 */ unregister_filesystem(&nfsd_fs_type); } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 757f9d208034..0aa1b9603d7f 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -591,6 +591,7 @@ nfserrno (int errno) { nfserr_dropit, -ENOMEM }, { nfserr_badname, -ESRCH }, { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, { -1, -EIO } }; int i; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 02ded7cfbdcf..07b9a065e9da 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -31,6 +31,7 @@ #include <linux/nfsd/stats.h> #include <linux/nfsd/cache.h> #include <linux/lockd/bind.h> +#include <linux/nfsacl.h> #define NFSDDBG_FACILITY NFSDDBG_SVC @@ -94,7 +95,7 @@ nfsd_svc(unsigned short port, int nrservs) error = nfsd_racache_init(2*nrservs); if (error<0) goto out; - error = nfs4_state_init(); + error = nfs4_state_start(); if (error<0) goto out; if (!nfsd_serv) { @@ -362,6 +363,32 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) return 1; } +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +static struct svc_stat nfsd_acl_svcstats; +static struct svc_version * nfsd_acl_version[] = { + [2] = &nfsd_acl_version2, + [3] = &nfsd_acl_version3, +}; + +#define NFSD_ACL_NRVERS (sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0])) +static struct svc_program nfsd_acl_program = { + .pg_prog = NFS_ACL_PROGRAM, + .pg_nvers = NFSD_ACL_NRVERS, + .pg_vers = nfsd_acl_version, + .pg_name = "nfsd", + .pg_class = "nfsd", + .pg_stats = &nfsd_acl_svcstats, +}; + +static struct svc_stat nfsd_acl_svcstats = { + .program = &nfsd_acl_program, +}; + +#define nfsd_acl_program_p &nfsd_acl_program +#else +#define nfsd_acl_program_p NULL +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ + extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; static struct svc_version * nfsd_version[] = { @@ -376,6 +403,7 @@ static struct svc_version * nfsd_version[] = { #define NFSD_NRVERS (sizeof(nfsd_version)/sizeof(nfsd_version[0])) struct svc_program nfsd_program = { + .pg_next = nfsd_acl_program_p, .pg_prog = NFS_PROGRAM, /* program number */ .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ .pg_vers = nfsd_version, /* version table */ diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 948b08287c99..b45999ff33e6 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -49,6 +49,12 @@ decode_fh(u32 *p, struct svc_fh *fhp) return p + (NFS_FHSIZE >> 2); } +/* Helper function for NFSv2 ACL code */ +u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { @@ -190,6 +196,11 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) return p; } +/* Helper function for NFSv2 ACL code */ +u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + return encode_fattr(rqstp, p, fhp); +} /* * XDR decode functions diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e3e9d217236e..be24ead89d94 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -46,10 +46,9 @@ #include <linux/nfsd/nfsfh.h> #include <linux/quotaops.h> #include <linux/dnotify.h> -#ifdef CONFIG_NFSD_V4 #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> -#include <linux/xattr_acl.h> +#ifdef CONFIG_NFSD_V4 #include <linux/xattr.h> #include <linux/nfs4.h> #include <linux/nfs4_acl.h> @@ -424,13 +423,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; if (pacl) { - error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS); + error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); if (error < 0) goto out_nfserr; } if (dpacl) { - error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT); + error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); if (error < 0) goto out_nfserr; } @@ -497,7 +496,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac struct posix_acl *pacl = NULL, *dpacl = NULL; unsigned int flags = 0; - pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS); + pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(pacl)) { @@ -507,7 +506,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac } if (S_ISDIR(inode->i_mode)) { - dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT); + dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) dpacl = NULL; else if (IS_ERR(dpacl)) { @@ -1857,3 +1856,107 @@ nfsd_racache_init(int cache_size) nfsdstats.ra_size = cache_size; return 0; } + +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +struct posix_acl * +nfsd_get_posix_acl(struct svc_fh *fhp, int type) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; + ssize_t size; + struct posix_acl *acl; + + if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr) + return ERR_PTR(-EOPNOTSUPP); + switch(type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0); + + if (size < 0) { + acl = ERR_PTR(size); + goto getout; + } else if (size > 0) { + value = kmalloc(size, GFP_KERNEL); + if (!value) { + acl = ERR_PTR(-ENOMEM); + goto getout; + } + size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size); + if (size < 0) { + acl = ERR_PTR(size); + goto getout; + } + } + acl = posix_acl_from_xattr(value, size); + +getout: + kfree(value); + return acl; +} + +int +nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; + size_t size; + int error; + + if (!IS_POSIXACL(inode) || !inode->i_op || + !inode->i_op->setxattr || !inode->i_op->removexattr) + return -EOPNOTSUPP; + switch(type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + break; + default: + return -EOPNOTSUPP; + } + + if (acl && acl->a_count) { + size = posix_acl_xattr_size(acl->a_count); + value = kmalloc(size, GFP_KERNEL); + if (!value) + return -ENOMEM; + size = posix_acl_to_xattr(acl, value, size); + if (size < 0) { + error = size; + goto getout; + } + } else + size = 0; + + if (!fhp->fh_locked) + fh_lock(fhp); /* unlocking is done automatically */ + if (size) + error = inode->i_op->setxattr(fhp->fh_dentry, name, + value, size, 0); + else { + if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) + error = 0; + else { + error = inode->i_op->removexattr(fhp->fh_dentry, name); + if (error == -ENODATA) + error = 0; + } + } + +getout: + kfree(value); + return error; +} +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ diff --git a/fs/open.c b/fs/open.c index 963bd81a44c8..3f4a4286fdc4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -21,6 +21,7 @@ #include <linux/vfs.h> #include <asm/uaccess.h> #include <linux/fs.h> +#include <linux/personality.h> #include <linux/pagemap.h> #include <linux/syscalls.h> @@ -807,7 +808,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) /* NB: we're sure to have correct a_ops only after f_op->open */ if (f->f_flags & O_DIRECT) { - if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) { + if (!f->f_mapping->a_ops || + ((!f->f_mapping->a_ops->direct_IO) && + (!f->f_mapping->a_ops->get_xip_page))) { fput(f); f = ERR_PTR(-EINVAL); } @@ -933,31 +936,27 @@ EXPORT_SYMBOL(fd_install); asmlinkage long sys_open(const char __user * filename, int flags, int mode) { char * tmp; - int fd, error; + int fd; + + if (force_o_largefile()) + flags |= O_LARGEFILE; -#if BITS_PER_LONG != 32 - flags |= O_LARGEFILE; -#endif tmp = getname(filename); fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd(); if (fd >= 0) { struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_error; - fd_install(fd, f); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = PTR_ERR(f); + } else { + fd_install(fd, f); + } } -out: putname(tmp); } return fd; - -out_error: - put_unused_fd(fd); - fd = error; - goto out; } EXPORT_SYMBOL_GPL(sys_open); @@ -980,23 +979,15 @@ asmlinkage long sys_creat(const char __user * pathname, int mode) */ int filp_close(struct file *filp, fl_owner_t id) { - int retval; - - /* Report and clear outstanding errors */ - retval = filp->f_error; - if (retval) - filp->f_error = 0; + int retval = 0; if (!file_count(filp)) { printk(KERN_ERR "VFS: Close: file count is 0\n"); - return retval; + return 0; } - if (filp->f_op && filp->f_op->flush) { - int err = filp->f_op->flush(filp); - if (!retval) - retval = err; - } + if (filp->f_op && filp->f_op->flush) + retval = filp->f_op->flush(filp); dnotify_flush(filp, id); locks_remove_posix(filp, id); diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile index 4c83c17969e1..66d5cc26fafb 100644 --- a/fs/partitions/Makefile +++ b/fs/partitions/Makefile @@ -17,4 +17,3 @@ obj-$(CONFIG_SUN_PARTITION) += sun.o obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o obj-$(CONFIG_IBM_PARTITION) += ibm.o obj-$(CONFIG_EFI_PARTITION) += efi.o -obj-$(CONFIG_NEC98_PARTITION) += nec98.o msdos.o diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 2cab98a9a621..77e178f13162 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -79,9 +79,6 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = #ifdef CONFIG_LDM_PARTITION ldm_partition, /* this must come before msdos */ #endif -#ifdef CONFIG_NEC98_PARTITION - nec98_partition, /* must be come before `msdos_partition' */ -#endif #ifdef CONFIG_MSDOS_PARTITION msdos_partition, #endif diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 43adcc68e471..17ae8ecd9e8b 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h @@ -30,7 +30,3 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) extern int warn_no_part; -extern void parse_bsd(struct parsed_partitions *state, - struct block_device *bdev, u32 offset, u32 size, - int origin, char *flavour, int max_partitions); - diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 584a27b2bbd5..9935d254186e 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -202,12 +202,12 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, #endif } -#if defined(CONFIG_BSD_DISKLABEL) || defined(CONFIG_NEC98_PARTITION) +#if defined(CONFIG_BSD_DISKLABEL) /* * Create devices for BSD partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ -void +static void parse_bsd(struct parsed_partitions *state, struct block_device *bdev, u32 offset, u32 size, int origin, char *flavour, int max_partitions) diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 738b9b602932..7431d7ba2d09 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -11,4 +11,5 @@ proc-y += inode.o root.o base.o generic.o array.o \ kmsg.o proc_tty.o proc_misc.o proc-$(CONFIG_PROC_KCORE) += kcore.o +proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o diff --git a/fs/proc/base.c b/fs/proc/base.c index e31903aadd96..ace151fa4878 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -314,7 +314,7 @@ static int may_ptrace_attach(struct task_struct *task) (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) goto out; rmb(); - if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE)) goto out; if (security_ptrace(current, task)) goto out; @@ -1113,7 +1113,9 @@ static int task_dumpable(struct task_struct *task) if (mm) dumpable = mm->dumpable; task_unlock(task); - return dumpable; + if(dumpable == 1) + return 1; + return 0; } diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index a60a3b3d8a7b..a3453555a94e 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -44,6 +44,7 @@ #include <linux/jiffies.h> #include <linux/sysrq.h> #include <linux/vmalloc.h> +#include <linux/crash_dump.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -219,6 +220,19 @@ static struct file_operations fragmentation_file_operations = { .release = seq_release, }; +extern struct seq_operations zoneinfo_op; +static int zoneinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &zoneinfo_op); +} + +static struct file_operations proc_zoneinfo_file_operations = { + .open = zoneinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int version_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -438,7 +452,7 @@ static int devices_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { int len = get_chrdev_list(page); - len += get_blkdev_list(page+len); + len += get_blkdev_list(page+len, len); return proc_calc_metrics(page, start, off, count, eof, len); } @@ -589,6 +603,7 @@ void __init proc_misc_init(void) create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); + create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); create_seq_entry("diskstats", 0, &proc_diskstats_operations); #ifdef CONFIG_MODULES create_seq_entry("modules", 0, &proc_modules_operations); @@ -604,6 +619,11 @@ void __init proc_misc_init(void) (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; } #endif +#ifdef CONFIG_PROC_VMCORE + proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); + if (proc_vmcore) + proc_vmcore->proc_fops = &proc_vmcore_operations; +#endif #ifdef CONFIG_MAGIC_SYSRQ entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL); if (entry) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c new file mode 100644 index 000000000000..3b2e7b69e63a --- /dev/null +++ b/fs/proc/vmcore.c @@ -0,0 +1,669 @@ +/* + * fs/proc/vmcore.c Interface for accessing the crash + * dump from the system's previous life. + * Heavily borrowed from fs/proc/kcore.c + * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) + * Copyright (C) IBM Corporation, 2004. All rights reserved + * + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/elf.h> +#include <linux/elfcore.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/bootmem.h> +#include <linux/init.h> +#include <linux/crash_dump.h> +#include <linux/list.h> +#include <asm/uaccess.h> +#include <asm/io.h> + +/* List representing chunks of contiguous memory areas and their offsets in + * vmcore file. + */ +static LIST_HEAD(vmcore_list); + +/* Stores the pointer to the buffer containing kernel elf core headers. */ +static char *elfcorebuf; +static size_t elfcorebuf_sz; + +/* Total size of vmcore file. */ +static u64 vmcore_size; + +struct proc_dir_entry *proc_vmcore = NULL; + +/* Reads a page from the oldmem device from given offset. */ +static ssize_t read_from_oldmem(char *buf, size_t count, + loff_t *ppos, int userbuf) +{ + unsigned long pfn, offset; + size_t nr_bytes; + ssize_t read = 0, tmp; + + if (!count) + return 0; + + offset = (unsigned long)(*ppos % PAGE_SIZE); + pfn = (unsigned long)(*ppos / PAGE_SIZE); + if (pfn > saved_max_pfn) + return -EINVAL; + + do { + if (count > (PAGE_SIZE - offset)) + nr_bytes = PAGE_SIZE - offset; + else + nr_bytes = count; + + tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); + if (tmp < 0) + return tmp; + *ppos += nr_bytes; + count -= nr_bytes; + buf += nr_bytes; + read += nr_bytes; + ++pfn; + offset = 0; + } while (count); + + return read; +} + +/* Maps vmcore file offset to respective physical address in memroy. */ +static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list, + struct vmcore **m_ptr) +{ + struct vmcore *m; + u64 paddr; + + list_for_each_entry(m, vc_list, list) { + u64 start, end; + start = m->offset; + end = m->offset + m->size - 1; + if (offset >= start && offset <= end) { + paddr = m->paddr + offset - start; + *m_ptr = m; + return paddr; + } + } + *m_ptr = NULL; + return 0; +} + +/* Read from the ELF header and then the crash dump. On error, negative value is + * returned otherwise number of bytes read are returned. + */ +static ssize_t read_vmcore(struct file *file, char __user *buffer, + size_t buflen, loff_t *fpos) +{ + ssize_t acc = 0, tmp; + size_t tsz, nr_bytes; + u64 start; + struct vmcore *curr_m = NULL; + + if (buflen == 0 || *fpos >= vmcore_size) + return 0; + + /* trim buflen to not go beyond EOF */ + if (buflen > vmcore_size - *fpos) + buflen = vmcore_size - *fpos; + + /* Read ELF core header */ + if (*fpos < elfcorebuf_sz) { + tsz = elfcorebuf_sz - *fpos; + if (buflen < tsz) + tsz = buflen; + if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) + return -EFAULT; + buflen -= tsz; + *fpos += tsz; + buffer += tsz; + acc += tsz; + + /* leave now if filled buffer already */ + if (buflen == 0) + return acc; + } + + start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); + if (!curr_m) + return -EINVAL; + if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) + tsz = buflen; + + /* Calculate left bytes in current memory segment. */ + nr_bytes = (curr_m->size - (start - curr_m->paddr)); + if (tsz > nr_bytes) + tsz = nr_bytes; + + while (buflen) { + tmp = read_from_oldmem(buffer, tsz, &start, 1); + if (tmp < 0) + return tmp; + buflen -= tsz; + *fpos += tsz; + buffer += tsz; + acc += tsz; + if (start >= (curr_m->paddr + curr_m->size)) { + if (curr_m->list.next == &vmcore_list) + return acc; /*EOF*/ + curr_m = list_entry(curr_m->list.next, + struct vmcore, list); + start = curr_m->paddr; + } + if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) + tsz = buflen; + /* Calculate left bytes in current memory segment. */ + nr_bytes = (curr_m->size - (start - curr_m->paddr)); + if (tsz > nr_bytes) + tsz = nr_bytes; + } + return acc; +} + +static int open_vmcore(struct inode *inode, struct file *filp) +{ + return 0; +} + +struct file_operations proc_vmcore_operations = { + .read = read_vmcore, + .open = open_vmcore, +}; + +static struct vmcore* __init get_new_element(void) +{ + struct vmcore *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p) + memset(p, 0, sizeof(*p)); + return p; +} + +static u64 __init get_vmcore_size_elf64(char *elfptr) +{ + int i; + u64 size; + Elf64_Ehdr *ehdr_ptr; + Elf64_Phdr *phdr_ptr; + + ehdr_ptr = (Elf64_Ehdr *)elfptr; + phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); + size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr)); + for (i = 0; i < ehdr_ptr->e_phnum; i++) { + size += phdr_ptr->p_memsz; + phdr_ptr++; + } + return size; +} + +static u64 __init get_vmcore_size_elf32(char *elfptr) +{ + int i; + u64 size; + Elf32_Ehdr *ehdr_ptr; + Elf32_Phdr *phdr_ptr; + + ehdr_ptr = (Elf32_Ehdr *)elfptr; + phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); + size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr)); + for (i = 0; i < ehdr_ptr->e_phnum; i++) { + size += phdr_ptr->p_memsz; + phdr_ptr++; + } + return size; +} + +/* Merges all the PT_NOTE headers into one. */ +static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, + struct list_head *vc_list) +{ + int i, nr_ptnote=0, rc=0; + char *tmp; + Elf64_Ehdr *ehdr_ptr; + Elf64_Phdr phdr, *phdr_ptr; + Elf64_Nhdr *nhdr_ptr; + u64 phdr_sz = 0, note_off; + + ehdr_ptr = (Elf64_Ehdr *)elfptr; + phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + int j; + void *notes_section; + struct vmcore *new; + u64 offset, max_sz, sz, real_sz = 0; + if (phdr_ptr->p_type != PT_NOTE) + continue; + nr_ptnote++; + max_sz = phdr_ptr->p_memsz; + offset = phdr_ptr->p_offset; + notes_section = kmalloc(max_sz, GFP_KERNEL); + if (!notes_section) + return -ENOMEM; + rc = read_from_oldmem(notes_section, max_sz, &offset, 0); + if (rc < 0) { + kfree(notes_section); + return rc; + } + nhdr_ptr = notes_section; + for (j = 0; j < max_sz; j += sz) { + if (nhdr_ptr->n_namesz == 0) + break; + sz = sizeof(Elf64_Nhdr) + + ((nhdr_ptr->n_namesz + 3) & ~3) + + ((nhdr_ptr->n_descsz + 3) & ~3); + real_sz += sz; + nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); + } + + /* Add this contiguous chunk of notes section to vmcore list.*/ + new = get_new_element(); + if (!new) { + kfree(notes_section); + return -ENOMEM; + } + new->paddr = phdr_ptr->p_offset; + new->size = real_sz; + list_add_tail(&new->list, vc_list); + phdr_sz += real_sz; + kfree(notes_section); + } + + /* Prepare merged PT_NOTE program header. */ + phdr.p_type = PT_NOTE; + phdr.p_flags = 0; + note_off = sizeof(Elf64_Ehdr) + + (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); + phdr.p_offset = note_off; + phdr.p_vaddr = phdr.p_paddr = 0; + phdr.p_filesz = phdr.p_memsz = phdr_sz; + phdr.p_align = 0; + + /* Add merged PT_NOTE program header*/ + tmp = elfptr + sizeof(Elf64_Ehdr); + memcpy(tmp, &phdr, sizeof(phdr)); + tmp += sizeof(phdr); + + /* Remove unwanted PT_NOTE program headers. */ + i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); + *elfsz = *elfsz - i; + memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); + + /* Modify e_phnum to reflect merged headers. */ + ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; + + return 0; +} + +/* Merges all the PT_NOTE headers into one. */ +static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, + struct list_head *vc_list) +{ + int i, nr_ptnote=0, rc=0; + char *tmp; + Elf32_Ehdr *ehdr_ptr; + Elf32_Phdr phdr, *phdr_ptr; + Elf32_Nhdr *nhdr_ptr; + u64 phdr_sz = 0, note_off; + + ehdr_ptr = (Elf32_Ehdr *)elfptr; + phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + int j; + void *notes_section; + struct vmcore *new; + u64 offset, max_sz, sz, real_sz = 0; + if (phdr_ptr->p_type != PT_NOTE) + continue; + nr_ptnote++; + max_sz = phdr_ptr->p_memsz; + offset = phdr_ptr->p_offset; + notes_section = kmalloc(max_sz, GFP_KERNEL); + if (!notes_section) + return -ENOMEM; + rc = read_from_oldmem(notes_section, max_sz, &offset, 0); + if (rc < 0) { + kfree(notes_section); + return rc; + } + nhdr_ptr = notes_section; + for (j = 0; j < max_sz; j += sz) { + if (nhdr_ptr->n_namesz == 0) + break; + sz = sizeof(Elf32_Nhdr) + + ((nhdr_ptr->n_namesz + 3) & ~3) + + ((nhdr_ptr->n_descsz + 3) & ~3); + real_sz += sz; + nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); + } + + /* Add this contiguous chunk of notes section to vmcore list.*/ + new = get_new_element(); + if (!new) { + kfree(notes_section); + return -ENOMEM; + } + new->paddr = phdr_ptr->p_offset; + new->size = real_sz; + list_add_tail(&new->list, vc_list); + phdr_sz += real_sz; + kfree(notes_section); + } + + /* Prepare merged PT_NOTE program header. */ + phdr.p_type = PT_NOTE; + phdr.p_flags = 0; + note_off = sizeof(Elf32_Ehdr) + + (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); + phdr.p_offset = note_off; + phdr.p_vaddr = phdr.p_paddr = 0; + phdr.p_filesz = phdr.p_memsz = phdr_sz; + phdr.p_align = 0; + + /* Add merged PT_NOTE program header*/ + tmp = elfptr + sizeof(Elf32_Ehdr); + memcpy(tmp, &phdr, sizeof(phdr)); + tmp += sizeof(phdr); + + /* Remove unwanted PT_NOTE program headers. */ + i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); + *elfsz = *elfsz - i; + memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); + + /* Modify e_phnum to reflect merged headers. */ + ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; + + return 0; +} + +/* Add memory chunks represented by program headers to vmcore list. Also update + * the new offset fields of exported program headers. */ +static int __init process_ptload_program_headers_elf64(char *elfptr, + size_t elfsz, + struct list_head *vc_list) +{ + int i; + Elf64_Ehdr *ehdr_ptr; + Elf64_Phdr *phdr_ptr; + loff_t vmcore_off; + struct vmcore *new; + + ehdr_ptr = (Elf64_Ehdr *)elfptr; + phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ + + /* First program header is PT_NOTE header. */ + vmcore_off = sizeof(Elf64_Ehdr) + + (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) + + phdr_ptr->p_memsz; /* Note sections */ + + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + if (phdr_ptr->p_type != PT_LOAD) + continue; + + /* Add this contiguous chunk of memory to vmcore list.*/ + new = get_new_element(); + if (!new) + return -ENOMEM; + new->paddr = phdr_ptr->p_offset; + new->size = phdr_ptr->p_memsz; + list_add_tail(&new->list, vc_list); + + /* Update the program header offset. */ + phdr_ptr->p_offset = vmcore_off; + vmcore_off = vmcore_off + phdr_ptr->p_memsz; + } + return 0; +} + +static int __init process_ptload_program_headers_elf32(char *elfptr, + size_t elfsz, + struct list_head *vc_list) +{ + int i; + Elf32_Ehdr *ehdr_ptr; + Elf32_Phdr *phdr_ptr; + loff_t vmcore_off; + struct vmcore *new; + + ehdr_ptr = (Elf32_Ehdr *)elfptr; + phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ + + /* First program header is PT_NOTE header. */ + vmcore_off = sizeof(Elf32_Ehdr) + + (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) + + phdr_ptr->p_memsz; /* Note sections */ + + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + if (phdr_ptr->p_type != PT_LOAD) + continue; + + /* Add this contiguous chunk of memory to vmcore list.*/ + new = get_new_element(); + if (!new) + return -ENOMEM; + new->paddr = phdr_ptr->p_offset; + new->size = phdr_ptr->p_memsz; + list_add_tail(&new->list, vc_list); + + /* Update the program header offset */ + phdr_ptr->p_offset = vmcore_off; + vmcore_off = vmcore_off + phdr_ptr->p_memsz; + } + return 0; +} + +/* Sets offset fields of vmcore elements. */ +static void __init set_vmcore_list_offsets_elf64(char *elfptr, + struct list_head *vc_list) +{ + loff_t vmcore_off; + Elf64_Ehdr *ehdr_ptr; + struct vmcore *m; + + ehdr_ptr = (Elf64_Ehdr *)elfptr; + + /* Skip Elf header and program headers. */ + vmcore_off = sizeof(Elf64_Ehdr) + + (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr); + + list_for_each_entry(m, vc_list, list) { + m->offset = vmcore_off; + vmcore_off += m->size; + } +} + +/* Sets offset fields of vmcore elements. */ +static void __init set_vmcore_list_offsets_elf32(char *elfptr, + struct list_head *vc_list) +{ + loff_t vmcore_off; + Elf32_Ehdr *ehdr_ptr; + struct vmcore *m; + + ehdr_ptr = (Elf32_Ehdr *)elfptr; + + /* Skip Elf header and program headers. */ + vmcore_off = sizeof(Elf32_Ehdr) + + (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr); + + list_for_each_entry(m, vc_list, list) { + m->offset = vmcore_off; + vmcore_off += m->size; + } +} + +static int __init parse_crash_elf64_headers(void) +{ + int rc=0; + Elf64_Ehdr ehdr; + u64 addr; + + addr = elfcorehdr_addr; + + /* Read Elf header */ + rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0); + if (rc < 0) + return rc; + + /* Do some basic Verification. */ + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || + (ehdr.e_type != ET_CORE) || + !elf_check_arch(&ehdr) || + ehdr.e_ident[EI_CLASS] != ELFCLASS64 || + ehdr.e_ident[EI_VERSION] != EV_CURRENT || + ehdr.e_version != EV_CURRENT || + ehdr.e_ehsize != sizeof(Elf64_Ehdr) || + ehdr.e_phentsize != sizeof(Elf64_Phdr) || + ehdr.e_phnum == 0) { + printk(KERN_WARNING "Warning: Core image elf header is not" + "sane\n"); + return -EINVAL; + } + + /* Read in all elf headers. */ + elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr); + elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); + if (!elfcorebuf) + return -ENOMEM; + addr = elfcorehdr_addr; + rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); + if (rc < 0) { + kfree(elfcorebuf); + return rc; + } + + /* Merge all PT_NOTE headers into one. */ + rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list); + if (rc) { + kfree(elfcorebuf); + return rc; + } + rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, + &vmcore_list); + if (rc) { + kfree(elfcorebuf); + return rc; + } + set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list); + return 0; +} + +static int __init parse_crash_elf32_headers(void) +{ + int rc=0; + Elf32_Ehdr ehdr; + u64 addr; + + addr = elfcorehdr_addr; + + /* Read Elf header */ + rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0); + if (rc < 0) + return rc; + + /* Do some basic Verification. */ + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || + (ehdr.e_type != ET_CORE) || + !elf_check_arch(&ehdr) || + ehdr.e_ident[EI_CLASS] != ELFCLASS32|| + ehdr.e_ident[EI_VERSION] != EV_CURRENT || + ehdr.e_version != EV_CURRENT || + ehdr.e_ehsize != sizeof(Elf32_Ehdr) || + ehdr.e_phentsize != sizeof(Elf32_Phdr) || + ehdr.e_phnum == 0) { + printk(KERN_WARNING "Warning: Core image elf header is not" + "sane\n"); + return -EINVAL; + } + + /* Read in all elf headers. */ + elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); + elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); + if (!elfcorebuf) + return -ENOMEM; + addr = elfcorehdr_addr; + rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); + if (rc < 0) { + kfree(elfcorebuf); + return rc; + } + + /* Merge all PT_NOTE headers into one. */ + rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list); + if (rc) { + kfree(elfcorebuf); + return rc; + } + rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, + &vmcore_list); + if (rc) { + kfree(elfcorebuf); + return rc; + } + set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list); + return 0; +} + +static int __init parse_crash_elf_headers(void) +{ + unsigned char e_ident[EI_NIDENT]; + u64 addr; + int rc=0; + + addr = elfcorehdr_addr; + rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0); + if (rc < 0) + return rc; + if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { + printk(KERN_WARNING "Warning: Core image elf header" + " not found\n"); + return -EINVAL; + } + + if (e_ident[EI_CLASS] == ELFCLASS64) { + rc = parse_crash_elf64_headers(); + if (rc) + return rc; + + /* Determine vmcore size. */ + vmcore_size = get_vmcore_size_elf64(elfcorebuf); + } else if (e_ident[EI_CLASS] == ELFCLASS32) { + rc = parse_crash_elf32_headers(); + if (rc) + return rc; + + /* Determine vmcore size. */ + vmcore_size = get_vmcore_size_elf32(elfcorebuf); + } else { + printk(KERN_WARNING "Warning: Core image elf header is not" + " sane\n"); + return -EINVAL; + } + return 0; +} + +/* Init function for vmcore module. */ +static int __init vmcore_init(void) +{ + int rc = 0; + + /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ + if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX)) + return rc; + rc = parse_crash_elf_headers(); + if (rc) { + printk(KERN_WARNING "Kdump: vmcore not initialized\n"); + return rc; + } + + /* Initialize /proc/vmcore size if proc is already up. */ + if (proc_vmcore) + proc_vmcore->size = vmcore_size; + return 0; +} +module_init(vmcore_init) diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index cd66147cca04..7a8f5595c26f 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -61,7 +61,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; else { le = (struct qnx4_link_info*)de; - ino = ( le->dl_inode_blk - 1 ) * + ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * QNX4_INODES_PER_BLOCK + le->dl_inode_ndx; } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index aa92d6b76a9a..b79162a35478 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -236,7 +236,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock ) struct buffer_head *bh = NULL; struct qnx4_xblk *xblk = NULL; struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); - qnx4_nxtnt_t nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); + u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) { // iblock is in the first extent. This is easy. @@ -372,7 +372,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) printk("qnx4: unable to read the superblock\n"); goto outnobh; } - if ( le32_to_cpu( *(__u32*)bh->b_data ) != QNX4_SUPER_MAGIC ) { + if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) { if (!silent) printk("qnx4: wrong fsid in superblock.\n"); goto out; diff --git a/fs/quota.c b/fs/quota.c index 3f0333a51a23..f5d1cff55196 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t return error; } -static struct super_block *get_super_to_sync(int type) -{ - struct list_head *head; - int cnt, dirty; - -restart: - spin_lock(&sb_lock); - list_for_each(head, &super_blocks) { - struct super_block *sb = list_entry(head, struct super_block, s_list); - - /* This test just improves performance so it needn't be reliable... */ - for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) - if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) - && info_any_dirty(&sb_dqopt(sb)->info[cnt])) - dirty = 1; - if (!dirty) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (!sb->s_root) { - drop_super(sb); - goto restart; - } - return sb; - } - spin_unlock(&sb_lock); - return NULL; -} - static void quota_sync_sb(struct super_block *sb, int type) { int cnt; @@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type) void sync_dquots(struct super_block *sb, int type) { + int cnt, dirty; + if (sb) { if (sb->s_qcop->quota_sync) quota_sync_sb(sb, type); + return; } - else { - while ((sb = get_super_to_sync(type)) != NULL) { - if (sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - drop_super(sb); - } + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + /* This test just improves performance so it needn't be reliable... */ + for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) + if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) + && info_any_dirty(&sb_dqopt(sb)->info[cnt])) + dirty = 1; + if (!dirty) + continue; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root && sb->s_qcop->quota_sync) + quota_sync_sb(sb, type); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } + spin_unlock(&sb_lock); } /* Copy parameters and call proper function */ diff --git a/fs/read_write.c b/fs/read_write.c index c4c2bee373ed..9292f5fa4d62 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -203,6 +203,16 @@ Einval: return -EINVAL; } +static void wait_on_retry_sync_kiocb(struct kiocb *iocb) +{ + set_current_state(TASK_UNINTERRUPTIBLE); + if (!kiocbIsKicked(iocb)) + schedule(); + else + kiocbClearKicked(iocb); + __set_current_state(TASK_RUNNING); +} + ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { struct kiocb kiocb; @@ -210,7 +220,10 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos); + while (-EIOCBRETRY == + (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) + wait_on_retry_sync_kiocb(&kiocb); + if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; @@ -258,7 +271,10 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos); + while (-EIOCBRETRY == + (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) + wait_on_retry_sync_kiocb(&kiocb); + if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 2230afff1870..12e91209544e 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -201,7 +201,7 @@ static int reiserfs_allocate_blocks_for_region( /* If we came here, it means we absolutely need to open a transaction, since we need to allocate some blocks */ reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. - res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough + res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough if (res) goto error_exit; reiserfs_update_inode_transaction(inode) ; @@ -576,7 +576,7 @@ error_exit: int err; // update any changes we made to blk count reiserfs_update_sd(th, inode); - err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); + err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); if (err) res = err; } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 2711dff1b7b4..289d864fe731 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -28,7 +28,7 @@ static int reiserfs_prepare_write(struct file *f, struct page *page, void reiserfs_delete_inode (struct inode * inode) { /* We need blocks for transaction + (user+group) quota update (possibly delete) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); struct reiserfs_transaction_handle th ; reiserfs_write_lock(inode->i_sb); @@ -254,6 +254,7 @@ static int _get_block_create_0 (struct inode * inode, long block, char * p = NULL; int chars; int ret ; + int result ; int done = 0 ; unsigned long offset ; @@ -262,10 +263,13 @@ static int _get_block_create_0 (struct inode * inode, long block, (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); research: - if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) { + result = search_for_position_by_key (inode->i_sb, &key, &path) ; + if (result != POSITION_FOUND) { pathrelse (&path); if (p) kunmap(bh_result->b_page) ; + if (result == IO_ERROR) + return -EIO; // We do not return -ENOENT if there is a hole but page is uptodate, because it means // That there is some MMAPED data associated with it that is yet to be written to disk. if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { @@ -382,8 +386,9 @@ research: // update key to look for the next piece set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars); - if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) - // we read something from tail, even if now we got IO_ERROR + result = search_for_position_by_key (inode->i_sb, &key, &path); + if (result != POSITION_FOUND) + // i/o error most likely break; bh = get_last_bh (&path); ih = get_ih (&path); @@ -394,6 +399,10 @@ research: finished: pathrelse (&path); + + if (result == IO_ERROR) + return -EIO; + /* this buffer has valid data, but isn't valid for io. mapping it to * block #0 tells the rest of reiserfs it just has a tail in it */ @@ -591,7 +600,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block, XXX in practically impossible worst case direct2indirect() can incur (much) more than 3 balancings. quota update for user, group */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); int version; int dangle = 1; loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ; @@ -2796,12 +2805,15 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { if (!error) { struct reiserfs_transaction_handle th; + int jbegin_count = 2*(REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)+REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb))+2; /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ - journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); + error = journal_begin(&th, inode->i_sb, jbegin_count); + if (error) + goto out; error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; if (error) { - journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); + journal_end(&th, inode->i_sb, jbegin_count); goto out; } /* Update corresponding info in inode so that everything is in @@ -2811,7 +2823,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; mark_inode_dirty(inode); - journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); + error = journal_end(&th, inode->i_sb, jbegin_count); } } if (!error) diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 94dc42475a04..76caedf737f2 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -36,10 +36,16 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, /* following two cases are taken from fs/ext2/ioctl.c by Remy Card (card@masi.ibp.fr) */ case REISERFS_IOC_GETFLAGS: + if (!reiserfs_attrs (inode->i_sb)) + return -ENOTTY; + flags = REISERFS_I(inode) -> i_attrs; i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags ); return put_user(flags, (int __user *) arg); case REISERFS_IOC_SETFLAGS: { + if (!reiserfs_attrs (inode->i_sb)) + return -ENOTTY; + if (IS_RDONLY(inode)) return -EROFS; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 3072cfdee959..d1bcf0da6728 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -645,18 +645,22 @@ struct buffer_chunk { static void write_chunk(struct buffer_chunk *chunk) { int i; + get_fs_excl(); for (i = 0; i < chunk->nr ; i++) { submit_logged_buffer(chunk->bh[i]) ; } chunk->nr = 0; + put_fs_excl(); } static void write_ordered_chunk(struct buffer_chunk *chunk) { int i; + get_fs_excl(); for (i = 0; i < chunk->nr ; i++) { submit_ordered_buffer(chunk->bh[i]) ; } chunk->nr = 0; + put_fs_excl(); } static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, @@ -918,6 +922,8 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list return 0 ; } + get_fs_excl(); + /* before we can put our commit blocks on disk, we have to make sure everyone older than ** us is on disk too */ @@ -1055,6 +1061,7 @@ put_jl: if (retval) reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); + put_fs_excl(); return retval; } @@ -1251,6 +1258,8 @@ static int flush_journal_list(struct super_block *s, return 0 ; } + get_fs_excl(); + /* if all the work is already done, get out of here */ if (atomic_read(&(jl->j_nonzerolen)) <= 0 && atomic_read(&(jl->j_commit_left)) <= 0) { @@ -1450,6 +1459,7 @@ flush_older_and_return: put_journal_list(s, jl); if (flushall) up(&journal->j_flush_sem); + put_fs_excl(); return err ; } @@ -2631,6 +2641,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct sup int retval; reiserfs_check_lock_depth(p_s_sb, "journal_begin") ; + if (nblocks > journal->j_trans_max) + BUG(); PROC_INFO_INC( p_s_sb, journal.journal_being ); /* set here for journal_join */ @@ -2717,6 +2729,7 @@ relock: th->t_trans_id = journal->j_trans_id ; unlock_journal(p_s_sb) ; INIT_LIST_HEAD (&th->t_list); + get_fs_excl(); return 0 ; out_fail: @@ -3524,6 +3537,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_b BUG_ON (th->t_refcount > 1); BUG_ON (!th->t_trans_id); + put_fs_excl(); current->journal_info = th->t_handle_save; reiserfs_check_lock_depth(p_s_sb, "journal end"); if (journal->j_len == 0) { diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 7d4dc5f5aa8b..4a333255f27a 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -586,7 +586,7 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode, int retval; struct inode * inode; /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); struct reiserfs_transaction_handle th ; int locked; @@ -653,7 +653,7 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, struct inode * inode; struct reiserfs_transaction_handle th ; /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); int locked; if (!new_valid_dev(rdev)) @@ -727,7 +727,7 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode) struct inode * inode; struct reiserfs_transaction_handle th ; /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); int locked; #ifdef DISPLACE_NEW_PACKING_LOCALITIES @@ -829,8 +829,10 @@ static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry) /* we will be doing 2 balancings and update 2 stat data, we change quotas - * of the owner of the directory and of the owner of the parent directory */ - jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); + * of the owner of the directory and of the owner of the parent directory. + * The quota structure is possibly deleted only on last iput => outside + * of this transaction */ + jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); reiserfs_write_lock(dir->i_sb); retval = journal_begin(&th, dir->i_sb, jbegin_count) ; @@ -913,9 +915,10 @@ static int reiserfs_unlink (struct inode * dir, struct dentry *dentry) inode = dentry->d_inode; /* in this transaction we can be doing at max two balancings and update - two stat datas, we change quotas of the owner of the directory and of - the owner of the parent directory */ - jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); + * two stat datas, we change quotas of the owner of the directory and of + * the owner of the parent directory. The quota structure is possibly + * deleted only on iput => outside of this transaction */ + jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); reiserfs_write_lock(dir->i_sb); retval = journal_begin(&th, dir->i_sb, jbegin_count) ; @@ -1000,7 +1003,7 @@ static int reiserfs_symlink (struct inode * parent_dir, struct reiserfs_transaction_handle th ; int mode = S_IFLNK | S_IRWXUGO; /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb)); if (!(inode = new_inode(parent_dir->i_sb))) { return -ENOMEM ; @@ -1076,7 +1079,7 @@ static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct struct inode *inode = old_dentry->d_inode; struct reiserfs_transaction_handle th ; /* We need blocks for transaction + update of quotas for the owners of the directory */ - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS; + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); reiserfs_write_lock(dir->i_sb); if (inode->i_nlink >= REISERFS_LINK_MAX) { @@ -1196,7 +1199,7 @@ static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, pointed initially and (5) maybe block containing ".." of renamed directory quota updates: two parent directories */ - jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS; + jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); old_inode = old_dentry->d_inode; new_dentry_inode = new_dentry->d_inode; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index c47f8fd31a2d..63158491e152 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -223,7 +223,7 @@ extern struct tree_balance * cur_tb; const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}}; /* Maximal possible key. It is never in the tree. */ -const struct reiserfs_key MAX_KEY = { +static const struct reiserfs_key MAX_KEY = { __constant_cpu_to_le32(0xffffffff), __constant_cpu_to_le32(0xffffffff), {{__constant_cpu_to_le32(0xffffffff), diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b35b87744983..4b80ab95d338 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -866,8 +866,9 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st {"jdev", .arg_required = 'j', .values = NULL}, {"nolargeio", .arg_required = 'w', .values = NULL}, {"commit", .arg_required = 'c', .values = NULL}, - {"usrquota",}, - {"grpquota",}, + {"usrquota", .setmask = 1<<REISERFS_QUOTA}, + {"grpquota", .setmask = 1<<REISERFS_QUOTA}, + {"noquota", .clrmask = 1<<REISERFS_QUOTA}, {"errors", .arg_required = 'e', .values = error_actions}, {"usrjquota", .arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL}, {"grpjquota", .arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL}, @@ -964,6 +965,7 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st return 0; } strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); + *mount_options |= 1<<REISERFS_QUOTA; } else { if (REISERFS_SB(s)->s_qf_names[qtype]) { @@ -995,7 +997,13 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified."); return 0; } + /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ + if (!(*mount_options & (1<<REISERFS_QUOTA)) && sb_any_quota_enabled(s)) { + reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on."); + return 0; + } #endif + return 1; } @@ -1045,10 +1053,9 @@ static void handle_barrier_mode(struct super_block *s, unsigned long bits) { static void handle_attrs( struct super_block *s ) { - struct reiserfs_super_block * rs; + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); if( reiserfs_attrs( s ) ) { - rs = SB_DISK_SUPER_BLOCK (s); if( old_format_only(s) ) { reiserfs_warning(s, "reiserfs: cannot support attributes on 3.5.x disk format" ); REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS ); @@ -1058,6 +1065,8 @@ static void handle_attrs( struct super_block *s ) reiserfs_warning(s, "reiserfs: cannot support attributes until flag is set in super-block" ); REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS ); } + } else if (le32_to_cpu( rs -> s_flags ) & reiserfs_attrs_cleared) { + REISERFS_SB(s)->s_mount_opt |= REISERFS_ATTRS; } } @@ -1105,6 +1114,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a safe_mask |= 1 << REISERFS_ERROR_RO; safe_mask |= 1 << REISERFS_ERROR_CONTINUE; safe_mask |= 1 << REISERFS_ERROR_PANIC; + safe_mask |= 1 << REISERFS_QUOTA; /* Update the bitmask, taking care to keep * the bits we're not allowed to change here */ @@ -1841,13 +1851,18 @@ static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf) static int reiserfs_dquot_initialize(struct inode *inode, int type) { struct reiserfs_transaction_handle th; - int ret; + int ret, err; /* We may create quota structure so we need to reserve enough blocks */ reiserfs_write_lock(inode->i_sb); - journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); + ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)); + if (ret) + goto out; ret = dquot_initialize(inode, type); - journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); + err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)); + if (!ret && err) + ret = err; +out: reiserfs_write_unlock(inode->i_sb); return ret; } @@ -1855,13 +1870,18 @@ static int reiserfs_dquot_initialize(struct inode *inode, int type) static int reiserfs_dquot_drop(struct inode *inode) { struct reiserfs_transaction_handle th; - int ret; + int ret, err; /* We may delete quota structure so we need to reserve enough blocks */ reiserfs_write_lock(inode->i_sb); - journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); + ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); + if (ret) + goto out; ret = dquot_drop(inode); - journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); + err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); + if (!ret && err) + ret = err; +out: reiserfs_write_unlock(inode->i_sb); return ret; } @@ -1869,12 +1889,17 @@ static int reiserfs_dquot_drop(struct inode *inode) static int reiserfs_write_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; - int ret; + int ret, err; reiserfs_write_lock(dquot->dq_sb); - journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS); + ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + if (ret) + goto out; ret = dquot_commit(dquot); - journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS); + err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + if (!ret && err) + ret = err; +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -1882,12 +1907,17 @@ static int reiserfs_write_dquot(struct dquot *dquot) static int reiserfs_acquire_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; - int ret; + int ret, err; reiserfs_write_lock(dquot->dq_sb); - journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); + ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + if (ret) + goto out; ret = dquot_acquire(dquot); - journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); + err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + if (!ret && err) + ret = err; +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -1895,12 +1925,17 @@ static int reiserfs_acquire_dquot(struct dquot *dquot) static int reiserfs_release_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; - int ret; + int ret, err; reiserfs_write_lock(dquot->dq_sb); - journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); + ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + if (ret) + goto out; ret = dquot_release(dquot); - journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); + err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + if (!ret && err) + ret = err; +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -1920,39 +1955,29 @@ static int reiserfs_mark_dquot_dirty(struct dquot *dquot) static int reiserfs_write_info(struct super_block *sb, int type) { struct reiserfs_transaction_handle th; - int ret; + int ret, err; /* Data block + inode block */ reiserfs_write_lock(sb); - journal_begin(&th, sb, 2); + ret = journal_begin(&th, sb, 2); + if (ret) + goto out; ret = dquot_commit_info(sb, type); - journal_end(&th, sb, 2); + err = journal_end(&th, sb, 2); + if (!ret && err) + ret = err; +out: reiserfs_write_unlock(sb); return ret; } /* - * Turn on quotas during mount time - we need to find - * the quota file and such... + * Turn on quotas during mount time - we need to find the quota file and such... */ static int reiserfs_quota_on_mount(struct super_block *sb, int type) { - int err; - struct dentry *dentry; - struct qstr name = { .name = REISERFS_SB(sb)->s_qf_names[type], - .hash = 0, - .len = strlen(REISERFS_SB(sb)->s_qf_names[type])}; - - dentry = lookup_hash(&name, sb->s_root); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - err = vfs_quota_on_mount(type, REISERFS_SB(sb)->s_jquota_fmt, dentry); - /* Now invalidate and put the dentry - quota got its own reference - * to inode and dentry has at least wrong hash so we had better - * throw it away */ - d_invalidate(dentry); - dput(dentry); - return err; + return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], + REISERFS_SB(sb)->s_jquota_fmt, type); } /* @@ -1963,6 +1988,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch int err; struct nameidata nd; + if (!(REISERFS_SB(sb)->s_mount_opt & (1<<REISERFS_QUOTA))) + return -EINVAL; err = path_lookup(path, LOOKUP_FOLLOW, &nd); if (err) return err; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index e302071903a1..c312881c5f53 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -4,7 +4,7 @@ #include <linux/errno.h> #include <linux/pagemap.h> #include <linux/xattr.h> -#include <linux/xattr_acl.h> +#include <linux/posix_acl_xattr.h> #include <linux/reiserfs_xattr.h> #include <linux/reiserfs_acl.h> #include <asm/uaccess.h> @@ -192,11 +192,11 @@ reiserfs_get_acl(struct inode *inode, int type) switch (type) { case ACL_TYPE_ACCESS: - name = XATTR_NAME_ACL_ACCESS; + name = POSIX_ACL_XATTR_ACCESS; p_acl = &reiserfs_i->i_acl_access; break; case ACL_TYPE_DEFAULT: - name = XATTR_NAME_ACL_DEFAULT; + name = POSIX_ACL_XATTR_DEFAULT; p_acl = &reiserfs_i->i_acl_default; break; default: @@ -260,7 +260,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) switch (type) { case ACL_TYPE_ACCESS: - name = XATTR_NAME_ACL_ACCESS; + name = POSIX_ACL_XATTR_ACCESS; p_acl = &reiserfs_i->i_acl_access; if (acl) { mode_t mode = inode->i_mode; @@ -275,7 +275,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } break; case ACL_TYPE_DEFAULT: - name = XATTR_NAME_ACL_DEFAULT; + name = POSIX_ACL_XATTR_DEFAULT; p_acl = &reiserfs_i->i_acl_default; if (!S_ISDIR (inode->i_mode)) return acl ? -EACCES : 0; @@ -468,7 +468,7 @@ static int posix_acl_access_get(struct inode *inode, const char *name, void *buffer, size_t size) { - if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) return -EINVAL; return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); } @@ -477,7 +477,7 @@ static int posix_acl_access_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) return -EINVAL; return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); } @@ -487,7 +487,7 @@ posix_acl_access_del (struct inode *inode, const char *name) { struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); struct posix_acl **acl = &reiserfs_i->i_acl_access; - if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) return -EINVAL; if (!IS_ERR (*acl) && *acl) { posix_acl_release (*acl); @@ -510,7 +510,7 @@ posix_acl_access_list (struct inode *inode, const char *name, int namelen, char } struct reiserfs_xattr_handler posix_acl_access_handler = { - .prefix = XATTR_NAME_ACL_ACCESS, + .prefix = POSIX_ACL_XATTR_ACCESS, .get = posix_acl_access_get, .set = posix_acl_access_set, .del = posix_acl_access_del, @@ -521,7 +521,7 @@ static int posix_acl_default_get (struct inode *inode, const char *name, void *buffer, size_t size) { - if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) return -EINVAL; return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); } @@ -530,7 +530,7 @@ static int posix_acl_default_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) return -EINVAL; return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); } @@ -540,7 +540,7 @@ posix_acl_default_del (struct inode *inode, const char *name) { struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); struct posix_acl **acl = &reiserfs_i->i_acl_default; - if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) return -EINVAL; if (!IS_ERR (*acl) && *acl) { posix_acl_release (*acl); @@ -563,7 +563,7 @@ posix_acl_default_list (struct inode *inode, const char *name, int namelen, char } struct reiserfs_xattr_handler posix_acl_default_handler = { - .prefix = XATTR_NAME_ACL_DEFAULT, + .prefix = POSIX_ACL_XATTR_DEFAULT, .get = posix_acl_default_get, .set = posix_acl_default_set, .del = posix_acl_default_del, diff --git a/fs/super.c b/fs/super.c index 3a1b8ca04ba6..25bc1ec6bc5d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb) */ void sync_supers(void) { - struct super_block * sb; -restart: + struct super_block *sb; + spin_lock(&sb_lock); - sb = sb_entry(super_blocks.next); - while (sb != sb_entry(&super_blocks)) +restart: + list_for_each_entry(sb, &super_blocks, s_list) { if (sb->s_dirt) { sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); write_super(sb); - drop_super(sb); - goto restart; - } else - sb = sb_entry(sb->s_list.next); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + } spin_unlock(&sb_lock); } @@ -381,20 +383,16 @@ void sync_filesystems(int wait) down(&mutex); /* Could be down_interruptible */ spin_lock(&sb_lock); - for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); - sb = sb_entry(sb->s_list.next)) { + list_for_each_entry(sb, &super_blocks, s_list) { if (!sb->s_op->sync_fs) continue; if (sb->s_flags & MS_RDONLY) continue; sb->s_need_sync_fs = 1; } - spin_unlock(&sb_lock); restart: - spin_lock(&sb_lock); - for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); - sb = sb_entry(sb->s_list.next)) { + list_for_each_entry(sb, &super_blocks, s_list) { if (!sb->s_need_sync_fs) continue; sb->s_need_sync_fs = 0; @@ -405,8 +403,11 @@ restart: down_read(&sb->s_umount); if (sb->s_root && (wait || sb->s_dirt)) sb->s_op->sync_fs(sb, wait); - drop_super(sb); - goto restart; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } spin_unlock(&sb_lock); up(&mutex); @@ -422,21 +423,25 @@ restart: struct super_block * get_super(struct block_device *bdev) { - struct list_head *p; + struct super_block *sb; + if (!bdev) return NULL; -rescan: + spin_lock(&sb_lock); - list_for_each(p, &super_blocks) { - struct super_block *s = sb_entry(p); - if (s->s_bdev == bdev) { - s->s_count++; +rescan: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_bdev == bdev) { + sb->s_count++; spin_unlock(&sb_lock); - down_read(&s->s_umount); - if (s->s_root) - return s; - drop_super(s); - goto rescan; + down_read(&sb->s_umount); + if (sb->s_root) + return sb; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto rescan; } } spin_unlock(&sb_lock); @@ -447,20 +452,22 @@ EXPORT_SYMBOL(get_super); struct super_block * user_get_super(dev_t dev) { - struct list_head *p; + struct super_block *sb; -rescan: spin_lock(&sb_lock); - list_for_each(p, &super_blocks) { - struct super_block *s = sb_entry(p); - if (s->s_dev == dev) { - s->s_count++; +rescan: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_dev == dev) { + sb->s_count++; spin_unlock(&sb_lock); - down_read(&s->s_umount); - if (s->s_root) - return s; - drop_super(s); - goto rescan; + down_read(&sb->s_umount); + if (sb->s_root) + return sb; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto rescan; } } spin_unlock(&sb_lock); @@ -835,6 +842,7 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) mnt->mnt_parent = mnt; mnt->mnt_namespace = current->namespace; up_write(&sb->s_umount); + free_secdata(secdata); put_filesystem(type); return mnt; out_sb: diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index d4aaa88d0214..78899eeab974 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -25,7 +25,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) struct kobject * kobj = to_kobj(dentry->d_parent); if (!attr->read) - return -EINVAL; + return -EIO; return attr->read(kobj, buffer, off, count); } @@ -71,7 +71,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) struct kobject *kobj = to_kobj(dentry->d_parent); if (!attr->write) - return -EINVAL; + return -EIO; return attr->write(kobj, buffer, offset, count); } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index fe198210bc2d..59734ba1ee60 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -8,6 +8,7 @@ #include <linux/mount.h> #include <linux/module.h> #include <linux/kobject.h> +#include <linux/namei.h> #include "sysfs.h" DECLARE_RWSEM(sysfs_rename_sem); @@ -99,20 +100,21 @@ static int create_dir(struct kobject * k, struct dentry * p, umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; down(&p->d_inode->i_sem); - *d = sysfs_get_dentry(p,n); + *d = lookup_one_len(n, p, strlen(n)); if (!IS_ERR(*d)) { - error = sysfs_create(*d, mode, init_dir); + error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR); if (!error) { - error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, - SYSFS_DIR); + error = sysfs_create(*d, mode, init_dir); if (!error) { p->d_inode->i_nlink++; (*d)->d_op = &sysfs_dentry_ops; d_rehash(*d); } } - if (error && (error != -EEXIST)) + if (error && (error != -EEXIST)) { + sysfs_put((*d)->d_fsdata); d_drop(*d); + } dput(*d); } else error = PTR_ERR(*d); @@ -171,17 +173,19 @@ static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry) init = init_file; } + dentry->d_fsdata = sysfs_get(sd); + sd->s_dentry = dentry; error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init); - if (error) + if (error) { + sysfs_put(sd); return error; + } if (bin_attr) { dentry->d_inode->i_size = bin_attr->size; dentry->d_inode->i_fop = &bin_fops; } dentry->d_op = &sysfs_dentry_ops; - dentry->d_fsdata = sysfs_get(sd); - sd->s_dentry = dentry; d_rehash(dentry); return 0; @@ -191,13 +195,15 @@ static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry) { int err = 0; + dentry->d_fsdata = sysfs_get(sd); + sd->s_dentry = dentry; err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink); if (!err) { dentry->d_op = &sysfs_dentry_ops; - dentry->d_fsdata = sysfs_get(sd); - sd->s_dentry = dentry; d_rehash(dentry); - } + } else + sysfs_put(sd); + return err; } @@ -228,6 +234,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, struct inode_operations sysfs_dir_inode_operations = { .lookup = sysfs_lookup, + .setattr = sysfs_setattr, }; static void remove_dir(struct dentry * d) @@ -309,7 +316,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) down(&parent->d_inode->i_sem); - new_dentry = sysfs_get_dentry(parent, new_name); + new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (!IS_ERR(new_dentry)) { if (!new_dentry->d_inode) { error = kobject_set_name(kobj, "%s", new_name); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 364208071e17..d72c1ce48559 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/dnotify.h> #include <linux/kobject.h> +#include <linux/namei.h> #include <asm/uaccess.h> #include <asm/semaphore.h> @@ -13,7 +14,7 @@ #define to_subsys(k) container_of(k,struct subsystem,kset.kobj) #define to_sattr(a) container_of(a,struct subsys_attribute,attr) -/** +/* * Subsystem file operations. * These operations allow subsystems to have files that can be * read/written. @@ -23,7 +24,7 @@ subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page) { struct subsystem * s = to_subsys(kobj); struct subsys_attribute * sattr = to_sattr(attr); - ssize_t ret = 0; + ssize_t ret = -EIO; if (sattr->show) ret = sattr->show(s,page); @@ -36,7 +37,7 @@ subsys_attr_store(struct kobject * kobj, struct attribute * attr, { struct subsystem * s = to_subsys(kobj); struct subsys_attribute * sattr = to_sattr(attr); - ssize_t ret = 0; + ssize_t ret = -EIO; if (sattr->store) ret = sattr->store(s,page,count); @@ -182,7 +183,7 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t return -ENOMEM; if (count >= PAGE_SIZE) - count = PAGE_SIZE - 1; + count = PAGE_SIZE; error = copy_from_user(buffer->page,buf,count); buffer->needs_read_fill = 1; return error ? -EFAULT : count; @@ -191,8 +192,9 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t /** * flush_write_buffer - push buffer to kobject. - * @file: file pointer. + * @dentry: dentry to the attribute * @buffer: data buffer for file. + * @count: number of bytes * * Get the correct pointers for the kobject and the attribute we're * dealing with, then call the store() method for the attribute, @@ -400,7 +402,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) int res = -ENOENT; down(&dir->d_inode->i_sem); - victim = sysfs_get_dentry(dir, attr->name); + victim = lookup_one_len(attr->name, dir, strlen(attr->name)); if (!IS_ERR(victim)) { /* make sure dentry is really there */ if (victim->d_inode && @@ -443,7 +445,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) int res = -ENOENT; down(&dir->d_inode->i_sem); - victim = sysfs_get_dentry(dir, attr->name); + victim = lookup_one_len(attr->name, dir, strlen(attr->name)); if (!IS_ERR(victim)) { if (victim->d_inode && (victim->d_parent->d_inode == dir->d_inode)) { diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index f11ac5ea7021..122145b0895c 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -11,6 +11,7 @@ #include <linux/kobject.h> #include <linux/module.h> #include <linux/dcache.h> +#include <linux/namei.h> #include <linux/err.h> #include "sysfs.h" @@ -68,7 +69,8 @@ void sysfs_remove_group(struct kobject * kobj, struct dentry * dir; if (grp->name) - dir = sysfs_get_dentry(kobj->dentry,grp->name); + dir = lookup_one_len(grp->name, kobj->dentry, + strlen(grp->name)); else dir = dget(kobj->dentry); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index aff7b2dfa8ee..8de13bafaa76 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -26,18 +26,107 @@ static struct backing_dev_info sysfs_backing_dev_info = { .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, }; -struct inode * sysfs_new_inode(mode_t mode) +static struct inode_operations sysfs_inode_operations ={ + .setattr = sysfs_setattr, +}; + +int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) +{ + struct inode * inode = dentry->d_inode; + struct sysfs_dirent * sd = dentry->d_fsdata; + struct iattr * sd_iattr; + unsigned int ia_valid = iattr->ia_valid; + int error; + + if (!sd) + return -EINVAL; + + sd_iattr = sd->s_iattr; + + error = inode_change_ok(inode, iattr); + if (error) + return error; + + error = inode_setattr(inode, iattr); + if (error) + return error; + + if (!sd_iattr) { + /* setting attributes for the first time, allocate now */ + sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL); + if (!sd_iattr) + return -ENOMEM; + /* assign default attributes */ + memset(sd_iattr, 0, sizeof(struct iattr)); + sd_iattr->ia_mode = sd->s_mode; + sd_iattr->ia_uid = 0; + sd_iattr->ia_gid = 0; + sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; + sd->s_iattr = sd_iattr; + } + + /* attributes were changed atleast once in past */ + + if (ia_valid & ATTR_UID) + sd_iattr->ia_uid = iattr->ia_uid; + if (ia_valid & ATTR_GID) + sd_iattr->ia_gid = iattr->ia_gid; + if (ia_valid & ATTR_ATIME) + sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MTIME) + sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_CTIME) + sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + sd_iattr->ia_mode = mode; + } + + return error; +} + +static inline void set_default_inode_attr(struct inode * inode, mode_t mode) +{ + inode->i_mode = mode; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +} + +static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) +{ + inode->i_mode = iattr->ia_mode; + inode->i_uid = iattr->ia_uid; + inode->i_gid = iattr->ia_gid; + inode->i_atime = iattr->ia_atime; + inode->i_mtime = iattr->ia_mtime; + inode->i_ctime = iattr->ia_ctime; +} + +struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) { struct inode * inode = new_inode(sysfs_sb); if (inode) { - inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; + inode->i_op = &sysfs_inode_operations; + + if (sd->s_iattr) { + /* sysfs_dirent has non-default attributes + * get them for the new inode from persistent copy + * in sysfs_dirent + */ + set_inode_attr(inode, sd->s_iattr); + } else + set_default_inode_attr(inode, mode); } return inode; } @@ -48,7 +137,8 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) struct inode * inode = NULL; if (dentry) { if (!dentry->d_inode) { - if ((inode = sysfs_new_inode(mode))) { + struct sysfs_dirent * sd = dentry->d_fsdata; + if ((inode = sysfs_new_inode(mode, sd))) { if (dentry->d_parent && dentry->d_parent->d_inode) { struct inode *p_inode = dentry->d_parent->d_inode; p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; @@ -76,16 +166,6 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) return error; } -struct dentry * sysfs_get_dentry(struct dentry * parent, const char * name) -{ - struct qstr qstr; - - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); - return lookup_hash(&qstr,parent); -} - /* * Get the name for corresponding element represented by the given sysfs_dirent */ diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 5c805bb1a4b7..f1117e885bd6 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -28,6 +28,7 @@ static struct sysfs_dirent sysfs_root = { .s_children = LIST_HEAD_INIT(sysfs_root.s_children), .s_element = NULL, .s_type = SYSFS_ROOT, + .s_iattr = NULL, }; static int sysfs_fill_super(struct super_block *sb, void *data, int silent) @@ -42,7 +43,8 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sysfs_sb = sb; - inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); + inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + &sysfs_root); if (inode) { inode->i_op = &sysfs_dir_inode_operations; inode->i_fop = &sysfs_dir_operations; diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index dfdf70174354..fae57c83a722 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -43,7 +43,7 @@ static void fill_object_path(struct kobject * kobj, char * buffer, int length) } } -static int sysfs_add_link(struct dentry * parent, char * name, struct kobject * target) +static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target) { struct sysfs_dirent * parent_sd = parent->d_fsdata; struct sysfs_symlink * sl; @@ -79,7 +79,7 @@ exit1: * @target: object we're pointing to. * @name: name of the symlink. */ -int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * name) +int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) { struct dentry * dentry = kobj->dentry; int error = 0; @@ -99,13 +99,13 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * nam * @name: name of the symlink to remove. */ -void sysfs_remove_link(struct kobject * kobj, char * name) +void sysfs_remove_link(struct kobject * kobj, const char * name) { sysfs_hash_and_remove(kobj->dentry,name); } static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, - char *path) + char *path) { char * s; int depth, size; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index a8a24a0c0b3b..3f8953e0e5d0 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -2,12 +2,11 @@ extern struct vfsmount * sysfs_mount; extern kmem_cache_t *sysfs_dir_cachep; -extern struct inode * sysfs_new_inode(mode_t mode); +extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, umode_t, int); -extern struct dentry * sysfs_get_dentry(struct dentry *, const char *); extern int sysfs_add_file(struct dentry *, const struct attribute *, int); extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); @@ -17,6 +16,7 @@ extern void sysfs_remove_subdir(struct dentry *); extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd); extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent); +extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); extern struct rw_semaphore sysfs_rename_sem; extern struct super_block * sysfs_sb; @@ -75,6 +75,7 @@ static inline void release_sysfs_dirent(struct sysfs_dirent * sd) kobject_put(sl->target_kobj); kfree(sl); } + kfree(sd->s_iattr); kmem_cache_free(sysfs_dir_cachep, sd); } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 3f6dc7112bc6..ac191ed7df0a 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -159,14 +159,12 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, char *nameptr; uint8_t lfi; uint16_t liu; - loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; + loff_t size; kernel_lb_addr bloc, eloc; uint32_t extoffset, elen, offset; struct buffer_head *bh = NULL; - if (!dir) - return NULL; - + size = (udf_ext0_offset(dir) + dir->i_size) >> 2; f_pos = (udf_ext0_offset(dir) >> 2); fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 93ce257cd149..a3a4b5aaf5d9 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -149,11 +149,12 @@ linvfs_unwritten_convert( */ STATIC void linvfs_unwritten_convert_direct( - struct inode *inode, + struct kiocb *iocb, loff_t offset, ssize_t size, void *private) { + struct inode *inode = iocb->ki_filp->f_dentry->d_inode; ASSERT(!private || inode == (struct inode *)private); /* private indicates an unwritten extent lay beneath this IO */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 997963e53622..df0cba239dd5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -61,12 +61,13 @@ * File wide globals */ -STATIC kmem_cache_t *pagebuf_cache; +STATIC kmem_cache_t *pagebuf_zone; STATIC kmem_shaker_t pagebuf_shake; -STATIC int pagebuf_daemon_wakeup(int, unsigned int); +STATIC int xfsbufd_wakeup(int, unsigned int); STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); -STATIC struct workqueue_struct *pagebuf_logio_workqueue; -STATIC struct workqueue_struct *pagebuf_dataio_workqueue; + +STATIC struct workqueue_struct *xfslogd_workqueue; +STATIC struct workqueue_struct *xfsdatad_workqueue; /* * Pagebuf debugging @@ -123,9 +124,9 @@ ktrace_t *pagebuf_trace_buf; #define pagebuf_allocate(flags) \ - kmem_zone_alloc(pagebuf_cache, pb_to_km(flags)) + kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) #define pagebuf_deallocate(pb) \ - kmem_zone_free(pagebuf_cache, (pb)); + kmem_zone_free(pagebuf_zone, (pb)); /* * Page Region interfaces. @@ -425,7 +426,7 @@ _pagebuf_lookup_pages( __FUNCTION__, gfp_mask); XFS_STATS_INC(pb_page_retries); - pagebuf_daemon_wakeup(0, gfp_mask); + xfsbufd_wakeup(0, gfp_mask); blk_congestion_wait(WRITE, HZ/50); goto retry; } @@ -1136,8 +1137,8 @@ pagebuf_iodone( if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { if (schedule) { INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); - queue_work(dataio ? pagebuf_dataio_workqueue : - pagebuf_logio_workqueue, &pb->pb_iodone_work); + queue_work(dataio ? xfsdatad_workqueue : + xfslogd_workqueue, &pb->pb_iodone_work); } else { pagebuf_iodone_work(pb); } @@ -1562,16 +1563,6 @@ xfs_free_buftarg( kmem_free(btp, sizeof(*btp)); } -void -xfs_incore_relse( - xfs_buftarg_t *btp, - int delwri_only, - int wait) -{ - invalidate_bdev(btp->pbr_bdev, 1); - truncate_inode_pages(btp->pbr_mapping, 0LL); -} - STATIC int xfs_setsize_buftarg_flags( xfs_buftarg_t *btp, @@ -1742,27 +1733,27 @@ pagebuf_runall_queues( } /* Defines for pagebuf daemon */ -STATIC DECLARE_COMPLETION(pagebuf_daemon_done); -STATIC struct task_struct *pagebuf_daemon_task; -STATIC int pagebuf_daemon_active; -STATIC int force_flush; -STATIC int force_sleep; +STATIC DECLARE_COMPLETION(xfsbufd_done); +STATIC struct task_struct *xfsbufd_task; +STATIC int xfsbufd_active; +STATIC int xfsbufd_force_flush; +STATIC int xfsbufd_force_sleep; STATIC int -pagebuf_daemon_wakeup( +xfsbufd_wakeup( int priority, unsigned int mask) { - if (force_sleep) + if (xfsbufd_force_sleep) return 0; - force_flush = 1; + xfsbufd_force_flush = 1; barrier(); - wake_up_process(pagebuf_daemon_task); + wake_up_process(xfsbufd_task); return 0; } STATIC int -pagebuf_daemon( +xfsbufd( void *data) { struct list_head tmp; @@ -1774,17 +1765,17 @@ pagebuf_daemon( daemonize("xfsbufd"); current->flags |= PF_MEMALLOC; - pagebuf_daemon_task = current; - pagebuf_daemon_active = 1; + xfsbufd_task = current; + xfsbufd_active = 1; barrier(); INIT_LIST_HEAD(&tmp); do { - if (unlikely(current->flags & PF_FREEZE)) { - force_sleep = 1; - refrigerator(PF_FREEZE); + if (unlikely(freezing(current))) { + xfsbufd_force_sleep = 1; + refrigerator(); } else { - force_sleep = 0; + xfsbufd_force_sleep = 0; } set_current_state(TASK_INTERRUPTIBLE); @@ -1797,7 +1788,7 @@ pagebuf_daemon( ASSERT(pb->pb_flags & PBF_DELWRI); if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { - if (!force_flush && + if (!xfsbufd_force_flush && time_before(jiffies, pb->pb_queuetime + age)) { pagebuf_unlock(pb); @@ -1824,10 +1815,10 @@ pagebuf_daemon( if (as_list_len > 0) purge_addresses(); - force_flush = 0; - } while (pagebuf_daemon_active); + xfsbufd_force_flush = 0; + } while (xfsbufd_active); - complete_and_exit(&pagebuf_daemon_done, 0); + complete_and_exit(&xfsbufd_done, 0); } /* @@ -1844,8 +1835,8 @@ xfs_flush_buftarg( xfs_buf_t *pb, *n; int pincount = 0; - pagebuf_runall_queues(pagebuf_dataio_workqueue); - pagebuf_runall_queues(pagebuf_logio_workqueue); + pagebuf_runall_queues(xfsdatad_workqueue); + pagebuf_runall_queues(xfslogd_workqueue); INIT_LIST_HEAD(&tmp); spin_lock(&pbd_delwrite_lock); @@ -1898,43 +1889,43 @@ xfs_flush_buftarg( } STATIC int -pagebuf_daemon_start(void) +xfs_buf_daemons_start(void) { - int rval; + int error = -ENOMEM; - pagebuf_logio_workqueue = create_workqueue("xfslogd"); - if (!pagebuf_logio_workqueue) - return -ENOMEM; + xfslogd_workqueue = create_workqueue("xfslogd"); + if (!xfslogd_workqueue) + goto out; - pagebuf_dataio_workqueue = create_workqueue("xfsdatad"); - if (!pagebuf_dataio_workqueue) { - destroy_workqueue(pagebuf_logio_workqueue); - return -ENOMEM; - } + xfsdatad_workqueue = create_workqueue("xfsdatad"); + if (!xfsdatad_workqueue) + goto out_destroy_xfslogd_workqueue; - rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES); - if (rval < 0) { - destroy_workqueue(pagebuf_logio_workqueue); - destroy_workqueue(pagebuf_dataio_workqueue); - } + error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); + if (error < 0) + goto out_destroy_xfsdatad_workqueue; + return 0; - return rval; + out_destroy_xfsdatad_workqueue: + destroy_workqueue(xfsdatad_workqueue); + out_destroy_xfslogd_workqueue: + destroy_workqueue(xfslogd_workqueue); + out: + return error; } /* - * pagebuf_daemon_stop - * * Note: do not mark as __exit, it is called from pagebuf_terminate. */ STATIC void -pagebuf_daemon_stop(void) +xfs_buf_daemons_stop(void) { - pagebuf_daemon_active = 0; + xfsbufd_active = 0; barrier(); - wait_for_completion(&pagebuf_daemon_done); + wait_for_completion(&xfsbufd_done); - destroy_workqueue(pagebuf_logio_workqueue); - destroy_workqueue(pagebuf_dataio_workqueue); + destroy_workqueue(xfslogd_workqueue); + destroy_workqueue(xfsdatad_workqueue); } /* @@ -1944,27 +1935,37 @@ pagebuf_daemon_stop(void) int __init pagebuf_init(void) { - pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (pagebuf_cache == NULL) { - printk("XFS: couldn't init xfs_buf_t cache\n"); - pagebuf_terminate(); - return -ENOMEM; - } + int error = -ENOMEM; + + pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); + if (!pagebuf_zone) + goto out; #ifdef PAGEBUF_TRACE pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); #endif - pagebuf_daemon_start(); + error = xfs_buf_daemons_start(); + if (error) + goto out_free_buf_zone; - pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup); - if (pagebuf_shake == NULL) { - pagebuf_terminate(); - return -ENOMEM; + pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); + if (!pagebuf_shake) { + error = -ENOMEM; + goto out_stop_daemons; } return 0; + + out_stop_daemons: + xfs_buf_daemons_stop(); + out_free_buf_zone: +#ifdef PAGEBUF_TRACE + ktrace_free(pagebuf_trace_buf); +#endif + kmem_zone_destroy(pagebuf_zone); + out: + return error; } @@ -1976,12 +1977,12 @@ pagebuf_init(void) void pagebuf_terminate(void) { - pagebuf_daemon_stop(); + xfs_buf_daemons_stop(); #ifdef PAGEBUF_TRACE ktrace_free(pagebuf_trace_buf); #endif - kmem_zone_destroy(pagebuf_cache); + kmem_zone_destroy(pagebuf_zone); kmem_shake_deregister(pagebuf_shake); } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 74deed8e6d90..3f8f69a66aea 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -576,7 +576,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); extern void xfs_free_buftarg(xfs_buftarg_t *, int); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); -extern void xfs_incore_relse(xfs_buftarg_t *, int, int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); #define xfs_getsize_buftarg(buftarg) \ diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 24fa3b101b93..f1ce4323f56e 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -57,7 +57,9 @@ #include <linux/smp_lock.h> static struct vm_operations_struct linvfs_file_vm_ops; - +#ifdef CONFIG_XFS_DMAPI +static struct vm_operations_struct linvfs_dmapi_file_vm_ops; +#endif STATIC inline ssize_t __linvfs_read( @@ -388,6 +390,14 @@ done: return -error; } +#ifdef CONFIG_XFS_DMAPI +STATIC void +linvfs_mmap_close( + struct vm_area_struct *vma) +{ + xfs_dm_mm_put(vma); +} +#endif /* CONFIG_XFS_DMAPI */ STATIC int linvfs_file_mmap( @@ -399,16 +409,19 @@ linvfs_file_mmap( vattr_t va = { .va_mask = XFS_AT_UPDATIME }; int error; + vma->vm_ops = &linvfs_file_vm_ops; + if (vp->v_vfsp->vfs_flag & VFS_DMI) { xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); error = -XFS_SEND_MMAP(mp, vma, 0); if (error) return error; +#ifdef CONFIG_XFS_DMAPI + vma->vm_ops = &linvfs_dmapi_file_vm_ops; +#endif } - vma->vm_ops = &linvfs_file_vm_ops; - VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); if (!error) vn_revalidate(vp); /* update Linux inode flags */ @@ -609,7 +622,15 @@ struct file_operations linvfs_dir_operations = { static struct vm_operations_struct linvfs_file_vm_ops = { .nopage = filemap_nopage, .populate = filemap_populate, +}; + +#ifdef CONFIG_XFS_DMAPI +static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { + .close = linvfs_mmap_close, + .nopage = filemap_nopage, + .populate = filemap_populate, #ifdef HAVE_VMOP_MPROTECT .mprotect = linvfs_mprotect, #endif }; +#endif /* CONFIG_XFS_DMAPI */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 69809eef8a54..05a447e51cc0 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1174,7 +1174,8 @@ xfs_ioc_xattr( switch (cmd) { case XFS_IOC_FSGETXATTR: { - va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS; + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \ + XFS_AT_NEXTENTS | XFS_AT_PROJID; VOP_GETATTR(vp, &va, 0, NULL, error); if (error) return -error; @@ -1182,6 +1183,7 @@ xfs_ioc_xattr( fa.fsx_xflags = va.va_xflags; fa.fsx_extsize = va.va_extsize; fa.fsx_nextents = va.va_nextents; + fa.fsx_projid = va.va_projid; if (copy_to_user(arg, &fa, sizeof(fa))) return -XFS_ERROR(EFAULT); @@ -1196,9 +1198,10 @@ xfs_ioc_xattr( if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= ATTR_NONBLOCK; - va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE; + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; va.va_xflags = fa.fsx_xflags; va.va_extsize = fa.fsx_extsize; + va.va_projid = fa.fsx_projid; VOP_SETATTR(vp, &va, attr_flags, NULL, error); if (!error) @@ -1207,7 +1210,8 @@ xfs_ioc_xattr( } case XFS_IOC_FSGETXATTRA: { - va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS; + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \ + XFS_AT_ANEXTENTS | XFS_AT_PROJID; VOP_GETATTR(vp, &va, 0, NULL, error); if (error) return -error; @@ -1215,6 +1219,7 @@ xfs_ioc_xattr( fa.fsx_xflags = va.va_xflags; fa.fsx_extsize = va.va_extsize; fa.fsx_nextents = va.va_anextents; + fa.fsx_projid = va.va_projid; if (copy_to_user(arg, &fa, sizeof(fa))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 71bb41019a12..42dc5e4662ed 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -145,10 +145,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val #define xfs_rotorstep xfs_params.rotorstep.val -#ifndef __smp_processor_id -#define __smp_processor_id() smp_processor_id() +#ifndef raw_smp_processor_id +#define raw_smp_processor_id() smp_processor_id() #endif -#define current_cpu() __smp_processor_id() +#define current_cpu() raw_smp_processor_id() #define current_pid() (current->pid) #define current_fsuid(cred) (current->fsuid) #define current_fsgid(cred) (current->fsgid) @@ -230,8 +230,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) * field (see the QCMD macro in quota.h). These macros help keep the * code portable - they are not visible from the syscall interface. */ -#define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */ -#define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */ +#define Q_XSETGQLIM XQM_CMD(8) /* set groups disk limits */ +#define Q_XGETGQUOTA XQM_CMD(9) /* get groups disk limits */ +#define Q_XSETPQLIM XQM_CMD(10) /* set projects disk limits */ +#define Q_XGETPQUOTA XQM_CMD(11) /* get projects disk limits */ /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */ /* we may well need to fine-tune this if it ever becomes an issue. */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index aa9daaea6c34..acab58c48043 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -209,30 +209,6 @@ unlock: return (-status); } -/* - * xfs_inval_cached_pages - * - * This routine is responsible for keeping direct I/O and buffered I/O - * somewhat coherent. From here we make sure that we're at least - * temporarily holding the inode I/O lock exclusively and then call - * the page cache to flush and invalidate any cached pages. If there - * are no cached pages this routine will be very quick. - */ -void -xfs_inval_cached_pages( - vnode_t *vp, - xfs_iocore_t *io, - xfs_off_t offset, - int write, - int relock) -{ - if (VN_CACHED(vp)) { - xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1); - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED); - } - -} - ssize_t /* bytes read, or (-) error */ xfs_read( bhv_desc_t *bdp, @@ -304,10 +280,11 @@ xfs_read( if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { vrwlock_t locktype = VRWLOCK_READ; + int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, size, - FILP_DELAY_FLAG(file), &locktype); + dmflags, &locktype); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); goto unlock_isem; @@ -867,11 +844,15 @@ retry: !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); + if (need_isem) + up(&inode->i_sem); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) - goto out_unlock_isem; + goto out_nounlocks; + if (need_isem) + down(&inode->i_sem); xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; ret = 0; @@ -986,6 +967,7 @@ retry: out_unlock_isem: if (need_isem) up(&inode->i_sem); + out_nounlocks: return -error; } diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index d723e35254a0..f197a720e394 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -94,8 +94,6 @@ extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, xfs_fsize_t, xfs_fsize_t); -extern void xfs_inval_cached_pages(struct vnode *, struct xfs_iocore *, - xfs_off_t, int, int); extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *, const struct iovec *, unsigned int, loff_t *, int, struct cred *); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 455e2b2fb964..f6dd7de25927 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -483,7 +483,7 @@ xfssyncd( set_current_state(TASK_INTERRUPTIBLE); timeleft = schedule_timeout(timeleft); /* swsusp */ - try_to_freeze(PF_FREEZE); + try_to_freeze(); if (vfsp->vfs_flag & VFS_UMOUNT) break; @@ -590,8 +590,10 @@ linvfs_sync_super( int error; int flags = SYNC_FSDATA; - if (wait) - flags |= SYNC_WAIT; + if (unlikely(sb->s_frozen == SB_FREEZE_WRITE)) + flags = SYNC_QUIESCE; + else + flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0); VFS_SYNC(vfsp, flags, NULL, error); sb->s_dirt = 0; @@ -701,7 +703,8 @@ linvfs_getxquota( struct vfs *vfsp = LINVFS_GET_VFS(sb); int error, getmode; - getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA; + getmode = (type == USRQUOTA) ? Q_XGETQUOTA : + ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA); VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error); return -error; } @@ -716,7 +719,8 @@ linvfs_setxquota( struct vfs *vfsp = LINVFS_GET_VFS(sb); int error, setmode; - setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM; + setmode = (type == USRQUOTA) ? Q_XSETQLIM : + ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM); VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error); return -error; } diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 76493991578f..7ee1f714e9ba 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -107,6 +107,7 @@ typedef enum { #define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ +#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */ typedef int (*vfs_mount_t)(bhv_desc_t *, struct xfs_mount_args *, struct cred *); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index a832d165f24f..250cad54e892 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -411,13 +411,13 @@ vn_remove( /* 0 */ (void *)(__psint_t)(vk), \ /* 1 */ (void *)(s), \ /* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(vn_count(vp)), \ +/* 3 */ (void *)(__psint_t)(vn_count(vp)), \ /* 4 */ (void *)(ra), \ /* 5 */ (void *)(__psunsigned_t)(vp)->v_flag, \ /* 6 */ (void *)(__psint_t)current_cpu(), \ /* 7 */ (void *)(__psint_t)current_pid(), \ /* 8 */ (void *)__return_address, \ -/* 9 */ 0, 0, 0, 0, 0, 0, 0) +/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) /* * Vnode tracing code. diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 00466c3194ac..a6e57c647be4 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -426,7 +426,7 @@ typedef struct vattr { u_long va_extsize; /* file extent size */ u_long va_nextents; /* number of extents in file */ u_long va_anextents; /* number of attr extents in file */ - int va_projid; /* project id */ + prid_t va_projid; /* project id */ } vattr_t; /* diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 740d20d33187..46ce1e3ce1d6 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -101,7 +101,7 @@ int xfs_dqerror_mod = 33; * is the d_id field. The idea is to fill in the entire q_core * when we read in the on disk dquot. */ -xfs_dquot_t * +STATIC xfs_dquot_t * xfs_qm_dqinit( xfs_mount_t *mp, xfs_dqid_t id, @@ -286,7 +286,9 @@ xfs_qm_adjust_dqlimits( * We also return 0 as the values of the timers in Q_GETQUOTA calls, when * enforcement's off. * In contrast, warnings are a little different in that they don't - * 'automatically' get started when limits get exceeded. + * 'automatically' get started when limits get exceeded. They do + * get reset to zero, however, when we find the count to be under + * the soft limit (they are only ever set non-zero via userspace). */ void xfs_qm_adjust_dqtimers( @@ -315,6 +317,8 @@ xfs_qm_adjust_dqtimers( INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) { INT_SET(d->d_btimer, ARCH_CONVERT, get_seconds() + XFS_QI_BTIMELIMIT(mp)); + } else { + d->d_bwarns = 0; } } else { if ((!d->d_blk_softlimit || @@ -336,6 +340,8 @@ xfs_qm_adjust_dqtimers( INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) { INT_SET(d->d_itimer, ARCH_CONVERT, get_seconds() + XFS_QI_ITIMELIMIT(mp)); + } else { + d->d_iwarns = 0; } } else { if ((!d->d_ino_softlimit || @@ -357,6 +363,8 @@ xfs_qm_adjust_dqtimers( INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT)))) { INT_SET(d->d_rtbtimer, ARCH_CONVERT, get_seconds() + XFS_QI_RTBTIMELIMIT(mp)); + } else { + d->d_rtbwarns = 0; } } else { if ((!d->d_rtb_softlimit || @@ -371,68 +379,6 @@ xfs_qm_adjust_dqtimers( } /* - * Increment or reset warnings of a given dquot. - */ -int -xfs_qm_dqwarn( - xfs_disk_dquot_t *d, - uint flags) -{ - int warned; - - /* - * root's limits are not real limits. - */ - if (!d->d_id) - return (0); - - warned = 0; - if (INT_GET(d->d_blk_softlimit, ARCH_CONVERT) && - (INT_GET(d->d_bcount, ARCH_CONVERT) >= - INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) { - if (flags & XFS_QMOPT_DOWARN) { - INT_MOD(d->d_bwarns, ARCH_CONVERT, +1); - warned++; - } - } else { - if (!d->d_blk_softlimit || - (INT_GET(d->d_bcount, ARCH_CONVERT) < - INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) { - d->d_bwarns = 0; - } - } - - if (INT_GET(d->d_ino_softlimit, ARCH_CONVERT) > 0 && - (INT_GET(d->d_icount, ARCH_CONVERT) >= - INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) { - if (flags & XFS_QMOPT_DOWARN) { - INT_MOD(d->d_iwarns, ARCH_CONVERT, +1); - warned++; - } - } else { - if (!d->d_ino_softlimit || - (INT_GET(d->d_icount, ARCH_CONVERT) < - INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) { - d->d_iwarns = 0; - } - } -#ifdef QUOTADEBUG - if (INT_GET(d->d_iwarns, ARCH_CONVERT)) - cmn_err(CE_DEBUG, - "--------@@Inode warnings running : %Lu >= %Lu", - INT_GET(d->d_icount, ARCH_CONVERT), - INT_GET(d->d_ino_softlimit, ARCH_CONVERT)); - if (INT_GET(d->d_bwarns, ARCH_CONVERT)) - cmn_err(CE_DEBUG, - "--------@@Blks warnings running : %Lu >= %Lu", - INT_GET(d->d_bcount, ARCH_CONVERT), - INT_GET(d->d_blk_softlimit, ARCH_CONVERT)); -#endif - return (warned); -} - - -/* * initialize a buffer full of dquots and log the whole thing */ STATIC void @@ -461,9 +407,9 @@ xfs_qm_init_dquot_blk( for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++) xfs_qm_dqinit_core(curid, type, d); xfs_trans_dquot_buf(tp, bp, - type & XFS_DQ_USER ? - XFS_BLI_UDQUOT_BUF : - XFS_BLI_GDQUOT_BUF); + (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF : + ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF : + XFS_BLI_GDQUOT_BUF))); xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1); } @@ -544,8 +490,7 @@ xfs_qm_dqalloc( * the entire thing. */ xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), - dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP), - bp); + dqp->dq_flags & XFS_DQ_ALLTYPES, bp); if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { goto error1; @@ -675,8 +620,7 @@ xfs_qm_dqtobp( /* * A simple sanity check in case we got a corrupted dquot... */ - if (xfs_qm_dqcheck(ddq, id, - dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP), + if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), "dqtobp")) { if (!(flags & XFS_QMOPT_DQREPAIR)) { @@ -953,8 +897,8 @@ int xfs_qm_dqget( xfs_mount_t *mp, xfs_inode_t *ip, /* locked inode (optional) */ - xfs_dqid_t id, /* gid or uid, depending on type */ - uint type, /* UDQUOT or GDQUOT */ + xfs_dqid_t id, /* uid/projid/gid depending on type */ + uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ { @@ -965,6 +909,7 @@ xfs_qm_dqget( ASSERT(XFS_IS_QUOTA_RUNNING(mp)); if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || + (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { return (ESRCH); } @@ -983,7 +928,9 @@ xfs_qm_dqget( again: #ifdef DEBUG - ASSERT(type == XFS_DQ_USER || type == XFS_DQ_GROUP); + ASSERT(type == XFS_DQ_USER || + type == XFS_DQ_PROJ || + type == XFS_DQ_GROUP); if (ip) { ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); if (type == XFS_DQ_USER) @@ -1306,8 +1253,8 @@ xfs_qm_dqflush( return (error); } - if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), 0, XFS_QMOPT_DOWARN, - "dqflush (incore copy)")) { + if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), + 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE); return XFS_ERROR(EIO); } @@ -1459,7 +1406,8 @@ xfs_dqlock2( { if (d1 && d2) { ASSERT(d1 != d2); - if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > INT_GET(d2->q_core.d_id, ARCH_CONVERT)) { + if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > + INT_GET(d2->q_core.d_id, ARCH_CONVERT)) { xfs_dqlock(d2); xfs_dqlock(d1); } else { @@ -1582,8 +1530,7 @@ xfs_qm_dqprint(xfs_dquot_t *dqp) cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); cmn_err(CE_DEBUG, "---- dquotID = %d", (int)INT_GET(dqp->q_core.d_id, ARCH_CONVERT)); - cmn_err(CE_DEBUG, "---- type = %s", - XFS_QM_ISUDQ(dqp) ? "USR" : "GRP"); + cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 0c3fe3175baa..39175103c8e0 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -114,25 +114,18 @@ typedef struct xfs_dquot { #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) /* - * Quota Accounting flags + * Quota Accounting/Enforcement flags */ -#define XFS_ALL_QUOTA_ACCT (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT) -#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD) -#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD) -#define XFS_ALL_QUOTA_ACTV (XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) -#define XFS_ALL_QUOTA_ACCT_ENFD (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_GQUOTA_ACCT|XFS_GQUOTA_ENFD) +#define XFS_ALL_QUOTA_ACCT \ + (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) +#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) +#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) -#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) -#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) -#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) - -/* - * Quota Limit Enforcement flags - */ +#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) #define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) -#define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) -#define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD) +#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) +#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) +#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) #ifdef DEBUG static inline int @@ -167,6 +160,8 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) +#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) +#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) #define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) #define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ @@ -174,7 +169,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) #define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ - (XFS_IS_GQUOTA_ON((d)->q_mount)))) + (XFS_IS_OQUOTA_ON((d)->q_mount)))) #ifdef XFS_DQUOT_TRACE /* @@ -211,7 +206,6 @@ extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, xfs_disk_dquot_t *); extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, xfs_disk_dquot_t *); -extern int xfs_qm_dqwarn(xfs_disk_dquot_t *, uint); extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, xfs_dqid_t, uint, uint, xfs_dquot_t **); extern void xfs_qm_dqput(xfs_dquot_t *); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index a5425ee6e7bd..f5271b7b1e84 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -428,7 +428,7 @@ xfs_qm_dquot_logitem_committing( /* * This is the ops vector for dquots */ -struct xfs_item_ops xfs_dquot_item_ops = { +STATIC struct xfs_item_ops xfs_dquot_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_qm_dquot_logitem_format, @@ -646,7 +646,7 @@ xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) return; } -struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { +STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_qm_qoff_logitem_format, @@ -669,7 +669,7 @@ struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { /* * This is the ops vector shared by all quotaoff-start log items. */ -struct xfs_item_ops xfs_qm_qoff_logitem_ops = { +STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_qm_qoff_logitem_format, diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 89f2cd656ebf..f665ca8f9e96 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -81,12 +81,18 @@ struct xfs_qm *xfs_Gqm; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; -kmem_shaker_t xfs_qm_shaker; +STATIC kmem_shaker_t xfs_qm_shaker; STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); +STATIC void xfs_qm_freelist_init(xfs_frlist_t *); +STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *); +STATIC int xfs_qm_mplist_nowait(xfs_mount_t *); +STATIC int xfs_qm_dqhashlock_nowait(xfs_dquot_t *); + STATIC int xfs_qm_init_quotainos(xfs_mount_t *); +STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); STATIC int xfs_qm_shake(int, unsigned int); #ifdef DEBUG @@ -184,7 +190,7 @@ xfs_Gqm_init(void) /* * Destroy the global quota manager when its reference count goes to zero. */ -void +STATIC void xfs_qm_destroy( struct xfs_qm *xqm) { @@ -304,9 +310,9 @@ xfs_qm_mount_quotainit( uint flags) { /* - * User or group quotas has to be on. + * User, projects or group quotas has to be on. */ - ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA)); + ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)); /* * Initialize the flags in the mount structure. From this point @@ -324,7 +330,11 @@ xfs_qm_mount_quotainit( if (flags & XFSMNT_GQUOTA) { mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); if (flags & XFSMNT_GQUOTAENF) - mp->m_qflags |= XFS_GQUOTA_ENFD; + mp->m_qflags |= XFS_OQUOTA_ENFD; + } else if (flags & XFSMNT_PQUOTA) { + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + if (flags & XFSMNT_PQUOTAENF) + mp->m_qflags |= XFS_OQUOTA_ENFD; } } @@ -357,11 +367,11 @@ xfs_qm_mount_quotas( /* * If a file system had quotas running earlier, but decided to - * mount without -o quota/uquota/gquota options, revoke the + * mount without -o uquota/pquota/gquota options, revoke the * quotachecked license, and bail out. */ if (! XFS_IS_QUOTA_ON(mp) && - (mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) { + (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) { mp->m_qflags = 0; goto write_changes; } @@ -509,7 +519,7 @@ out: * Flush all dquots of the given file system to disk. The dquots are * _not_ purged from memory here, just their data written to disk. */ -int +STATIC int xfs_qm_dqflush_all( xfs_mount_t *mp, int flags) @@ -613,7 +623,7 @@ xfs_qm_detach_gdquots( STATIC int xfs_qm_dqpurge_int( xfs_mount_t *mp, - uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/GQUOTA */ + uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */ { xfs_dquot_t *dqp; uint dqtype; @@ -625,6 +635,7 @@ xfs_qm_dqpurge_int( return (0); dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; + dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; xfs_qm_mplist_lock(mp); @@ -734,11 +745,11 @@ xfs_qm_dqattach_one( /* * udqhint is the i_udquot field in inode, and is non-NULL only - * when the type arg is XFS_DQ_GROUP. Its purpose is to save a + * when the type arg is group/project. Its purpose is to save a * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside * the user dquot. */ - ASSERT(!udqhint || type == XFS_DQ_GROUP); + ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); if (udqhint && !dolock) xfs_dqlock(udqhint); @@ -897,8 +908,8 @@ xfs_qm_dqattach_grouphint( /* - * Given a locked inode, attach dquot(s) to it, taking UQUOTAON / GQUOTAON - * in to account. + * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON + * into account. * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty * much made this code a complete mess, but it has been pretty useful. @@ -937,8 +948,13 @@ xfs_qm_dqattach( nquotas++; } ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); - if (XFS_IS_GQUOTA_ON(mp)) { - error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, + if (XFS_IS_OQUOTA_ON(mp)) { + error = XFS_IS_GQUOTA_ON(mp) ? + xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, + flags & XFS_QMOPT_DQALLOC, + flags & XFS_QMOPT_DQLOCK, + ip->i_udquot, &ip->i_gdquot) : + xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, flags & XFS_QMOPT_DQALLOC, flags & XFS_QMOPT_DQLOCK, ip->i_udquot, &ip->i_gdquot); @@ -989,7 +1005,7 @@ xfs_qm_dqattach( } if (XFS_IS_UQUOTA_ON(mp)) ASSERT(ip->i_udquot); - if (XFS_IS_GQUOTA_ON(mp)) + if (XFS_IS_OQUOTA_ON(mp)) ASSERT(ip->i_gdquot); } #endif @@ -1018,13 +1034,13 @@ xfs_qm_dqdetach( ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); - if (ip->i_udquot) - xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip); if (ip->i_udquot) { + xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip); xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } if (ip->i_gdquot) { + xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip); xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } @@ -1149,7 +1165,7 @@ xfs_qm_sync( * This initializes all the quota information that's kept in the * mount structure */ -int +STATIC int xfs_qm_init_quotainfo( xfs_mount_t *mp) { @@ -1202,8 +1218,9 @@ xfs_qm_init_quotainfo( * and group quotas, at least not at this point. */ error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, - (XFS_IS_UQUOTA_RUNNING(mp)) ? - XFS_DQ_USER : XFS_DQ_GROUP, + XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : + (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : + XFS_DQ_PROJ), XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, &dqp); if (! error) { @@ -1234,6 +1251,10 @@ xfs_qm_init_quotainfo( INT_GET(ddqp->d_iwarns, ARCH_CONVERT) ? INT_GET(ddqp->d_iwarns, ARCH_CONVERT) : XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = + INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) ? + INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) : + XFS_QM_RTBWARNLIMIT; qinf->qi_bhardlimit = INT_GET(ddqp->d_blk_hardlimit, ARCH_CONVERT); qinf->qi_bsoftlimit = @@ -1259,6 +1280,7 @@ xfs_qm_init_quotainfo( qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; } return (0); @@ -1366,13 +1388,20 @@ xfs_qm_dqget_noattach( ASSERT(udqp); } - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_OQUOTA_ON(mp)) { ASSERT(ip->i_gdquot == NULL); if (udqp) xfs_dqunlock(udqp); - if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_gid, XFS_DQ_GROUP, - XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, - &gdqp))) { + error = XFS_IS_GQUOTA_ON(mp) ? + xfs_qm_dqget(mp, ip, + ip->i_d.di_gid, XFS_DQ_GROUP, + XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, + &gdqp) : + xfs_qm_dqget(mp, ip, + ip->i_d.di_projid, XFS_DQ_PROJ, + XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, + &gdqp); + if (error) { if (udqp) xfs_qm_dqrele(udqp); ASSERT(error != ESRCH); @@ -1521,8 +1550,10 @@ xfs_qm_reset_dqcounts( INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL); INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0); INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0); + INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, (time_t)0); INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL); INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL); + INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, 0UL); ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); } @@ -1541,11 +1572,14 @@ xfs_qm_dqiter_bufs( int error; int notcommitted; int incr; + int type; ASSERT(blkcnt > 0); notcommitted = 0; incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; + type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : + (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); error = 0; /* @@ -1564,9 +1598,7 @@ xfs_qm_dqiter_bufs( if (error) break; - (void) xfs_qm_reset_dqcounts(mp, bp, firstid, - flags & XFS_QMOPT_UQUOTA ? - XFS_DQ_USER : XFS_DQ_GROUP); + (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_bdwrite(mp, bp); /* * goto the next block. @@ -1578,7 +1610,7 @@ xfs_qm_dqiter_bufs( } /* - * Iterate over all allocated USR/GRP dquots in the system, calling a + * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a * caller supplied function for every chunk of dquots that we find. */ STATIC int @@ -1849,7 +1881,7 @@ xfs_qm_dqusage_adjust( xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks); xfs_qm_dqput(udqp); } - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_OQUOTA_ON(mp)) { ASSERT(gdqp); xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks); xfs_qm_dqput(gdqp); @@ -1898,7 +1930,7 @@ xfs_qm_quotacheck( cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); /* - * First we go thru all the dquots on disk, USR and GRP, and reset + * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset * their counters to zero. We need a clean slate. * We don't log our changes till later. */ @@ -1909,9 +1941,10 @@ xfs_qm_quotacheck( } if ((gip = XFS_QI_GQIP(mp))) { - if ((error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA))) + if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA))) goto error_return; - flags |= XFS_GQUOTA_CHKD; + flags |= XFS_OQUOTA_CHKD; } do { @@ -1938,7 +1971,7 @@ xfs_qm_quotacheck( if (error) { xfs_qm_dqpurge_all(mp, XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA| - XFS_QMOPT_QUOTAOFF); + XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF); goto error_return; } /* @@ -1961,7 +1994,7 @@ xfs_qm_quotacheck( * quotachecked status, since we won't be doing accounting for * that type anymore. */ - mp->m_qflags &= ~(XFS_GQUOTA_CHKD | XFS_UQUOTA_CHKD); + mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); mp->m_qflags |= flags; XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++"); @@ -2013,7 +2046,7 @@ xfs_qm_init_quotainos( 0, 0, &uip, 0))) return XFS_ERROR(error); } - if (XFS_IS_GQUOTA_ON(mp) && + if (XFS_IS_OQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, @@ -2043,10 +2076,12 @@ xfs_qm_init_quotainos( flags &= ~XFS_QMOPT_SBVERSION; } - if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { - if ((error = xfs_qm_qino_alloc(mp, &gip, - sbflags | XFS_SB_GQUOTINO, - flags | XFS_QMOPT_GQUOTA))) { + if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { + flags |= (XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); + error = xfs_qm_qino_alloc(mp, &gip, + sbflags | XFS_SB_GQUOTINO, flags); + if (error) { if (uip) VN_RELE(XFS_ITOV(uip)); @@ -2452,6 +2487,7 @@ xfs_qm_vop_dqalloc( xfs_inode_t *ip, uid_t uid, gid_t gid, + prid_t prid, uint flags, xfs_dquot_t **O_udqpp, xfs_dquot_t **O_gdqpp) @@ -2483,8 +2519,7 @@ xfs_qm_vop_dqalloc( } uq = gq = NULL; - if ((flags & XFS_QMOPT_UQUOTA) && - XFS_IS_UQUOTA_ON(mp)) { + if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { if (ip->i_d.di_uid != uid) { /* * What we need is the dquot that has this uid, and @@ -2522,8 +2557,7 @@ xfs_qm_vop_dqalloc( xfs_dqunlock(uq); } } - if ((flags & XFS_QMOPT_GQUOTA) && - XFS_IS_GQUOTA_ON(mp)) { + if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { if (ip->i_d.di_gid != gid) { xfs_iunlock(ip, lockflags); if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, @@ -2546,6 +2580,29 @@ xfs_qm_vop_dqalloc( XFS_DQHOLD(gq); xfs_dqunlock(gq); } + } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { + if (ip->i_d.di_projid != prid) { + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, + XFS_DQ_PROJ, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &gq))) { + if (uq) + xfs_qm_dqrele(uq); + ASSERT(error != ENOENT); + return (error); + } + xfs_dqunlock(gq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + ASSERT(ip->i_gdquot); + gq = ip->i_gdquot; + xfs_dqlock(gq); + XFS_DQHOLD(gq); + xfs_dqunlock(gq); + } } if (uq) xfs_dqtrace_entry_ino(uq, "DQALLOC", ip); @@ -2574,6 +2631,9 @@ xfs_qm_vop_chown( xfs_dquot_t *newdq) { xfs_dquot_t *prevdq; + uint bfield = XFS_IS_REALTIME_INODE(ip) ? + XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; + ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); @@ -2582,20 +2642,12 @@ xfs_qm_vop_chown( ASSERT(prevdq); ASSERT(prevdq != newdq); - xfs_trans_mod_dquot(tp, prevdq, - XFS_TRANS_DQ_BCOUNT, - -(ip->i_d.di_nblocks)); - xfs_trans_mod_dquot(tp, prevdq, - XFS_TRANS_DQ_ICOUNT, - -1); + xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); + xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); /* the sparkling new dquot */ - xfs_trans_mod_dquot(tp, newdq, - XFS_TRANS_DQ_BCOUNT, - ip->i_d.di_nblocks); - xfs_trans_mod_dquot(tp, newdq, - XFS_TRANS_DQ_ICOUNT, - 1); + xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); + xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); /* * Take an extra reference, because the inode @@ -2611,7 +2663,7 @@ xfs_qm_vop_chown( } /* - * Quota reservations for setattr(AT_UID|AT_GID). + * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). */ int xfs_qm_vop_chown_reserve( @@ -2623,7 +2675,7 @@ xfs_qm_vop_chown_reserve( { int error; xfs_mount_t *mp; - uint delblks; + uint delblks, blkflags; xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; ASSERT(XFS_ISLOCKED_INODE(ip)); @@ -2632,6 +2684,8 @@ xfs_qm_vop_chown_reserve( delblks = ip->i_delayed_blks; delblksudq = delblksgdq = unresudq = unresgdq = NULL; + blkflags = XFS_IS_REALTIME_INODE(ip) ? + XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; if (XFS_IS_UQUOTA_ON(mp) && udqp && ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) { @@ -2646,18 +2700,22 @@ xfs_qm_vop_chown_reserve( unresudq = ip->i_udquot; } } - if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp && - ip->i_d.di_gid != INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) { - delblksgdq = gdqp; - if (delblks) { - ASSERT(ip->i_gdquot); - unresgdq = ip->i_gdquot; + if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { + if ((XFS_IS_GQUOTA_ON(ip->i_mount) && ip->i_d.di_gid != + INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) || + (XFS_IS_PQUOTA_ON(ip->i_mount) && ip->i_d.di_projid != + INT_GET(gdqp->q_core.d_id, ARCH_CONVERT))) { + delblksgdq = gdqp; + if (delblks) { + ASSERT(ip->i_gdquot); + unresgdq = ip->i_gdquot; + } } } if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, - flags | XFS_QMOPT_RES_REGBLKS))) + flags | blkflags))) return (error); /* @@ -2674,11 +2732,11 @@ xfs_qm_vop_chown_reserve( ASSERT(unresudq || unresgdq); if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, - flags | XFS_QMOPT_RES_REGBLKS))) + flags | blkflags))) return (error); xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, - XFS_QMOPT_RES_REGBLKS); + blkflags); } return (0); @@ -2751,7 +2809,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode( } /* ------------- list stuff -----------------*/ -void +STATIC void xfs_qm_freelist_init(xfs_frlist_t *ql) { ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql; @@ -2760,7 +2818,7 @@ xfs_qm_freelist_init(xfs_frlist_t *ql) ql->qh_nelems = 0; } -void +STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *ql) { xfs_dquot_t *dqp, *nextdqp; @@ -2786,7 +2844,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql) ASSERT(ql->qh_nelems == 0); } -void +STATIC void xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq) { dq->dq_flnext = ql->qh_next; @@ -2816,7 +2874,7 @@ xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq) xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq); } -int +STATIC int xfs_qm_dqhashlock_nowait( xfs_dquot_t *dqp) { @@ -2836,7 +2894,7 @@ xfs_qm_freelist_lock_nowait( return (locked); } -int +STATIC int xfs_qm_mplist_nowait( xfs_mount_t *mp) { diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index dcf1a7a831d8..b03eecf3b6cb 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -133,8 +133,9 @@ typedef struct xfs_quotainfo { time_t qi_btimelimit; /* limit for blks timer */ time_t qi_itimelimit; /* limit for inodes timer */ time_t qi_rtbtimelimit;/* limit for rt blks timer */ - xfs_qwarncnt_t qi_bwarnlimit; /* limit for num warnings */ - xfs_qwarncnt_t qi_iwarnlimit; /* limit for num warnings */ + xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ + xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ + xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ mutex_t qi_quotaofflock;/* to serialize quotaoff */ xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ uint qi_dqperchunk; /* # ondisk dqs in above chunk */ @@ -176,6 +177,7 @@ typedef struct xfs_dquot_acct { #define XFS_QM_BWARNLIMIT 5 #define XFS_QM_IWARNLIMIT 5 +#define XFS_QM_RTBWARNLIMIT 5 #define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock, PINOD)) #define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock)) @@ -184,7 +186,6 @@ typedef struct xfs_dquot_acct { extern void xfs_mount_reset_sbqflags(xfs_mount_t *); -extern int xfs_qm_init_quotainfo(xfs_mount_t *); extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); extern int xfs_qm_mount_quotas(xfs_mount_t *, int); extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); @@ -203,7 +204,7 @@ extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); /* vop stuff */ extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, - uid_t, gid_t, uint, + uid_t, gid_t, prid_t, uint, xfs_dquot_t **, xfs_dquot_t **); extern void xfs_qm_vop_dqattach_and_dqmod_newinode( xfs_trans_t *, xfs_inode_t *, @@ -215,14 +216,9 @@ extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *, xfs_dquot_t *, xfs_dquot_t *, uint); /* list stuff */ -extern void xfs_qm_freelist_init(xfs_frlist_t *); -extern void xfs_qm_freelist_destroy(xfs_frlist_t *); -extern void xfs_qm_freelist_insert(xfs_frlist_t *, xfs_dquot_t *); extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); extern void xfs_qm_freelist_unlink(xfs_dquot_t *); extern int xfs_qm_freelist_lock_nowait(xfs_qm_t *); -extern int xfs_qm_mplist_nowait(xfs_mount_t *); -extern int xfs_qm_dqhashlock_nowait(xfs_dquot_t *); /* system call interface */ extern int xfs_qm_quotactl(bhv_desc_t *, int, int, xfs_caddr_t); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index be67d9c265f8..dc3c37a1e158 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -71,10 +71,13 @@ #define MNTOPT_NOQUOTA "noquota" /* no quotas */ #define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ #define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ +#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ #define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ #define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ +#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ #define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ +#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ STATIC int @@ -109,6 +112,14 @@ xfs_qm_parseargs( args->flags |= XFSMNT_UQUOTA; args->flags &= ~XFSMNT_UQUOTAENF; referenced = 1; + } else if (!strcmp(this_char, MNTOPT_PQUOTA) || + !strcmp(this_char, MNTOPT_PRJQUOTA)) { + args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; + referenced = 1; + } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { + args->flags |= XFSMNT_PQUOTA; + args->flags &= ~XFSMNT_PQUOTAENF; + referenced = 1; } else if (!strcmp(this_char, MNTOPT_GQUOTA) || !strcmp(this_char, MNTOPT_GRPQUOTA)) { args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; @@ -127,6 +138,12 @@ xfs_qm_parseargs( *this_char++ = ','; } + if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { + cmn_err(CE_WARN, + "XFS: cannot mount with both project and group quota"); + return XFS_ERROR(EINVAL); + } + PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error); if (!error && !referenced) bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM); @@ -148,13 +165,19 @@ xfs_qm_showargs( seq_puts(m, "," MNTOPT_UQUOTANOENF); } + if (mp->m_qflags & XFS_PQUOTA_ACCT) { + (mp->m_qflags & XFS_OQUOTA_ENFD) ? + seq_puts(m, "," MNTOPT_PRJQUOTA) : + seq_puts(m, "," MNTOPT_PQUOTANOENF); + } + if (mp->m_qflags & XFS_GQUOTA_ACCT) { - (mp->m_qflags & XFS_GQUOTA_ENFD) ? + (mp->m_qflags & XFS_OQUOTA_ENFD) ? seq_puts(m, "," MNTOPT_GRPQUOTA) : seq_puts(m, "," MNTOPT_GQUOTANOENF); } - if (!(mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) + if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, "," MNTOPT_NOQUOTA); PVFS_SHOWARGS(BHV_NEXT(bhv), m, error); @@ -171,7 +194,7 @@ xfs_qm_mount( struct xfs_mount *mp = XFS_VFSTOM(vfsp); int error; - if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA)) + if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA)) xfs_qm_mount_quotainit(mp, args->flags); PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error); return error; @@ -255,16 +278,17 @@ xfs_qm_newmount( uint *quotaflags) { uint quotaondisk; - uint uquotaondisk = 0, gquotaondisk = 0; + uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; *quotaflags = 0; *needquotamount = B_FALSE; quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && - mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT); + (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); if (quotaondisk) { uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT; + pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT; gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT; } @@ -277,13 +301,16 @@ xfs_qm_newmount( if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || + (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || + (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || - (!gquotaondisk && XFS_IS_GQUOTA_ON(mp))) && + (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && xfs_dev_is_read_only(mp, "changing quota state")) { cmn_err(CE_WARN, - "XFS: please mount with%s%s%s.", + "XFS: please mount with%s%s%s%s.", (!quotaondisk ? "out quota" : ""), (uquotaondisk ? " usrquota" : ""), + (pquotaondisk ? " prjquota" : ""), (gquotaondisk ? " grpquota" : "")); return XFS_ERROR(EPERM); } @@ -359,7 +386,7 @@ xfs_qm_dqrele_null( } -struct xfs_qmops xfs_qmcore_xfs = { +STATIC struct xfs_qmops xfs_qmcore_xfs = { .xfs_qminit = xfs_qm_newmount, .xfs_qmdone = xfs_qm_unmount_quotadestroy, .xfs_qmmount = xfs_qm_endmount, diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 229f5b5a2d25..68e98962dbef 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -118,40 +118,41 @@ xfs_qm_quotactl( * The following commands are valid even when quotaoff. */ switch (cmd) { + case Q_XQUOTARM: /* - * truncate quota files. quota must be off. + * Truncate quota files. quota must be off. */ - case Q_XQUOTARM: if (XFS_IS_QUOTA_ON(mp) || addr == NULL) return XFS_ERROR(EINVAL); if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); return (xfs_qm_scall_trunc_qfiles(mp, xfs_qm_import_qtype_flags(*(uint *)addr))); + + case Q_XGETQSTAT: /* * Get quota status information. */ - case Q_XGETQSTAT: return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr)); + case Q_XQUOTAON: /* - * QUOTAON for root f/s and quota enforcement on others.. - * Quota accounting for non-root f/s's must be turned on - * at mount time. + * QUOTAON - enabling quota enforcement. + * Quota accounting must be turned on at mount time. */ - case Q_XQUOTAON: if (addr == NULL) return XFS_ERROR(EINVAL); if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); return (xfs_qm_scall_quotaon(mp, xfs_qm_import_flags(*(uint *)addr))); - case Q_XQUOTAOFF: + + case Q_XQUOTAOFF: if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); break; - default: + default: break; } @@ -159,7 +160,7 @@ xfs_qm_quotactl( return XFS_ERROR(ESRCH); switch (cmd) { - case Q_XQUOTAOFF: + case Q_XQUOTAOFF: if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); error = xfs_qm_scall_quotaoff(mp, @@ -167,42 +168,39 @@ xfs_qm_quotactl( B_FALSE); break; - /* - * Defaults to XFS_GETUQUOTA. - */ - case Q_XGETQUOTA: + case Q_XGETQUOTA: error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER, (fs_disk_quota_t *)addr); break; - /* - * Set limits, both hard and soft. Defaults to Q_SETUQLIM. - */ - case Q_XSETQLIM: + case Q_XGETGQUOTA: + error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, + (fs_disk_quota_t *)addr); + break; + case Q_XGETPQUOTA: + error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ, + (fs_disk_quota_t *)addr); + break; + + case Q_XSETQLIM: if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER, (fs_disk_quota_t *)addr); break; - - case Q_XSETGQLIM: + case Q_XSETGQLIM: if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, (fs_disk_quota_t *)addr); break; - - - case Q_XGETGQUOTA: - error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, - (fs_disk_quota_t *)addr); + case Q_XSETPQLIM: + if (vfsp->vfs_flag & VFS_RDONLY) + return XFS_ERROR(EROFS); + error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ, + (fs_disk_quota_t *)addr); break; - /* - * Quotas are entirely undefined after quotaoff in XFS quotas. - * For instance, there's no way to set limits when quotaoff. - */ - - default: + default: error = XFS_ERROR(EINVAL); break; } @@ -286,8 +284,12 @@ xfs_qm_scall_quotaoff( } if (flags & XFS_GQUOTA_ACCT) { dqtype |= XFS_QMOPT_GQUOTA; - flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD); + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); inactivate_flags |= XFS_GQUOTA_ACTIVE; + } else if (flags & XFS_PQUOTA_ACCT) { + dqtype |= XFS_QMOPT_PQUOTA; + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + inactivate_flags |= XFS_PQUOTA_ACTIVE; } /* @@ -364,7 +366,8 @@ xfs_qm_scall_quotaoff( /* * If quotas is completely disabled, close shop. */ - if ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_ALL) { + if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || + ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); xfs_qm_destroy_quotainfo(mp); return (0); @@ -378,7 +381,7 @@ xfs_qm_scall_quotaoff( XFS_PURGE_INODE(XFS_QI_UQIP(mp)); XFS_QI_UQIP(mp) = NULL; } - if ((dqtype & XFS_QMOPT_GQUOTA) && XFS_QI_GQIP(mp)) { + if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { XFS_PURGE_INODE(XFS_QI_GQIP(mp)); XFS_QI_GQIP(mp) = NULL; } @@ -411,7 +414,8 @@ xfs_qm_scall_trunc_qfiles( } } - if ((flags & XFS_DQ_GROUP) && mp->m_sb.sb_gquotino != NULLFSINO) { + if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && + mp->m_sb.sb_gquotino != NULLFSINO) { error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); if (! error) { (void) xfs_truncate_file(mp, qip); @@ -434,7 +438,7 @@ xfs_qm_scall_quotaon( uint flags) { int error; - unsigned long s; + unsigned long s; uint qf; uint accflags; __int64_t sbflags; @@ -468,9 +472,13 @@ xfs_qm_scall_quotaon( (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && (flags & XFS_UQUOTA_ENFD)) || + ((flags & XFS_PQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && + (flags & XFS_OQUOTA_ENFD)) + || ((flags & XFS_GQUOTA_ACCT) == 0 && (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && - (flags & XFS_GQUOTA_ENFD))) { + (flags & XFS_OQUOTA_ENFD))) { qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", flags, mp->m_sb.sb_qflags); return XFS_ERROR(EINVAL); @@ -504,6 +512,10 @@ xfs_qm_scall_quotaon( */ if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) != (mp->m_qflags & XFS_UQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != + (mp->m_qflags & XFS_PQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != + (mp->m_qflags & XFS_GQUOTA_ACCT)) || (flags & XFS_ALL_QUOTA_ENFD) == 0) return (0); @@ -521,7 +533,6 @@ xfs_qm_scall_quotaon( } - /* * Return quota status information, such as uquota-off, enforcements, etc. */ @@ -606,7 +617,8 @@ xfs_qm_scall_setqlim( if (!capable(CAP_SYS_ADMIN)) return XFS_ERROR(EPERM); - if ((newlim->d_fieldmask & (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK)) == 0) + if ((newlim->d_fieldmask & + (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) return (0); tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); @@ -691,12 +703,23 @@ xfs_qm_scall_setqlim( qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); } + /* + * Update warnings counter(s) if requested + */ + if (newlim->d_fieldmask & FS_DQ_BWARNS) + INT_SET(ddq->d_bwarns, ARCH_CONVERT, newlim->d_bwarns); + if (newlim->d_fieldmask & FS_DQ_IWARNS) + INT_SET(ddq->d_iwarns, ARCH_CONVERT, newlim->d_iwarns); + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, newlim->d_rtbwarns); + if (id == 0) { /* * Timelimits for the super user set the relative time * the other users can be over quota for this file system. * If it is zero a default is used. Ditto for the default - * soft and hard limit values (already done, above). + * soft and hard limit values (already done, above), and + * for warnings. */ if (newlim->d_fieldmask & FS_DQ_BTIMER) { mp->m_quotainfo->qi_btimelimit = newlim->d_btimer; @@ -710,7 +733,13 @@ xfs_qm_scall_setqlim( mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer; INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer); } - } else /* if (XFS_IS_QUOTA_ENFORCED(mp)) */ { + if (newlim->d_fieldmask & FS_DQ_BWARNS) + mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns; + if (newlim->d_fieldmask & FS_DQ_IWARNS) + mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns; + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns; + } else { /* * If the user is now over quota, start the timelimit. * The user will not be 'warned'. @@ -776,9 +805,9 @@ xfs_qm_log_quotaoff_end( xfs_qoff_logitem_t *startqoff, uint flags) { - xfs_trans_t *tp; + xfs_trans_t *tp; int error; - xfs_qoff_logitem_t *qoffi; + xfs_qoff_logitem_t *qoffi; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); @@ -928,18 +957,26 @@ xfs_qm_export_dquot( STATIC uint xfs_qm_import_qtype_flags( - uint uflags) + uint uflags) { + uint oflags = 0; + /* - * Can't be both at the same time. + * Can't be more than one, or none. */ if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == - (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) || - ((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == 0)) + (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) || + ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) == + (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) || + ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) == + (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) || + ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0)) return (0); - return (uflags & XFS_USER_QUOTA) ? - XFS_DQ_USER : XFS_DQ_GROUP; + oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0; + oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0; + oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0; + return oflags; } STATIC uint @@ -947,14 +984,19 @@ xfs_qm_export_qtype_flags( uint flags) { /* - * Can't be both at the same time. + * Can't be more than one, or none. */ - ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != - (XFS_GROUP_QUOTA | XFS_USER_QUOTA)); - ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != 0); + ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) != + (XFS_PROJ_QUOTA | XFS_USER_QUOTA)); + ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) != + (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)); + ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) != + (XFS_USER_QUOTA | XFS_GROUP_QUOTA)); + ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0); return (flags & XFS_DQ_USER) ? - XFS_USER_QUOTA : XFS_GROUP_QUOTA; + XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? + XFS_PROJ_QUOTA : XFS_GROUP_QUOTA; } STATIC uint @@ -965,12 +1007,14 @@ xfs_qm_import_flags( if (uflags & XFS_QUOTA_UDQ_ACCT) flags |= XFS_UQUOTA_ACCT; + if (uflags & XFS_QUOTA_PDQ_ACCT) + flags |= XFS_PQUOTA_ACCT; if (uflags & XFS_QUOTA_GDQ_ACCT) flags |= XFS_GQUOTA_ACCT; if (uflags & XFS_QUOTA_UDQ_ENFD) flags |= XFS_UQUOTA_ENFD; - if (uflags & XFS_QUOTA_GDQ_ENFD) - flags |= XFS_GQUOTA_ENFD; + if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) + flags |= XFS_OQUOTA_ENFD; return (flags); } @@ -984,12 +1028,16 @@ xfs_qm_export_flags( uflags = 0; if (flags & XFS_UQUOTA_ACCT) uflags |= XFS_QUOTA_UDQ_ACCT; + if (flags & XFS_PQUOTA_ACCT) + uflags |= XFS_QUOTA_PDQ_ACCT; if (flags & XFS_GQUOTA_ACCT) uflags |= XFS_QUOTA_GDQ_ACCT; if (flags & XFS_UQUOTA_ENFD) uflags |= XFS_QUOTA_UDQ_ENFD; - if (flags & XFS_GQUOTA_ENFD) - uflags |= XFS_QUOTA_GDQ_ENFD; + if (flags & (XFS_OQUOTA_ENFD)) { + uflags |= (flags & XFS_GQUOTA_ACCT) ? + XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD; + } return (uflags); } @@ -1070,7 +1118,7 @@ again: xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } - if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) { + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } @@ -1160,7 +1208,6 @@ xfs_qm_dqtest_print( { cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); - cmn_err(CE_DEBUG, "---- type = %s", XFS_QM_ISUDQ(d)? "USR" : "GRP"); cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", d->d_bcount, (int)d->d_bcount); @@ -1231,7 +1278,7 @@ xfs_dqtest_cmp2( #ifdef QUOTADEBUG if (!err) { cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", - d->d_id, XFS_QM_ISUDQ(d) ? "USR" : "GRP", d->q_mount); + d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); } #endif return (err); @@ -1287,6 +1334,7 @@ STATIC void xfs_qm_internalqcheck_get_dquots( xfs_mount_t *mp, xfs_dqid_t uid, + xfs_dqid_t projid, xfs_dqid_t gid, xfs_dqtest_t **ud, xfs_dqtest_t **gd) @@ -1295,6 +1343,8 @@ xfs_qm_internalqcheck_get_dquots( xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud); if (XFS_IS_GQUOTA_ON(mp)) xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd); + else if (XFS_IS_PQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd); } @@ -1362,13 +1412,14 @@ xfs_qm_internalqcheck_adjust( } xfs_qm_internalqcheck_get_dquots(mp, (xfs_dqid_t) ip->i_d.di_uid, + (xfs_dqid_t) ip->i_d.di_projid, (xfs_dqid_t) ip->i_d.di_gid, &ud, &gd); if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(ud); xfs_qm_internalqcheck_dqadjust(ip, ud); } - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_OQUOTA_ON(mp)) { ASSERT(gd); xfs_qm_internalqcheck_dqadjust(ip, gd); } diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index 414b6004af21..bf413e70ec07 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h @@ -56,6 +56,7 @@ #define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit) #define XFS_QI_ITIMELIMIT(mp) ((mp)->m_quotainfo->qi_itimelimit) #define XFS_QI_BWARNLIMIT(mp) ((mp)->m_quotainfo->qi_bwarnlimit) +#define XFS_QI_RTBWARNLIMIT(mp) ((mp)->m_quotainfo->qi_rtbwarnlimit) #define XFS_QI_IWARNLIMIT(mp) ((mp)->m_quotainfo->qi_iwarnlimit) #define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock) @@ -102,7 +103,8 @@ static inline int XQMISLCKD(struct xfs_dqhash *h) (xfs_Gqm->qm_grp_dqhtable + \ XFS_DQ_HASHVAL(mp, id))) #define XFS_IS_DQTYPE_ON(mp, type) (type == XFS_DQ_USER ? \ - XFS_IS_UQUOTA_ON(mp):XFS_IS_GQUOTA_ON(mp)) + XFS_IS_UQUOTA_ON(mp) : \ + XFS_IS_OQUOTA_ON(mp)) #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ !dqp->q_core.d_blk_hardlimit && \ !dqp->q_core.d_blk_softlimit && \ @@ -177,16 +179,11 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \ (!((dqp)->q_core.d_id)) #define XFS_PURGE_INODE(ip) \ - { \ - vmap_t dqvmap; \ - vnode_t *dqvp; \ - dqvp = XFS_ITOV(ip); \ - VMAP(dqvp, dqvmap); \ - VN_RELE(dqvp); \ - } + IRELE(ip); #define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ - (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : "???")) + (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ + (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) #define DQFLAGTO_DIRTYSTR(d) (XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY") #endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 149b2a1fd949..3b99daf8a640 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -187,7 +187,7 @@ xfs_trans_dup_dqinfo( /* * Wrap around mod_dquot to account for both user and group quotas. */ -void +STATIC void xfs_trans_mod_dquot_byino( xfs_trans_t *tp, xfs_inode_t *ip, @@ -207,12 +207,10 @@ xfs_trans_mod_dquot_byino( if (tp->t_dqinfo == NULL) xfs_trans_alloc_dqinfo(tp); - if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) { + if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); - } - if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) { + if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); - } } STATIC xfs_dqtrx_t * @@ -368,7 +366,7 @@ xfs_trans_dqlockedjoin( * Unreserve just the reservations done by this transaction. * dquot is still left locked at exit. */ -void +STATIC void xfs_trans_apply_dquot_deltas( xfs_trans_t *tp) { @@ -499,7 +497,7 @@ xfs_trans_apply_dquot_deltas( * Adjust the RT reservation. */ if (qtrx->qt_rtblk_res != 0) { - if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { + if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) { if (qtrx->qt_rtblk_res > qtrx->qt_rtblk_res_used) dqp->q_res_rtbcount -= (xfs_qcnt_t) @@ -532,12 +530,6 @@ xfs_trans_apply_dquot_deltas( (xfs_qcnt_t)qtrx->qt_icount_delta; } - -#ifdef QUOTADEBUG - if (qtrx->qt_rtblk_res != 0) - cmn_err(CE_DEBUG, "RT res %d for 0x%p\n", - (int) qtrx->qt_rtblk_res, dqp); -#endif ASSERT(dqp->q_res_bcount >= INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT)); ASSERT(dqp->q_res_icount >= @@ -638,7 +630,10 @@ xfs_trans_dqresv( int error; xfs_qcnt_t hardlimit; xfs_qcnt_t softlimit; - time_t btimer; + time_t timer; + xfs_qwarncnt_t warns; + xfs_qwarncnt_t warnlimit; + xfs_qcnt_t count; xfs_qcnt_t *resbcountp; xfs_quotainfo_t *q = mp->m_quotainfo; @@ -653,7 +648,9 @@ xfs_trans_dqresv( softlimit = INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT); if (!softlimit) softlimit = q->qi_bsoftlimit; - btimer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT); + timer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT); + warns = INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT); + warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount); resbcountp = &dqp->q_res_bcount; } else { ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); @@ -663,7 +660,9 @@ xfs_trans_dqresv( softlimit = INT_GET(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT); if (!softlimit) softlimit = q->qi_rtbsoftlimit; - btimer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT); + timer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT); + warns = INT_GET(dqp->q_core.d_rtbwarns, ARCH_CONVERT); + warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); resbcountp = &dqp->q_res_rtbcount; } error = 0; @@ -693,37 +692,36 @@ xfs_trans_dqresv( * If timer or warnings has expired, * return EDQUOT */ - if ((btimer != 0 && get_seconds() > btimer) || - (dqp->q_core.d_bwarns && - INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) >= - XFS_QI_BWARNLIMIT(dqp->q_mount))) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { error = EDQUOT; goto error_return; } } } if (ninos > 0) { - hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT); + count = INT_GET(dqp->q_core.d_icount, ARCH_CONVERT); + timer = INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT); + warns = INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT); + warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount); + hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit, + ARCH_CONVERT); if (!hardlimit) hardlimit = q->qi_ihardlimit; - softlimit = INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT); + softlimit = INT_GET(dqp->q_core.d_ino_softlimit, + ARCH_CONVERT); if (!softlimit) softlimit = q->qi_isoftlimit; - if (hardlimit > 0ULL && - INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= hardlimit) { + if (hardlimit > 0ULL && count >= hardlimit) { error = EDQUOT; goto error_return; - } else if (softlimit > 0ULL && - INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= softlimit) { + } else if (softlimit > 0ULL && count >= softlimit) { /* * If timer or warnings has expired, * return EDQUOT */ - if ((dqp->q_core.d_itimer && - get_seconds() > INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT)) || - (dqp->q_core.d_iwarns && - INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) >= - XFS_QI_IWARNLIMIT(dqp->q_mount))) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { error = EDQUOT; goto error_return; } diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 7d6e1f37df10..4ed7b6928cd7 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -36,7 +36,6 @@ #include <linux/sched.h> #include <linux/kernel.h> -int doass = 1; static char message[256]; /* keep it off the stack */ static DEFINE_SPINLOCK(xfs_err_lock); diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index 40b0f4c54d9e..c5b9365a7e2a 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -50,16 +50,11 @@ extern void cmn_err(int, char *, ...); #endif #ifdef DEBUG -# ifdef lint -# define ASSERT(EX) ((void)0) /* avoid "constant in conditional" babble */ -# else -# define ASSERT(EX) ((!doass||(EX))?((void)0):assfail(#EX, __FILE__, __LINE__)) -# endif /* lint */ +# define ASSERT(EX) ((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__)) #else # define ASSERT(x) ((void)0) #endif -extern int doass; /* dynamically turn off asserts */ extern void assfail(char *, char *, int); #ifdef DEBUG extern unsigned long random(void); diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 36603db10fe9..dcfe19703620 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -59,7 +59,7 @@ #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 -int +STATIC int xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agnumber_t agno, xfs_agblock_t bno, @@ -2562,7 +2562,7 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, /* * returns non-zero if any of (agno,bno):len is in a busy list */ -int +STATIC int xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agnumber_t agno, xfs_agblock_t bno, diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index ee8b5904ec7c..a41ad3a5e554 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -71,6 +71,11 @@ * Provide the external interfaces to manage attribute lists. */ +#define ATTR_SYSCOUNT 2 +STATIC struct attrnames posix_acl_access; +STATIC struct attrnames posix_acl_default; +STATIC struct attrnames *attr_system_names[ATTR_SYSCOUNT]; + /*======================================================================== * Function prototypes for the kernel. *========================================================================*/ @@ -83,6 +88,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); /* * Internal routines when attribute list is one block. */ +STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context); @@ -90,6 +96,7 @@ STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context); /* * Internal routines when attribute list is more than one block. */ +STATIC int xfs_attr_node_get(xfs_da_args_t *args); STATIC int xfs_attr_node_addname(xfs_da_args_t *args); STATIC int xfs_attr_node_removename(xfs_da_args_t *args); STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context); @@ -1102,7 +1109,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). */ -int +STATIC int xfs_attr_leaf_get(xfs_da_args_t *args) { xfs_dabuf_t *bp; @@ -1707,7 +1714,7 @@ xfs_attr_refillstate(xfs_da_state_t *state) * block, ie: both true Btree attr lists and for single-leaf-blocks with * "remote" values taking up more blocks. */ -int +STATIC int xfs_attr_node_get(xfs_da_args_t *args) { xfs_da_state_t *state; @@ -2398,7 +2405,7 @@ posix_acl_default_exists( return xfs_acl_vhasacl_default(vp); } -struct attrnames posix_acl_access = { +STATIC struct attrnames posix_acl_access = { .attr_name = "posix_acl_access", .attr_namelen = sizeof("posix_acl_access") - 1, .attr_get = posix_acl_access_get, @@ -2407,7 +2414,7 @@ struct attrnames posix_acl_access = { .attr_exists = posix_acl_access_exists, }; -struct attrnames posix_acl_default = { +STATIC struct attrnames posix_acl_default = { .attr_name = "posix_acl_default", .attr_namelen = sizeof("posix_acl_default") - 1, .attr_get = posix_acl_default_get, @@ -2416,7 +2423,7 @@ struct attrnames posix_acl_default = { .attr_exists = posix_acl_default_exists, }; -struct attrnames *attr_system_names[] = +STATIC struct attrnames *attr_system_names[] = { &posix_acl_access, &posix_acl_default }; diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 67cd0f5ac1a7..45ab1c542baf 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -76,11 +76,6 @@ extern struct attrnames attr_system; extern struct attrnames attr_trusted; extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT]; -#define ATTR_SYSCOUNT 2 -extern struct attrnames posix_acl_access; -extern struct attrnames posix_acl_default; -extern struct attrnames *attr_system_names[ATTR_SYSCOUNT]; - extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int); extern int attr_generic_list(struct vnode *, void *, size_t, int, ssize_t *); @@ -184,8 +179,6 @@ int xfs_attr_list(bhv_desc_t *, char *, int, int, struct attrlist_cursor_kern *, struct cred *); int xfs_attr_inactive(struct xfs_inode *dp); -int xfs_attr_node_get(struct xfs_da_args *); -int xfs_attr_leaf_get(struct xfs_da_args *); int xfs_attr_shortform_getvalue(struct xfs_da_args *); int xfs_attr_fetch(struct xfs_inode *, char *, int, char *, int *, int, struct cred *); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index b11256e58bf4..1cdd574c63a9 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -79,6 +79,8 @@ /* * Routines used for growing the Btree. */ +STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block, + xfs_dabuf_t **bpp); STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args, int freemap_index); STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer); @@ -92,6 +94,16 @@ STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state, int *number_usedbytes_in_blk1); /* + * Routines used for shrinking the Btree. + */ +STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, + xfs_dabuf_t *bp, int level); +STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, + xfs_dabuf_t *bp); +STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, + xfs_dablk_t blkno, int blkcnt); + +/* * Utility routines. */ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, @@ -99,6 +111,10 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, xfs_attr_leafblock_t *dst_leaf, int dst_start, int move_count, xfs_mount_t *mp); +STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); +STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context, + attrnames_t *, char *name, int namelen, + int valuelen); /*======================================================================== @@ -774,7 +790,7 @@ out: * Create the initial contents of a leaf attribute list * or a leaf in a node attribute list. */ -int +STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) { xfs_attr_leafblock_t *leaf; @@ -2209,7 +2225,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count) * Calculate the number of bytes used to store the indicated attribute * (whether local or remote only calculate bytes in this block). */ -int +STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) { xfs_attr_leaf_name_local_t *name_loc; @@ -2380,7 +2396,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) * we may be reading them directly out of a user buffer. */ /*ARGSUSED*/ -int +STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp, char *name, int namelen, int valuelen) { @@ -2740,7 +2756,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) * Recurse (gasp!) through the attribute nodes until we find leaves. * We're doing a depth-first traversal in order to invalidate everything. */ -int +STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, int level) { @@ -2849,7 +2865,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, * Note that we must release the lock on the buffer so that we are not * caught holding something that the logging code wants to flush to disk. */ -int +STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) { xfs_attr_leafblock_t *leaf; @@ -2934,7 +2950,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) * Look at all the extents for this logical region, * invalidate any buffers that are incore/in transactions. */ -int +STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dablk_t blkno, int blkcnt) { diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index b1480e0b3349..0a4cfad6df91 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -261,8 +261,6 @@ int xfs_attr_leaf_flipflags(xfs_da_args_t *args); /* * Routines used for growing the Btree. */ -int xfs_attr_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block, - struct xfs_dabuf **bpp); int xfs_attr_leaf_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); @@ -284,12 +282,6 @@ void xfs_attr_leaf_unbalance(struct xfs_da_state *state, struct xfs_da_state_blk *drop_blk, struct xfs_da_state_blk *save_blk); int xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); -int xfs_attr_node_inactive(struct xfs_trans **trans, struct xfs_inode *dp, - struct xfs_dabuf *bp, int level); -int xfs_attr_leaf_inactive(struct xfs_trans **trans, struct xfs_inode *dp, - struct xfs_dabuf *bp); -int xfs_attr_leaf_freextent(struct xfs_trans **trans, struct xfs_inode *dp, - xfs_dablk_t blkno, int blkcnt); /* * Utility routines. @@ -299,10 +291,6 @@ int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp, struct xfs_dabuf *leaf2_bp); int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int blocksize, int *local); -int xfs_attr_leaf_entsize(struct xfs_attr_leafblock *leaf, int index); -int xfs_attr_put_listent(struct xfs_attr_list_context *context, - struct attrnames *, char *name, int namelen, - int valuelen); int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp); #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index a20a6c3dc13e..76c9ad3875ef 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -45,7 +45,7 @@ /* * Index of high bit number in byte, -1 for none set, 0..7 otherwise. */ -const char xfs_highbit[256] = { +STATIC const char xfs_highbit[256] = { -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index de3162418663..6f5d283888aa 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -301,6 +301,19 @@ xfs_bmap_search_extents( xfs_bmbt_irec_t *gotp, /* out: extent entry found */ xfs_bmbt_irec_t *prevp); /* out: previous extent entry found */ +/* + * Check the last inode extent to determine whether this allocation will result + * in blocks being allocated at the end of the file. When we allocate new data + * blocks at the end of the file which do not start at the previous data block, + * we will try to align the new blocks at stripe unit boundaries. + */ +STATIC int /* error */ +xfs_bmap_isaeof( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fileoff_t off, /* file offset in fsblocks */ + int whichfork, /* data or attribute fork */ + char *aeof); /* return value */ + #ifdef XFS_BMAP_TRACE /* * Add a bmap trace buffer entry. Base routine for the others. @@ -4532,18 +4545,17 @@ xfs_bmapi( xfs_extlen_t alen; /* allocated extent length */ xfs_fileoff_t aoff; /* allocated file offset */ xfs_bmalloca_t bma; /* args for xfs_bmap_alloc */ - char contig; /* allocation must be one extent */ xfs_btree_cur_t *cur; /* bmap btree cursor */ - char delay; /* this request is for delayed alloc */ xfs_fileoff_t end; /* end of mapped file region */ int eof; /* we've hit the end of extent list */ + char contig; /* allocation must be one extent */ + char delay; /* this request is for delayed alloc */ + char exact; /* don't do all of wasdelayed extent */ xfs_bmbt_rec_t *ep; /* extent list entry pointer */ int error; /* error return */ - char exact; /* don't do all of wasdelayed extent */ xfs_bmbt_irec_t got; /* current extent list record */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extlen_t indlen; /* indirect blocks length */ - char inhole; /* current location is hole in file */ xfs_extnum_t lastx; /* last useful extent number */ int logflags; /* flags for transaction logging */ xfs_extlen_t minleft; /* min blocks left after allocation */ @@ -4554,13 +4566,15 @@ xfs_bmapi( xfs_extnum_t nextents; /* number of extents in file */ xfs_fileoff_t obno; /* old block number (offset) */ xfs_bmbt_irec_t prev; /* previous extent list record */ - char stateless; /* ignore state flag set */ int tmp_logflags; /* temp flags holder */ + int whichfork; /* data or attr fork */ + char inhole; /* current location is hole in file */ + char stateless; /* ignore state flag set */ char trim; /* output trimmed to match range */ char userdata; /* allocating non-metadata */ char wasdelay; /* old extent was delayed */ - int whichfork; /* data or attr fork */ char wr; /* this is a write request */ + char rt; /* this is a realtime file */ char rsvd; /* OK to allocate reserved blocks */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4590,6 +4604,7 @@ xfs_bmapi( } if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + rt = XFS_IS_REALTIME_INODE(ip); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); @@ -4694,9 +4709,16 @@ xfs_bmapi( } minlen = contig ? alen : 1; if (delay) { - indlen = (xfs_extlen_t) - xfs_bmap_worst_indlen(ip, alen); - ASSERT(indlen > 0); + xfs_extlen_t extsz = 0; + + /* Figure out the extent size, adjust alen */ + if (rt) { + if (!(extsz = ip->i_d.di_extsize)) + extsz = mp->m_sb.sb_rextsize; + alen = roundup(alen, extsz); + extsz = alen / mp->m_sb.sb_rextsize; + } + /* * Make a transaction-less quota reservation for * delayed allocation blocks. This number gets @@ -4704,8 +4726,10 @@ xfs_bmapi( * We return EDQUOT if we haven't allocated * blks already inside this loop; */ - if (XFS_TRANS_RESERVE_BLKQUOTA( - mp, NULL, ip, (long)alen)) { + if (XFS_TRANS_RESERVE_QUOTA_NBLKS( + mp, NULL, ip, (long)alen, 0, + rt ? XFS_QMOPT_RES_RTBLKS : + XFS_QMOPT_RES_REGBLKS)) { if (n == 0) { *nmap = 0; ASSERT(cur == NULL); @@ -4718,40 +4742,34 @@ xfs_bmapi( * Split changing sb for alen and indlen since * they could be coming from different places. */ - if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) { - xfs_extlen_t extsz; - xfs_extlen_t ralen; - if (!(extsz = ip->i_d.di_extsize)) - extsz = mp->m_sb.sb_rextsize; - ralen = roundup(alen, extsz); - ralen = ralen / mp->m_sb.sb_rextsize; - if (xfs_mod_incore_sb(mp, - XFS_SBS_FREXTENTS, - -(ralen), rsvd)) { - if (XFS_IS_QUOTA_ON(ip->i_mount)) - XFS_TRANS_UNRESERVE_BLKQUOTA( - mp, NULL, ip, - (long)alen); - break; - } - } else { - if (xfs_mod_incore_sb(mp, - XFS_SBS_FDBLOCKS, - -(alen), rsvd)) { - if (XFS_IS_QUOTA_ON(ip->i_mount)) - XFS_TRANS_UNRESERVE_BLKQUOTA( - mp, NULL, ip, - (long)alen); - break; - } - } + indlen = (xfs_extlen_t) + xfs_bmap_worst_indlen(ip, alen); + ASSERT(indlen > 0); - if (xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, - -(indlen), rsvd)) { - XFS_TRANS_UNRESERVE_BLKQUOTA( - mp, NULL, ip, (long)alen); + if (rt) + error = xfs_mod_incore_sb(mp, + XFS_SBS_FREXTENTS, + -(extsz), rsvd); + else + error = xfs_mod_incore_sb(mp, + XFS_SBS_FDBLOCKS, + -(alen), rsvd); + if (!error) + error = xfs_mod_incore_sb(mp, + XFS_SBS_FDBLOCKS, + -(indlen), rsvd); + + if (error) { + if (XFS_IS_QUOTA_ON(ip->i_mount)) + /* unreserve the blocks now */ + XFS_TRANS_UNRESERVE_QUOTA_NBLKS( + mp, NULL, ip, + (long)alen, 0, rt ? + XFS_QMOPT_RES_RTBLKS : + XFS_QMOPT_RES_REGBLKS); break; } + ip->i_delayed_blks += alen; abno = NULLSTARTBLOCK(indlen); } else { @@ -5376,13 +5394,24 @@ xfs_bunmapi( } if (wasdel) { ASSERT(STARTBLOCKVAL(del.br_startblock) > 0); - xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, - (int)del.br_blockcount, rsvd); - /* Unreserve our quota space */ - XFS_TRANS_RESERVE_QUOTA_NBLKS( - mp, NULL, ip, -((long)del.br_blockcount), 0, - isrt ? XFS_QMOPT_RES_RTBLKS : + /* Update realtim/data freespace, unreserve quota */ + if (isrt) { + xfs_filblks_t rtexts; + + rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, + (int)rtexts, rsvd); + XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, + -((long)del.br_blockcount), 0, + XFS_QMOPT_RES_RTBLKS); + } else { + xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, + (int)del.br_blockcount, rsvd); + XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, + -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); + } ip->i_delayed_blks -= del.br_blockcount; if (cur) cur->bc_private.b.flags |= @@ -5714,7 +5743,7 @@ unlock_and_return: * blocks at the end of the file which do not start at the previous data block, * we will try to align the new blocks at stripe unit boundaries. */ -int /* error */ +STATIC int /* error */ xfs_bmap_isaeof( xfs_inode_t *ip, /* incore inode pointer */ xfs_fileoff_t off, /* file offset in fsblocks */ diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index f1bc22fb26ae..e6d22ec9b2e4 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -332,19 +332,6 @@ xfs_getbmap( int iflags); /* interface flags */ /* - * Check the last inode extent to determine whether this allocation will result - * in blocks being allocated at the end of the file. When we allocate new data - * blocks at the end of the file which do not start at the previous data block, - * we will try to align the new blocks at stripe unit boundaries. - */ -int -xfs_bmap_isaeof( - struct xfs_inode *ip, - xfs_fileoff_t off, - int whichfork, - char *aeof); - -/* * Check if the endoff is outside the last extent. If so the caller will grow * the allocation to a stripe unit boundary */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 163305a79fcc..09c413576ba8 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2331,20 +2331,6 @@ xfs_bmbt_lookup_ge( return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat); } -int /* error */ -xfs_bmbt_lookup_le( - xfs_btree_cur_t *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - int *stat) /* success/failure */ -{ - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; - return xfs_bmbt_lookup(cur, XFS_LOOKUP_LE, stat); -} - /* * Give the bmap btree a new root block. Copy the old broot contents * down into a real block and make the broot point to it. diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 843ff12b4bf2..0a40cf126c28 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -580,14 +580,6 @@ xfs_bmbt_lookup_ge( xfs_filblks_t, int *); -int -xfs_bmbt_lookup_le( - struct xfs_btree_cur *, - xfs_fileoff_t, - xfs_fsblock_t, - xfs_filblks_t, - int *); - /* * Give the bmap btree a new root block. Copy the old broot contents * down into a real block and make the broot point to it. diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 9dd22dd95487..0cc63d657a14 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -90,6 +90,16 @@ xfs_btree_maxrecs( */ /* + * Retrieve the block pointer from the cursor at the given level. + * This may be a bmap btree root or from a buffer. + */ +STATIC xfs_btree_block_t * /* generic btree block pointer */ +xfs_btree_get_block( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree */ + struct xfs_buf **bpp); /* buffer containing the block */ + +/* * Checking routine: return maxrecs for the block. */ STATIC int /* number of records fitting in block */ @@ -497,7 +507,7 @@ xfs_btree_firstrec( * Retrieve the block pointer from the cursor at the given level. * This may be a bmap btree root or from a buffer. */ -xfs_btree_block_t * /* generic btree block pointer */ +STATIC xfs_btree_block_t * /* generic btree block pointer */ xfs_btree_get_block( xfs_btree_cur_t *cur, /* btree cursor */ int level, /* level in btree */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 93872bba41f5..09b4e1532a35 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -325,16 +325,6 @@ xfs_btree_firstrec( int level); /* level to change */ /* - * Retrieve the block pointer from the cursor at the given level. - * This may be a bmap btree root or from a buffer. - */ -xfs_btree_block_t * /* generic btree block pointer */ -xfs_btree_get_block( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree */ - struct xfs_buf **bpp); /* buffer containing the block */ - -/* * Get a buffer for the block, return it with no data read. * Long-form addressing. */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 9ab0039f07df..30b8285ad476 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -172,7 +172,7 @@ STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); * * If the XFS_BLI_STALE flag has been set, then log nothing. */ -uint +STATIC uint xfs_buf_item_size( xfs_buf_log_item_t *bip) { @@ -240,7 +240,7 @@ xfs_buf_item_size( * format structure, and the rest point to contiguous chunks * within the buffer. */ -void +STATIC void xfs_buf_item_format( xfs_buf_log_item_t *bip, xfs_log_iovec_t *log_vector) @@ -365,7 +365,7 @@ xfs_buf_item_format( * item in memory so it cannot be written out. Simply call bpin() * on the buffer to do this. */ -void +STATIC void xfs_buf_item_pin( xfs_buf_log_item_t *bip) { @@ -391,7 +391,7 @@ xfs_buf_item_pin( * If the XFS_BLI_STALE flag is set and we are the last reference, * then free up the buf log item and unlock the buffer. */ -void +STATIC void xfs_buf_item_unpin( xfs_buf_log_item_t *bip, int stale) @@ -446,7 +446,7 @@ xfs_buf_item_unpin( * so we need to free the item's descriptor (that points to the item) * in the transaction. */ -void +STATIC void xfs_buf_item_unpin_remove( xfs_buf_log_item_t *bip, xfs_trans_t *tp) @@ -493,7 +493,7 @@ xfs_buf_item_unpin_remove( * the lock right away, return 0. If we can get the lock, pull the * buffer from the free list, mark it busy, and return 1. */ -uint +STATIC uint xfs_buf_item_trylock( xfs_buf_log_item_t *bip) { @@ -537,7 +537,7 @@ xfs_buf_item_trylock( * This is for support of xfs_trans_bhold(). Make sure the * XFS_BLI_HOLD field is cleared if we don't free the item. */ -void +STATIC void xfs_buf_item_unlock( xfs_buf_log_item_t *bip) { @@ -635,7 +635,7 @@ xfs_buf_item_unlock( * by returning the original lsn of that transaction here rather than * the current one. */ -xfs_lsn_t +STATIC xfs_lsn_t xfs_buf_item_committed( xfs_buf_log_item_t *bip, xfs_lsn_t lsn) @@ -654,7 +654,7 @@ xfs_buf_item_committed( * and have aborted this transaction, we'll trap this buffer when it tries to * get written out. */ -void +STATIC void xfs_buf_item_abort( xfs_buf_log_item_t *bip) { @@ -674,7 +674,7 @@ xfs_buf_item_abort( * B_DELWRI set, then get it going out to disk with a call to bawrite(). * If not, then just release the buffer. */ -void +STATIC void xfs_buf_item_push( xfs_buf_log_item_t *bip) { @@ -693,7 +693,7 @@ xfs_buf_item_push( } /* ARGSUSED */ -void +STATIC void xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) { } @@ -701,7 +701,7 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) /* * This is the ops vector shared by all buf log items. */ -struct xfs_item_ops xfs_buf_item_ops = { +STATIC struct xfs_item_ops xfs_buf_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_buf_item_format, diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 5f1b0c9308f6..01aed5f2d579 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -80,7 +80,7 @@ typedef struct xfs_buf_log_format_t { * user or group dquots and may require special recovery handling. */ #define XFS_BLI_UDQUOT_BUF 0x4 -/* #define XFS_BLI_PDQUOT_BUF 0x8 */ +#define XFS_BLI_PDQUOT_BUF 0x8 #define XFS_BLI_GDQUOT_BUF 0x10 #define XFS_BLI_CHUNK 128 diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index d7fe28866764..8b792ddf2164 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -113,7 +113,10 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra); - +STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, + xfs_da_state_blk_t *drop_blk, + xfs_da_state_blk_t *save_blk); +STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state); /*======================================================================== * Routines used for growing the Btree. @@ -1424,7 +1427,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) /* * Unlink a block from a doubly linked list of blocks. */ -int /* error */ +STATIC int /* error */ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_state_blk_t *save_blk) { @@ -2381,7 +2384,7 @@ xfs_da_state_alloc(void) /* * Kill the altpath contents of a da-state structure. */ -void +STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state) { int i; diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 9fc699d96995..3a9b9e809c60 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -296,8 +296,6 @@ int xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, /* * Utility routines. */ -int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, - xfs_da_state_blk_t *save_blk); int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, xfs_da_state_blk_t *new_blk); @@ -320,7 +318,6 @@ uint xfs_da_hashname(uchar_t *name_string, int name_length); uint xfs_da_log2_roundup(uint i); xfs_da_state_t *xfs_da_state_alloc(void); void xfs_da_state_free(xfs_da_state_t *state); -void xfs_da_state_kill_altpath(xfs_da_state_t *state); void xfs_da_buf_done(xfs_dabuf_t *dabuf); void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first, diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 63abdc2ac7f4..681be5c93af5 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -180,9 +180,10 @@ xfs_swapext( goto error0; } - if (VN_CACHED(tvp) != 0) - xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore), - (xfs_off_t)0, 0, 0); + if (VN_CACHED(tvp) != 0) { + xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); + VOP_FLUSHINVAL_PAGES(tvp, 0, -1, FI_REMAPF_LOCKED); + } /* Verify O_DIRECT for ftmp */ if (VN_CACHED(tvp) != 0) { diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index db9887a107de..a0aa0e44ff9d 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -304,7 +304,7 @@ xfs_dir2_data_freeinsert( /* * Remove a bestfree entry from the table. */ -void +STATIC void xfs_dir2_data_freeremove( xfs_dir2_data_t *d, /* data block pointer */ xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h index 3f02294ccff0..476cac920bf5 100644 --- a/fs/xfs/xfs_dir2_data.h +++ b/fs/xfs/xfs_dir2_data.h @@ -193,10 +193,6 @@ extern xfs_dir2_data_free_t * xfs_dir2_data_unused_t *dup, int *loghead); extern void - xfs_dir2_data_freeremove(xfs_dir2_data_t *d, - xfs_dir2_data_free_t *dfp, int *loghead); - -extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, int *loghead, char *aendp); diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 262d1e86df30..056f5283904b 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -77,6 +77,10 @@ static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp); #endif static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp, int *indexp, xfs_dabuf_t **dbpp); +static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, + int first, int last); +static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); + /* * Convert a block form directory to a leaf form directory. @@ -1214,7 +1218,7 @@ xfs_dir2_leaf_init( /* * Log the bests entries indicated from a leaf1 block. */ -void +static void xfs_dir2_leaf_log_bests( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp, /* leaf buffer */ @@ -1278,7 +1282,7 @@ xfs_dir2_leaf_log_header( /* * Log the tail of the leaf1 block. */ -void +STATIC void xfs_dir2_leaf_log_tail( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp) /* leaf buffer */ diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h index 7f20eee56a52..3303cd6f4c00 100644 --- a/fs/xfs/xfs_dir2_leaf.h +++ b/fs/xfs/xfs_dir2_leaf.h @@ -330,15 +330,8 @@ extern void int first, int last); extern void - xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, - int first, int last); - -extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp); -extern void - xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); - extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c index 617018d6bbdc..c2ea6171fb0e 100644 --- a/fs/xfs/xfs_dir_leaf.c +++ b/fs/xfs/xfs_dir_leaf.c @@ -91,6 +91,10 @@ STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state, int *number_entries_in_blk1, int *number_namebytes_in_blk1); +STATIC int xfs_dir_leaf_create(struct xfs_da_args *args, + xfs_dablk_t which_block, + struct xfs_dabuf **bpp); + /* * Utility routines. */ @@ -781,7 +785,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args) * Create the initial contents of a leaf directory * or a leaf in a node directory. */ -int +STATIC int xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) { xfs_dir_leafblock_t *leaf; diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h index 00d68d33cc7a..dd423ce1bc8d 100644 --- a/fs/xfs/xfs_dir_leaf.h +++ b/fs/xfs/xfs_dir_leaf.h @@ -202,8 +202,6 @@ int xfs_dir_leaf_to_shortform(struct xfs_da_args *args); /* * Routines used for growing the Btree. */ -int xfs_dir_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block, - struct xfs_dabuf **bpp); int xfs_dir_leaf_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 55ae3e67d245..55c17adaaa37 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -166,27 +166,32 @@ typedef enum { #define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */ #define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */ #define DM_FLAGS_ISEM 0x004 /* thread holds i_sem */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,21) -/* i_alloc_sem was added in 2.4.22-pre1 */ #define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */ #define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */ -#endif -#endif /* * Based on IO_ISDIRECT, decide which i_ flag is set. */ -#ifdef DM_FLAGS_IALLOCSEM_RD +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) +#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ + DM_FLAGS_ISEM : 0) +#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22)) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM) #define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) -#else +#endif + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ 0 : DM_FLAGS_ISEM) #define DM_SEM_FLAG_WR (DM_FLAGS_ISEM) #endif + /* * Macros to turn caller specified delay/block flags into * dm_send_xxxx_event flag DM_FLAGS_NDELAY. diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index bbe1dea11c08..dcd3fdd5c1f7 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -280,7 +280,7 @@ xfs_error_report( } } -void +STATIC void xfs_hex_dump(void *p, int length) { __uint8_t *uip = (__uint8_t*)p; diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 6bc0535c0a65..52ee2b90b5ed 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -73,9 +73,6 @@ xfs_corruption_error( int linenum, inst_t *ra); -extern void -xfs_hex_dump(void *p, int length); - #define XFS_ERROR_REPORT(e, lvl, mp) \ xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) #define XFS_CORRUPTION_ERROR(e, lvl, mp, mem) \ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 5eafd5b63211..db7cbd1bc857 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -59,6 +59,18 @@ STATIC void xfs_efi_item_abort(xfs_efi_log_item_t *); STATIC void xfs_efd_item_abort(xfs_efd_log_item_t *); +void +xfs_efi_item_free(xfs_efi_log_item_t *efip) +{ + int nexts = efip->efi_format.efi_nextents; + + if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { + kmem_free(efip, sizeof(xfs_efi_log_item_t) + + (nexts - 1) * sizeof(xfs_extent_t)); + } else { + kmem_zone_free(xfs_efi_zone, efip); + } +} /* * This returns the number of iovecs needed to log the given efi item. @@ -120,8 +132,6 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip) STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { - int nexts; - int size; xfs_mount_t *mp; SPLDECL(s); @@ -132,21 +142,11 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); - - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } + xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; AIL_UNLOCK(mp, s); } - - return; } /* @@ -159,8 +159,6 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) STATIC void xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { - int nexts; - int size; xfs_mount_t *mp; xfs_log_item_desc_t *lidp; SPLDECL(s); @@ -178,23 +176,11 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); - /* - * now free the item itself - */ - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } + xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; AIL_UNLOCK(mp, s); } - - return; } /* @@ -245,18 +231,7 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) STATIC void xfs_efi_item_abort(xfs_efi_log_item_t *efip) { - int nexts; - int size; - - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } - return; + xfs_efi_item_free(efip); } /* @@ -288,7 +263,7 @@ xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) /* * This is the ops vector shared by all efi log items. */ -struct xfs_item_ops xfs_efi_item_ops = { +STATIC struct xfs_item_ops xfs_efi_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_efi_item_format, @@ -355,8 +330,6 @@ xfs_efi_release(xfs_efi_log_item_t *efip, { xfs_mount_t *mp; int extents_left; - uint size; - int nexts; SPLDECL(s); mp = efip->efi_item.li_mountp; @@ -372,20 +345,10 @@ xfs_efi_release(xfs_efi_log_item_t *efip, * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); + xfs_efi_item_free(efip); } else { AIL_UNLOCK(mp, s); } - - if (extents_left == 0) { - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } - } } /* @@ -398,8 +361,6 @@ STATIC void xfs_efi_cancel( xfs_efi_log_item_t *efip) { - int nexts; - int size; xfs_mount_t *mp; SPLDECL(s); @@ -410,26 +371,25 @@ xfs_efi_cancel( * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); - - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } + xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_CANCELED; AIL_UNLOCK(mp, s); } - - return; } +STATIC void +xfs_efd_item_free(xfs_efd_log_item_t *efdp) +{ + int nexts = efdp->efd_format.efd_nextents; - - + if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { + kmem_free(efdp, sizeof(xfs_efd_log_item_t) + + (nexts - 1) * sizeof(xfs_extent_t)); + } else { + kmem_zone_free(xfs_efd_zone, efdp); + } +} /* * This returns the number of iovecs needed to log the given efd item. @@ -533,9 +493,6 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp) STATIC xfs_lsn_t xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) { - uint size; - int nexts; - /* * If we got a log I/O error, it's always the case that the LR with the * EFI got unpinned and freed before the EFD got aborted. @@ -543,15 +500,7 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0) xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents); - nexts = efdp->efd_format.efd_nextents; - if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efd_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efdp, size); - } else { - kmem_zone_free(xfs_efd_zone, efdp); - } - + xfs_efd_item_free(efdp); return (xfs_lsn_t)-1; } @@ -565,9 +514,6 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) STATIC void xfs_efd_item_abort(xfs_efd_log_item_t *efdp) { - int nexts; - int size; - /* * If we got a log I/O error, it's always the case that the LR with the * EFI got unpinned and freed before the EFD got aborted. So don't @@ -576,15 +522,7 @@ xfs_efd_item_abort(xfs_efd_log_item_t *efdp) if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0) xfs_efi_cancel(efdp->efd_efip); - nexts = efdp->efd_format.efd_nextents; - if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efd_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efdp, size); - } else { - kmem_zone_free(xfs_efd_zone, efdp); - } - return; + xfs_efd_item_free(efdp); } /* @@ -615,7 +553,7 @@ xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn) /* * This is the ops vector shared by all efd log items. */ -struct xfs_item_ops xfs_efd_item_ops = { +STATIC struct xfs_item_ops xfs_efd_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_efd_item_format, diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 7122d6101d15..d433bac9f59d 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -118,6 +118,8 @@ xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, uint); +void xfs_efi_item_free(xfs_efi_log_item_t *); + #endif /* __KERNEL__ */ #endif /* __XFS_EXTFREE_ITEM_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 6ee8443bf9d3..095af0a5cff3 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -60,7 +60,8 @@ struct fsxattr { __u32 fsx_xflags; /* xflags field value (get/set) */ __u32 fsx_extsize; /* extsize field value (get/set)*/ __u32 fsx_nextents; /* nextents field value (get) */ - unsigned char fsx_pad[16]; + __u32 fsx_projid; /* project identifier (get/set) */ + unsigned char fsx_pad[12]; }; #endif diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 21213057c27f..ca535d613190 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -559,32 +559,6 @@ xfs_reserve_blocks( return(0); } -void -xfs_fs_log_dummy(xfs_mount_t *mp) -{ - xfs_trans_t *tp; - xfs_inode_t *ip; - - - tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - atomic_inc(&mp->m_active_trans); - if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) { - xfs_trans_cancel(tp, 0); - return; - } - - ip = mp->m_rootip; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - xfs_trans_commit(tp, 0, NULL); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); -} - int xfs_fs_goingdown( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 803c4d17a057..44be188674a6 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -100,9 +100,13 @@ xfs_inofree_t xfs_inobt_mask(int i); #endif #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_FREE) int xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i); -#define XFS_INOBT_IS_FREE(rp,i) xfs_inobt_is_free(rp,i) +#define XFS_INOBT_IS_FREE(rp,i) xfs_inobt_is_free(rp,i) +#define XFS_INOBT_IS_FREE_DISK(rp,i) xfs_inobt_is_free_disk(rp,i) #else -#define XFS_INOBT_IS_FREE(rp,i) (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) +#define XFS_INOBT_IS_FREE(rp,i) \ + (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) +#define XFS_INOBT_IS_FREE_DISK(rp,i) \ + ((INT_GET((rp)->ir_free, ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0) #endif #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_SET_FREE) void xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bc8c8c7f9039..34bdf5909687 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -146,51 +146,6 @@ xfs_inobp_check( #endif /* - * called from bwrite on xfs inode buffers - */ -void -xfs_inobp_bwcheck(xfs_buf_t *bp) -{ - xfs_mount_t *mp; - int i; - int j; - xfs_dinode_t *dip; - - ASSERT(XFS_BUF_FSPRIVATE3(bp, void *) != NULL); - - mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); - - - j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; - - for (i = 0; i < j; i++) { - dip = (xfs_dinode_t *) xfs_buf_offset(bp, - i * mp->m_sb.sb_inodesize); - if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { - cmn_err(CE_WARN, -"Bad magic # 0x%x in XFS inode buffer 0x%Lx, starting blockno %Ld, offset 0x%x", - INT_GET(dip->di_core.di_magic, ARCH_CONVERT), - (__uint64_t)(__psunsigned_t) bp, - (__int64_t) XFS_BUF_ADDR(bp), - xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); - xfs_fs_cmn_err(CE_WARN, mp, - "corrupt, unmount and run xfs_repair"); - } - if (!dip->di_next_unlinked) { - cmn_err(CE_WARN, -"Bad next_unlinked field (0) in XFS inode buffer 0x%p, starting blockno %Ld, offset 0x%x", - (__uint64_t)(__psunsigned_t) bp, - (__int64_t) XFS_BUF_ADDR(bp), - xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); - xfs_fs_cmn_err(CE_WARN, mp, - "corrupt, unmount and run xfs_repair"); - } - } - - return; -} - -/* * This routine is called to map an inode number within a file * system to the buffer containing the on-disk version of the * inode. It returns a pointer to the buffer containing the @@ -203,7 +158,7 @@ xfs_inobp_bwcheck(xfs_buf_t *bp) * Use xfs_imap() to determine the size and location of the * buffer to read from disk. */ -int +STATIC int xfs_inotobp( xfs_mount_t *mp, xfs_trans_t *tp, @@ -1247,26 +1202,32 @@ xfs_ialloc( case S_IFREG: case S_IFDIR: if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { - if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { - if ((mode & S_IFMT) == S_IFDIR) { - ip->i_d.di_flags |= XFS_DIFLAG_RTINHERIT; - } else { - ip->i_d.di_flags |= XFS_DIFLAG_REALTIME; + uint di_flags = 0; + + if ((mode & S_IFMT) == S_IFDIR) { + if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + } else { + if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { + di_flags |= XFS_DIFLAG_REALTIME; ip->i_iocore.io_flags |= XFS_IOCORE_RT; } } if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && xfs_inherit_noatime) - ip->i_d.di_flags |= XFS_DIFLAG_NOATIME; + di_flags |= XFS_DIFLAG_NOATIME; if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && xfs_inherit_nodump) - ip->i_d.di_flags |= XFS_DIFLAG_NODUMP; + di_flags |= XFS_DIFLAG_NODUMP; if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && xfs_inherit_sync) - ip->i_d.di_flags |= XFS_DIFLAG_SYNC; + di_flags |= XFS_DIFLAG_SYNC; if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && xfs_inherit_nosymlinks) - ip->i_d.di_flags |= XFS_DIFLAG_NOSYMLINKS; + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + ip->i_d.di_flags |= di_flags; } /* FALLTHROUGH */ case S_IFLNK: @@ -2156,7 +2117,7 @@ static __inline__ int xfs_inode_clean(xfs_inode_t *ip) (ip->i_update_core == 0)); } -void +STATIC void xfs_ifree_cluster( xfs_inode_t *free_ip, xfs_trans_t *tp, @@ -2875,7 +2836,7 @@ xfs_iunpin( * be subsequently pinned once someone is waiting for it to be * unpinned. */ -void +STATIC void xfs_iunpin_wait( xfs_inode_t *ip) { @@ -3601,107 +3562,43 @@ corrupt_out: /* - * Flush all inactive inodes in mp. Return true if no user references - * were found, false otherwise. + * Flush all inactive inodes in mp. */ -int +void xfs_iflush_all( - xfs_mount_t *mp, - int flag) + xfs_mount_t *mp) { - int busy; - int done; - int purged; xfs_inode_t *ip; - vmap_t vmap; vnode_t *vp; - busy = done = 0; - while (!done) { - purged = 0; - XFS_MOUNT_ILOCK(mp); - ip = mp->m_inodes; - if (ip == NULL) { - break; - } - do { - /* Make sure we skip markers inserted by sync */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; - } - - /* - * It's up to our caller to purge the root - * and quota vnodes later. - */ - vp = XFS_ITOV_NULL(ip); - - if (!vp) { - XFS_MOUNT_IUNLOCK(mp); - xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); - purged = 1; - break; - } + again: + XFS_MOUNT_ILOCK(mp); + ip = mp->m_inodes; + if (ip == NULL) + goto out; - if (vn_count(vp) != 0) { - if (vn_count(vp) == 1 && - (ip == mp->m_rootip || - (mp->m_quotainfo && - (ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino)))) { + do { + /* Make sure we skip markers inserted by sync */ + if (ip->i_mount == NULL) { + ip = ip->i_mnext; + continue; + } - ip = ip->i_mnext; - continue; - } - if (!(flag & XFS_FLUSH_ALL)) { - busy = 1; - done = 1; - break; - } - /* - * Ignore busy inodes but continue flushing - * others. - */ - ip = ip->i_mnext; - continue; - } - /* - * Sample vp mapping while holding mp locked on MP - * systems, so we don't purge a reclaimed or - * nonexistent vnode. We break from the loop - * since we know that we modify - * it by pulling ourselves from it in xfs_reclaim() - * called via vn_purge() below. Set ip to the next - * entry in the list anyway so we'll know below - * whether we reached the end or not. - */ - VMAP(vp, vmap); + vp = XFS_ITOV_NULL(ip); + if (!vp) { XFS_MOUNT_IUNLOCK(mp); + xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); + goto again; + } - vn_purge(vp, &vmap); + ASSERT(vn_count(vp) == 0); - purged = 1; - break; - } while (ip != mp->m_inodes); - /* - * We need to distinguish between when we exit the loop - * after a purge and when we simply hit the end of the - * list. We can't use the (ip == mp->m_inodes) test, - * because when we purge an inode at the start of the list - * the next inode on the list becomes mp->m_inodes. That - * would cause such a test to bail out early. The purged - * variable tells us how we got out of the loop. - */ - if (!purged) { - done = 1; - } - } + ip = ip->i_mnext; + } while (ip != mp->m_inodes); + out: XFS_MOUNT_IUNLOCK(mp); - return !busy; } - /* * xfs_iaccess: check accessibility of inode for mode. */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 37e1c316f3b6..54d9e54c7c95 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -412,11 +412,6 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n); #define XFS_IFLUSH_DELWRI 5 /* - * Flags for xfs_iflush_all. - */ -#define XFS_FLUSH_ALL 0x1 - -/* * Flags for xfs_itruncate_start(). */ #define XFS_ITRUNC_DEFINITE 0x1 @@ -487,8 +482,6 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int); /* * xfs_inode.c prototypes. */ -int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - xfs_dinode_t **, struct xfs_buf **, int *); int xfs_itobp(struct xfs_mount *, struct xfs_trans *, xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **, xfs_daddr_t); @@ -522,7 +515,7 @@ void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int); int xfs_iflush(xfs_inode_t *, uint); -int xfs_iflush_all(struct xfs_mount *, int); +void xfs_iflush_all(struct xfs_mount *); int xfs_iaccess(xfs_inode_t *, mode_t, cred_t *); uint xfs_iroundup(uint); void xfs_ichgtime(xfs_inode_t *, int); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 768cb1816b8e..0eed30f5cb19 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -910,7 +910,7 @@ xfs_inode_item_committing( /* * This is the ops vector shared by all buf log items. */ -struct xfs_item_ops xfs_inode_item_ops = { +STATIC struct xfs_item_ops xfs_inode_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_inode_item_format, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 469e1a7939d4..2edd6769e5d3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -385,15 +385,15 @@ xfs_iomap_write_direct( int nimaps, maps; int error; int bmapi_flag; + int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; xfs_bmap_free_t free_list; int aeof; - xfs_filblks_t datablocks; + xfs_filblks_t datablocks, qblocks, resblks; int committed; int numrtextents; - uint resblks; /* * Make sure that the dquots are there. This doesn't hold @@ -419,7 +419,6 @@ xfs_iomap_write_direct( xfs_fileoff_t map_last_fsb; map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff; - if (map_last_fsb < last_fsb) { last_fsb = map_last_fsb; count_fsb = last_fsb - offset_fsb; @@ -428,56 +427,47 @@ xfs_iomap_write_direct( } /* - * determine if reserving space on - * the data or realtime partition. + * Determine if reserving space on the data or realtime partition. */ if ((rt = XFS_IS_REALTIME_INODE(ip))) { - int sbrtextsize, iprtextsize; + xfs_extlen_t extsz; - sbrtextsize = mp->m_sb.sb_rextsize; - iprtextsize = - ip->i_d.di_extsize ? ip->i_d.di_extsize : sbrtextsize; - numrtextents = (count_fsb + iprtextsize - 1); - do_div(numrtextents, sbrtextsize); + if (!(extsz = ip->i_d.di_extsize)) + extsz = mp->m_sb.sb_rextsize; + numrtextents = qblocks = (count_fsb + extsz - 1); + do_div(numrtextents, mp->m_sb.sb_rextsize); + quota_flag = XFS_QMOPT_RES_RTBLKS; datablocks = 0; } else { - datablocks = count_fsb; + datablocks = qblocks = count_fsb; + quota_flag = XFS_QMOPT_RES_REGBLKS; numrtextents = 0; } /* - * allocate and setup the transaction + * Allocate and setup the transaction */ xfs_iunlock(ip, XFS_ILOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); - error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), numrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); /* - * check for running out of space + * Check for running out of space, note: need lock to return */ if (error) - /* - * Free the transaction structure. - */ xfs_trans_cancel(tp, 0); - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (error) - goto error_out; /* Don't return in above if .. trans .., - need lock to return */ + goto error_out; - if (XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, resblks)) { + if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) { error = (EDQUOT); goto error1; } - nimaps = 1; bmapi_flag = XFS_BMAPI_WRITE; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -487,31 +477,29 @@ xfs_iomap_write_direct( bmapi_flag |= XFS_BMAPI_PREALLOC; /* - * issue the bmapi() call to allocate the blocks + * Issue the bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); + nimaps = 1; imapp = &imap[0]; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list); - if (error) { + if (error) goto error0; - } /* - * complete the transaction + * Complete the transaction */ - error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); - if (error) { + if (error) goto error0; - } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); - if (error) { + if (error) goto error_out; - } - /* copy any maps to caller's array and return any error. */ + /* + * Copy any maps to caller's array and return any error. + */ if (nimaps == 0) { error = (ENOSPC); goto error_out; @@ -530,10 +518,11 @@ xfs_iomap_write_direct( } return 0; - error0: /* Cancel bmap, unlock inode, and cancel trans */ +error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); + XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); - error1: /* Just cancel transaction */ +error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); *nmaps = 0; /* nothing set-up here */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 092d5fb096b1..1cd2ac163877 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -134,7 +134,7 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, #define xlog_verify_tail_lsn(a,b,c) #endif -int xlog_iclogs_empty(xlog_t *log); +STATIC int xlog_iclogs_empty(xlog_t *log); #ifdef DEBUG int xlog_do_error = 0; @@ -1857,7 +1857,7 @@ xlog_write(xfs_mount_t * mp, * * State Change: DIRTY -> ACTIVE */ -void +STATIC void xlog_state_clean_log(xlog_t *log) { xlog_in_core_t *iclog; @@ -3542,7 +3542,7 @@ xfs_log_force_umount( return (retval); } -int +STATIC int xlog_iclogs_empty(xlog_t *log) { xlog_in_core_t *iclog; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c31e3ce3be66..1a1d452f15f9 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -535,7 +535,6 @@ typedef struct log { /* common routines */ extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); -extern int xlog_find_head(xlog_t *log, xfs_daddr_t *head_blk); extern int xlog_find_tail(xlog_t *log, xfs_daddr_t *head_blk, xfs_daddr_t *tail_blk, @@ -548,7 +547,6 @@ extern void xlog_recover_process_iunlinks(xlog_t *log); extern struct xfs_buf *xlog_get_bp(xlog_t *, int); extern void xlog_put_bp(struct xfs_buf *); extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); -extern xfs_caddr_t xlog_align(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); /* iclog tracing */ #define XLOG_TRACE_GRAB_FLUSH 1 diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9824b5bf0ec0..0aac28ddb81c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -148,7 +148,7 @@ xlog_bread( * The buffer is kept locked across the write and is returned locked. * This can only be used for synchronous log writes. */ -int +STATIC int xlog_bwrite( xlog_t *log, xfs_daddr_t blk_no, @@ -179,7 +179,7 @@ xlog_bwrite( return error; } -xfs_caddr_t +STATIC xfs_caddr_t xlog_align( xlog_t *log, xfs_daddr_t blk_no, @@ -528,7 +528,7 @@ out: * * Return: zero if normal, non-zero if error. */ -int +STATIC int xlog_find_head( xlog_t *log, xfs_daddr_t *return_head_blk) @@ -1964,7 +1964,8 @@ xlog_recover_do_reg_buffer( * probably a good thing to do for other buf types also. */ error = 0; - if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { + if (buf_f->blf_flags & + (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { error = xfs_qm_dqcheck((xfs_disk_dquot_t *) item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, @@ -2030,6 +2031,7 @@ xfs_qm_dqcheck( } if (INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_USER && + INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_PROJ && INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_GROUP) { if (flags & XFS_QMOPT_DOWARN) cmn_err(CE_ALERT, @@ -2135,6 +2137,8 @@ xlog_recover_do_dquot_buffer( type = 0; if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) type |= XFS_DQ_USER; + if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) + type |= XFS_DQ_PROJ; if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) type |= XFS_DQ_GROUP; /* @@ -2247,7 +2251,8 @@ xlog_recover_do_buffer_trans( error = 0; if (flags & XFS_BLI_INODE_BUF) { error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); - } else if (flags & (XFS_BLI_UDQUOT_BUF | XFS_BLI_GDQUOT_BUF)) { + } else if (flags & + (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); } else { xlog_recover_do_reg_buffer(mp, item, bp, buf_f); @@ -2619,7 +2624,7 @@ xlog_recover_do_dquot_trans( * This type of quotas was turned off, so ignore this record. */ type = INT_GET(recddq->d_flags, ARCH_CONVERT) & - (XFS_DQ_USER | XFS_DQ_GROUP); + (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); ASSERT(type); if (log->l_quotaoffs_flag & type) return (0); @@ -2742,7 +2747,6 @@ xlog_recover_do_efd_trans( xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; int gen; - int nexts; __uint64_t efi_id; SPLDECL(s); @@ -2777,22 +2781,15 @@ xlog_recover_do_efd_trans( } lip = xfs_trans_next_ail(mp, lip, &gen, NULL); } - if (lip == NULL) { - AIL_UNLOCK(mp, s); - } /* * If we found it, then free it up. If it wasn't there, it * must have been overwritten in the log. Oh well. */ if (lip != NULL) { - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - kmem_free(lip, sizeof(xfs_efi_log_item_t) + - ((nexts - 1) * sizeof(xfs_extent_t))); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } + xfs_efi_item_free(efip); + } else { + AIL_UNLOCK(mp, s); } } diff --git a/fs/xfs/xfs_macros.c b/fs/xfs/xfs_macros.c index ce4f46c6b3ab..698c2cd62858 100644 --- a/fs/xfs/xfs_macros.c +++ b/fs/xfs/xfs_macros.c @@ -1658,6 +1658,11 @@ xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i) { return XFS_INOBT_IS_FREE(rp, i); } +int +xfs_inobt_is_free_disk(xfs_inobt_rec_t *rp, int i) +{ + return XFS_INOBT_IS_FREE_DISK(rp, i); +} #endif #if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_IS_LAST_REC) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2ec967d93e5a..82e1646e6243 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -64,6 +64,7 @@ STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); STATIC int xfs_uuid_mount(xfs_mount_t *); STATIC void xfs_uuid_unmount(xfs_mount_t *mp); +STATIC void xfs_unmountfs_wait(xfs_mount_t *); static struct { short offset; @@ -555,7 +556,7 @@ xfs_readsb(xfs_mount_t *mp) * fields from the superblock associated with the given * mount structure */ -void +STATIC void xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) { int i; @@ -1081,7 +1082,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) int64_t fsid; #endif - xfs_iflush_all(mp, XFS_FLUSH_ALL); + xfs_iflush_all(mp); XFS_QM_DQPURGEALL(mp, XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING); @@ -1111,15 +1112,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) */ ASSERT(mp->m_inodes == NULL); - /* - * We may have bufs that are in the process of getting written still. - * We must wait for the I/O completion of those. The sync flag here - * does a two pass iteration thru the bufcache. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - xfs_incore_relse(mp->m_ddev_targp, 0, 1); /* synchronous */ - } - xfs_unmountfs_close(mp, cr); if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) xfs_uuid_unmount(mp); @@ -1146,7 +1138,7 @@ xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) xfs_free_buftarg(mp->m_ddev_targp, 0); } -void +STATIC void xfs_unmountfs_wait(xfs_mount_t *mp) { if (mp->m_logdev_targp != mp->m_ddev_targp) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 30dd08fb9f57..5affba38a577 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -141,7 +141,7 @@ typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); typedef void (*xfs_dqdetach_t)(struct xfs_inode *); typedef int (*xfs_dqpurgeall_t)(struct xfs_mount *, uint); typedef int (*xfs_dqvopalloc_t)(struct xfs_mount *, - struct xfs_inode *, uid_t, gid_t, uint, + struct xfs_inode *, uid_t, gid_t, prid_t, uint, struct xfs_dquot **, struct xfs_dquot **); typedef void (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *, struct xfs_dquot *, struct xfs_dquot *); @@ -185,8 +185,8 @@ typedef struct xfs_qmops { (*(mp)->m_qm_ops.xfs_dqdetach)(ip) #define XFS_QM_DQPURGEALL(mp, fl) \ (*(mp)->m_qm_ops.xfs_dqpurgeall)(mp, fl) -#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, fl, dq1, dq2) \ - (*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, fl, dq1, dq2) +#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \ + (*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2) #define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \ (*(mp)->m_qm_ops.xfs_dqvopcreate)(tp, ip, dq1, dq2) #define XFS_QM_DQVOPRENAME(mp, ip) \ @@ -544,7 +544,6 @@ extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int); extern int xfs_unmountfs(xfs_mount_t *, struct cred *); -extern void xfs_unmountfs_wait(xfs_mount_t *); extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *); extern int xfs_unmountfs_writesb(xfs_mount_t *); extern int xfs_unmount_flush(xfs_mount_t *, int); diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 703ec4efcb41..7134576ae7fa 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -96,7 +96,7 @@ typedef struct xfs_dqblk { * flags for q_flags field in the dquot. */ #define XFS_DQ_USER 0x0001 /* a user quota */ -/* #define XFS_DQ_PROJ 0x0002 -- project quota (IRIX) */ +#define XFS_DQ_PROJ 0x0002 /* project quota */ #define XFS_DQ_GROUP 0x0004 /* a group quota */ #define XFS_DQ_FLOCKED 0x0008 /* flush lock taken */ #define XFS_DQ_DIRTY 0x0010 /* dquot is dirty */ @@ -104,6 +104,8 @@ typedef struct xfs_dqblk { #define XFS_DQ_INACTIVE 0x0040 /* dq off mplist & hashlist */ #define XFS_DQ_MARKER 0x0080 /* sentinel */ +#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) + /* * In the worst case, when both user and group quotas are on, * we can have a max of three dquots changing in a single transaction. @@ -124,7 +126,7 @@ typedef struct xfs_dqblk { typedef struct xfs_dq_logformat { __uint16_t qlf_type; /* dquot log item type */ __uint16_t qlf_size; /* size of this item */ - xfs_dqid_t qlf_id; /* usr/grp id number : 32 bits */ + xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */ __int64_t qlf_blkno; /* blkno of dquot buffer */ __int32_t qlf_len; /* len of dquot buffer */ __uint32_t qlf_boffset; /* off of dquot in buffer */ @@ -152,9 +154,9 @@ typedef struct xfs_qoff_logformat { #define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */ #define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */ #define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */ -#define XFS_PQUOTA_ACCT 0x0008 /* (IRIX) project quota accounting ON */ -#define XFS_GQUOTA_ENFD 0x0010 /* group quota limits enforced */ -#define XFS_GQUOTA_CHKD 0x0020 /* quotacheck run on grp quotas */ +#define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */ +#define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */ +#define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ /* @@ -162,17 +164,22 @@ typedef struct xfs_qoff_logformat { * are in the process of getting turned off. These flags are in m_qflags but * never in sb_qflags. */ -#define XFS_UQUOTA_ACTIVE 0x0080 /* uquotas are being turned off */ -#define XFS_GQUOTA_ACTIVE 0x0100 /* gquotas are being turned off */ +#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ +#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ +#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ /* * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees * quota will be not be switched off as long as that inode lock is held. */ #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ - XFS_GQUOTA_ACTIVE)) + XFS_GQUOTA_ACTIVE | \ + XFS_PQUOTA_ACTIVE)) +#define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \ + XFS_PQUOTA_ACTIVE)) #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) +#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) /* * Flags to tell various functions what to do. Not all of these are meaningful @@ -182,7 +189,7 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_DQLOCK 0x0000001 /* dqlock */ #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ -#define XFS_QMOPT_GQUOTA 0x0000008 /* group dquot requested */ +#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ #define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ @@ -192,6 +199,7 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if necessary */ #define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */ #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot, if damaged. */ +#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ /* * flags to xfs_trans_mod_dquot to indicate which field needs to be @@ -231,7 +239,8 @@ typedef struct xfs_qoff_logformat { #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT -#define XFS_QMOPT_QUOTALL (XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA) +#define XFS_QMOPT_QUOTALL \ + (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) #ifdef __KERNEL__ @@ -246,21 +255,33 @@ typedef struct xfs_qoff_logformat { */ #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ (ip)->i_udquot == NULL) || \ - (XFS_IS_GQUOTA_ON(mp) && \ + (XFS_IS_OQUOTA_ON(mp) && \ (ip)->i_gdquot == NULL)) -#define XFS_QM_NEED_QUOTACHECK(mp) ((XFS_IS_UQUOTA_ON(mp) && \ - (mp->m_sb.sb_qflags & \ - XFS_UQUOTA_CHKD) == 0) || \ - (XFS_IS_GQUOTA_ON(mp) && \ - (mp->m_sb.sb_qflags & \ - XFS_GQUOTA_CHKD) == 0)) +#define XFS_QM_NEED_QUOTACHECK(mp) \ + ((XFS_IS_UQUOTA_ON(mp) && \ + (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ + (XFS_IS_GQUOTA_ON(mp) && \ + ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ + (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ + (XFS_IS_PQUOTA_ON(mp) && \ + ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ + (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) + +#define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ + XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) + +#define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ + XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ + XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ - XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD) + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ + XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ + XFS_GQUOTA_ACCT) #define XFS_MOUNT_QUOTA_MASK (XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \ - XFS_GQUOTA_ACTIVE) + XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE) /* @@ -331,15 +352,8 @@ typedef struct xfs_dqtrxops { #define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \ XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots) -#define XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, nblks) \ - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \ - XFS_QMOPT_RES_REGBLKS) -#define XFS_TRANS_RESERVE_BLKQUOTA_FORCE(mp, tp, ip, nblks) \ - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \ - XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES) -#define XFS_TRANS_UNRESERVE_BLKQUOTA(mp, tp, ip, nblks) \ - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), 0, \ - XFS_QMOPT_RES_REGBLKS) +#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \ + XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags) #define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \ XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index cb13f9a1d45b..23b48ac1cb7e 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -234,9 +234,6 @@ xfs_lock_for_rename( return 0; } - -int rename_which_error_return = 0; - /* * xfs_rename */ @@ -316,7 +313,6 @@ xfs_rename( &num_inodes); if (error) { - rename_which_error_return = __LINE__; /* * We have nothing locked, no inode references, and * no transaction, so just get out. @@ -332,7 +328,6 @@ xfs_rename( */ if (target_ip == NULL && (src_dp != target_dp) && target_dp->i_d.di_nlink >= XFS_MAXLINK) { - rename_which_error_return = __LINE__; error = XFS_ERROR(EMLINK); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; @@ -359,7 +354,6 @@ xfs_rename( XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { - rename_which_error_return = __LINE__; xfs_trans_cancel(tp, 0); goto rele_return; } @@ -369,7 +363,6 @@ xfs_rename( */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); - rename_which_error_return = __LINE__; goto rele_return; } @@ -413,7 +406,6 @@ xfs_rename( if (spaceres == 0 && (error = XFS_DIR_CANENTER(mp, tp, target_dp, target_name, target_namelen))) { - rename_which_error_return = __LINE__; goto error_return; } /* @@ -425,11 +417,9 @@ xfs_rename( target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) { - rename_which_error_return = __LINE__; goto error_return; } if (error) { - rename_which_error_return = __LINE__; goto abort_return; } xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -437,7 +427,6 @@ xfs_rename( if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } } @@ -455,7 +444,6 @@ xfs_rename( if (!(XFS_DIR_ISEMPTY(target_ip->i_mount, target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); - rename_which_error_return = __LINE__; goto error_return; } } @@ -473,7 +461,6 @@ xfs_rename( target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -484,7 +471,6 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } target_ip_dropped = 1; @@ -495,7 +481,6 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } } @@ -519,7 +504,6 @@ xfs_rename( &free_list, spaceres); ASSERT(error != EEXIST); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -550,7 +534,6 @@ xfs_rename( */ error = xfs_droplink(tp, src_dp); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } } @@ -558,7 +541,6 @@ xfs_rename( error = XFS_DIR_REMOVENAME(mp, tp, src_dp, src_name, src_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 3db0e2200775..06dfca531f79 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -332,25 +332,6 @@ undo_blocks: /* - * This is called to set the a callback to be called when the given - * transaction is committed to disk. The transaction pointer and the - * argument pointer will be passed to the callback routine. - * - * Only one callback can be associated with any single transaction. - */ -void -xfs_trans_callback( - xfs_trans_t *tp, - xfs_trans_callback_t callback, - void *arg) -{ - ASSERT(tp->t_callback == NULL); - tp->t_callback = callback; - tp->t_callarg = arg; -} - - -/* * Record the indicated change to the given field for application * to the file system's superblock when the transaction commits. * For now, just store the change in the transaction structure. @@ -551,7 +532,7 @@ xfs_trans_apply_sb_deltas( * * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). */ -void +STATIC void xfs_trans_unreserve_and_mod_sb( xfs_trans_t *tp) { diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index bd37ccb85e76..ec541d66fa2a 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -987,8 +987,6 @@ xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); -void xfs_trans_callback(xfs_trans_t *, - void (*)(xfs_trans_t *, void *), void *); void xfs_trans_mod_sb(xfs_trans_t *, uint, long); struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t, int, uint); @@ -1010,7 +1008,6 @@ int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, xfs_ino_t , uint, uint, struct xfs_inode **); void xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint); void xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *); -void xfs_trans_ihold_release(xfs_trans_t *, struct xfs_inode *); void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index a9682b9510c1..144da7a85466 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -976,6 +976,7 @@ xfs_trans_dquot_buf( ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT(type == XFS_BLI_UDQUOT_BUF || + type == XFS_BLI_PDQUOT_BUF || type == XFS_BLI_GDQUOT_BUF); bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index e2c3706f453d..7e7631ca4979 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -253,24 +253,6 @@ xfs_trans_ihold( ip->i_itemp->ili_flags |= XFS_ILI_HOLD; } -/* - * Cancel the previous inode hold request made on this inode - * for this transaction. - */ -/*ARGSUSED*/ -void -xfs_trans_ihold_release( - xfs_trans_t *tp, - xfs_inode_t *ip) -{ - ASSERT(ip->i_transp == tp); - ASSERT(ip->i_itemp != NULL); - ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); - ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); - - ip->i_itemp->ili_flags &= ~XFS_ILI_HOLD; -} - /* * This is called to mark the fields indicated in fieldmask as needing diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index e4bf711e48ff..16f5371ce102 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -55,7 +55,7 @@ typedef signed long long int __int64_t; typedef unsigned long long int __uint64_t; typedef enum { B_FALSE,B_TRUE } boolean_t; -typedef __int64_t prid_t; /* project ID */ +typedef __uint32_t prid_t; /* project ID */ typedef __uint32_t inst_t; /* an instruction */ typedef __s64 xfs_off_t; /* <file offset> type */ diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index d1f8146a06ea..11351f08d438 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -428,7 +428,7 @@ xfs_truncate_file( if (ip->i_ino != mp->m_sb.sb_uquotino) ASSERT(ip->i_udquot); } - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_OQUOTA_ON(mp)) { if (ip->i_ino != mp->m_sb.sb_gquotino) ASSERT(ip->i_gdquot); } diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index b53736650100..42bcc0215203 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -368,16 +368,6 @@ xfs_finish_flags( } /* - * disallow mount attempts with (IRIX) project quota enabled - */ - if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && - (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT)) { - cmn_err(CE_WARN, - "XFS: cannot mount a filesystem with IRIX project quota enabled"); - return XFS_ERROR(ENOSYS); - } - - /* * check for shared mount. */ if (ap->flags & XFSMNT_SHARED) { @@ -622,7 +612,34 @@ out: return XFS_ERROR(error); } -#define REMOUNT_READONLY_FLAGS (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) +STATIC int +xfs_quiesce_fs( + xfs_mount_t *mp) +{ + int count = 0, pincount; + + xfs_refcache_purge_mp(mp); + xfs_flush_buftarg(mp->m_ddev_targp, 0); + xfs_finish_reclaim_all(mp, 0); + + /* This loop must run at least twice. + * The first instance of the loop will flush + * most meta data but that will generate more + * meta data (typically directory updates). + * Which then must be flushed and logged before + * we can write the unmount record. + */ + do { + xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL); + pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); + if (!pincount) { + delay(50); + count++; + } + } while (count < 2); + + return 0; +} STATIC int xfs_mntupdate( @@ -632,8 +649,7 @@ xfs_mntupdate( { struct vfs *vfsp = bhvtovfs(bdp); xfs_mount_t *mp = XFS_BHVTOM(bdp); - int pincount, error; - int count = 0; + int error; if (args->flags & XFSMNT_NOATIME) mp->m_flags |= XFS_MOUNT_NOATIME; @@ -645,25 +661,7 @@ xfs_mntupdate( } if (*flags & MS_RDONLY) { - xfs_refcache_purge_mp(mp); - xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0); - - /* This loop must run at least twice. - * The first instance of the loop will flush - * most meta data but that will generate more - * meta data (typically directory updates). - * Which then must be flushed and logged before - * we can write the unmount record. - */ - do { - VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error); - pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); - if (!pincount) { - delay(50); - count++; - } - } while (count < 2); + xfs_quiesce_fs(mp); /* Ok now write out an unmount record */ xfs_log_unmount_write(mp); @@ -879,10 +877,12 @@ xfs_sync( int flags, cred_t *credp) { - xfs_mount_t *mp; + xfs_mount_t *mp = XFS_BHVTOM(bdp); - mp = XFS_BHVTOM(bdp); - return (xfs_syncsub(mp, flags, 0, NULL)); + if (unlikely(flags == SYNC_QUIESCE)) + return xfs_quiesce_fs(mp); + else + return xfs_syncsub(mp, flags, 0, NULL); } /* @@ -1681,7 +1681,7 @@ suffix_strtoul(const char *cp, char **endp, unsigned int base) return simple_strtoul(cp, endp, base) << shift_left_factor; } -int +STATIC int xfs_parseargs( struct bhv_desc *bhv, char *options, @@ -1867,7 +1867,7 @@ printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n"); return 0; } -int +STATIC int xfs_showargs( struct bhv_desc *bhv, struct seq_file *m) diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 25a526629b12..1377c868f3f4 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -351,21 +351,28 @@ xfs_setattr( * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ - if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) { + if (XFS_IS_QUOTA_ON(mp) && + (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { uint qflags = 0; - if (mask & XFS_AT_UID) { + if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = vap->va_uid; qflags |= XFS_QMOPT_UQUOTA; } else { uid = ip->i_d.di_uid; } - if (mask & XFS_AT_GID) { + if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { gid = vap->va_gid; qflags |= XFS_QMOPT_GQUOTA; } else { gid = ip->i_d.di_gid; } + if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { + projid = vap->va_projid; + qflags |= XFS_QMOPT_PQUOTA; + } else { + projid = ip->i_d.di_projid; + } /* * We take a reference when we initialize udqp and gdqp, * so it is important that we never blindly double trip on @@ -373,7 +380,8 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp); + code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, + &udqp, &gdqp); if (code) return (code); } @@ -499,8 +507,6 @@ xfs_setattr( * that the group ID supplied to the chown() function * shall be equal to either the group ID or one of the * supplementary group IDs of the calling process. - * - * XXX: How does restricted_chown affect projid? */ if (restricted_chown && (iuid != uid || (igid != gid && @@ -510,10 +516,11 @@ xfs_setattr( goto error_return; } /* - * Do a quota reservation only if uid or gid is actually + * Do a quota reservation only if uid/projid/gid is actually * going to change. */ if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { ASSERT(tp); code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, @@ -774,6 +781,7 @@ xfs_setattr( } if (igid != gid) { if (XFS_IS_GQUOTA_ON(mp)) { + ASSERT(!XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & XFS_AT_GID); ASSERT(gdqp); olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, @@ -782,6 +790,13 @@ xfs_setattr( ip->i_d.di_gid = gid; } if (iprojid != projid) { + if (XFS_IS_PQUOTA_ON(mp)) { + ASSERT(!XFS_IS_GQUOTA_ON(mp)); + ASSERT(mask & XFS_AT_PROJID); + ASSERT(gdqp); + olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + &ip->i_gdquot, gdqp); + } ip->i_d.di_projid = projid; /* * We may have to rev the inode as well as @@ -843,6 +858,8 @@ xfs_setattr( di_flags |= XFS_DIFLAG_NOATIME; if (vap->va_xflags & XFS_XFLAG_NODUMP) di_flags |= XFS_DIFLAG_NODUMP; + if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { if (vap->va_xflags & XFS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; @@ -1898,7 +1915,9 @@ xfs_create( /* Return through std_return after this point. */ udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -1907,7 +1926,7 @@ xfs_create( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2604,17 +2623,7 @@ xfs_link( if (src_vp->v_type == VDIR) return XFS_ERROR(EPERM); - /* - * For now, manually find the XFS behavior descriptor for - * the source vnode. If it doesn't exist then something - * is wrong and we should just return an error. - * Eventually we need to figure out how link is going to - * work in the face of stacked vnodes. - */ src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); - if (src_bdp == NULL) { - return XFS_ERROR(EXDEV); - } sip = XFS_BHVTOI(src_bdp); tdp = XFS_BHVTOI(target_dir_bdp); mp = tdp->i_mount; @@ -2681,6 +2690,17 @@ xfs_link( goto error_return; } + /* + * If we are using project inheritance, we only allow hard link + * creation in our tree when the project IDs are the same; else + * the tree quota mechanism could be circumvented. + */ + if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && + (tdp->i_d.di_projid != sip->i_d.di_projid))) { + error = XFS_ERROR(EPERM); + goto error_return; + } + if (resblks == 0 && (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, target_namelen))) @@ -2803,7 +2823,9 @@ xfs_mkdir( mp = dp->i_mount; udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -2812,7 +2834,7 @@ xfs_mkdir( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -3357,7 +3379,9 @@ xfs_symlink( /* Return through std_return after this point. */ udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -3366,7 +3390,7 @@ xfs_symlink( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -4028,7 +4052,7 @@ xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) * errno on error * */ -int +STATIC int xfs_alloc_file_space( xfs_inode_t *ip, xfs_off_t offset, @@ -4151,9 +4175,8 @@ retry: break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, - ip->i_udquot, ip->i_gdquot, resblks, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + error = XFS_TRANS_RESERVE_QUOTA(mp, tp, + ip->i_udquot, ip->i_gdquot, resblks, 0, 0); if (error) goto error1; @@ -4305,6 +4328,7 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { + vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -4325,9 +4349,11 @@ xfs_free_file_space( xfs_trans_t *tp; int need_iolock = 1; - vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); + vp = XFS_ITOV(ip); mp = ip->i_mount; + vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; @@ -4344,7 +4370,7 @@ xfs_free_file_space( DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { if (end_dmi_offset > ip->i_d.di_size) end_dmi_offset = ip->i_d.di_size; - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, offset, end_dmi_offset - offset, AT_DELAY_FLAG(attr_flags), NULL); if (error) @@ -4363,7 +4389,14 @@ xfs_free_file_space( ioffset = offset & ~(rounding - 1); if (ilen & (rounding - 1)) ilen = (ilen + rounding) & ~(rounding - 1); - xfs_inval_cached_pages(XFS_ITOV(ip), &(ip->i_iocore), ioffset, 0, 0); + + if (VN_CACHED(vp) != 0) { + xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, + ctooff(offtoct(ioffset)), -1); + VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), + -1, FI_REMAPF_LOCKED); + } + /* * Need to zero the stuff we're not freeing, on disk. * If its a realtime file & can't use unwritten extents then we |