diff options
99 files changed, 2322 insertions, 1370 deletions
diff --git a/.gitignore b/.gitignore index 8363e48cdcdc..fdcce40226d7 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,5 @@ cscope.* *.orig *.rej +*~ +\#*# diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 6015347b41e2..866b9cd9a959 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -1,4 +1,8 @@ -Memory Controller +Memory Resource Controller + +NOTE: The Memory Resource Controller has been generically been referred +to as the memory controller in this document. Do not confuse memory controller +used here with the memory controller that is used in hardware. Salient features @@ -152,7 +156,7 @@ The memory controller uses the following hierarchy a. Enable CONFIG_CGROUPS b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_CONT +c. Enable CONFIG_CGROUP_MEM_RES_CTLR 1. Prepare the cgroups # mkdir -p /cgroups @@ -164,7 +168,7 @@ c. Enable CONFIG_CGROUP_MEM_CONT Since now we're in the 0 cgroup, We can alter the memory limit: -# echo -n 4M > /cgroups/0/memory.limit_in_bytes +# echo 4M > /cgroups/0/memory.limit_in_bytes NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, mega or gigabytes. @@ -185,7 +189,7 @@ number of factors, such as rounding up to page boundaries or the total availability of memory on the system. The user is required to re-read this file after a write to guarantee the value committed by the kernel. -# echo -n 1 > memory.limit_in_bytes +# echo 1 > memory.limit_in_bytes # cat memory.limit_in_bytes 4096 @@ -197,7 +201,7 @@ caches, RSS and Active pages/Inactive pages are shown. The memory.force_empty gives an interface to drop *all* charges by force. -# echo -n 1 > memory.force_empty +# echo 1 > memory.force_empty will drop all charges in cgroup. Currently, this is maintained for test. diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 8da724e2a0ff..54630095aa3c 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -2,6 +2,9 @@ GPIO Interfaces This provides an overview of GPIO access conventions on Linux. +These calls use the gpio_* naming prefix. No other calls should use that +prefix, or the related __gpio_* prefix. + What is a GPIO? =============== @@ -69,11 +72,13 @@ in this document, but drivers acting as clients to the GPIO interface must not care how it's implemented.) That said, if the convention is supported on their platform, drivers should -use it when possible. Platforms should declare GENERIC_GPIO support in -Kconfig (boolean true), which multi-platform drivers can depend on when -using the include file: +use it when possible. Platforms must declare GENERIC_GPIO support in their +Kconfig (boolean true), and provide an <asm/gpio.h> file. Drivers that can't +work without standard GPIO calls should have Kconfig entries which depend +on GENERIC_GPIO. The GPIO calls are available, either as "real code" or as +optimized-away stubs, when drivers use the include file: - #include <asm/gpio.h> + #include <linux/gpio.h> If you stick to this convention then it'll be easier for other developers to see what your code is doing, and help maintain it. @@ -316,6 +321,9 @@ pulldowns integrated on some platforms. Not all platforms support them, or support them in the same way; and any given board might use external pullups (or pulldowns) so that the on-chip ones should not be used. (When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) +Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a +platform-specific issue, as are models like (not) having a one-to-one +correspondence between configurable pins and GPIOs. There are other system-specific mechanisms that are not specified here, like the aforementioned options for input de-glitching and wire-OR output. diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 83f515c2905a..be89f393274f 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -192,7 +192,8 @@ code mapping. The Kprobes API includes a "register" function and an "unregister" function for each type of probe. Here are terse, mini-man-page specifications for these functions and the associated probe handlers -that you'll write. See the latter half of this document for examples. +that you'll write. See the files in the samples/kprobes/ sub-directory +for examples. 4.1 register_kprobe @@ -420,249 +421,15 @@ e. Watchpoint probes (which fire on data references). 8. Kprobes Example -Here's a sample kernel module showing the use of kprobes to dump a -stack trace and selected i386 registers when do_fork() is called. ------ cut here ----- -/*kprobe_example.c*/ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/kprobes.h> -#include <linux/sched.h> - -/*For each probe you need to allocate a kprobe structure*/ -static struct kprobe kp; - -/*kprobe pre_handler: called just before the probed instruction is executed*/ -int handler_pre(struct kprobe *p, struct pt_regs *regs) -{ - printk("pre_handler: p->addr=0x%p, eip=%lx, eflags=0x%lx\n", - p->addr, regs->eip, regs->eflags); - dump_stack(); - return 0; -} - -/*kprobe post_handler: called after the probed instruction is executed*/ -void handler_post(struct kprobe *p, struct pt_regs *regs, unsigned long flags) -{ - printk("post_handler: p->addr=0x%p, eflags=0x%lx\n", - p->addr, regs->eflags); -} - -/* fault_handler: this is called if an exception is generated for any - * instruction within the pre- or post-handler, or when Kprobes - * single-steps the probed instruction. - */ -int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) -{ - printk("fault_handler: p->addr=0x%p, trap #%dn", - p->addr, trapnr); - /* Return 0 because we don't handle the fault. */ - return 0; -} - -static int __init kprobe_init(void) -{ - int ret; - kp.pre_handler = handler_pre; - kp.post_handler = handler_post; - kp.fault_handler = handler_fault; - kp.symbol_name = "do_fork"; - - ret = register_kprobe(&kp); - if (ret < 0) { - printk("register_kprobe failed, returned %d\n", ret); - return ret; - } - printk("kprobe registered\n"); - return 0; -} - -static void __exit kprobe_exit(void) -{ - unregister_kprobe(&kp); - printk("kprobe unregistered\n"); -} - -module_init(kprobe_init) -module_exit(kprobe_exit) -MODULE_LICENSE("GPL"); ------ cut here ----- - -You can build the kernel module, kprobe-example.ko, using the following -Makefile: ------ cut here ----- -obj-m := kprobe-example.o -KDIR := /lib/modules/$(shell uname -r)/build -PWD := $(shell pwd) -default: - $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules -clean: - rm -f *.mod.c *.ko *.o ------ cut here ----- - -$ make -$ su - -... -# insmod kprobe-example.ko - -You will see the trace data in /var/log/messages and on the console -whenever do_fork() is invoked to create a new process. +See samples/kprobes/kprobe_example.c 9. Jprobes Example -Here's a sample kernel module showing the use of jprobes to dump -the arguments of do_fork(). ------ cut here ----- -/*jprobe-example.c */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/uio.h> -#include <linux/kprobes.h> - -/* - * Jumper probe for do_fork. - * Mirror principle enables access to arguments of the probed routine - * from the probe handler. - */ - -/* Proxy routine having the same arguments as actual do_fork() routine */ -long jdo_fork(unsigned long clone_flags, unsigned long stack_start, - struct pt_regs *regs, unsigned long stack_size, - int __user * parent_tidptr, int __user * child_tidptr) -{ - printk("jprobe: clone_flags=0x%lx, stack_size=0x%lx, regs=0x%p\n", - clone_flags, stack_size, regs); - /* Always end with a call to jprobe_return(). */ - jprobe_return(); - /*NOTREACHED*/ - return 0; -} - -static struct jprobe my_jprobe = { - .entry = jdo_fork -}; - -static int __init jprobe_init(void) -{ - int ret; - my_jprobe.kp.symbol_name = "do_fork"; - - if ((ret = register_jprobe(&my_jprobe)) <0) { - printk("register_jprobe failed, returned %d\n", ret); - return -1; - } - printk("Planted jprobe at %p, handler addr %p\n", - my_jprobe.kp.addr, my_jprobe.entry); - return 0; -} - -static void __exit jprobe_exit(void) -{ - unregister_jprobe(&my_jprobe); - printk("jprobe unregistered\n"); -} - -module_init(jprobe_init) -module_exit(jprobe_exit) -MODULE_LICENSE("GPL"); ------ cut here ----- - -Build and insert the kernel module as shown in the above kprobe -example. You will see the trace data in /var/log/messages and on -the console whenever do_fork() is invoked to create a new process. -(Some messages may be suppressed if syslogd is configured to -eliminate duplicate messages.) +See samples/kprobes/jprobe_example.c 10. Kretprobes Example -Here's a sample kernel module showing the use of return probes to -report failed calls to sys_open(). ------ cut here ----- -/*kretprobe-example.c*/ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/kprobes.h> -#include <linux/ktime.h> - -/* per-instance private data */ -struct my_data { - ktime_t entry_stamp; -}; - -static const char *probed_func = "sys_open"; - -/* Timestamp function entry. */ -static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - struct my_data *data; - - if(!current->mm) - return 1; /* skip kernel threads */ - - data = (struct my_data *)ri->data; - data->entry_stamp = ktime_get(); - return 0; -} - -/* If the probed function failed, log the return value and duration. - * Duration may turn out to be zero consistently, depending upon the - * granularity of time accounting on the platform. */ -static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - int retval = regs_return_value(regs); - struct my_data *data = (struct my_data *)ri->data; - s64 delta; - ktime_t now; - - if (retval < 0) { - now = ktime_get(); - delta = ktime_to_ns(ktime_sub(now, data->entry_stamp)); - printk("%s: return val = %d (duration = %lld ns)\n", - probed_func, retval, delta); - } - return 0; -} - -static struct kretprobe my_kretprobe = { - .handler = return_handler, - .entry_handler = entry_handler, - .data_size = sizeof(struct my_data), - .maxactive = 20, /* probe up to 20 instances concurrently */ -}; - -static int __init kretprobe_init(void) -{ - int ret; - my_kretprobe.kp.symbol_name = (char *)probed_func; - - if ((ret = register_kretprobe(&my_kretprobe)) < 0) { - printk("register_kretprobe failed, returned %d\n", ret); - return -1; - } - printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name); - return 0; -} - -static void __exit kretprobe_exit(void) -{ - unregister_kretprobe(&my_kretprobe); - printk("kretprobe unregistered\n"); - /* nmissed > 0 suggests that maxactive was set too low. */ - printk("Missed probing %d instances of %s\n", - my_kretprobe.nmissed, probed_func); -} - -module_init(kretprobe_init) -module_exit(kretprobe_exit) -MODULE_LICENSE("GPL"); ------ cut here ----- - -Build and insert the kernel module as shown in the above kprobe -example. You will see the trace data in /var/log/messages and on the -console whenever sys_open() returns a negative value. (Some messages -may be suppressed if syslogd is configured to eliminate duplicate -messages.) +See samples/kprobes/kretprobe_example.c For additional information on Kprobes, refer to the following URLs: http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe diff --git a/MAINTAINERS b/MAINTAINERS index a0f78e764329..558636e3a954 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1138,6 +1138,12 @@ L: accessrunner-general@lists.sourceforge.net W: http://accessrunner.sourceforge.net/ S: Maintained +CONTROL GROUPS (CGROUPS) +P: Paul Menage +M: menage@google.com +L: containers@lists.linux-foundation.org +S: Maintained + CORETEMP HARDWARE MONITORING DRIVER P: Rudolf Marek M: r.marek@assembler.cz @@ -2633,6 +2639,17 @@ L: linux-kernel@vger.kernel.org W: http://www.linux-mm.org S: Maintained +MEMORY RESOURCE CONTROLLER +P: Balbir Singh +M: balbir@linux.vnet.ibm.com +P: Pavel Emelyanov +M: xemul@openvz.org +P: KAMEZAWA Hiroyuki +M: kamezawa.hiroyu@jp.fujitsu.com +L: linux-mm@kvack.org +L: linux-kernel@vger.kernel.org +S: Maintained + MEI MN10300/AM33 PORT P: David Howells M: dhowells@redhat.com diff --git a/arch/Kconfig b/arch/Kconfig index 3d72dc3fc8f5..694c9af520bb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -27,5 +27,12 @@ config KPROBES for kernel debugging, non-intrusive instrumentation and testing. If in doubt, say "N". +config KRETPROBES + def_bool y + depends on KPROBES && HAVE_KRETPROBES + config HAVE_KPROBES def_bool n + +config HAVE_KRETPROBES + def_bool n diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 26d3789dfdd0..be6fa105cd34 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -31,7 +31,6 @@ #endif #define DEBUG_NODIRECT 0 -#define DEBUG_FORCEDAC 0 #define ISA_DMA_MASK 0x00ffffff @@ -126,39 +125,67 @@ iommu_arena_new(struct pci_controller *hose, dma_addr_t base, return iommu_arena_new_node(0, hose, base, window_size, align); } +static inline int is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size) +{ + shift = (shift + index) & (boundary_size - 1); + return shift + nr > boundary_size; +} + /* Must be called with the arena lock held */ static long -iommu_arena_find_pages(struct pci_iommu_arena *arena, long n, long mask) +iommu_arena_find_pages(struct device *dev, struct pci_iommu_arena *arena, + long n, long mask) { unsigned long *ptes; long i, p, nent; + int pass = 0; + unsigned long base; + unsigned long boundary_size; + + BUG_ON(arena->dma_base & ~PAGE_MASK); + base = arena->dma_base >> PAGE_SHIFT; + if (dev) + boundary_size = ALIGN(dma_get_max_seg_size(dev) + 1, PAGE_SIZE) + >> PAGE_SHIFT; + else + boundary_size = ALIGN(1UL << 32, PAGE_SIZE) >> PAGE_SHIFT; + + BUG_ON(!is_power_of_2(boundary_size)); /* Search forward for the first mask-aligned sequence of N free ptes */ ptes = arena->ptes; nent = arena->size >> PAGE_SHIFT; - p = (arena->next_entry + mask) & ~mask; + p = ALIGN(arena->next_entry, mask + 1); i = 0; + +again: while (i < n && p+i < nent) { + if (!i && is_span_boundary(p, n, base, boundary_size)) { + p = ALIGN(p + 1, mask + 1); + goto again; + } + if (ptes[p+i]) - p = (p + i + 1 + mask) & ~mask, i = 0; + p = ALIGN(p + i + 1, mask + 1), i = 0; else i = i + 1; } if (i < n) { - /* Reached the end. Flush the TLB and restart the - search from the beginning. */ - alpha_mv.mv_pci_tbi(arena->hose, 0, -1); - - p = 0, i = 0; - while (i < n && p+i < nent) { - if (ptes[p+i]) - p = (p + i + 1 + mask) & ~mask, i = 0; - else - i = i + 1; - } - - if (i < n) + if (pass < 1) { + /* + * Reached the end. Flush the TLB and restart + * the search from the beginning. + */ + alpha_mv.mv_pci_tbi(arena->hose, 0, -1); + + pass++; + p = 0; + i = 0; + goto again; + } else return -1; } @@ -168,7 +195,8 @@ iommu_arena_find_pages(struct pci_iommu_arena *arena, long n, long mask) } static long -iommu_arena_alloc(struct pci_iommu_arena *arena, long n, unsigned int align) +iommu_arena_alloc(struct device *dev, struct pci_iommu_arena *arena, long n, + unsigned int align) { unsigned long flags; unsigned long *ptes; @@ -179,7 +207,7 @@ iommu_arena_alloc(struct pci_iommu_arena *arena, long n, unsigned int align) /* Search for N empty ptes */ ptes = arena->ptes; mask = max(align, arena->align_entry) - 1; - p = iommu_arena_find_pages(arena, n, mask); + p = iommu_arena_find_pages(dev, arena, n, mask); if (p < 0) { spin_unlock_irqrestore(&arena->lock, flags); return -1; @@ -229,6 +257,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, unsigned long paddr; dma_addr_t ret; unsigned int align = 0; + struct device *dev = pdev ? &pdev->dev : NULL; paddr = __pa(cpu_addr); @@ -276,7 +305,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, /* Force allocation to 64KB boundary for ISA bridges. */ if (pdev && pdev == isa_bridge) align = 8; - dma_ofs = iommu_arena_alloc(arena, npages, align); + dma_ofs = iommu_arena_alloc(dev, arena, npages, align); if (dma_ofs < 0) { printk(KERN_WARNING "pci_map_single failed: " "could not allocate dma page tables\n"); @@ -563,7 +592,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end, paddr &= ~PAGE_MASK; npages = calc_npages(paddr + size); - dma_ofs = iommu_arena_alloc(arena, npages, 0); + dma_ofs = iommu_arena_alloc(dev, arena, npages, 0); if (dma_ofs < 0) { /* If we attempted a direct map above but failed, die. */ if (leader->dma_address == 0) @@ -830,7 +859,7 @@ iommu_reserve(struct pci_iommu_arena *arena, long pg_count, long align_mask) /* Search for N empty ptes. */ ptes = arena->ptes; - p = iommu_arena_find_pages(arena, pg_count, align_mask); + p = iommu_arena_find_pages(NULL, arena, pg_count, align_mask); if (p < 0) { spin_unlock_irqrestore(&arena->lock, flags); return -1; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 16b82e1272b0..955fc53c1c01 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -12,6 +12,7 @@ config ARM select SYS_SUPPORTS_APM_EMULATION select HAVE_OPROFILE select HAVE_KPROBES if (!XIP_KERNEL) + select HAVE_KRETPROBES if (HAVE_KPROBES) help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c index 9310a7b476e9..525483f0ddf8 100644 --- a/arch/cris/arch-v10/kernel/time.c +++ b/arch/cris/arch-v10/kernel/time.c @@ -13,7 +13,7 @@ #include <linux/swap.h> #include <linux/sched.h> #include <linux/init.h> -#include <linux/vmstat.h> +#include <linux/mm.h> #include <asm/arch/svinto.h> #include <asm/types.h> #include <asm/signal.h> diff --git a/arch/cris/arch-v10/lib/string.c b/arch/cris/arch-v10/lib/string.c index 7161a2bef4fe..c7bd6ebdc93c 100644 --- a/arch/cris/arch-v10/lib/string.c +++ b/arch/cris/arch-v10/lib/string.c @@ -1,55 +1,59 @@ -/*#************************************************************************#*/ -/*#-------------------------------------------------------------------------*/ -/*# */ -/*# FUNCTION NAME: memcpy() */ -/*# */ -/*# PARAMETERS: void* dst; Destination address. */ -/*# void* src; Source address. */ -/*# int len; Number of bytes to copy. */ -/*# */ -/*# RETURNS: dst. */ -/*# */ -/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */ -/*# about copying of overlapping memory areas. This routine is */ -/*# very sensitive to compiler changes in register allocation. */ -/*# Should really be rewritten to avoid this problem. */ -/*# */ -/*#-------------------------------------------------------------------------*/ -/*# */ -/*# HISTORY */ -/*# */ -/*# DATE NAME CHANGES */ -/*# ---- ---- ------- */ -/*# 941007 Kenny R Creation */ -/*# 941011 Kenny R Lots of optimizations and inlining. */ -/*# 941129 Ulf A Adapted for use in libc. */ -/*# 950216 HP N==0 forgotten if non-aligned src/dst. */ -/*# Added some optimizations. */ -/*# 001025 HP Make src and dst char *. Align dst to */ -/*# dword, not just word-if-both-src-and-dst- */ -/*# are-misaligned. */ -/*# */ -/*#-------------------------------------------------------------------------*/ - -#include <linux/types.h> - -void *memcpy(void *pdst, - const void *psrc, - size_t pn) +/* A memcpy for CRIS. + Copyright (C) 1994-2005 Axis Communications. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Neither the name of Axis Communications nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS + COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. */ + +/* FIXME: This file should really only be used for reference, as the + result is somewhat depending on gcc generating what we expect rather + than what we describe. An assembly file should be used instead. */ + +#include <stddef.h> + +/* Break even between movem and move16 is really at 38.7 * 2, but + modulo 44, so up to the next multiple of 44, we use ordinary code. */ +#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2) + +/* No name ambiguities in this file. */ +__asm__ (".syntax no_register_prefix"); + +void * +memcpy(void *pdst, const void *psrc, size_t pn) { - /* Ok. Now we want the parameters put in special registers. + /* Now we want the parameters put in special registers. Make sure the compiler is able to make something useful of this. - As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). + As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). - If gcc was alright, it really would need no temporaries, and no - stack space to save stuff on. */ + If gcc was allright, it really would need no temporaries, and no + stack space to save stuff on. */ register void *return_dst __asm__ ("r10") = pdst; - register char *dst __asm__ ("r13") = pdst; - register const char *src __asm__ ("r11") = psrc; + register unsigned char *dst __asm__ ("r13") = pdst; + register unsigned const char *src __asm__ ("r11") = psrc; register int n __asm__ ("r12") = pn; - - + /* When src is aligned but not dst, this makes a few extra needless cycles. I believe it would take as many to check that the re-alignment was unnecessary. */ @@ -59,167 +63,174 @@ void *memcpy(void *pdst, && n >= 3) { if ((unsigned long) dst & 1) - { - n--; - *(char*)dst = *(char*)src; - src++; - dst++; - } + { + n--; + *dst = *src; + src++; + dst++; + } if ((unsigned long) dst & 2) - { - n -= 2; - *(short*)dst = *(short*)src; - src += 2; - dst += 2; - } + { + n -= 2; + *(short *) dst = *(short *) src; + src += 2; + dst += 2; + } } - /* Decide which copying method to use. */ - if (n >= 44*2) /* Break even between movem and - move16 is at 38.7*2, but modulo 44. */ - { - /* For large copies we use 'movem' */ - - /* It is not optimal to tell the compiler about clobbering any - registers; that will move the saving/restoring of those registers - to the function prologue/epilogue, and make non-movem sizes - suboptimal. - - This method is not foolproof; it assumes that the "asm reg" - declarations at the beginning of the function really are used - here (beware: they may be moved to temporary registers). - This way, we do not have to save/move the registers around into - temporaries; we can safely use them straight away. - - If you want to check that the allocation was right; then - check the equalities in the first comment. It should say - "r13=r13, r11=r11, r12=r12" */ - __asm__ volatile ("\n\ - ;; Check that the following is true (same register names on \n\ - ;; both sides of equal sign, as in r8=r8): \n\ - ;; %0=r13, %1=r11, %2=r12 \n\ - ;; \n\ - ;; Save the registers we'll use in the movem process \n\ - ;; on the stack. \n\ - subq 11*4,$sp \n\ - movem $r10,[$sp] \n\ + /* Decide which copying method to use. */ + if (n >= MEMCPY_BY_BLOCK_THRESHOLD) + { + /* It is not optimal to tell the compiler about clobbering any + registers; that will move the saving/restoring of those registers + to the function prologue/epilogue, and make non-movem sizes + suboptimal. */ + __asm__ volatile + ("\ + ;; GCC does promise correct register allocations, but let's \n\ + ;; make sure it keeps its promises. \n\ + .ifnc %0-%1-%2,$r13-$r11-$r12 \n\ + .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\ + .endif \n\ + \n\ + ;; Save the registers we'll use in the movem process \n\ + ;; on the stack. \n\ + subq 11*4,sp \n\ + movem r10,[sp] \n\ \n\ - ;; Now we've got this: \n\ - ;; r11 - src \n\ - ;; r13 - dst \n\ - ;; r12 - n \n\ + ;; Now we've got this: \n\ + ;; r11 - src \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ \n\ - ;; Update n for the first loop \n\ - subq 44,$r12 \n\ + ;; Update n for the first loop. \n\ + subq 44,r12 \n\ 0: \n\ - movem [$r11+],$r10 \n\ - subq 44,$r12 \n\ - bge 0b \n\ - movem $r10,[$r13+] \n\ +" +#ifdef __arch_common_v10_v32 + /* Cater to branch offset difference between v32 and v10. We + assume the branch below has an 8-bit offset. */ +" setf\n" +#endif +" movem [r11+],r10 \n\ + subq 44,r12 \n\ + bge 0b \n\ + movem r10,[r13+] \n\ \n\ - addq 44,$r12 ;; compensate for last loop underflowing n \n\ + ;; Compensate for last loop underflowing n. \n\ + addq 44,r12 \n\ \n\ - ;; Restore registers from stack \n\ - movem [$sp+],$r10" + ;; Restore registers from stack. \n\ + movem [sp+],r10" - /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) - /* Inputs */ : "0" (dst), "1" (src), "2" (n)); - - } + /* Outputs. */ + : "=r" (dst), "=r" (src), "=r" (n) - /* Either we directly starts copying, using dword copying - in a loop, or we copy as much as possible with 'movem' - and then the last block (<44 bytes) is copied here. - This will work since 'movem' will have updated src,dst,n. */ + /* Inputs. */ + : "0" (dst), "1" (src), "2" (n)); + } - while ( n >= 16 ) - { - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - n -= 16; - } + while (n >= 16) + { + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + + n -= 16; + } - /* A switch() is definitely the fastest although it takes a LOT of code. - * Particularly if you inline code this. - */ switch (n) - { + { case 0: break; + case 1: - *(char*)dst = *(char*)src; + *dst = *src; break; + case 2: - *(short*)dst = *(short*)src; + *(short *) dst = *(short *) src; break; + case 3: - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 4: - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; break; + case 5: - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 6: - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 7: - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 8: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; break; + case 9: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 10: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 11: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 12: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; break; + case 13: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 14: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 15: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; - } + } - return return_dst; /* destination pointer. */ -} /* memcpy() */ + return return_dst; +} diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c index b8e6c0430e5b..b0a608da7bd1 100644 --- a/arch/cris/arch-v10/lib/usercopy.c +++ b/arch/cris/arch-v10/lib/usercopy.c @@ -193,7 +193,7 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) inaccessible. */ unsigned long -__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn) +__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. diff --git a/arch/cris/arch-v32/lib/string.c b/arch/cris/arch-v32/lib/string.c index 6740b2cebae5..c7bd6ebdc93c 100644 --- a/arch/cris/arch-v32/lib/string.c +++ b/arch/cris/arch-v32/lib/string.c @@ -1,55 +1,59 @@ -/*#************************************************************************#*/ -/*#-------------------------------------------------------------------------*/ -/*# */ -/*# FUNCTION NAME: memcpy() */ -/*# */ -/*# PARAMETERS: void* dst; Destination address. */ -/*# void* src; Source address. */ -/*# int len; Number of bytes to copy. */ -/*# */ -/*# RETURNS: dst. */ -/*# */ -/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */ -/*# about copying of overlapping memory areas. This routine is */ -/*# very sensitive to compiler changes in register allocation. */ -/*# Should really be rewritten to avoid this problem. */ -/*# */ -/*#-------------------------------------------------------------------------*/ -/*# */ -/*# HISTORY */ -/*# */ -/*# DATE NAME CHANGES */ -/*# ---- ---- ------- */ -/*# 941007 Kenny R Creation */ -/*# 941011 Kenny R Lots of optimizations and inlining. */ -/*# 941129 Ulf A Adapted for use in libc. */ -/*# 950216 HP N==0 forgotten if non-aligned src/dst. */ -/*# Added some optimizations. */ -/*# 001025 HP Make src and dst char *. Align dst to */ -/*# dword, not just word-if-both-src-and-dst- */ -/*# are-misaligned. */ -/*# */ -/*#-------------------------------------------------------------------------*/ - -#include <linux/types.h> - -void *memcpy(void *pdst, - const void *psrc, - size_t pn) +/* A memcpy for CRIS. + Copyright (C) 1994-2005 Axis Communications. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Neither the name of Axis Communications nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS + COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. */ + +/* FIXME: This file should really only be used for reference, as the + result is somewhat depending on gcc generating what we expect rather + than what we describe. An assembly file should be used instead. */ + +#include <stddef.h> + +/* Break even between movem and move16 is really at 38.7 * 2, but + modulo 44, so up to the next multiple of 44, we use ordinary code. */ +#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2) + +/* No name ambiguities in this file. */ +__asm__ (".syntax no_register_prefix"); + +void * +memcpy(void *pdst, const void *psrc, size_t pn) { - /* Ok. Now we want the parameters put in special registers. + /* Now we want the parameters put in special registers. Make sure the compiler is able to make something useful of this. - As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). + As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). - If gcc was alright, it really would need no temporaries, and no - stack space to save stuff on. */ + If gcc was allright, it really would need no temporaries, and no + stack space to save stuff on. */ register void *return_dst __asm__ ("r10") = pdst; - register char *dst __asm__ ("r13") = pdst; - register const char *src __asm__ ("r11") = psrc; + register unsigned char *dst __asm__ ("r13") = pdst; + register unsigned const char *src __asm__ ("r11") = psrc; register int n __asm__ ("r12") = pn; - /* When src is aligned but not dst, this makes a few extra needless cycles. I believe it would take as many to check that the re-alignment was unnecessary. */ @@ -59,161 +63,174 @@ void *memcpy(void *pdst, && n >= 3) { if ((unsigned long) dst & 1) - { - n--; - *(char*)dst = *(char*)src; - src++; - dst++; - } + { + n--; + *dst = *src; + src++; + dst++; + } if ((unsigned long) dst & 2) - { - n -= 2; - *(short*)dst = *(short*)src; - src += 2; - dst += 2; - } + { + n -= 2; + *(short *) dst = *(short *) src; + src += 2; + dst += 2; + } } - /* Decide which copying method to use. Movem is dirt cheap, so the - overheap is low enough to always use the minimum block size as the - threshold. */ - if (n >= 44) - { - /* For large copies we use 'movem' */ - - /* It is not optimal to tell the compiler about clobbering any - registers; that will move the saving/restoring of those registers - to the function prologue/epilogue, and make non-movem sizes - suboptimal. */ - __asm__ volatile (" \n\ - ;; Check that the register asm declaration got right. \n\ - ;; The GCC manual explicitly says TRT will happen. \n\ - .ifnc %0-%1-%2,$r13-$r11-$r12 \n\ - .err \n\ - .endif \n\ - \n\ - ;; Save the registers we'll use in the movem process \n\ + /* Decide which copying method to use. */ + if (n >= MEMCPY_BY_BLOCK_THRESHOLD) + { + /* It is not optimal to tell the compiler about clobbering any + registers; that will move the saving/restoring of those registers + to the function prologue/epilogue, and make non-movem sizes + suboptimal. */ + __asm__ volatile + ("\ + ;; GCC does promise correct register allocations, but let's \n\ + ;; make sure it keeps its promises. \n\ + .ifnc %0-%1-%2,$r13-$r11-$r12 \n\ + .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\ + .endif \n\ \n\ - ;; on the stack. \n\ - subq 11*4,$sp \n\ - movem $r10,[$sp] \n\ + ;; Save the registers we'll use in the movem process \n\ + ;; on the stack. \n\ + subq 11*4,sp \n\ + movem r10,[sp] \n\ \n\ - ;; Now we've got this: \n\ - ;; r11 - src \n\ - ;; r13 - dst \n\ - ;; r12 - n \n\ + ;; Now we've got this: \n\ + ;; r11 - src \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ \n\ - ;; Update n for the first loop \n\ - subq 44,$r12 \n\ + ;; Update n for the first loop. \n\ + subq 44,r12 \n\ 0: \n\ - movem [$r11+],$r10 \n\ - subq 44,$r12 \n\ - bge 0b \n\ - movem $r10,[$r13+] \n\ +" +#ifdef __arch_common_v10_v32 + /* Cater to branch offset difference between v32 and v10. We + assume the branch below has an 8-bit offset. */ +" setf\n" +#endif +" movem [r11+],r10 \n\ + subq 44,r12 \n\ + bge 0b \n\ + movem r10,[r13+] \n\ \n\ - addq 44,$r12 ;; compensate for last loop underflowing n \n\ + ;; Compensate for last loop underflowing n. \n\ + addq 44,r12 \n\ \n\ - ;; Restore registers from stack \n\ - movem [$sp+],$r10" + ;; Restore registers from stack. \n\ + movem [sp+],r10" - /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) - /* Inputs */ : "0" (dst), "1" (src), "2" (n)); + /* Outputs. */ + : "=r" (dst), "=r" (src), "=r" (n) - } + /* Inputs. */ + : "0" (dst), "1" (src), "2" (n)); + } - /* Either we directly starts copying, using dword copying - in a loop, or we copy as much as possible with 'movem' - and then the last block (<44 bytes) is copied here. - This will work since 'movem' will have updated src,dst,n. */ + while (n >= 16) + { + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; - while ( n >= 16 ) - { - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - n -= 16; - } + n -= 16; + } - /* A switch() is definitely the fastest although it takes a LOT of code. - * Particularly if you inline code this. - */ switch (n) - { + { case 0: break; + case 1: - *(char*)dst = *(char*)src; + *dst = *src; break; + case 2: - *(short*)dst = *(short*)src; + *(short *) dst = *(short *) src; break; + case 3: - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 4: - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; break; + case 5: - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 6: - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 7: - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 8: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; break; + case 9: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 10: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 11: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 12: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; break; + case 13: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 14: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *(short*)dst = *(short*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 15: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *(char*)dst = *(char*)src; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; - } + } - return return_dst; /* destination pointer. */ -} /* memcpy() */ + return return_dst; +} diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c index 04d0cf35a276..0b5b70d5f58a 100644 --- a/arch/cris/arch-v32/lib/usercopy.c +++ b/arch/cris/arch-v32/lib/usercopy.c @@ -161,7 +161,7 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) inaccessible. */ unsigned long -__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn) +__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index dff9edfc7465..56762d3c2a6a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -18,6 +18,7 @@ config IA64 select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5b8d8382b762..1189d8d6170d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -90,6 +90,7 @@ config PPC select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES config EARLY_PRINTK bool diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b21444b681b6..9892827b6176 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -61,6 +61,7 @@ config S390 def_bool y select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES source "init/Kconfig" diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 3af378ddb6ae..463d1be32c98 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -10,6 +10,7 @@ config SPARC default y select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES config SPARC64 bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 53800b80a204..f41c9538ca30 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,6 +21,7 @@ config X86 select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 674cd66dcaba..18feb1c7c33b 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -849,7 +849,8 @@ static int pkt_flush_cache(struct pktcdvd_device *pd) /* * speed is given as the normal factor, e.g. 4 for 4x */ -static int pkt_set_speed(struct pktcdvd_device *pd, unsigned write_speed, unsigned read_speed) +static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, + unsigned write_speed, unsigned read_speed) { struct packet_command cgc; struct request_sense sense; @@ -1776,7 +1777,8 @@ static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, return pkt_generic_packet(pd, &cgc); } -static int pkt_get_last_written(struct pktcdvd_device *pd, long *last_written) +static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd, + long *last_written) { disc_information di; track_information ti; @@ -1813,7 +1815,7 @@ static int pkt_get_last_written(struct pktcdvd_device *pd, long *last_written) /* * write mode select package based on pd->settings */ -static int pkt_set_write_settings(struct pktcdvd_device *pd) +static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) { struct packet_command cgc; struct request_sense sense; @@ -1972,7 +1974,7 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di) return 1; } -static int pkt_probe_settings(struct pktcdvd_device *pd) +static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) { struct packet_command cgc; unsigned char buf[12]; @@ -2071,7 +2073,8 @@ static int pkt_probe_settings(struct pktcdvd_device *pd) /* * enable/disable write caching on drive */ -static int pkt_write_caching(struct pktcdvd_device *pd, int set) +static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd, + int set) { struct packet_command cgc; struct request_sense sense; @@ -2116,7 +2119,8 @@ static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag) /* * Returns drive maximum write speed */ -static int pkt_get_max_speed(struct pktcdvd_device *pd, unsigned *write_speed) +static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd, + unsigned *write_speed) { struct packet_command cgc; struct request_sense sense; @@ -2177,7 +2181,8 @@ static char us_clv_to_speed[16] = { /* * reads the maximum media speed from ATIP */ -static int pkt_media_speed(struct pktcdvd_device *pd, unsigned *speed) +static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, + unsigned *speed) { struct packet_command cgc; struct request_sense sense; @@ -2249,7 +2254,7 @@ static int pkt_media_speed(struct pktcdvd_device *pd, unsigned *speed) } } -static int pkt_perform_opc(struct pktcdvd_device *pd) +static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd) { struct packet_command cgc; struct request_sense sense; diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 85d596a3c18c..eba2883b630e 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -1527,7 +1527,7 @@ static int __devinit reset_card(struct pci_dev *pdev, msleep(10); portcount = inw(base + 0x2); - if (!inw(base + 0xe) & 0x1 || (portcount != 0 && portcount != 4 && + if (!(inw(base + 0xe) & 0x1) || (portcount != 0 && portcount != 4 && portcount != 8 && portcount != 16)) { dev_err(&pdev->dev, "ISILoad:PCI Card%d reset failure.\n", card + 1); diff --git a/drivers/char/pcmcia/ipwireless/network.c b/drivers/char/pcmcia/ipwireless/network.c index ff35230058d3..d793e68b3e0d 100644 --- a/drivers/char/pcmcia/ipwireless/network.c +++ b/drivers/char/pcmcia/ipwireless/network.c @@ -377,13 +377,16 @@ void ipwireless_network_packet_received(struct ipw_network *network, for (i = 0; i < MAX_ASSOCIATED_TTYS; i++) { struct ipw_tty *tty = network->associated_ttys[channel_idx][i]; + if (!tty) + continue; + /* * If it's associated with a tty (other than the RAS channel * when we're online), then send the data to that tty. The RAS * channel's data is handled above - it always goes through * ppp_generic. */ - if (tty && channel_idx == IPW_CHANNEL_RAS + if (channel_idx == IPW_CHANNEL_RAS && (network->ras_control_lines & IPW_CONTROL_LINE_DCD) != 0 && ipwireless_tty_is_modem(tty)) { diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c index c0e08c7bca2f..5ff83df67b44 100644 --- a/drivers/char/specialix.c +++ b/drivers/char/specialix.c @@ -2109,7 +2109,6 @@ static void sx_throttle(struct tty_struct * tty) sx_out(bp, CD186x_CAR, port_No(port)); spin_unlock_irqrestore(&bp->lock, flags); if (I_IXOFF(tty)) { - spin_unlock_irqrestore(&bp->lock, flags); sx_wait_CCR(bp); spin_lock_irqsave(&bp->lock, flags); sx_out(bp, CD186x_CCR, CCR_SSCH2); diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 367be9175061..9b58b894f823 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -702,6 +702,7 @@ void redraw_screen(struct vc_data *vc, int is_switch) if (is_switch) { set_leds(); compute_shiftstate(); + notify_update(vc); } } diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 8b10d9f23bef..c5263d63aca3 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -42,14 +42,14 @@ config INPUT_M68K_BEEP config INPUT_APANEL tristate "Fujitsu Lifebook Application Panel buttons" - depends on X86 - select I2C_I801 + depends on X86 && I2C && LEDS_CLASS select INPUT_POLLDEV select CHECK_SIGNATURE help Say Y here for support of the Application Panel buttons, used on Fujitsu Lifebook. These are attached to the mainboard through - an SMBus interface managed by the I2C Intel ICH (i801) driver. + an SMBus interface managed by the I2C Intel ICH (i801) driver, + which you should also build for this kernel. To compile this driver as a module, choose M here: the module will be called apanel. diff --git a/drivers/isdn/hisax/hisax_fcpcipnp.c b/drivers/isdn/hisax/hisax_fcpcipnp.c index 7993e01f9fc5..76043dedba5b 100644 --- a/drivers/isdn/hisax/hisax_fcpcipnp.c +++ b/drivers/isdn/hisax/hisax_fcpcipnp.c @@ -725,23 +725,6 @@ static int __devinit fcpcipnp_setup(struct fritz_adapter *adapter) switch (adapter->type) { case AVM_FRITZ_PCIV2: - retval = request_irq(adapter->irq, fcpci2_irq, IRQF_SHARED, - "fcpcipnp", adapter); - break; - case AVM_FRITZ_PCI: - retval = request_irq(adapter->irq, fcpci_irq, IRQF_SHARED, - "fcpcipnp", adapter); - break; - case AVM_FRITZ_PNP: - retval = request_irq(adapter->irq, fcpci_irq, 0, - "fcpcipnp", adapter); - break; - } - if (retval) - goto err_region; - - switch (adapter->type) { - case AVM_FRITZ_PCIV2: case AVM_FRITZ_PCI: val = inl(adapter->io); break; @@ -796,6 +779,23 @@ static int __devinit fcpcipnp_setup(struct fritz_adapter *adapter) switch (adapter->type) { case AVM_FRITZ_PCIV2: + retval = request_irq(adapter->irq, fcpci2_irq, IRQF_SHARED, + "fcpcipnp", adapter); + break; + case AVM_FRITZ_PCI: + retval = request_irq(adapter->irq, fcpci_irq, IRQF_SHARED, + "fcpcipnp", adapter); + break; + case AVM_FRITZ_PNP: + retval = request_irq(adapter->irq, fcpci_irq, 0, + "fcpcipnp", adapter); + break; + } + if (retval) + goto err_region; + + switch (adapter->type) { + case AVM_FRITZ_PCIV2: fcpci2_init(adapter); isacsx_setup(&adapter->isac); break; diff --git a/drivers/isdn/i4l/isdn_ttyfax.c b/drivers/isdn/i4l/isdn_ttyfax.c index f93de4a30355..78f7660c1d0e 100644 --- a/drivers/isdn/i4l/isdn_ttyfax.c +++ b/drivers/isdn/i4l/isdn_ttyfax.c @@ -906,7 +906,8 @@ isdn_tty_cmd_FCLASS2(char **p, modem_info * info) sprintf(rs, "\r\n0-2"); isdn_tty_at_cout(rs, info); } else { - if ((f->phase != ISDN_FAX_PHASE_D) || (!info->faxonline & 1)) + if ((f->phase != ISDN_FAX_PHASE_D) || + (!(info->faxonline & 1))) PARSE_ERROR1; par = isdn_getnum(p); if ((par < 0) || (par > 2)) diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c index 655ef9a3f4df..a335c85a736e 100644 --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@ -1289,7 +1289,7 @@ isdnloop_command(isdn_ctrl * c, isdnloop_card * card) } break; case ISDN_CMD_CLREAZ: - if (!card->flags & ISDNLOOP_FLAGS_RUNNING) + if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) return -ENODEV; if (card->leased) break; @@ -1333,7 +1333,7 @@ isdnloop_command(isdn_ctrl * c, isdnloop_card * card) } break; case ISDN_CMD_SETL3: - if (!card->flags & ISDNLOOP_FLAGS_RUNNING) + if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) return -ENODEV; return 0; default: @@ -1380,7 +1380,7 @@ if_writecmd(const u_char __user *buf, int len, int id, int channel) isdnloop_card *card = isdnloop_findcard(id); if (card) { - if (!card->flags & ISDNLOOP_FLAGS_RUNNING) + if (!(card->flags & ISDNLOOP_FLAGS_RUNNING)) return -ENODEV; return (isdnloop_writecmd(buf, len, 1, card)); } diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7aeceedcf7d4..831aed9c56ff 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1047,6 +1047,11 @@ void bitmap_daemon_work(struct bitmap *bitmap) if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) return; bitmap->daemon_lastrun = jiffies; + if (bitmap->allclean) { + bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + return; + } + bitmap->allclean = 1; for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; @@ -1068,8 +1073,10 @@ void bitmap_daemon_work(struct bitmap *bitmap) clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - if (need_write) + if (need_write) { write_page(bitmap, page, 0); + bitmap->allclean = 0; + } continue; } @@ -1098,6 +1105,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) /* if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); */ + if (*bmc) + bitmap->allclean = 0; + if (*bmc == 2) { *bmc=1; /* maybe clear the bit next time */ set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); @@ -1132,6 +1142,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) } } + if (bitmap->allclean == 0) + bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1226,6 +1238,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect sectors -= blocks; else sectors = 0; } + bitmap->allclean = 0; return 0; } @@ -1296,6 +1309,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, } } spin_unlock_irq(&bitmap->lock); + bitmap->allclean = 0; return rv; } @@ -1332,6 +1346,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab } unlock: spin_unlock_irqrestore(&bitmap->lock, flags); + bitmap->allclean = 0; } void bitmap_close_sync(struct bitmap *bitmap) @@ -1399,7 +1414,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } spin_unlock_irq(&bitmap->lock); - + bitmap->allclean = 0; } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ diff --git a/drivers/md/md.c b/drivers/md/md.c index 7da6ec244e15..827824a9f3e9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1105,7 +1105,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) - rdev-> sb_size = (rdev->sb_size | bmask)+1; + rdev->sb_size = (rdev->sb_size | bmask) + 1; + + if (minor_version + && rdev->data_offset < sb_offset + (rdev->sb_size/512)) + return -EINVAL; if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) rdev->desc_nr = -1; @@ -1137,7 +1141,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) else ret = 0; } - if (minor_version) + if (minor_version) rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; else rdev->size = rdev->sb_offset; @@ -1499,7 +1503,8 @@ static void export_rdev(mdk_rdev_t * rdev) free_disk_sb(rdev); list_del_init(&rdev->same_set); #ifndef MODULE - md_autodetect_dev(rdev->bdev->bd_dev); + if (test_bit(AutoDetected, &rdev->flags)) + md_autodetect_dev(rdev->bdev->bd_dev); #endif unlock_rdev(rdev); kobject_put(&rdev->kobj); @@ -1996,9 +2001,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) char *e; unsigned long long size = simple_strtoull(buf, &e, 10); unsigned long long oldsize = rdev->size; + mddev_t *my_mddev = rdev->mddev; + if (e==buf || (*e && *e != '\n')) return -EINVAL; - if (rdev->mddev->pers) + if (my_mddev->pers) return -EBUSY; rdev->size = size; if (size > oldsize && rdev->mddev->external) { @@ -2011,7 +2018,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) int overlap = 0; struct list_head *tmp, *tmp2; - mddev_unlock(rdev->mddev); + mddev_unlock(my_mddev); for_each_mddev(mddev, tmp) { mdk_rdev_t *rdev2; @@ -2031,7 +2038,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) break; } } - mddev_lock(rdev->mddev); + mddev_lock(my_mddev); if (overlap) { /* Someone else could have slipped in a size * change here, but doing so is just silly. @@ -2043,8 +2050,8 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) return -EBUSY; } } - if (size < rdev->mddev->size || rdev->mddev->size == 0) - rdev->mddev->size = size; + if (size < my_mddev->size || my_mddev->size == 0) + my_mddev->size = size; return len; } @@ -2065,10 +2072,21 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); + mddev_t *mddev = rdev->mddev; + ssize_t rv; if (!entry->show) return -EIO; - return entry->show(rdev, page); + + rv = mddev ? mddev_lock(mddev) : -EBUSY; + if (!rv) { + if (rdev->mddev == NULL) + rv = -EBUSY; + else + rv = entry->show(rdev, page); + mddev_unlock(mddev); + } + return rv; } static ssize_t @@ -2077,15 +2095,19 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, { struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); - int rv; + ssize_t rv; + mddev_t *mddev = rdev->mddev; if (!entry->store) return -EIO; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - rv = mddev_lock(rdev->mddev); + rv = mddev ? mddev_lock(mddev): -EBUSY; if (!rv) { - rv = entry->store(rdev, page, length); + if (rdev->mddev == NULL) + rv = -EBUSY; + else + rv = entry->store(rdev, page, length); mddev_unlock(rdev->mddev); } return rv; @@ -5351,6 +5373,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) mddev->ro = 0; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); + md_wakeup_thread(mddev->sync_thread); } atomic_inc(&mddev->writes_pending); if (mddev->in_sync) { @@ -6021,6 +6044,7 @@ static void autostart_arrays(int part) MD_BUG(); continue; } + set_bit(AutoDetected, &rdev->flags); list_add(&rdev->same_set, &pending_raid_disks); i_passed++; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5c7fef091cec..ff61b309129a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -592,6 +592,37 @@ static int raid1_congested(void *data, int bits) } +static int flush_pending_writes(conf_t *conf) +{ + /* Any writes that have been queued but are awaiting + * bitmap updates get flushed here. + * We return 1 if any requests were actually submitted. + */ + int rv = 0; + + spin_lock_irq(&conf->device_lock); + + if (conf->pending_bio_list.head) { + struct bio *bio; + bio = bio_list_get(&conf->pending_bio_list); + blk_remove_plug(conf->mddev->queue); + spin_unlock_irq(&conf->device_lock); + /* flush any pending bitmap writes to + * disk before proceeding w/ I/O */ + bitmap_unplug(conf->mddev->bitmap); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + rv = 1; + } else + spin_unlock_irq(&conf->device_lock); + return rv; +} + /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -673,15 +704,23 @@ static void freeze_array(conf_t *conf) /* stop syncio and normal IO and wait for everything to * go quite. * We increment barrier and nr_waiting, and then - * wait until barrier+nr_pending match nr_queued+2 + * wait until nr_pending match nr_queued+1 + * This is called in the context of one normal IO request + * that has failed. Thus any sync request that might be pending + * will be blocked by nr_pending, and we need to wait for + * pending IO requests to complete or be queued for re-try. + * Thus the number queued (nr_queued) plus this request (1) + * must match the number of pending IOs (nr_pending) before + * we continue. */ spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; wait_event_lock_irq(conf->wait_barrier, - conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->nr_pending == conf->nr_queued+1, conf->resync_lock, - raid1_unplug(conf->mddev->queue)); + ({ flush_pending_writes(conf); + raid1_unplug(conf->mddev->queue); })); spin_unlock_irq(&conf->resync_lock); } static void unfreeze_array(conf_t *conf) @@ -907,6 +946,9 @@ static int make_request(struct request_queue *q, struct bio * bio) blk_plug_device(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); + /* In case raid1d snuck into freeze_array */ + wake_up(&conf->wait_barrier); + if (do_sync) md_wakeup_thread(mddev->thread); #if 0 @@ -1473,28 +1515,14 @@ static void raid1d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; - spin_lock_irqsave(&conf->device_lock, flags); - - if (conf->pending_bio_list.head) { - bio = bio_list_get(&conf->pending_bio_list); - blk_remove_plug(mddev->queue); - spin_unlock_irqrestore(&conf->device_lock, flags); - /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - bitmap_unplug(mddev->bitmap); - while (bio) { /* submit pending writes */ - struct bio *next = bio->bi_next; - bio->bi_next = NULL; - generic_make_request(bio); - bio = next; - } - unplug = 1; + unplug += flush_pending_writes(conf); - continue; - } - - if (list_empty(head)) + spin_lock_irqsave(&conf->device_lock, flags); + if (list_empty(head)) { + spin_unlock_irqrestore(&conf->device_lock, flags); break; + } r1_bio = list_entry(head->prev, r1bio_t, retry_list); list_del(head->prev); conf->nr_queued--; @@ -1590,7 +1618,6 @@ static void raid1d(mddev_t *mddev) } } } - spin_unlock_irqrestore(&conf->device_lock, flags); if (unplug) unplug_slaves(mddev); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 017f58113c33..32389d2f18fc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -537,7 +537,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) current_distance = abs(r10_bio->devs[slot].addr - conf->mirrors[disk].head_position); - /* Find the disk whose head is closest */ + /* Find the disk whose head is closest, + * or - for far > 1 - find the closest to partition beginning */ for (nslot = slot; nslot < conf->copies; nslot++) { int ndisk = r10_bio->devs[nslot].devnum; @@ -557,8 +558,13 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) slot = nslot; break; } - new_distance = abs(r10_bio->devs[nslot].addr - - conf->mirrors[ndisk].head_position); + + /* for far > 1 always use the lowest address */ + if (conf->far_copies > 1) + new_distance = r10_bio->devs[nslot].addr; + else + new_distance = abs(r10_bio->devs[nslot].addr - + conf->mirrors[ndisk].head_position); if (new_distance < current_distance) { current_distance = new_distance; disk = ndisk; @@ -629,7 +635,36 @@ static int raid10_congested(void *data, int bits) return ret; } - +static int flush_pending_writes(conf_t *conf) +{ + /* Any writes that have been queued but are awaiting + * bitmap updates get flushed here. + * We return 1 if any requests were actually submitted. + */ + int rv = 0; + + spin_lock_irq(&conf->device_lock); + + if (conf->pending_bio_list.head) { + struct bio *bio; + bio = bio_list_get(&conf->pending_bio_list); + blk_remove_plug(conf->mddev->queue); + spin_unlock_irq(&conf->device_lock); + /* flush any pending bitmap writes to disk + * before proceeding w/ I/O */ + bitmap_unplug(conf->mddev->bitmap); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + rv = 1; + } else + spin_unlock_irq(&conf->device_lock); + return rv; +} /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -712,15 +747,23 @@ static void freeze_array(conf_t *conf) /* stop syncio and normal IO and wait for everything to * go quiet. * We increment barrier and nr_waiting, and then - * wait until barrier+nr_pending match nr_queued+2 + * wait until nr_pending match nr_queued+1 + * This is called in the context of one normal IO request + * that has failed. Thus any sync request that might be pending + * will be blocked by nr_pending, and we need to wait for + * pending IO requests to complete or be queued for re-try. + * Thus the number queued (nr_queued) plus this request (1) + * must match the number of pending IOs (nr_pending) before + * we continue. */ spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; wait_event_lock_irq(conf->wait_barrier, - conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->nr_pending == conf->nr_queued+1, conf->resync_lock, - raid10_unplug(conf->mddev->queue)); + ({ flush_pending_writes(conf); + raid10_unplug(conf->mddev->queue); })); spin_unlock_irq(&conf->resync_lock); } @@ -892,6 +935,9 @@ static int make_request(struct request_queue *q, struct bio * bio) blk_plug_device(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); + /* In case raid10d snuck in to freeze_array */ + wake_up(&conf->wait_barrier); + if (do_sync) md_wakeup_thread(mddev->thread); @@ -1464,28 +1510,14 @@ static void raid10d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; - spin_lock_irqsave(&conf->device_lock, flags); - if (conf->pending_bio_list.head) { - bio = bio_list_get(&conf->pending_bio_list); - blk_remove_plug(mddev->queue); - spin_unlock_irqrestore(&conf->device_lock, flags); - /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - bitmap_unplug(mddev->bitmap); - - while (bio) { /* submit pending writes */ - struct bio *next = bio->bi_next; - bio->bi_next = NULL; - generic_make_request(bio); - bio = next; - } - unplug = 1; - - continue; - } + unplug += flush_pending_writes(conf); - if (list_empty(head)) + spin_lock_irqsave(&conf->device_lock, flags); + if (list_empty(head)) { + spin_unlock_irqrestore(&conf->device_lock, flags); break; + } r10_bio = list_entry(head->prev, r10bio_t, retry_list); list_del(head->prev); conf->nr_queued--; @@ -1548,7 +1580,6 @@ static void raid10d(mddev_t *mddev) } } } - spin_unlock_irqrestore(&conf->device_lock, flags); if (unplug) unplug_slaves(mddev); } @@ -1787,6 +1818,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (j == conf->copies) { /* Cannot recover, so abort the recovery */ put_buf(r10_bio); + if (rb2) + atomic_dec(&rb2->remaining); r10_bio = rb2; if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index afd82966f9a0..13bac53db69a 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -48,31 +48,13 @@ struct sm501_devdata { unsigned int pdev_id; unsigned int irq; void __iomem *regs; + unsigned int rev; }; #define MHZ (1000 * 1000) #ifdef DEBUG -static const unsigned int misc_div[] = { - [0] = 1, - [1] = 2, - [2] = 4, - [3] = 8, - [4] = 16, - [5] = 32, - [6] = 64, - [7] = 128, - [8] = 3, - [9] = 6, - [10] = 12, - [11] = 24, - [12] = 48, - [13] = 96, - [14] = 192, - [15] = 384, -}; - -static const unsigned int px_div[] = { +static const unsigned int div_tab[] = { [0] = 1, [1] = 2, [2] = 4, @@ -101,12 +83,12 @@ static const unsigned int px_div[] = { static unsigned long decode_div(unsigned long pll2, unsigned long val, unsigned int lshft, unsigned int selbit, - unsigned long mask, const unsigned int *dtab) + unsigned long mask) { if (val & selbit) pll2 = 288 * MHZ; - return pll2 / dtab[(val >> lshft) & mask]; + return pll2 / div_tab[(val >> lshft) & mask]; } #define fmt_freq(x) ((x) / MHZ), ((x) % MHZ), (x) @@ -141,10 +123,10 @@ static void sm501_dump_clk(struct sm501_devdata *sm) } sdclk0 = (misct & (1<<12)) ? pll2 : 288 * MHZ; - sdclk0 /= misc_div[((misct >> 8) & 0xf)]; + sdclk0 /= div_tab[((misct >> 8) & 0xf)]; sdclk1 = (misct & (1<<20)) ? pll2 : 288 * MHZ; - sdclk1 /= misc_div[((misct >> 16) & 0xf)]; + sdclk1 /= div_tab[((misct >> 16) & 0xf)]; dev_dbg(sm->dev, "MISCT=%08lx, PM0=%08lx, PM1=%08lx\n", misct, pm0, pm1); @@ -158,19 +140,19 @@ static void sm501_dump_clk(struct sm501_devdata *sm) "P2 %ld.%ld MHz (%ld), V2 %ld.%ld (%ld), " "M %ld.%ld (%ld), MX1 %ld.%ld (%ld)\n", (pmc & 3 ) == 0 ? '*' : '-', - fmt_freq(decode_div(pll2, pm0, 24, 1<<29, 31, px_div)), - fmt_freq(decode_div(pll2, pm0, 16, 1<<20, 15, misc_div)), - fmt_freq(decode_div(pll2, pm0, 8, 1<<12, 15, misc_div)), - fmt_freq(decode_div(pll2, pm0, 0, 1<<4, 15, misc_div))); + fmt_freq(decode_div(pll2, pm0, 24, 1<<29, 31)), + fmt_freq(decode_div(pll2, pm0, 16, 1<<20, 15)), + fmt_freq(decode_div(pll2, pm0, 8, 1<<12, 15)), + fmt_freq(decode_div(pll2, pm0, 0, 1<<4, 15))); dev_dbg(sm->dev, "PM1[%c]: " "P2 %ld.%ld MHz (%ld), V2 %ld.%ld (%ld), " "M %ld.%ld (%ld), MX1 %ld.%ld (%ld)\n", (pmc & 3 ) == 1 ? '*' : '-', - fmt_freq(decode_div(pll2, pm1, 24, 1<<29, 31, px_div)), - fmt_freq(decode_div(pll2, pm1, 16, 1<<20, 15, misc_div)), - fmt_freq(decode_div(pll2, pm1, 8, 1<<12, 15, misc_div)), - fmt_freq(decode_div(pll2, pm1, 0, 1<<4, 15, misc_div))); + fmt_freq(decode_div(pll2, pm1, 24, 1<<29, 31)), + fmt_freq(decode_div(pll2, pm1, 16, 1<<20, 15)), + fmt_freq(decode_div(pll2, pm1, 8, 1<<12, 15)), + fmt_freq(decode_div(pll2, pm1, 0, 1<<4, 15))); } static void sm501_dump_regs(struct sm501_devdata *sm) @@ -436,46 +418,108 @@ struct sm501_clock { unsigned long mclk; int divider; int shift; + unsigned int m, n, k; }; +/* sm501_calc_clock + * + * Calculates the nearest discrete clock frequency that + * can be achieved with the specified input clock. + * the maximum divisor is 3 or 5 + */ + +static int sm501_calc_clock(unsigned long freq, + struct sm501_clock *clock, + int max_div, + unsigned long mclk, + long *best_diff) +{ + int ret = 0; + int divider; + int shift; + long diff; + + /* try dividers 1 and 3 for CRT and for panel, + try divider 5 for panel only.*/ + + for (divider = 1; divider <= max_div; divider += 2) { + /* try all 8 shift values.*/ + for (shift = 0; shift < 8; shift++) { + /* Calculate difference to requested clock */ + diff = sm501fb_round_div(mclk, divider << shift) - freq; + if (diff < 0) + diff = -diff; + + /* If it is less than the current, use it */ + if (diff < *best_diff) { + *best_diff = diff; + + clock->mclk = mclk; + clock->divider = divider; + clock->shift = shift; + ret = 1; + } + } + } + + return ret; +} + +/* sm501_calc_pll + * + * Calculates the nearest discrete clock frequency that can be + * achieved using the programmable PLL. + * the maximum divisor is 3 or 5 + */ + +static unsigned long sm501_calc_pll(unsigned long freq, + struct sm501_clock *clock, + int max_div) +{ + unsigned long mclk; + unsigned int m, n, k; + long best_diff = 999999999; + + /* + * The SM502 datasheet doesn't specify the min/max values for M and N. + * N = 1 at least doesn't work in practice. + */ + for (m = 2; m <= 255; m++) { + for (n = 2; n <= 127; n++) { + for (k = 0; k <= 1; k++) { + mclk = (24000000UL * m / n) >> k; + + if (sm501_calc_clock(freq, clock, max_div, + mclk, &best_diff)) { + clock->m = m; + clock->n = n; + clock->k = k; + } + } + } + } + + /* Return best clock. */ + return clock->mclk / (clock->divider << clock->shift); +} + /* sm501_select_clock * - * selects nearest discrete clock frequency the SM501 can achive + * Calculates the nearest discrete clock frequency that can be + * achieved using the 288MHz and 336MHz PLLs. * the maximum divisor is 3 or 5 */ + static unsigned long sm501_select_clock(unsigned long freq, struct sm501_clock *clock, int max_div) { unsigned long mclk; - int divider; - int shift; - long diff; long best_diff = 999999999; /* Try 288MHz and 336MHz clocks. */ for (mclk = 288000000; mclk <= 336000000; mclk += 48000000) { - /* try dividers 1 and 3 for CRT and for panel, - try divider 5 for panel only.*/ - - for (divider = 1; divider <= max_div; divider += 2) { - /* try all 8 shift values.*/ - for (shift = 0; shift < 8; shift++) { - /* Calculate difference to requested clock */ - diff = sm501fb_round_div(mclk, divider << shift) - freq; - if (diff < 0) - diff = -diff; - - /* If it is less than the current, use it */ - if (diff < best_diff) { - best_diff = diff; - - clock->mclk = mclk; - clock->divider = divider; - clock->shift = shift; - } - } - } + sm501_calc_clock(freq, clock, max_div, mclk, &best_diff); } /* Return best clock. */ @@ -497,6 +541,7 @@ unsigned long sm501_set_clock(struct device *dev, unsigned long gate = readl(sm->regs + SM501_CURRENT_GATE); unsigned long clock = readl(sm->regs + SM501_CURRENT_CLOCK); unsigned char reg; + unsigned int pll_reg = 0; unsigned long sm501_freq; /* the actual frequency acheived */ struct sm501_clock to; @@ -511,14 +556,28 @@ unsigned long sm501_set_clock(struct device *dev, * requested frequency the value must be multiplied by * 2. This clock also has an additional pre divisor */ - sm501_freq = (sm501_select_clock(2 * req_freq, &to, 5) / 2); - reg=to.shift & 0x07;/* bottom 3 bits are shift */ - if (to.divider == 3) - reg |= 0x08; /* /3 divider required */ - else if (to.divider == 5) - reg |= 0x10; /* /5 divider required */ - if (to.mclk != 288000000) - reg |= 0x20; /* which mclk pll is source */ + if (sm->rev >= 0xC0) { + /* SM502 -> use the programmable PLL */ + sm501_freq = (sm501_calc_pll(2 * req_freq, + &to, 5) / 2); + reg = to.shift & 0x07;/* bottom 3 bits are shift */ + if (to.divider == 3) + reg |= 0x08; /* /3 divider required */ + else if (to.divider == 5) + reg |= 0x10; /* /5 divider required */ + reg |= 0x40; /* select the programmable PLL */ + pll_reg = 0x20000 | (to.k << 15) | (to.n << 8) | to.m; + } else { + sm501_freq = (sm501_select_clock(2 * req_freq, + &to, 5) / 2); + reg = to.shift & 0x07;/* bottom 3 bits are shift */ + if (to.divider == 3) + reg |= 0x08; /* /3 divider required */ + else if (to.divider == 5) + reg |= 0x10; /* /5 divider required */ + if (to.mclk != 288000000) + reg |= 0x20; /* which mclk pll is source */ + } break; case SM501_CLOCK_V2XCLK: @@ -579,6 +638,10 @@ unsigned long sm501_set_clock(struct device *dev, } writel(mode, sm->regs + SM501_POWER_MODE_CONTROL); + + if (pll_reg) + writel(pll_reg, sm->regs + SM501_PROGRAMMABLE_PLL_CONTROL); + sm501_sync_regs(sm); dev_info(sm->dev, "gate %08lx, clock %08lx, mode %08lx\n", @@ -599,15 +662,24 @@ EXPORT_SYMBOL_GPL(sm501_set_clock); * finds the closest available frequency for a given clock */ -unsigned long sm501_find_clock(int clksrc, +unsigned long sm501_find_clock(struct device *dev, + int clksrc, unsigned long req_freq) { + struct sm501_devdata *sm = dev_get_drvdata(dev); unsigned long sm501_freq; /* the frequency achiveable by the 501 */ struct sm501_clock to; switch (clksrc) { case SM501_CLOCK_P2XCLK: - sm501_freq = (sm501_select_clock(2 * req_freq, &to, 5) / 2); + if (sm->rev >= 0xC0) { + /* SM502 -> use the programmable PLL */ + sm501_freq = (sm501_calc_pll(2 * req_freq, + &to, 5) / 2); + } else { + sm501_freq = (sm501_select_clock(2 * req_freq, + &to, 5) / 2); + } break; case SM501_CLOCK_V2XCLK: @@ -914,6 +986,8 @@ static int sm501_init_dev(struct sm501_devdata *sm) dev_info(sm->dev, "SM501 At %p: Version %08lx, %ld Mb, IRQ %d\n", sm->regs, devid, (unsigned long)mem_avail >> 20, sm->irq); + sm->rev = devid & SM501_DEVICEID_REVMASK; + sm501_dump_gate(sm); ret = device_create_file(sm->dev, &dev_attr_dbg_regs); diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c index bb269d0c677e..6cb781262f94 100644 --- a/drivers/misc/thinkpad_acpi.c +++ b/drivers/misc/thinkpad_acpi.c @@ -1078,7 +1078,8 @@ static int hotkey_get_tablet_mode(int *status) if (!acpi_evalf(hkey_handle, &s, "MHKG", "d")) return -EIO; - return ((s & TP_HOTKEY_TABLET_MASK) != 0); + *status = ((s & TP_HOTKEY_TABLET_MASK) != 0); + return 0; } /* diff --git a/drivers/parisc/Kconfig b/drivers/parisc/Kconfig index 1d3b84b4af3f..553a9905299a 100644 --- a/drivers/parisc/Kconfig +++ b/drivers/parisc/Kconfig @@ -103,6 +103,11 @@ config IOMMU_SBA depends on PCI_LBA default PCI_LBA +config IOMMU_HELPER + bool + depends on IOMMU_SBA || IOMMU_CCIO + default y + #config PCI_EPIC # bool "EPIC/SAGA PCI support" # depends on PCI diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index d08b284de196..60d338cd8009 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -43,6 +43,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/scatterlist.h> +#include <linux/iommu-helper.h> #include <asm/byteorder.h> #include <asm/cache.h> /* for L1_CACHE_BYTES */ @@ -302,13 +303,17 @@ static int ioc_count; */ #define CCIO_SEARCH_LOOP(ioc, res_idx, mask, size) \ for(; res_ptr < res_end; ++res_ptr) { \ - if(0 == (*res_ptr & mask)) { \ - *res_ptr |= mask; \ - res_idx = (unsigned int)((unsigned long)res_ptr - (unsigned long)ioc->res_map); \ - ioc->res_hint = res_idx + (size >> 3); \ - goto resource_found; \ - } \ - } + int ret;\ + unsigned int idx;\ + idx = (unsigned int)((unsigned long)res_ptr - (unsigned long)ioc->res_map); \ + ret = iommu_is_span_boundary(idx << 3, pages_needed, 0, boundary_size);\ + if ((0 == (*res_ptr & mask)) && !ret) { \ + *res_ptr |= mask; \ + res_idx = idx;\ + ioc->res_hint = res_idx + (size >> 3); \ + goto resource_found; \ + } \ + } #define CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, size) \ u##size *res_ptr = (u##size *)&((ioc)->res_map[ioa->res_hint & ~((size >> 3) - 1)]); \ @@ -341,10 +346,11 @@ static int ioc_count; * of available pages for the requested size. */ static int -ccio_alloc_range(struct ioc *ioc, size_t size) +ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size) { unsigned int pages_needed = size >> IOVP_SHIFT; unsigned int res_idx; + unsigned long boundary_size; #ifdef CCIO_SEARCH_TIME unsigned long cr_start = mfctl(16); #endif @@ -360,6 +366,9 @@ ccio_alloc_range(struct ioc *ioc, size_t size) ** ggg sacrifices another 710 to the computer gods. */ + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 1 << IOVP_SHIFT); + boundary_size >>= IOVP_SHIFT; + if (pages_needed <= 8) { /* * LAN traffic will not thrash the TLB IFF the same NIC @@ -760,7 +769,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size, ioc->msingle_pages += size >> IOVP_SHIFT; #endif - idx = ccio_alloc_range(ioc, size); + idx = ccio_alloc_range(ioc, dev, size); iovp = (dma_addr_t)MKIOVP(idx); pdir_start = &(ioc->pdir_base[idx]); diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h index 97ba8286c596..a9c46cc2db37 100644 --- a/drivers/parisc/iommu-helpers.h +++ b/drivers/parisc/iommu-helpers.h @@ -96,8 +96,8 @@ iommu_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents, static inline unsigned int iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, - struct scatterlist *startsg, int nents, - int (*iommu_alloc_range)(struct ioc *, size_t)) + struct scatterlist *startsg, int nents, + int (*iommu_alloc_range)(struct ioc *, struct device *, size_t)) { struct scatterlist *contig_sg; /* contig chunk head */ unsigned long dma_offset, dma_len; /* start/len of DMA stream */ @@ -166,7 +166,7 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); sg_dma_address(contig_sg) = PIDE_FLAG - | (iommu_alloc_range(ioc, dma_len) << IOVP_SHIFT) + | (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT) | dma_offset; n_mappings++; } diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index d06627c3f353..e834127a8505 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -29,6 +29,7 @@ #include <linux/string.h> #include <linux/pci.h> #include <linux/scatterlist.h> +#include <linux/iommu-helper.h> #include <asm/byteorder.h> #include <asm/io.h> @@ -313,6 +314,12 @@ sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) #define RESMAP_MASK(n) (~0UL << (BITS_PER_LONG - (n))) #define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) +unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr, + unsigned int bitshiftcnt) +{ + return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3) + + bitshiftcnt; +} /** * sba_search_bitmap - find free space in IO PDIR resource bitmap @@ -324,19 +331,36 @@ sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) * Cool perf optimization: search for log2(size) bits at a time. */ static SBA_INLINE unsigned long -sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted) +sba_search_bitmap(struct ioc *ioc, struct device *dev, + unsigned long bits_wanted) { unsigned long *res_ptr = ioc->res_hint; unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); - unsigned long pide = ~0UL; + unsigned long pide = ~0UL, tpide; + unsigned long boundary_size; + unsigned long shift; + int ret; + + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 1 << IOVP_SHIFT); + boundary_size >>= IOVP_SHIFT; + +#if defined(ZX1_SUPPORT) + BUG_ON(ioc->ibase & ~IOVP_MASK); + shift = ioc->ibase >> IOVP_SHIFT; +#else + shift = 0; +#endif if (bits_wanted > (BITS_PER_LONG/2)) { /* Search word at a time - no mask needed */ for(; res_ptr < res_end; ++res_ptr) { - if (*res_ptr == 0) { + tpide = ptr_to_pide(ioc, res_ptr, 0); + ret = iommu_is_span_boundary(tpide, bits_wanted, + shift, + boundary_size); + if ((*res_ptr == 0) && !ret) { *res_ptr = RESMAP_MASK(bits_wanted); - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ + pide = tpide; break; } } @@ -365,11 +389,13 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted) { DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); WARN_ON(mask == 0); - if(((*res_ptr) & mask) == 0) { + tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); + ret = iommu_is_span_boundary(tpide, bits_wanted, + shift, + boundary_size); + if ((((*res_ptr) & mask) == 0) && !ret) { *res_ptr |= mask; /* mark resources busy! */ - pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); - pide <<= 3; /* convert to bit address */ - pide += bitshiftcnt; + pide = tpide; break; } mask >>= o; @@ -404,7 +430,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted) * resource bit map. */ static int -sba_alloc_range(struct ioc *ioc, size_t size) +sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size) { unsigned int pages_needed = size >> IOVP_SHIFT; #ifdef SBA_COLLECT_STATS @@ -412,9 +438,9 @@ sba_alloc_range(struct ioc *ioc, size_t size) #endif unsigned long pide; - pide = sba_search_bitmap(ioc, pages_needed); + pide = sba_search_bitmap(ioc, dev, pages_needed); if (pide >= (ioc->res_size << 3)) { - pide = sba_search_bitmap(ioc, pages_needed); + pide = sba_search_bitmap(ioc, dev, pages_needed); if (pide >= (ioc->res_size << 3)) panic("%s: I/O MMU @ %p is out of mapping resources\n", __FILE__, ioc->ioc_hpa); @@ -710,7 +736,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, ioc->msingle_calls++; ioc->msingle_pages += size >> IOVP_SHIFT; #endif - pide = sba_alloc_range(ioc, size); + pide = sba_alloc_range(ioc, dev, size); iovp = (dma_addr_t) pide << IOVP_SHIFT; DBG_RUN("%s() 0x%p -> 0x%lx\n", diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 6402d699072b..82f5ad9c3af4 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -250,6 +250,15 @@ config RTC_DRV_TWL92330 platforms. The support is integrated with the rest of the Menelaus driver; it's not separate module. +config RTC_DRV_S35390A + tristate "Seiko Instruments S-35390A" + help + If you say yes here you will get support for the Seiko + Instruments S-35390A. + + This driver can also be built as a module. If so the module + will be called rtc-s35390a. + endif # I2C comment "SPI RTC drivers" diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index ec703f34ab86..872f1218ff9f 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o +obj-$(CONFIG_RTC_DRV_S35390A) += rtc-s35390a.o obj-$(CONFIG_RTC_DRV_S3C) += rtc-s3c.o obj-$(CONFIG_RTC_DRV_SA1100) += rtc-sa1100.o obj-$(CONFIG_RTC_DRV_SH) += rtc-sh.o diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c new file mode 100644 index 000000000000..e8abc90c32c5 --- /dev/null +++ b/drivers/rtc/rtc-s35390a.c @@ -0,0 +1,316 @@ +/* + * Seiko Instruments S-35390A RTC Driver + * + * Copyright (c) 2007 Byron Bradley + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/rtc.h> +#include <linux/i2c.h> +#include <linux/bitrev.h> +#include <linux/bcd.h> +#include <linux/slab.h> + +#define S35390A_CMD_STATUS1 0 +#define S35390A_CMD_STATUS2 1 +#define S35390A_CMD_TIME1 2 + +#define S35390A_BYTE_YEAR 0 +#define S35390A_BYTE_MONTH 1 +#define S35390A_BYTE_DAY 2 +#define S35390A_BYTE_WDAY 3 +#define S35390A_BYTE_HOURS 4 +#define S35390A_BYTE_MINS 5 +#define S35390A_BYTE_SECS 6 + +#define S35390A_FLAG_POC 0x01 +#define S35390A_FLAG_BLD 0x02 +#define S35390A_FLAG_24H 0x40 +#define S35390A_FLAG_RESET 0x80 +#define S35390A_FLAG_TEST 0x01 + +struct s35390a { + struct i2c_client *client[8]; + struct rtc_device *rtc; + int twentyfourhour; +}; + +static int s35390a_set_reg(struct s35390a *s35390a, int reg, char *buf, int len) +{ + struct i2c_client *client = s35390a->client[reg]; + struct i2c_msg msg[] = { + { client->addr, 0, len, buf }, + }; + + if ((i2c_transfer(client->adapter, msg, 1)) != 1) + return -EIO; + + return 0; +} + +static int s35390a_get_reg(struct s35390a *s35390a, int reg, char *buf, int len) +{ + struct i2c_client *client = s35390a->client[reg]; + struct i2c_msg msg[] = { + { client->addr, I2C_M_RD, len, buf }, + }; + + if ((i2c_transfer(client->adapter, msg, 1)) != 1) + return -EIO; + + return 0; +} + +static int s35390a_reset(struct s35390a *s35390a) +{ + char buf[1]; + + if (s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)) < 0) + return -EIO; + + if (!(buf[0] & (S35390A_FLAG_POC | S35390A_FLAG_BLD))) + return 0; + + buf[0] |= (S35390A_FLAG_RESET | S35390A_FLAG_24H); + buf[0] &= 0xf0; + return s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)); +} + +static int s35390a_disable_test_mode(struct s35390a *s35390a) +{ + char buf[1]; + + if (s35390a_get_reg(s35390a, S35390A_CMD_STATUS2, buf, sizeof(buf)) < 0) + return -EIO; + + if (!(buf[0] & S35390A_FLAG_TEST)) + return 0; + + buf[0] &= ~S35390A_FLAG_TEST; + return s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, buf, sizeof(buf)); +} + +static char s35390a_hr2reg(struct s35390a *s35390a, int hour) +{ + if (s35390a->twentyfourhour) + return BIN2BCD(hour); + + if (hour < 12) + return BIN2BCD(hour); + + return 0x40 | BIN2BCD(hour - 12); +} + +static int s35390a_reg2hr(struct s35390a *s35390a, char reg) +{ + unsigned hour; + + if (s35390a->twentyfourhour) + return BCD2BIN(reg & 0x3f); + + hour = BCD2BIN(reg & 0x3f); + if (reg & 0x40) + hour += 12; + + return hour; +} + +static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm) +{ + struct s35390a *s35390a = i2c_get_clientdata(client); + int i, err; + char buf[7]; + + dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d mday=%d, " + "mon=%d, year=%d, wday=%d\n", __func__, tm->tm_sec, + tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year, + tm->tm_wday); + + buf[S35390A_BYTE_YEAR] = BIN2BCD(tm->tm_year - 100); + buf[S35390A_BYTE_MONTH] = BIN2BCD(tm->tm_mon + 1); + buf[S35390A_BYTE_DAY] = BIN2BCD(tm->tm_mday); + buf[S35390A_BYTE_WDAY] = BIN2BCD(tm->tm_wday); + buf[S35390A_BYTE_HOURS] = s35390a_hr2reg(s35390a, tm->tm_hour); + buf[S35390A_BYTE_MINS] = BIN2BCD(tm->tm_min); + buf[S35390A_BYTE_SECS] = BIN2BCD(tm->tm_sec); + + /* This chip expects the bits of each byte to be in reverse order */ + for (i = 0; i < 7; ++i) + buf[i] = bitrev8(buf[i]); + + err = s35390a_set_reg(s35390a, S35390A_CMD_TIME1, buf, sizeof(buf)); + + return err; +} + +static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm) +{ + struct s35390a *s35390a = i2c_get_clientdata(client); + char buf[7]; + int i, err; + + err = s35390a_get_reg(s35390a, S35390A_CMD_TIME1, buf, sizeof(buf)); + if (err < 0) + return err; + + /* This chip returns the bits of each byte in reverse order */ + for (i = 0; i < 7; ++i) + buf[i] = bitrev8(buf[i]); + + tm->tm_sec = BCD2BIN(buf[S35390A_BYTE_SECS]); + tm->tm_min = BCD2BIN(buf[S35390A_BYTE_MINS]); + tm->tm_hour = s35390a_reg2hr(s35390a, buf[S35390A_BYTE_HOURS]); + tm->tm_wday = BCD2BIN(buf[S35390A_BYTE_WDAY]); + tm->tm_mday = BCD2BIN(buf[S35390A_BYTE_DAY]); + tm->tm_mon = BCD2BIN(buf[S35390A_BYTE_MONTH]) - 1; + tm->tm_year = BCD2BIN(buf[S35390A_BYTE_YEAR]) + 100; + + dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, mday=%d, " + "mon=%d, year=%d, wday=%d\n", __func__, tm->tm_sec, + tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year, + tm->tm_wday); + + return rtc_valid_tm(tm); +} + +static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + return s35390a_get_datetime(to_i2c_client(dev), tm); +} + +static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + return s35390a_set_datetime(to_i2c_client(dev), tm); +} + +static const struct rtc_class_ops s35390a_rtc_ops = { + .read_time = s35390a_rtc_read_time, + .set_time = s35390a_rtc_set_time, +}; + +static struct i2c_driver s35390a_driver; + +static int s35390a_probe(struct i2c_client *client) +{ + int err; + unsigned int i; + struct s35390a *s35390a; + struct rtc_time tm; + char buf[1]; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + err = -ENODEV; + goto exit; + } + + s35390a = kzalloc(sizeof(struct s35390a), GFP_KERNEL); + if (!s35390a) { + err = -ENOMEM; + goto exit; + } + + s35390a->client[0] = client; + i2c_set_clientdata(client, s35390a); + + /* This chip uses multiple addresses, use dummy devices for them */ + for (i = 1; i < 8; ++i) { + s35390a->client[i] = i2c_new_dummy(client->adapter, + client->addr + i, "rtc-s35390a"); + if (!s35390a->client[i]) { + dev_err(&client->dev, "Address %02x unavailable\n", + client->addr + i); + err = -EBUSY; + goto exit_dummy; + } + } + + err = s35390a_reset(s35390a); + if (err < 0) { + dev_err(&client->dev, "error resetting chip\n"); + goto exit_dummy; + } + + err = s35390a_disable_test_mode(s35390a); + if (err < 0) { + dev_err(&client->dev, "error disabling test mode\n"); + goto exit_dummy; + } + + err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)); + if (err < 0) { + dev_err(&client->dev, "error checking 12/24 hour mode\n"); + goto exit_dummy; + } + if (buf[0] & S35390A_FLAG_24H) + s35390a->twentyfourhour = 1; + else + s35390a->twentyfourhour = 0; + + if (s35390a_get_datetime(client, &tm) < 0) + dev_warn(&client->dev, "clock needs to be set\n"); + + s35390a->rtc = rtc_device_register(s35390a_driver.driver.name, + &client->dev, &s35390a_rtc_ops, THIS_MODULE); + + if (IS_ERR(s35390a->rtc)) { + err = PTR_ERR(s35390a->rtc); + goto exit_dummy; + } + return 0; + +exit_dummy: + for (i = 1; i < 8; ++i) + if (s35390a->client[i]) + i2c_unregister_device(s35390a->client[i]); + kfree(s35390a); + i2c_set_clientdata(client, NULL); + +exit: + return err; +} + +static int s35390a_remove(struct i2c_client *client) +{ + unsigned int i; + + struct s35390a *s35390a = i2c_get_clientdata(client); + for (i = 1; i < 8; ++i) + if (s35390a->client[i]) + i2c_unregister_device(s35390a->client[i]); + + rtc_device_unregister(s35390a->rtc); + kfree(s35390a); + i2c_set_clientdata(client, NULL); + + return 0; +} + +static struct i2c_driver s35390a_driver = { + .driver = { + .name = "rtc-s35390a", + }, + .probe = s35390a_probe, + .remove = s35390a_remove, +}; + +static int __init s35390a_rtc_init(void) +{ + return i2c_add_driver(&s35390a_driver); +} + +static void __exit s35390a_rtc_exit(void) +{ + i2c_del_driver(&s35390a_driver); +} + +MODULE_AUTHOR("Byron Bradley <byron.bbradley@gmail.com>"); +MODULE_DESCRIPTION("S35390A RTC driver"); +MODULE_LICENSE("GPL"); + +module_init(s35390a_rtc_init); +module_exit(s35390a_rtc_exit); diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index 6f09cbd7fc48..97c68d021d28 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -91,6 +91,8 @@ static const struct pnp_device_id pnp_dev_table[] = { /* Archtek America Corp. */ /* Archtek SmartLink Modem 3334BT Plug & Play */ { "GVC000F", 0 }, + /* Archtek SmartLink Modem 3334BRV 33.6K Data Fax Voice */ + { "GVC0303", 0 }, /* Hayes */ /* Hayes Optima 288 V.34-V.FC + FAX + Voice Plug & Play */ { "HAY0001", 0 }, diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c index 348ee2c19b58..c2bb11c02bde 100644 --- a/drivers/serial/m32r_sio.c +++ b/drivers/serial/m32r_sio.c @@ -421,7 +421,7 @@ static void transmit_chars(struct uart_sio_port *up) up->port.icount.tx++; if (uart_circ_empty(xmit)) break; - while (!serial_in(up, UART_LSR) & UART_LSR_THRE); + while (!(serial_in(up, UART_LSR) & UART_LSR_THRE)); } while (--count > 0); diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c index 253ed5682a6d..a86315a0c5b8 100644 --- a/drivers/spi/mpc52xx_psc_spi.c +++ b/drivers/spi/mpc52xx_psc_spi.c @@ -42,6 +42,7 @@ struct mpc52xx_psc_spi { /* driver internal data */ struct mpc52xx_psc __iomem *psc; + struct mpc52xx_psc_fifo __iomem *fifo; unsigned int irq; u8 bits_per_word; u8 busy; @@ -139,6 +140,7 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi, { struct mpc52xx_psc_spi *mps = spi_master_get_devdata(spi->master); struct mpc52xx_psc __iomem *psc = mps->psc; + struct mpc52xx_psc_fifo __iomem *fifo = mps->fifo; unsigned rb = 0; /* number of bytes receieved */ unsigned sb = 0; /* number of bytes sent */ unsigned char *rx_buf = (unsigned char *)t->rx_buf; @@ -190,11 +192,11 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi, out_8(&psc->mode, 0); } else { out_8(&psc->mode, MPC52xx_PSC_MODE_FFULL); - out_be16(&psc->rfalarm, rfalarm); + out_be16(&fifo->rfalarm, rfalarm); } out_be16(&psc->mpc52xx_psc_imr, MPC52xx_PSC_IMR_RXRDY); wait_for_completion(&mps->done); - recv_at_once = in_be16(&psc->rfnum); + recv_at_once = in_be16(&fifo->rfnum); dev_dbg(&spi->dev, "%d bytes received\n", recv_at_once); send_at_once = recv_at_once; @@ -331,6 +333,7 @@ static void mpc52xx_psc_spi_cleanup(struct spi_device *spi) static int mpc52xx_psc_spi_port_config(int psc_id, struct mpc52xx_psc_spi *mps) { struct mpc52xx_psc __iomem *psc = mps->psc; + struct mpc52xx_psc_fifo __iomem *fifo = mps->fifo; u32 mclken_div; int ret = 0; @@ -346,7 +349,7 @@ static int mpc52xx_psc_spi_port_config(int psc_id, struct mpc52xx_psc_spi *mps) /* Disable interrupts, interrupts are based on alarm level */ out_be16(&psc->mpc52xx_psc_imr, 0); out_8(&psc->command, MPC52xx_PSC_SEL_MODE_REG_1); - out_8(&psc->rfcntl, 0); + out_8(&fifo->rfcntl, 0); out_8(&psc->mode, MPC52xx_PSC_MODE_FFULL); /* Configure 8bit codec mode as a SPI master and use EOF flags */ @@ -419,6 +422,8 @@ static int __init mpc52xx_psc_spi_do_probe(struct device *dev, u32 regaddr, ret = -EFAULT; goto free_master; } + /* On the 5200, fifo regs are immediately ajacent to the psc regs */ + mps->fifo = ((void __iomem *)mps->psc) + sizeof(struct mpc52xx_psc); ret = request_irq(mps->irq, mpc52xx_psc_spi_isr, 0, "mpc52xx-psc-spi", mps); diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c index e83dfba7e636..742b5c656d66 100644 --- a/drivers/video/sm501fb.c +++ b/drivers/video/sm501fb.c @@ -237,12 +237,14 @@ static int sm501fb_check_var(struct fb_var_screeninfo *var, /* check we can fit these values into the registers */ - if (var->hsync_len > 255 || var->vsync_len > 255) + if (var->hsync_len > 255 || var->vsync_len > 63) return -EINVAL; - if ((var->xres + var->right_margin) >= 4096) + /* hdisplay end and hsync start */ + if ((var->xres + var->right_margin) > 4096) return -EINVAL; + /* vdisplay end and vsync start */ if ((var->yres + var->lower_margin) > 2048) return -EINVAL; @@ -281,19 +283,21 @@ static int sm501fb_check_var(struct fb_var_screeninfo *var, var->blue.length = var->bits_per_pixel; var->blue.offset = 0; var->transp.length = 0; + var->transp.offset = 0; break; case 16: if (sm->pdata->flags & SM501_FBPD_SWAP_FB_ENDIAN) { - var->red.offset = 11; - var->green.offset = 5; - var->blue.offset = 0; - } else { var->blue.offset = 11; var->green.offset = 5; var->red.offset = 0; + } else { + var->red.offset = 11; + var->green.offset = 5; + var->blue.offset = 0; } + var->transp.offset = 0; var->red.length = 5; var->green.length = 6; @@ -397,7 +401,7 @@ static int sm501fb_set_par_common(struct fb_info *info, break; case 16: - info->fix.visual = FB_VISUAL_DIRECTCOLOR; + info->fix.visual = FB_VISUAL_TRUECOLOR; break; case 32: @@ -613,6 +617,7 @@ static int sm501fb_set_par_crt(struct fb_info *info) case 16: control |= SM501_DC_CRT_CONTROL_16BPP; + sm501fb_setup_gamma(fbi, SM501_DC_CRT_PALETTE); break; case 32: @@ -750,6 +755,7 @@ static int sm501fb_set_par_pnl(struct fb_info *info) case 16: control |= SM501_DC_PANEL_CONTROL_16BPP; + sm501fb_setup_gamma(fbi, SM501_DC_PANEL_PALETTE); break; case 32: diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c index 70fb4ee2b421..919ce75db9e2 100644 --- a/drivers/video/tridentfb.c +++ b/drivers/video/tridentfb.c @@ -564,19 +564,46 @@ static inline void write3CE(int reg, unsigned char val) t_outb(val, 0x3CF); } -static inline void enable_mmio(void) +static void enable_mmio(void) { + unsigned char tmp; + /* Goto New Mode */ outb(0x0B, 0x3C4); inb(0x3C5); /* Unprotect registers */ outb(NewMode1, 0x3C4); + tmp = inb(0x3C5); outb(0x80, 0x3C5); /* Enable MMIO */ outb(PCIReg, 0x3D4); outb(inb(0x3D5) | 0x01, 0x3D5); + + t_outb(NewMode1, 0x3C4); + t_outb(tmp, 0x3C5); +} + +static void disable_mmio(void) +{ + unsigned char tmp; + + /* Goto New Mode */ + t_outb(0x0B, 0x3C4); + t_inb(0x3C5); + + /* Unprotect registers */ + t_outb(NewMode1, 0x3C4); + tmp = t_inb(0x3C5); + t_outb(0x80, 0x3C5); + + /* Disable MMIO */ + t_outb(PCIReg, 0x3D4); + t_outb(t_inb(0x3D5) & ~0x01, 0x3D5); + + outb(NewMode1, 0x3C4); + outb(tmp, 0x3C5); } #define crtc_unlock() write3X4(CRTVSyncEnd, read3X4(CRTVSyncEnd) & 0x7F) @@ -1239,9 +1266,9 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, default_par.io_virt = ioremap_nocache(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len); if (!default_par.io_virt) { - release_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len); debug("ioremap failed\n"); - return -1; + err = -1; + goto out_unmap1; } enable_mmio(); @@ -1252,25 +1279,21 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, if (!request_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len, "tridentfb")) { debug("request_mem_region failed!\n"); + disable_mmio(); err = -1; - goto out_unmap; + goto out_unmap1; } fb_info.screen_base = ioremap_nocache(tridentfb_fix.smem_start, tridentfb_fix.smem_len); if (!fb_info.screen_base) { - release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len); debug("ioremap failed\n"); err = -1; - goto out_unmap; + goto out_unmap2; } output("%s board found\n", pci_name(dev)); -#if 0 - output("Trident board found : mem = %X, io = %X, mem_v = %X, io_v = %X\n", - tridentfb_fix.smem_start, tridentfb_fix.mmio_start, fb_info.screen_base, default_par.io_virt); -#endif displaytype = get_displaytype(); if (flatpanel) @@ -1288,9 +1311,12 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, if (!fb_find_mode(&default_var, &fb_info, mode, NULL, 0, NULL, bpp)) { err = -EINVAL; - goto out_unmap; + goto out_unmap2; } - fb_alloc_cmap(&fb_info.cmap, 256, 0); + err = fb_alloc_cmap(&fb_info.cmap, 256, 0); + if (err < 0) + goto out_unmap2; + if (defaultaccel && acc) default_var.accel_flags |= FB_ACCELF_TEXT; else @@ -1300,19 +1326,24 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, fb_info.device = &dev->dev; if (register_framebuffer(&fb_info) < 0) { printk(KERN_ERR "tridentfb: could not register Trident framebuffer\n"); + fb_dealloc_cmap(&fb_info.cmap); err = -EINVAL; - goto out_unmap; + goto out_unmap2; } output("fb%d: %s frame buffer device %dx%d-%dbpp\n", fb_info.node, fb_info.fix.id, default_var.xres, default_var.yres, default_var.bits_per_pixel); return 0; -out_unmap: - if (default_par.io_virt) - iounmap(default_par.io_virt); +out_unmap2: if (fb_info.screen_base) iounmap(fb_info.screen_base); + release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len); + disable_mmio(); +out_unmap1: + if (default_par.io_virt) + iounmap(default_par.io_virt); + release_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len); return err; } @@ -1323,7 +1354,7 @@ static void __devexit trident_pci_remove(struct pci_dev *dev) iounmap(par->io_virt); iounmap(fb_info.screen_base); release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len); - release_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len); + release_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len); } /* List of boards that we are trying to support */ diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c index 688e435b4d9a..10211e493001 100644 --- a/drivers/w1/masters/ds1wm.c +++ b/drivers/w1/masters/ds1wm.c @@ -17,6 +17,7 @@ #include <linux/pm.h> #include <linux/platform_device.h> #include <linux/clk.h> +#include <linux/err.h> #include <linux/delay.h> #include <linux/ds1wm.h> @@ -102,12 +103,12 @@ struct ds1wm_data { static inline void ds1wm_write_register(struct ds1wm_data *ds1wm_data, u32 reg, u8 val) { - __raw_writeb(val, ds1wm_data->map + (reg << ds1wm_data->bus_shift)); + __raw_writeb(val, ds1wm_data->map + (reg << ds1wm_data->bus_shift)); } static inline u8 ds1wm_read_register(struct ds1wm_data *ds1wm_data, u32 reg) { - return __raw_readb(ds1wm_data->map + (reg << ds1wm_data->bus_shift)); + return __raw_readb(ds1wm_data->map + (reg << ds1wm_data->bus_shift)); } @@ -149,8 +150,8 @@ static int ds1wm_reset(struct ds1wm_data *ds1wm_data) timeleft = wait_for_completion_timeout(&reset_done, DS1WM_TIMEOUT); ds1wm_data->reset_complete = NULL; if (!timeleft) { - dev_dbg(&ds1wm_data->pdev->dev, "reset failed\n"); - return 1; + dev_err(&ds1wm_data->pdev->dev, "reset failed\n"); + return 1; } /* Wait for the end of the reset. According to the specs, the time @@ -167,11 +168,11 @@ static int ds1wm_reset(struct ds1wm_data *ds1wm_data) (ds1wm_data->active_high ? DS1WM_INTEN_IAS : 0)); if (!ds1wm_data->slave_present) { - dev_dbg(&ds1wm_data->pdev->dev, "reset: no devices found\n"); - return 1; - } + dev_dbg(&ds1wm_data->pdev->dev, "reset: no devices found\n"); + return 1; + } - return 0; + return 0; } static int ds1wm_write(struct ds1wm_data *ds1wm_data, u8 data) @@ -334,7 +335,7 @@ static int ds1wm_probe(struct platform_device *pdev) if (!pdev) return -ENODEV; - ds1wm_data = kzalloc(sizeof (*ds1wm_data), GFP_KERNEL); + ds1wm_data = kzalloc(sizeof(*ds1wm_data), GFP_KERNEL); if (!ds1wm_data) return -ENOMEM; @@ -374,8 +375,8 @@ static int ds1wm_probe(struct platform_device *pdev) goto err1; ds1wm_data->clk = clk_get(&pdev->dev, "ds1wm"); - if (!ds1wm_data->clk) { - ret = -ENOENT; + if (IS_ERR(ds1wm_data->clk)) { + ret = PTR_ERR(ds1wm_data->clk); goto err2; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 41a958a7585e..5e1a4fb5cacb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1424,6 +1424,18 @@ struct elf_note_info { int thread_notes; }; +/* + * When a regset has a writeback hook, we call it on each thread before + * dumping user memory. On register window machines, this makes sure the + * user memory backing the register data is up to date before we read it. + */ +static void do_thread_regset_writeback(struct task_struct *task, + const struct user_regset *regset) +{ + if (regset->writeback) + regset->writeback(task, regset, 1); +} + static int fill_thread_core_info(struct elf_thread_core_info *t, const struct user_regset_view *view, long signr, size_t *total) @@ -1445,6 +1457,8 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, sizeof(t->prstatus), &t->prstatus); *total += notesize(&t->notes[0]); + do_thread_regset_writeback(t->task, &view->regsets[0]); + /* * Each other regset might generate a note too. For each regset * that has no core_note_type or is inactive, we leave t->notes[i] @@ -1452,6 +1466,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, */ for (i = 1; i < view->n; ++i) { const struct user_regset *regset = &view->regsets[i]; + do_thread_regset_writeback(t->task, regset); if (regset->core_note_type && (!regset->active || regset->active(t->task, regset))) { int ret; diff --git a/fs/buffer.c b/fs/buffer.c index 897cd7477b34..ddfdd2c80bf9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -835,7 +835,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) smp_mb(); if (buffer_dirty(bh)) { list_add(&bh->b_assoc_buffers, - &bh->b_assoc_map->private_list); + &mapping->private_list); bh->b_assoc_map = mapping; } spin_unlock(lock); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index dc74b186145d..6df1debdccce 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -263,52 +263,102 @@ out: return 0; } -/* This function must zero any hole we create */ +/** + * ecryptfs_prepare_write + * @file: The eCryptfs file + * @page: The eCryptfs page + * @from: The start byte from which we will write + * @to: The end byte to which we will write + * + * This function must zero any hole we create + * + * Returns zero on success; non-zero otherwise + */ static int ecryptfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - int rc = 0; loff_t prev_page_end_size; + int rc = 0; if (!PageUptodate(page)) { - rc = ecryptfs_read_lower_page_segment(page, page->index, 0, - PAGE_CACHE_SIZE, - page->mapping->host); - if (rc) { - printk(KERN_ERR "%s: Error attemping to read lower " - "page segment; rc = [%d]\n", __FUNCTION__, rc); - ClearPageUptodate(page); - goto out; - } else + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private( + file->f_path.dentry->d_inode)->crypt_stat; + + if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) + || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { + rc = ecryptfs_read_lower_page_segment( + page, page->index, 0, PAGE_CACHE_SIZE, + page->mapping->host); + if (rc) { + printk(KERN_ERR "%s: Error attemping to read " + "lower page segment; rc = [%d]\n", + __FUNCTION__, rc); + ClearPageUptodate(page); + goto out; + } else + SetPageUptodate(page); + } else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) { + if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + rc = ecryptfs_copy_up_encrypted_with_header( + page, crypt_stat); + if (rc) { + printk(KERN_ERR "%s: Error attempting " + "to copy the encrypted content " + "from the lower file whilst " + "inserting the metadata from " + "the xattr into the header; rc " + "= [%d]\n", __FUNCTION__, rc); + ClearPageUptodate(page); + goto out; + } + SetPageUptodate(page); + } else { + rc = ecryptfs_read_lower_page_segment( + page, page->index, 0, PAGE_CACHE_SIZE, + page->mapping->host); + if (rc) { + printk(KERN_ERR "%s: Error reading " + "page; rc = [%d]\n", + __FUNCTION__, rc); + ClearPageUptodate(page); + goto out; + } + SetPageUptodate(page); + } + } else { + rc = ecryptfs_decrypt_page(page); + if (rc) { + printk(KERN_ERR "%s: Error decrypting page " + "at index [%ld]; rc = [%d]\n", + __FUNCTION__, page->index, rc); + ClearPageUptodate(page); + goto out; + } SetPageUptodate(page); + } } - prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT); - - /* - * If creating a page or more of holes, zero them out via truncate. - * Note, this will increase i_size. - */ + /* If creating a page or more of holes, zero them out via truncate. + * Note, this will increase i_size. */ if (page->index != 0) { if (prev_page_end_size > i_size_read(page->mapping->host)) { rc = ecryptfs_truncate(file->f_path.dentry, prev_page_end_size); if (rc) { - printk(KERN_ERR "Error on attempt to " + printk(KERN_ERR "%s: Error on attempt to " "truncate to (higher) offset [%lld];" - " rc = [%d]\n", prev_page_end_size, rc); + " rc = [%d]\n", __FUNCTION__, + prev_page_end_size, rc); goto out; } } } - /* - * Writing to a new page, and creating a small hole from start of page? - * Zero it out. - */ - if ((i_size_read(page->mapping->host) == prev_page_end_size) && - (from != 0)) { + /* Writing to a new page, and creating a small hole from start + * of page? Zero it out. */ + if ((i_size_read(page->mapping->host) == prev_page_end_size) + && (from != 0)) zero_user(page, 0, PAGE_CACHE_SIZE); - } out: return rc; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 18769cc32377..ad5360664082 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -806,8 +806,8 @@ static match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, - {Opt_err, NULL}, {Opt_resize, "resize"}, + {Opt_err, NULL}, }; static ext3_fsblk_t get_sb_block(void **data) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 6841452e0dea..393cc22c1717 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2031,7 +2031,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, return -EXDEV; } /* We must not pack tails for quota files on reiserfs for quota IO to work */ - if (!REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask) { + if (!(REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask)) { reiserfs_warning(sb, "reiserfs: Quota file must have tail packing disabled."); path_put(&nd.path); diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h index 4e7bd32288ae..c042194d3ab5 100644 --- a/include/asm-arm/kprobes.h +++ b/include/asm-arm/kprobes.h @@ -20,7 +20,6 @@ #include <linux/ptrace.h> #include <linux/percpu.h> -#define ARCH_SUPPORTS_KRETPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT #define MAX_INSN_SIZE 2 #define MAX_STACK_SIZE 64 /* 32 would probably be OK */ diff --git a/include/asm-cris/uaccess.h b/include/asm-cris/uaccess.h index 69d48a2dc8e1..ea11eaf0e922 100644 --- a/include/asm-cris/uaccess.h +++ b/include/asm-cris/uaccess.h @@ -1,43 +1,6 @@ /* * Authors: Bjorn Wesen (bjornw@axis.com) * Hans-Peter Nilsson (hp@axis.com) - * - * $Log: uaccess.h,v $ - * Revision 1.8 2001/10/29 13:01:48 bjornw - * Removed unused variable tmp2 in strnlen_user - * - * Revision 1.7 2001/10/02 12:44:52 hp - * Add support for 64-bit put_user/get_user - * - * Revision 1.6 2001/10/01 14:51:17 bjornw - * Added register prefixes and removed underscores - * - * Revision 1.5 2000/10/25 03:33:21 hp - * - Provide implementation for everything else but get_user and put_user; - * copying inline to/from user for constant length 0..16, 20, 24, and - * clearing for 0..4, 8, 12, 16, 20, 24, strncpy_from_user and strnlen_user - * always inline. - * - Constraints for destination addr in get_user cannot be memory, only reg. - * - Correct labels for PC at expected fault points. - * - Nits with assembly code. - * - Don't use statement expressions without value; use "do {} while (0)". - * - Return correct values from __generic_... functions. - * - * Revision 1.4 2000/09/12 16:28:25 bjornw - * * Removed comments from the get/put user asm code - * * Constrains for destination addr in put_user cannot be memory, only reg - * - * Revision 1.3 2000/09/12 14:30:20 bjornw - * MAX_ADDR_USER does not exist anymore - * - * Revision 1.2 2000/07/13 15:52:48 bjornw - * New user-access functions - * - * Revision 1.1.1.1 2000/07/10 16:32:31 bjornw - * CRIS architecture, working draft - * - * - * */ /* Asm:s have been tweaked (within the domain of correctness) to give @@ -209,9 +172,9 @@ extern long __get_user_bad(void); /* More complex functions. Most are inline, but some call functions that live in lib/usercopy.c */ -extern unsigned long __copy_user(void *to, const void *from, unsigned long n); -extern unsigned long __copy_user_zeroing(void *to, const void *from, unsigned long n); -extern unsigned long __do_clear_user(void *to, unsigned long n); +extern unsigned long __copy_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n); +extern unsigned long __do_clear_user(void __user *to, unsigned long n); static inline unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n) @@ -253,7 +216,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) } -/* Note that if these expand awfully if made into switch constructs, so +/* Note that these expand awfully if made into switch constructs, so don't do that. */ static inline unsigned long @@ -407,19 +370,21 @@ __constant_clear_user(void __user *to, unsigned long n) */ static inline unsigned long -__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +__generic_copy_from_user_nocheck(void *to, const void __user *from, + unsigned long n) { return __copy_user_zeroing(to,from,n); } static inline unsigned long -__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +__generic_copy_to_user_nocheck(void __user *to, const void *from, + unsigned long n) { return __copy_user(to,from,n); } static inline unsigned long -__generic_clear_user_nocheck(void *to, unsigned long n) +__generic_clear_user_nocheck(void __user *to, unsigned long n) { return __do_clear_user(to,n); } diff --git a/include/asm-cris/unistd.h b/include/asm-cris/unistd.h index 007cb16a6b5b..76398ef87e9b 100644 --- a/include/asm-cris/unistd.h +++ b/include/asm-cris/unistd.h @@ -329,12 +329,12 @@ #define __NR_timerfd_create 322 #define __NR_eventfd 323 #define __NR_fallocate 324 -#define __NR_timerfd_settime 315 -#define __NR_timerfd_gettime 316 +#define __NR_timerfd_settime 325 +#define __NR_timerfd_gettime 326 #ifdef __KERNEL__ -#define NR_syscalls 325 +#define NR_syscalls 327 #include <asm/arch/unistd.h> diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index a93ce9ef07ff..adbaba14eb0a 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -82,7 +82,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ]; }; -#define ARCH_SUPPORTS_KRETPROBES #define kretprobe_blacklist_size 0 #define SLOT0_OPCODE_SHIFT (37) diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h index afabad230dbb..d0e7701fa1f6 100644 --- a/include/asm-powerpc/kprobes.h +++ b/include/asm-powerpc/kprobes.h @@ -80,7 +80,6 @@ typedef unsigned int kprobe_opcode_t; #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) #endif -#define ARCH_SUPPORTS_KRETPROBES #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h index 948db3d0d05c..330f68caffe4 100644 --- a/include/asm-s390/kprobes.h +++ b/include/asm-s390/kprobes.h @@ -46,7 +46,6 @@ typedef u16 kprobe_opcode_t; ? (MAX_STACK_SIZE) \ : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) -#define ARCH_SUPPORTS_KRETPROBES #define kretprobe_blacklist_size 0 #define KPROBE_SWAP_INST 0x10 diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h index 7237dd87663e..5879d71afdaa 100644 --- a/include/asm-sparc64/kprobes.h +++ b/include/asm-sparc64/kprobes.h @@ -14,8 +14,6 @@ typedef u32 kprobe_opcode_t; #define arch_remove_kprobe(p) do {} while (0) -#define ARCH_SUPPORTS_KRETPROBES - #define flush_insn_slot(p) \ do { flushi(&(p)->ainsn.insn[0]); \ flushi(&(p)->ainsn.insn[1]); \ diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h index 143476a3cb52..61ad7b5d142e 100644 --- a/include/asm-x86/kprobes.h +++ b/include/asm-x86/kprobes.h @@ -42,7 +42,6 @@ typedef u8 kprobe_opcode_t; : (((unsigned long)current_thread_info()) + THREAD_SIZE \ - (unsigned long)(ADDR))) -#define ARCH_SUPPORTS_KRETPROBES #define flush_insn_slot(p) do { } while (0) extern const int kretprobe_blacklist_size; diff --git a/include/linux/Kbuild b/include/linux/Kbuild index aada32fffec2..994df3780007 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -61,6 +61,7 @@ header-y += efs_fs_sb.h header-y += elf-fdpic.h header-y += elf-em.h header-y += fadvise.h +header-y += falloc.h header-y += fd.h header-y += fdreg.h header-y += fib_rules.h diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ac6aad98b607..1ddebfc52565 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -37,7 +37,7 @@ SUBSYS(cpuacct) /* */ -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR SUBSYS(mem_cgroup) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d0e17e1657dc..dcae0c8d97e6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -138,6 +138,12 @@ extern void __chk_io_ptr(const volatile void __iomem *); #define noinline #endif +/* + * Rather then using noinline to prevent stack consumption, use + * noinline_for_stack instead. For documentaiton reasons. + */ +#define noinline_for_stack noinline + #ifndef __always_inline #define __always_inline inline #endif diff --git a/include/linux/delay.h b/include/linux/delay.h index 17ddb55430ae..54552d21296e 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -7,6 +7,8 @@ * Delay routines, using a pre-computed "loops_per_jiffy" value. */ +#include <linux/kernel.h> + extern unsigned long loops_per_jiffy; #include <asm/delay.h> @@ -32,7 +34,11 @@ extern unsigned long loops_per_jiffy; #endif #ifndef ndelay -#define ndelay(x) udelay(((x)+999)/1000) +static inline void ndelay(unsigned long x) +{ + udelay(DIV_ROUND_UP(x, 1000)); +} +#define ndelay(x) ndelay(x) #endif void calibrate_delay(void); diff --git a/include/linux/gpio.h b/include/linux/gpio.h new file mode 100644 index 000000000000..4987a84078ef --- /dev/null +++ b/include/linux/gpio.h @@ -0,0 +1,95 @@ +#ifndef __LINUX_GPIO_H +#define __LINUX_GPIO_H + +/* see Documentation/gpio.txt */ + +#ifdef CONFIG_GENERIC_GPIO +#include <asm/gpio.h> + +#else + +/* + * Some platforms don't support the GPIO programming interface. + * + * In case some driver uses it anyway (it should normally have + * depended on GENERIC_GPIO), these routines help the compiler + * optimize out much GPIO-related code ... or trigger a runtime + * warning when something is wrongly called. + */ + +static inline int gpio_is_valid(int number) +{ + return 0; +} + +static inline int gpio_request(unsigned gpio, const char *label) +{ + return -ENOSYS; +} + +static inline void gpio_free(unsigned gpio) +{ + /* GPIO can never have been requested */ + WARN_ON(1); +} + +static inline int gpio_direction_input(unsigned gpio) +{ + return -ENOSYS; +} + +static inline int gpio_direction_output(unsigned gpio, int value) +{ + return -ENOSYS; +} + +static inline int gpio_get_value(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline void gpio_set_value(unsigned gpio, int value) +{ + /* GPIO can never have been requested or set as output */ + WARN_ON(1); +} + +static inline int gpio_cansleep(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline int gpio_get_value_cansleep(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline void gpio_set_value_cansleep(unsigned gpio, int value) +{ + /* GPIO can never have been requested or set as output */ + WARN_ON(1); +} + +static inline int gpio_to_irq(unsigned gpio) +{ + /* GPIO can never have been requested or set as input */ + WARN_ON(1); + return -EINVAL; +} + +static inline int irq_to_gpio(unsigned irq) +{ + /* irq can never have been returned from gpio_to_irq() */ + WARN_ON(1); + return -EINVAL; +} + +#endif + +#endif /* __LINUX_GPIO_H */ diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 4dd4c04ff2f4..c975caf75385 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -1,3 +1,6 @@ +extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size); extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long shift, diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 4a6ce82ba039..0f28486f6360 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -125,11 +125,11 @@ struct jprobe { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#ifdef ARCH_SUPPORTS_KRETPROBES +#ifdef CONFIG_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); -#else /* ARCH_SUPPORTS_KRETPROBES */ +#else /* CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -138,7 +138,7 @@ static inline int arch_trampoline_kprobe(struct kprobe *p) { return 0; } -#endif /* ARCH_SUPPORTS_KRETPROBES */ +#endif /* CONFIG_KRETPROBES */ /* * Function-return probe - * Note: diff --git a/include/linux/marker.h b/include/linux/marker.h index 5df879dc3776..430f6adf9762 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -104,10 +104,16 @@ static inline void marker_update_probe_range(struct marker *begin, #define MARK_NOARGS " " /* To be used for string format validity checking with gcc */ -static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...) +static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) { } +#define __mark_check_format(format, args...) \ + do { \ + if (0) \ + ___mark_check_format(format, ## args); \ + } while (0) + extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 04075628cb9a..8b1c4295848b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -25,18 +25,20 @@ struct page_cgroup; struct page; struct mm_struct; -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); extern void mm_free_cgroup(struct mm_struct *mm); -extern void page_assign_page_cgroup(struct page *page, - struct page_cgroup *pc); + +#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) + extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern void mem_cgroup_uncharge(struct page_cgroup *pc); +extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask); extern void mem_cgroup_uncharge_page(struct page *page); -extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active); +extern void mem_cgroup_move_lists(struct page *page, bool active); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, @@ -44,11 +46,9 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct mem_cgroup *mem_cont, int active); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); -extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); -#define vm_match_cgroup(mm, cgroup) \ +#define mm_match_cgroup(mm, cgroup) \ ((cgroup) == rcu_dereference((mm)->mem_cgroup)) extern int mem_cgroup_prepare_migration(struct page *page); @@ -72,7 +72,7 @@ extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct zone *zone, int priority); -#else /* CONFIG_CGROUP_MEM_CONT */ +#else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) { @@ -82,8 +82,7 @@ static inline void mm_free_cgroup(struct mm_struct *mm) { } -static inline void page_assign_page_cgroup(struct page *page, - struct page_cgroup *pc) +static inline void page_reset_bad_cgroup(struct page *page) { } @@ -92,33 +91,27 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page) return NULL; } -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +static inline int mem_cgroup_charge(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { return 0; } -static inline void mem_cgroup_uncharge(struct page_cgroup *pc) +static inline int mem_cgroup_cache_charge(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { + return 0; } static inline void mem_cgroup_uncharge_page(struct page *page) { } -static inline void mem_cgroup_move_lists(struct page_cgroup *pc, - bool active) -{ -} - -static inline int mem_cgroup_cache_charge(struct page *page, - struct mm_struct *mm, - gfp_t gfp_mask) +static inline void mem_cgroup_move_lists(struct page *page, bool active) { - return 0; } -static inline int vm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) +static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 34023c65d466..af190ceab971 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -88,7 +88,7 @@ struct page { void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long page_cgroup; #endif }; @@ -222,7 +222,7 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; struct kioctx *ioctx_list; -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR struct mem_cgroup *mem_cgroup; #endif }; diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index e51b531cd0b2..47fbcba11850 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -235,6 +235,8 @@ struct bitmap { unsigned long flags; + int allclean; + unsigned long max_write_behind; /* write-behind mode */ atomic_t behind_writes; diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 85a068bab625..7bb6d1abf71e 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -83,6 +83,7 @@ struct mdk_rdev_s #define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ #define AllReserved 6 /* If whole device is reserved for * one array */ +#define AutoDetected 7 /* added by auto-detect */ int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ diff --git a/include/linux/sm501-regs.h b/include/linux/sm501-regs.h index 64236b73c724..d53642d2d899 100644 --- a/include/linux/sm501-regs.h +++ b/include/linux/sm501-regs.h @@ -129,11 +129,14 @@ #define SM501_DEVICEID_SM501 (0x05010000) #define SM501_DEVICEID_IDMASK (0xffff0000) +#define SM501_DEVICEID_REVMASK (0x000000ff) #define SM501_PLLCLOCK_COUNT (0x000064) #define SM501_MISC_TIMING (0x000068) #define SM501_CURRENT_SDRAM_CLOCK (0x00006C) +#define SM501_PROGRAMMABLE_PLL_CONTROL (0x000074) + /* GPIO base */ #define SM501_GPIO (0x010000) #define SM501_GPIO_DATA_LOW (0x00) diff --git a/include/linux/sm501.h b/include/linux/sm501.h index 932a9efee8a5..bca134544700 100644 --- a/include/linux/sm501.h +++ b/include/linux/sm501.h @@ -24,7 +24,8 @@ extern int sm501_unit_power(struct device *dev, extern unsigned long sm501_set_clock(struct device *dev, int clksrc, unsigned long freq); -extern unsigned long sm501_find_clock(int clksrc, unsigned long req_freq); +extern unsigned long sm501_find_clock(struct device *dev, + int clksrc, unsigned long req_freq); /* sm501_misc_control * diff --git a/init/Kconfig b/init/Kconfig index f698a5af5007..442850b984be 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -366,6 +366,21 @@ config RESOURCE_COUNTERS infrastructure that works with cgroups depends on CGROUPS +config CGROUP_MEM_RES_CTLR + bool "Memory Resource Controller for Control Groups" + depends on CGROUPS && RESOURCE_COUNTERS + help + Provides a memory resource controller that manages both page cache and + RSS memory. + + Note that setting this option increases fixed memory overhead + associated with each page of memory in the system by 4/8 bytes + and also increases cache misses because struct page on many 64bit + systems will not fit into a single cache line anymore. + + Only enable when you're ok with these trade offs and really + sure you need the memory resource controller. + config SYSFS_DEPRECATED bool "Create deprecated sysfs files" depends on SYSFS @@ -387,21 +402,6 @@ config SYSFS_DEPRECATED If you are using a distro that was released in 2006 or later, it should be safe to say N here. -config CGROUP_MEM_CONT - bool "Memory controller for cgroups" - depends on CGROUPS && RESOURCE_COUNTERS - help - Provides a memory controller that manages both page cache and - RSS memory. - - Note that setting this option increases fixed memory overhead - associated with each page of memory in the system by 4/8 bytes - and also increases cache misses because struct page on many 64bit - systems will not fit into a single cache line anymore. - - Only enable when you're ok with these trade offs and really - sure you need the memory controller. - config PROC_PID_CPUSET bool "Include legacy /proc/<pid>/cpuset file" depends on CPUSETS diff --git a/init/main.c b/init/main.c index 8b1982082ad8..fbb0167c6b8a 100644 --- a/init/main.c +++ b/init/main.c @@ -254,7 +254,7 @@ early_param("quiet", quiet_kernel); static int __init loglevel(char *str) { get_option(&str, &console_loglevel); - return 1; + return 0; } early_param("loglevel", loglevel); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d8abe996e009..e9c2fb01e89b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2232,7 +2232,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, mutex_lock(&cgroup_mutex); - cgrp->flags = 0; INIT_LIST_HEAD(&cgrp->sibling); INIT_LIST_HEAD(&cgrp->children); INIT_LIST_HEAD(&cgrp->css_sets); @@ -2242,6 +2241,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, cgrp->root = parent->root; cgrp->top_cgroup = parent->top_cgroup; + if (notify_on_release(parent)) + set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); if (IS_ERR(css)) { diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7a86e6432338..fcfb580c3afc 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -498,27 +498,36 @@ static int __kprobes in_kprobes_functions(unsigned long addr) return 0; } +/* + * If we have a symbol_name argument, look it up and add the offset field + * to it. This way, we can specify a relative address to a symbol. + */ +static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) +{ + kprobe_opcode_t *addr = p->addr; + if (p->symbol_name) { + if (addr) + return NULL; + kprobe_lookup_name(p->symbol_name, addr); + } + + if (!addr) + return NULL; + return (kprobe_opcode_t *)(((char *)addr) + p->offset); +} + static int __kprobes __register_kprobe(struct kprobe *p, unsigned long called_from) { int ret = 0; struct kprobe *old_p; struct module *probed_mod; + kprobe_opcode_t *addr; - /* - * If we have a symbol_name argument look it up, - * and add it to the address. That way the addr - * field can either be global or relative to a symbol. - */ - if (p->symbol_name) { - if (p->addr) - return -EINVAL; - kprobe_lookup_name(p->symbol_name, p->addr); - } - - if (!p->addr) + addr = kprobe_addr(p); + if (!addr) return -EINVAL; - p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset); + p->addr = addr; if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr)) @@ -678,8 +687,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp) unregister_kprobe(&jp->kp); } -#ifdef ARCH_SUPPORTS_KRETPROBES - +#ifdef CONFIG_KRETPROBES /* * This kprobe pre_handler is registered with every kretprobe. When probe * hits it will set up the return probe. @@ -722,12 +730,12 @@ int __kprobes register_kretprobe(struct kretprobe *rp) int ret = 0; struct kretprobe_instance *inst; int i; - void *addr = rp->kp.addr; + void *addr; if (kretprobe_blacklist_size) { - if (addr == NULL) - kprobe_lookup_name(rp->kp.symbol_name, addr); - addr += rp->kp.offset; + addr = kprobe_addr(&rp->kp); + if (!addr) + return -EINVAL; for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { if (kretprobe_blacklist[i].addr == addr) @@ -769,8 +777,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) return ret; } -#else /* ARCH_SUPPORTS_KRETPROBES */ - +#else /* CONFIG_KRETPROBES */ int __kprobes register_kretprobe(struct kretprobe *rp) { return -ENOSYS; @@ -781,8 +788,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, { return 0; } - -#endif /* ARCH_SUPPORTS_KRETPROBES */ +#endif /* CONFIG_KRETPROBES */ void __kprobes unregister_kretprobe(struct kretprobe *rp) { diff --git a/kernel/marker.c b/kernel/marker.c index 50effc01d9a2..48a4ea5afffd 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -698,14 +698,12 @@ int marker_probe_unregister(const char *name, { struct marker_entry *entry; struct marker_probe_closure *old; - int ret = 0; + int ret = -ENOENT; mutex_lock(&markers_mutex); entry = get_marker(name); - if (!entry) { - ret = -ENOENT; + if (!entry) goto end; - } if (entry->rcu_pending) rcu_barrier(); old = marker_entry_remove_probe(entry, probe, probe_private); @@ -713,12 +711,15 @@ int marker_probe_unregister(const char *name, marker_update_probes(); /* may update entry */ mutex_lock(&markers_mutex); entry = get_marker(name); + if (!entry) + goto end; entry->oldptr = old; entry->rcu_pending = 1; /* write rcu_pending before calling the RCU callback */ smp_wmb(); call_rcu(&entry->rcu, free_old_closure); remove_marker(name); /* Ignore busy error message */ + ret = 0; end: mutex_unlock(&markers_mutex); return ret; diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 16cbec2d5d60..efbfc0fc232f 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -113,6 +113,7 @@ ssize_t res_counter_write(struct res_counter *counter, int member, ret = -EINVAL; + strstrip(buf); if (write_strategy) { if (write_strategy(buf, &tmp)) { goto out_free; diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index 495575a59ca6..a3b8d4c3f77a 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -40,10 +40,12 @@ static inline void set_bit_area(unsigned long *map, unsigned long i, } } -static inline int is_span_boundary(unsigned int index, unsigned int nr, - unsigned long shift, - unsigned long boundary_size) +int iommu_is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size) { + BUG_ON(!is_power_of_2(boundary_size)); + shift = (shift + index) & (boundary_size - 1); return shift + nr > boundary_size; } @@ -57,7 +59,7 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, again: index = find_next_zero_area(map, size, start, nr, align_mask); if (index != -1) { - if (is_span_boundary(index, nr, shift, boundary_size)) { + if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { /* we could do more effectively */ start = index + 1; goto again; diff --git a/mm/Makefile b/mm/Makefile index 9f117bab5322..a5b0dd93427a 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -32,5 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o -obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o +obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index 7e58322b7134..b0012e27fea8 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -6,6 +6,10 @@ #include <linux/mm.h> #include <linux/module.h> +#ifndef cache_line_size +#define cache_line_size() L1_CACHE_BYTES +#endif + /** * percpu_depopulate - depopulate per-cpu data for given cpu * @__pdata: per-cpu data to depopulate @@ -52,6 +56,11 @@ void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) struct percpu_data *pdata = __percpu_disguise(__pdata); int node = cpu_to_node(cpu); + /* + * We should make sure each CPU gets private memory. + */ + size = roundup(size, cache_line_size()); + BUG_ON(pdata->ptrs[cpu]); if (node_online(node)) pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node); @@ -98,7 +107,11 @@ EXPORT_SYMBOL_GPL(__percpu_populate_mask); */ void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { - void *pdata = kzalloc(nr_cpu_ids * sizeof(void *), gfp); + /* + * We allocate whole cache lines to avoid false sharing + */ + size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size()); + void *pdata = kzalloc(sz, gfp); void *__pdata = __percpu_disguise(pdata); if (unlikely(!pdata)) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 89e6286a7f57..dcacc811e70e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -71,7 +71,25 @@ static void enqueue_huge_page(struct page *page) free_huge_pages_node[nid]++; } -static struct page *dequeue_huge_page(struct vm_area_struct *vma, +static struct page *dequeue_huge_page(void) +{ + int nid; + struct page *page = NULL; + + for (nid = 0; nid < MAX_NUMNODES; ++nid) { + if (!list_empty(&hugepage_freelists[nid])) { + page = list_entry(hugepage_freelists[nid].next, + struct page, lru); + list_del(&page->lru); + free_huge_pages--; + free_huge_pages_node[nid]--; + break; + } + } + return page; +} + +static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, unsigned long address) { int nid; @@ -296,8 +314,10 @@ static int gather_surplus_pages(int delta) int needed, allocated; needed = (resv_huge_pages + delta) - free_huge_pages; - if (needed <= 0) + if (needed <= 0) { + resv_huge_pages += delta; return 0; + } allocated = 0; INIT_LIST_HEAD(&surplus_list); @@ -335,9 +355,12 @@ retry: * The surplus_list now contains _at_least_ the number of extra pages * needed to accomodate the reservation. Add the appropriate number * of pages to the hugetlb pool and free the extras back to the buddy - * allocator. + * allocator. Commit the entire reservation here to prevent another + * process from stealing the pages as they are added to the pool but + * before they are reserved. */ needed += allocated; + resv_huge_pages += delta; ret = 0; free: list_for_each_entry_safe(page, tmp, &surplus_list, lru) { @@ -371,6 +394,9 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) struct page *page; unsigned long nr_pages; + /* Uncommit the reservation */ + resv_huge_pages -= unused_resv_pages; + nr_pages = min(unused_resv_pages, surplus_huge_pages); while (nr_pages) { @@ -402,7 +428,7 @@ static struct page *alloc_huge_page_shared(struct vm_area_struct *vma, struct page *page; spin_lock(&hugetlb_lock); - page = dequeue_huge_page(vma, addr); + page = dequeue_huge_page_vma(vma, addr); spin_unlock(&hugetlb_lock); return page ? page : ERR_PTR(-VM_FAULT_OOM); } @@ -417,7 +443,7 @@ static struct page *alloc_huge_page_private(struct vm_area_struct *vma, spin_lock(&hugetlb_lock); if (free_huge_pages > resv_huge_pages) - page = dequeue_huge_page(vma, addr); + page = dequeue_huge_page_vma(vma, addr); spin_unlock(&hugetlb_lock); if (!page) { page = alloc_buddy_huge_page(vma, addr); @@ -570,7 +596,7 @@ static unsigned long set_max_huge_pages(unsigned long count) min_count = max(count, min_count); try_to_free_low(min_count); while (min_count < persistent_huge_pages) { - struct page *page = dequeue_huge_page(NULL, 0); + struct page *page = dequeue_huge_page(); if (!page) break; update_and_free_page(page); @@ -1205,12 +1231,13 @@ static int hugetlb_acct_memory(long delta) if (gather_surplus_pages(delta) < 0) goto out; - if (delta > cpuset_mems_nr(free_huge_pages_node)) + if (delta > cpuset_mems_nr(free_huge_pages_node)) { + return_unused_surplus_pages(delta); goto out; + } } ret = 0; - resv_huge_pages += delta; if (delta < 0) return_unused_surplus_pages((unsigned long) -delta); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 631002d085d1..8b9f6cae938e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -137,14 +137,21 @@ struct mem_cgroup { */ struct mem_cgroup_stat stat; }; +static struct mem_cgroup init_mem_cgroup; /* * We use the lower bit of the page->page_cgroup pointer as a bit spin - * lock. We need to ensure that page->page_cgroup is atleast two - * byte aligned (based on comments from Nick Piggin) + * lock. We need to ensure that page->page_cgroup is at least two + * byte aligned (based on comments from Nick Piggin). But since + * bit_spin_lock doesn't actually set that lock bit in a non-debug + * uniprocessor kernel, we should avoid setting it here too. */ #define PAGE_CGROUP_LOCK_BIT 0x0 -#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) +#else +#define PAGE_CGROUP_LOCK 0x0 +#endif /* * A page_cgroup page is associated with every page descriptor. The @@ -154,37 +161,27 @@ struct page_cgroup { struct list_head lru; /* per cgroup LRU list */ struct page *page; struct mem_cgroup *mem_cgroup; - atomic_t ref_cnt; /* Helpful when pages move b/w */ - /* mapped and cached states */ - int flags; + int ref_cnt; /* cached, mapped, migrating */ + int flags; }; #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ -static inline int page_cgroup_nid(struct page_cgroup *pc) +static int page_cgroup_nid(struct page_cgroup *pc) { return page_to_nid(pc->page); } -static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) +static enum zone_type page_cgroup_zid(struct page_cgroup *pc) { return page_zonenum(pc->page); } -enum { - MEM_CGROUP_TYPE_UNSPEC = 0, - MEM_CGROUP_TYPE_MAPPED, - MEM_CGROUP_TYPE_CACHED, - MEM_CGROUP_TYPE_ALL, - MEM_CGROUP_TYPE_MAX, -}; - enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_MAPPED, }; - /* * Always modified under lru lock. Then, not necessary to preempt_disable() */ @@ -193,23 +190,21 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, { int val = (charge)? 1 : -1; struct mem_cgroup_stat *stat = &mem->stat; - VM_BUG_ON(!irqs_disabled()); + VM_BUG_ON(!irqs_disabled()); if (flags & PAGE_CGROUP_FLAG_CACHE) - __mem_cgroup_stat_add_safe(stat, - MEM_CGROUP_STAT_CACHE, val); + __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); else __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); } -static inline struct mem_cgroup_per_zone * +static struct mem_cgroup_per_zone * mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) { - BUG_ON(!mem->info.nodeinfo[nid]); return &mem->info.nodeinfo[nid]->zoneinfo[zid]; } -static inline struct mem_cgroup_per_zone * +static struct mem_cgroup_per_zone * page_cgroup_zoneinfo(struct page_cgroup *pc) { struct mem_cgroup *mem = pc->mem_cgroup; @@ -234,18 +229,14 @@ static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, return total; } -static struct mem_cgroup init_mem_cgroup; - -static inline -struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) +static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) { return container_of(cgroup_subsys_state(cont, mem_cgroup_subsys_id), struct mem_cgroup, css); } -static inline -struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) +static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) { return container_of(task_subsys_state(p, mem_cgroup_subsys_id), struct mem_cgroup, css); @@ -267,81 +258,33 @@ void mm_free_cgroup(struct mm_struct *mm) static inline int page_cgroup_locked(struct page *page) { - return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, - &page->page_cgroup); + return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } -void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) +static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) { - int locked; - - /* - * While resetting the page_cgroup we might not hold the - * page_cgroup lock. free_hot_cold_page() is an example - * of such a scenario - */ - if (pc) - VM_BUG_ON(!page_cgroup_locked(page)); - locked = (page->page_cgroup & PAGE_CGROUP_LOCK); - page->page_cgroup = ((unsigned long)pc | locked); + VM_BUG_ON(!page_cgroup_locked(page)); + page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); } struct page_cgroup *page_get_page_cgroup(struct page *page) { - return (struct page_cgroup *) - (page->page_cgroup & ~PAGE_CGROUP_LOCK); + return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK); } -static void __always_inline lock_page_cgroup(struct page *page) +static void lock_page_cgroup(struct page *page) { bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); - VM_BUG_ON(!page_cgroup_locked(page)); -} - -static void __always_inline unlock_page_cgroup(struct page *page) -{ - bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } -/* - * Tie new page_cgroup to struct page under lock_page_cgroup() - * This can fail if the page has been tied to a page_cgroup. - * If success, returns 0. - */ -static int page_cgroup_assign_new_page_cgroup(struct page *page, - struct page_cgroup *pc) +static int try_lock_page_cgroup(struct page *page) { - int ret = 0; - - lock_page_cgroup(page); - if (!page_get_page_cgroup(page)) - page_assign_page_cgroup(page, pc); - else /* A page is tied to other pc. */ - ret = 1; - unlock_page_cgroup(page); - return ret; + return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } -/* - * Clear page->page_cgroup member under lock_page_cgroup(). - * If given "pc" value is different from one page->page_cgroup, - * page->cgroup is not cleared. - * Returns a value of page->page_cgroup at lock taken. - * A can can detect failure of clearing by following - * clear_page_cgroup(page, pc) == pc - */ - -static struct page_cgroup *clear_page_cgroup(struct page *page, - struct page_cgroup *pc) +static void unlock_page_cgroup(struct page *page) { - struct page_cgroup *ret; - /* lock and clear */ - lock_page_cgroup(page); - ret = page_get_page_cgroup(page); - if (likely(ret == pc)) - page_assign_page_cgroup(page, NULL); - unlock_page_cgroup(page); - return ret; + bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } static void __mem_cgroup_remove_list(struct page_cgroup *pc) @@ -399,7 +342,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) int ret; task_lock(task); - ret = task->mm && vm_match_cgroup(task->mm, mem); + ret = task->mm && mm_match_cgroup(task->mm, mem); task_unlock(task); return ret; } @@ -407,18 +350,30 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) /* * This routine assumes that the appropriate zone's lru lock is already held */ -void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) +void mem_cgroup_move_lists(struct page *page, bool active) { + struct page_cgroup *pc; struct mem_cgroup_per_zone *mz; unsigned long flags; - if (!pc) + /* + * We cannot lock_page_cgroup while holding zone's lru_lock, + * because other holders of lock_page_cgroup can be interrupted + * with an attempt to rotate_reclaimable_page. But we cannot + * safely get to page_cgroup without it, so just try_lock it: + * mem_cgroup_isolate_pages allows for page left on wrong list. + */ + if (!try_lock_page_cgroup(page)) return; - mz = page_cgroup_zoneinfo(pc); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_move_lists(pc, active); - spin_unlock_irqrestore(&mz->lru_lock, flags); + pc = page_get_page_cgroup(page); + if (pc) { + mz = page_cgroup_zoneinfo(pc); + spin_lock_irqsave(&mz->lru_lock, flags); + __mem_cgroup_move_lists(pc, active); + spin_unlock_irqrestore(&mz->lru_lock, flags); + } + unlock_page_cgroup(page); } /* @@ -437,6 +392,7 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); return (int)((rss * 100L) / total); } + /* * This function is called from vmscan.c. In page reclaiming loop. balance * between active and inactive list is calculated. For memory controller @@ -500,7 +456,6 @@ long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); - return (nr_inactive >> priority); } @@ -586,26 +541,21 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, * with it */ retry: - if (page) { - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - /* - * The page_cgroup exists and - * the page has already been accounted. - */ - if (pc) { - if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) { - /* this page is under being uncharged ? */ - unlock_page_cgroup(page); - cpu_relax(); - goto retry; - } else { - unlock_page_cgroup(page); - goto done; - } - } + lock_page_cgroup(page); + pc = page_get_page_cgroup(page); + /* + * The page_cgroup exists and + * the page has already been accounted. + */ + if (pc) { + VM_BUG_ON(pc->page != page); + VM_BUG_ON(pc->ref_cnt <= 0); + + pc->ref_cnt++; unlock_page_cgroup(page); + goto done; } + unlock_page_cgroup(page); pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); if (pc == NULL) @@ -623,16 +573,11 @@ retry: rcu_read_lock(); mem = rcu_dereference(mm->mem_cgroup); /* - * For every charge from the cgroup, increment reference - * count + * For every charge from the cgroup, increment reference count */ css_get(&mem->css); rcu_read_unlock(); - /* - * If we created the page_cgroup, we should free it on exceeding - * the cgroup limit. - */ while (res_counter_charge(&mem->res, PAGE_SIZE)) { if (!(gfp_mask & __GFP_WAIT)) goto out; @@ -641,12 +586,12 @@ retry: continue; /* - * try_to_free_mem_cgroup_pages() might not give us a full - * picture of reclaim. Some pages are reclaimed and might be - * moved to swap cache or just unmapped from the cgroup. - * Check the limit again to see if the reclaim reduced the - * current usage of the cgroup before giving up - */ + * try_to_free_mem_cgroup_pages() might not give us a full + * picture of reclaim. Some pages are reclaimed and might be + * moved to swap cache or just unmapped from the cgroup. + * Check the limit again to see if the reclaim reduced the + * current usage of the cgroup before giving up + */ if (res_counter_check_under_limit(&mem->res)) continue; @@ -657,14 +602,16 @@ retry: congestion_wait(WRITE, HZ/10); } - atomic_set(&pc->ref_cnt, 1); + pc->ref_cnt = 1; pc->mem_cgroup = mem; pc->page = page; pc->flags = PAGE_CGROUP_FLAG_ACTIVE; if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) pc->flags |= PAGE_CGROUP_FLAG_CACHE; - if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) { + lock_page_cgroup(page); + if (page_get_page_cgroup(page)) { + unlock_page_cgroup(page); /* * Another charge has been added to this page already. * We take lock_page_cgroup(page) again and read @@ -673,17 +620,16 @@ retry: res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); kfree(pc); - if (!page) - goto done; goto retry; } + page_assign_page_cgroup(page, pc); mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - /* Update statistics vector */ __mem_cgroup_add_list(pc); spin_unlock_irqrestore(&mz->lru_lock, flags); + unlock_page_cgroup(page); done: return 0; out: @@ -693,70 +639,61 @@ err: return -ENOMEM; } -int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return mem_cgroup_charge_common(page, mm, gfp_mask, - MEM_CGROUP_CHARGE_TYPE_MAPPED); + MEM_CGROUP_CHARGE_TYPE_MAPPED); } -/* - * See if the cached pages should be charged at all? - */ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - int ret = 0; if (!mm) mm = &init_mm; - - ret = mem_cgroup_charge_common(page, mm, gfp_mask, + return mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_CACHE); - return ret; } /* * Uncharging is always a welcome operation, we never complain, simply - * uncharge. This routine should be called with lock_page_cgroup held + * uncharge. */ -void mem_cgroup_uncharge(struct page_cgroup *pc) +void mem_cgroup_uncharge_page(struct page *page) { + struct page_cgroup *pc; struct mem_cgroup *mem; struct mem_cgroup_per_zone *mz; - struct page *page; unsigned long flags; /* * Check if our page_cgroup is valid */ + lock_page_cgroup(page); + pc = page_get_page_cgroup(page); if (!pc) - return; + goto unlock; - if (atomic_dec_and_test(&pc->ref_cnt)) { - page = pc->page; + VM_BUG_ON(pc->page != page); + VM_BUG_ON(pc->ref_cnt <= 0); + + if (--(pc->ref_cnt) == 0) { mz = page_cgroup_zoneinfo(pc); - /* - * get page->cgroup and clear it under lock. - * force_empty can drop page->cgroup without checking refcnt. - */ + spin_lock_irqsave(&mz->lru_lock, flags); + __mem_cgroup_remove_list(pc); + spin_unlock_irqrestore(&mz->lru_lock, flags); + + page_assign_page_cgroup(page, NULL); unlock_page_cgroup(page); - if (clear_page_cgroup(page, pc) == pc) { - mem = pc->mem_cgroup; - css_put(&mem->css); - res_counter_uncharge(&mem->res, PAGE_SIZE); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(pc); - spin_unlock_irqrestore(&mz->lru_lock, flags); - kfree(pc); - } - lock_page_cgroup(page); + + mem = pc->mem_cgroup; + res_counter_uncharge(&mem->res, PAGE_SIZE); + css_put(&mem->css); + + kfree(pc); + return; } -} -void mem_cgroup_uncharge_page(struct page *page) -{ - lock_page_cgroup(page); - mem_cgroup_uncharge(page_get_page_cgroup(page)); +unlock: unlock_page_cgroup(page); } @@ -764,63 +701,59 @@ void mem_cgroup_uncharge_page(struct page *page) * Returns non-zero if a page (under migration) has valid page_cgroup member. * Refcnt of page_cgroup is incremented. */ - int mem_cgroup_prepare_migration(struct page *page) { struct page_cgroup *pc; - int ret = 0; + lock_page_cgroup(page); pc = page_get_page_cgroup(page); - if (pc && atomic_inc_not_zero(&pc->ref_cnt)) - ret = 1; + if (pc) + pc->ref_cnt++; unlock_page_cgroup(page); - return ret; + return pc != NULL; } void mem_cgroup_end_migration(struct page *page) { - struct page_cgroup *pc; - - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - mem_cgroup_uncharge(pc); - unlock_page_cgroup(page); + mem_cgroup_uncharge_page(page); } + /* - * We know both *page* and *newpage* are now not-on-LRU and Pg_locked. + * We know both *page* and *newpage* are now not-on-LRU and PG_locked. * And no race with uncharge() routines because page_cgroup for *page* * has extra one reference by mem_cgroup_prepare_migration. */ - void mem_cgroup_page_migration(struct page *page, struct page *newpage) { struct page_cgroup *pc; - struct mem_cgroup *mem; - unsigned long flags; struct mem_cgroup_per_zone *mz; -retry: + unsigned long flags; + + lock_page_cgroup(page); pc = page_get_page_cgroup(page); - if (!pc) + if (!pc) { + unlock_page_cgroup(page); return; - mem = pc->mem_cgroup; + } + mz = page_cgroup_zoneinfo(pc); - if (clear_page_cgroup(page, pc) != pc) - goto retry; spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(pc); spin_unlock_irqrestore(&mz->lru_lock, flags); + page_assign_page_cgroup(page, NULL); + unlock_page_cgroup(page); + pc->page = newpage; lock_page_cgroup(newpage); page_assign_page_cgroup(newpage, pc); - unlock_page_cgroup(newpage); mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); __mem_cgroup_add_list(pc); spin_unlock_irqrestore(&mz->lru_lock, flags); - return; + + unlock_page_cgroup(newpage); } /* @@ -829,14 +762,13 @@ retry: * *And* this routine doesn't reclaim page itself, just removes page_cgroup. */ #define FORCE_UNCHARGE_BATCH (128) -static void -mem_cgroup_force_empty_list(struct mem_cgroup *mem, +static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct mem_cgroup_per_zone *mz, int active) { struct page_cgroup *pc; struct page *page; - int count; + int count = FORCE_UNCHARGE_BATCH; unsigned long flags; struct list_head *list; @@ -845,46 +777,36 @@ mem_cgroup_force_empty_list(struct mem_cgroup *mem, else list = &mz->inactive_list; - if (list_empty(list)) - return; -retry: - count = FORCE_UNCHARGE_BATCH; spin_lock_irqsave(&mz->lru_lock, flags); - - while (--count && !list_empty(list)) { + while (!list_empty(list)) { pc = list_entry(list->prev, struct page_cgroup, lru); page = pc->page; - /* Avoid race with charge */ - atomic_set(&pc->ref_cnt, 0); - if (clear_page_cgroup(page, pc) == pc) { - css_put(&mem->css); - res_counter_uncharge(&mem->res, PAGE_SIZE); - __mem_cgroup_remove_list(pc); - kfree(pc); - } else /* being uncharged ? ...do relax */ - break; + get_page(page); + spin_unlock_irqrestore(&mz->lru_lock, flags); + mem_cgroup_uncharge_page(page); + put_page(page); + if (--count <= 0) { + count = FORCE_UNCHARGE_BATCH; + cond_resched(); + } + spin_lock_irqsave(&mz->lru_lock, flags); } spin_unlock_irqrestore(&mz->lru_lock, flags); - if (!list_empty(list)) { - cond_resched(); - goto retry; - } - return; } /* * make mem_cgroup's charge to be 0 if there is no task. * This enables deleting this mem_cgroup. */ - -int mem_cgroup_force_empty(struct mem_cgroup *mem) +static int mem_cgroup_force_empty(struct mem_cgroup *mem) { int ret = -EBUSY; int node, zid; + css_get(&mem->css); /* * page reclaim code (kswapd etc..) will move pages between -` * active_list <-> inactive_list while we don't take a lock. + * active_list <-> inactive_list while we don't take a lock. * So, we have to do loop here until all lists are empty. */ while (mem->res.usage > 0) { @@ -906,9 +828,7 @@ out: return ret; } - - -int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) +static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) { *tmp = memparse(buf, &buf); if (*buf != '\0') @@ -945,8 +865,7 @@ static ssize_t mem_force_empty_write(struct cgroup *cont, size_t nbytes, loff_t *ppos) { struct mem_cgroup *mem = mem_cgroup_from_cont(cont); - int ret; - ret = mem_cgroup_force_empty(mem); + int ret = mem_cgroup_force_empty(mem); if (!ret) ret = nbytes; return ret; @@ -955,7 +874,6 @@ static ssize_t mem_force_empty_write(struct cgroup *cont, /* * Note: This should be removed if cgroup supports write-only file. */ - static ssize_t mem_force_empty_read(struct cgroup *cont, struct cftype *cft, struct file *file, char __user *userbuf, @@ -964,7 +882,6 @@ static ssize_t mem_force_empty_read(struct cgroup *cont, return -EINVAL; } - static const struct mem_cgroup_stat_desc { const char *msg; u64 unit; @@ -1017,8 +934,6 @@ static int mem_control_stat_open(struct inode *unused, struct file *file) return single_open(file, mem_control_stat_show, cont); } - - static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -1084,9 +999,6 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) kfree(mem->info.nodeinfo[node]); } - -static struct mem_cgroup init_mem_cgroup; - static struct cgroup_subsys_state * mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -1176,7 +1088,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, out: mmput(mm); - return; } struct cgroup_subsys mem_cgroup_subsys = { diff --git a/mm/memory.c b/mm/memory.c index ce3c9e4492d8..0d14d1e58a5f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1711,7 +1711,7 @@ unlock: } return ret; oom_free_new: - __free_page(new_page); + page_cache_release(new_page); oom: if (old_page) page_cache_release(old_page); @@ -2093,12 +2093,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(page); if (write_access) { - /* XXX: We could OR the do_wp_page code with this one? */ - if (do_wp_page(mm, vma, address, - page_table, pmd, ptl, pte) & VM_FAULT_OOM) { - mem_cgroup_uncharge_page(page); - ret = VM_FAULT_OOM; - } + ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); + if (ret & VM_FAULT_ERROR) + ret &= VM_FAULT_ERROR; goto out; } @@ -2163,7 +2160,7 @@ release: page_cache_release(page); goto unlock; oom_free_page: - __free_page(page); + page_cache_release(page); oom: return VM_FAULT_OOM; } diff --git a/mm/migrate.c b/mm/migrate.c index a73504ff5ab9..4e0eccca5e26 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -153,11 +153,6 @@ static void remove_migration_pte(struct vm_area_struct *vma, return; } - if (mem_cgroup_charge(new, mm, GFP_KERNEL)) { - pte_unmap(ptep); - return; - } - ptl = pte_lockptr(mm, pmd); spin_lock(ptl); pte = *ptep; @@ -169,6 +164,20 @@ static void remove_migration_pte(struct vm_area_struct *vma, if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) goto out; + /* + * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge. + * Failure is not an option here: we're now expected to remove every + * migration pte, and will cause crashes otherwise. Normally this + * is not an issue: mem_cgroup_prepare_migration bumped up the old + * page_cgroup count for safety, that's now attached to the new page, + * so this charge should just be another incrementation of the count, + * to keep in balance with rmap.c's mem_cgroup_uncharging. But if + * there's been a force_empty, those reference counts may no longer + * be reliable, and this charge can actually fail: oh well, we don't + * make the situation any worse by proceeding as if it had succeeded. + */ + mem_cgroup_charge(new, mm, GFP_ATOMIC); + get_page(new); pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); if (is_write_migration_entry(entry)) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 4194b9db0104..44b2da11bf43 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -412,7 +412,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, return oom_kill_task(p); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) { unsigned long points = 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8896e874a67d..402a504f1228 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -19,6 +19,7 @@ #include <linux/swap.h> #include <linux/interrupt.h> #include <linux/pagemap.h> +#include <linux/jiffies.h> #include <linux/bootmem.h> #include <linux/compiler.h> #include <linux/kernel.h> @@ -221,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page) static void bad_page(struct page *page) { - printk(KERN_EMERG "Bad page state in process '%s'\n" - KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" - KERN_EMERG "Trying to fix it up, but a reboot is needed\n" - KERN_EMERG "Backtrace:\n", + void *pc = page_get_page_cgroup(page); + + printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG + "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", current->comm, page, (int)(2*sizeof(unsigned long)), (unsigned long)page->flags, page->mapping, page_mapcount(page), page_count(page)); + if (pc) { + printk(KERN_EMERG "cgroup:%p\n", pc); + page_reset_bad_cgroup(page); + } + printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" + KERN_EMERG "Backtrace:\n"); dump_stack(); page->flags &= ~(1 << PG_lru | 1 << PG_private | @@ -453,6 +460,7 @@ static inline int free_pages_check(struct page *page) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | + (page_get_page_cgroup(page) != NULL) | (page_count(page) != 0) | (page->flags & ( 1 << PG_lru | @@ -602,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | + (page_get_page_cgroup(page) != NULL) | (page_count(page) != 0) | (page->flags & ( 1 << PG_lru | @@ -988,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold) if (!PageHighMem(page)) debug_check_no_locks_freed(page_address(page), PAGE_SIZE); - VM_BUG_ON(page_get_page_cgroup(page)); arch_free_page(page, 0); kernel_map_pages(page, 1, 0); @@ -1276,7 +1284,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) if (!zlc) return NULL; - if (jiffies - zlc->last_full_zap > 1 * HZ) { + if (time_after(jiffies, zlc->last_full_zap + HZ)) { bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); zlc->last_full_zap = jiffies; } @@ -2527,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, set_page_links(page, zone, nid, pfn); init_page_count(page); reset_page_mapcount(page); - page_assign_page_cgroup(page, NULL); SetPageReserved(page); /* diff --git a/mm/rmap.c b/mm/rmap.c index 8fd527c4e2bf..0c9a2df06c39 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -321,7 +321,7 @@ static int page_referenced_anon(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont)) + if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) continue; referenced += page_referenced_one(page, vma, &mapcount); if (!mapcount) @@ -382,7 +382,7 @@ static int page_referenced_file(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont)) + if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) continue; if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) == (VM_LOCKED|VM_MAYSHARE)) { diff --git a/mm/shmem.c b/mm/shmem.c index 90b576cbc06e..3372bc579e89 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1370,14 +1370,17 @@ repeat: shmem_swp_unmap(entry); spin_unlock(&info->lock); unlock_page(swappage); - page_cache_release(swappage); if (error == -ENOMEM) { /* allow reclaim from this memory cgroup */ - error = mem_cgroup_cache_charge(NULL, + error = mem_cgroup_cache_charge(swappage, current->mm, gfp & ~__GFP_HIGHMEM); - if (error) + if (error) { + page_cache_release(swappage); goto failed; + } + mem_cgroup_uncharge_page(swappage); } + page_cache_release(swappage); goto repeat; } } else if (sgp == SGP_READ && !filepage) { diff --git a/mm/swap.c b/mm/swap.c index 710a20bb9749..d4ec59aa5c46 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -176,7 +176,7 @@ void activate_page(struct page *page) SetPageActive(page); add_page_to_active_list(zone, page); __count_vm_event(PGACTIVATE); - mem_cgroup_move_lists(page_get_page_cgroup(page), true); + mem_cgroup_move_lists(page, true); } spin_unlock_irq(&zone->lru_lock); } diff --git a/mm/vmscan.c b/mm/vmscan.c index a26dabd62fed..45711585684e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -126,7 +126,7 @@ long vm_total_pages; /* The total number of pages which the VM controls */ static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR #define scan_global_lru(sc) (!(sc)->mem_cgroup) #else #define scan_global_lru(sc) (1) @@ -1128,7 +1128,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, ClearPageActive(page); list_move(&page->lru, &zone->inactive_list); - mem_cgroup_move_lists(page_get_page_cgroup(page), false); + mem_cgroup_move_lists(page, false); pgmoved++; if (!pagevec_add(&pvec, page)) { __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); @@ -1156,8 +1156,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, VM_BUG_ON(PageLRU(page)); SetPageLRU(page); VM_BUG_ON(!PageActive(page)); + list_move(&page->lru, &zone->active_list); - mem_cgroup_move_lists(page_get_page_cgroup(page), true); + mem_cgroup_move_lists(page, true); pgmoved++; if (!pagevec_add(&pvec, page)) { __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); @@ -1427,7 +1428,7 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) return do_try_to_free_pages(zones, gfp_mask, &sc); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask) diff --git a/samples/Kconfig b/samples/Kconfig index 74d97cc24787..e1fb471cc501 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -22,5 +22,16 @@ config SAMPLE_KOBJECT If in doubt, say "N" here. +config SAMPLE_KPROBES + tristate "Build kprobes examples -- loadable modules only" + depends on KPROBES && m + help + This build several kprobes example modules. + +config SAMPLE_KRETPROBES + tristate "Build kretprobes example -- loadable modules only" + default m + depends on SAMPLE_KPROBES && KRETPROBES + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 8652d0f268ad..2e02575f7794 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,3 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += markers/ kobject/ +obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ diff --git a/samples/kprobes/Makefile b/samples/kprobes/Makefile new file mode 100644 index 000000000000..68739bc4fc6a --- /dev/null +++ b/samples/kprobes/Makefile @@ -0,0 +1,5 @@ +# builds the kprobes example kernel modules; +# then to use one (as root): insmod <module_name.ko> + +obj-$(CONFIG_SAMPLE_KPROBES) += kprobe_example.o jprobe_example.o +obj-$(CONFIG_SAMPLE_KRETPROBES) += kretprobe_example.o diff --git a/samples/kprobes/jprobe_example.c b/samples/kprobes/jprobe_example.c new file mode 100644 index 000000000000..b7541355b92b --- /dev/null +++ b/samples/kprobes/jprobe_example.c @@ -0,0 +1,68 @@ +/* + * Here's a sample kernel module showing the use of jprobes to dump + * the arguments of do_fork(). + * + * For more information on theory of operation of jprobes, see + * Documentation/kprobes.txt + * + * Build and insert the kernel module as done in the kprobe example. + * You will see the trace data in /var/log/messages and on the + * console whenever do_fork() is invoked to create a new process. + * (Some messages may be suppressed if syslogd is configured to + * eliminate duplicate messages.) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +/* + * Jumper probe for do_fork. + * Mirror principle enables access to arguments of the probed routine + * from the probe handler. + */ + +/* Proxy routine having the same arguments as actual do_fork() routine */ +static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, + struct pt_regs *regs, unsigned long stack_size, + int __user *parent_tidptr, int __user *child_tidptr) +{ + printk(KERN_INFO "jprobe: clone_flags = 0x%lx, stack_size = 0x%lx," + " regs = 0x%p\n", + clone_flags, stack_size, regs); + + /* Always end with a call to jprobe_return(). */ + jprobe_return(); + return 0; +} + +static struct jprobe my_jprobe = { + .entry = jdo_fork, + .kp = { + .symbol_name = "do_fork", + }, +}; + +static int __init jprobe_init(void) +{ + int ret; + + ret = register_jprobe(&my_jprobe); + if (ret < 0) { + printk(KERN_INFO "register_jprobe failed, returned %d\n", ret); + return -1; + } + printk(KERN_INFO "Planted jprobe at %p, handler addr %p\n", + my_jprobe.kp.addr, my_jprobe.entry); + return 0; +} + +static void __exit jprobe_exit(void) +{ + unregister_jprobe(&my_jprobe); + printk(KERN_INFO "jprobe at %p unregistered\n", my_jprobe.kp.addr); +} + +module_init(jprobe_init) +module_exit(jprobe_exit) +MODULE_LICENSE("GPL"); diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c new file mode 100644 index 000000000000..a681998a871c --- /dev/null +++ b/samples/kprobes/kprobe_example.c @@ -0,0 +1,91 @@ +/* + * NOTE: This example is works on x86 and powerpc. + * Here's a sample kernel module showing the use of kprobes to dump a + * stack trace and selected registers when do_fork() is called. + * + * For more information on theory of operation of kprobes, see + * Documentation/kprobes.txt + * + * You will see the trace data in /var/log/messages and on the console + * whenever do_fork() is invoked to create a new process. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +/* For each probe you need to allocate a kprobe structure */ +static struct kprobe kp = { + .symbol_name = "do_fork", +}; + +/* kprobe pre_handler: called just before the probed instruction is executed */ +static int handler_pre(struct kprobe *p, struct pt_regs *regs) +{ +#ifdef CONFIG_X86 + printk(KERN_INFO "pre_handler: p->addr = 0x%p, ip = %lx," + " flags = 0x%lx\n", + p->addr, regs->ip, regs->flags); +#endif +#ifdef CONFIG_PPC + printk(KERN_INFO "pre_handler: p->addr = 0x%p, nip = 0x%lx," + " msr = 0x%lx\n", + p->addr, regs->nip, regs->msr); +#endif + + /* A dump_stack() here will give a stack backtrace */ + return 0; +} + +/* kprobe post_handler: called after the probed instruction is executed */ +static void handler_post(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ +#ifdef CONFIG_X86 + printk(KERN_INFO "post_handler: p->addr = 0x%p, flags = 0x%lx\n", + p->addr, regs->flags); +#endif +#ifdef CONFIG_PPC + printk(KERN_INFO "post_handler: p->addr = 0x%p, msr = 0x%lx\n", + p->addr, regs->msr); +#endif +} + +/* + * fault_handler: this is called if an exception is generated for any + * instruction within the pre- or post-handler, or when Kprobes + * single-steps the probed instruction. + */ +static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) +{ + printk(KERN_INFO "fault_handler: p->addr = 0x%p, trap #%dn", + p->addr, trapnr); + /* Return 0 because we don't handle the fault. */ + return 0; +} + +static int __init kprobe_init(void) +{ + int ret; + kp.pre_handler = handler_pre; + kp.post_handler = handler_post; + kp.fault_handler = handler_fault; + + ret = register_kprobe(&kp); + if (ret < 0) { + printk(KERN_INFO "register_kprobe failed, returned %d\n", ret); + return ret; + } + printk(KERN_INFO "Planted kprobe at %p\n", kp.addr); + return 0; +} + +static void __exit kprobe_exit(void) +{ + unregister_kprobe(&kp); + printk(KERN_INFO "kprobe at %p unregistered\n", kp.addr); +} + +module_init(kprobe_init) +module_exit(kprobe_exit) +MODULE_LICENSE("GPL"); diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c new file mode 100644 index 000000000000..4e764b317d61 --- /dev/null +++ b/samples/kprobes/kretprobe_example.c @@ -0,0 +1,106 @@ +/* + * kretprobe_example.c + * + * Here's a sample kernel module showing the use of return probes to + * report the return value and total time taken for probed function + * to run. + * + * usage: insmod kretprobe_example.ko func=<func_name> + * + * If no func_name is specified, do_fork is instrumented + * + * For more information on theory of operation of kretprobes, see + * Documentation/kprobes.txt + * + * Build and insert the kernel module as done in the kprobe example. + * You will see the trace data in /var/log/messages and on the console + * whenever the probed function returns. (Some messages may be suppressed + * if syslogd is configured to eliminate duplicate messages.) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> +#include <linux/ktime.h> +#include <linux/limits.h> + +static char func_name[NAME_MAX] = "do_fork"; +module_param_string(func, func_name, NAME_MAX, S_IRUGO); +MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the" + " function's execution time"); + +/* per-instance private data */ +struct my_data { + ktime_t entry_stamp; +}; + +/* Here we use the entry_hanlder to timestamp function entry */ +static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct my_data *data; + + if (!current->mm) + return 1; /* Skip kernel threads */ + + data = (struct my_data *)ri->data; + data->entry_stamp = ktime_get(); + return 0; +} + +/* + * Return-probe handler: Log the return value and duration. Duration may turn + * out to be zero consistently, depending upon the granularity of time + * accounting on the platform. + */ +static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + int retval = regs_return_value(regs); + struct my_data *data = (struct my_data *)ri->data; + s64 delta; + ktime_t now; + + now = ktime_get(); + delta = ktime_to_ns(ktime_sub(now, data->entry_stamp)); + printk(KERN_INFO "%s returned %d and took %lld ns to execute\n", + func_name, retval, (long long)delta); + return 0; +} + +static struct kretprobe my_kretprobe = { + .handler = ret_handler, + .entry_handler = entry_handler, + .data_size = sizeof(struct my_data), + /* Probe up to 20 instances concurrently. */ + .maxactive = 20, +}; + +static int __init kretprobe_init(void) +{ + int ret; + + my_kretprobe.kp.symbol_name = func_name; + ret = register_kretprobe(&my_kretprobe); + if (ret < 0) { + printk(KERN_INFO "register_kretprobe failed, returned %d\n", + ret); + return -1; + } + printk(KERN_INFO "Planted return probe at %s: %p\n", + my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr); + return 0; +} + +static void __exit kretprobe_exit(void) +{ + unregister_kretprobe(&my_kretprobe); + printk(KERN_INFO "kretprobe at %p unregistered\n", + my_kretprobe.kp.addr); + + /* nmissed > 0 suggests that maxactive was set too low. */ + printk(KERN_INFO "Missed probing %d instances of %s\n", + my_kretprobe.nmissed, my_kretprobe.kp.symbol_name); +} + +module_init(kretprobe_init) +module_exit(kretprobe_exit) +MODULE_LICENSE("GPL"); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2086a856400a..2a7cef9726e4 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -9,7 +9,7 @@ use strict; my $P = $0; $P =~ s@.*/@@g; -my $V = '0.14'; +my $V = '0.15'; use Getopt::Long qw(:config no_auto_abbrev); @@ -105,8 +105,7 @@ our $Sparse = qr{ __iomem| __must_check| __init_refok| - __kprobes| - fastcall + __kprobes }x; our $Attribute = qr{ const| @@ -158,7 +157,10 @@ sub build_types { \b (?:const\s+)? (?:unsigned\s+)? - $all + (?: + $all| + (?:typeof|__typeof__)\s*\(\s*\**\s*$Ident\s*\) + ) (?:\s+$Sparse|\s+const)* \b }x; @@ -362,6 +364,7 @@ sub ctx_statement_block { my $type = ''; my $level = 0; + my $p; my $c; my $len = 0; @@ -386,6 +389,7 @@ sub ctx_statement_block { last; } } + $p = $c; $c = substr($blk, $off, 1); $remainder = substr($blk, $off); @@ -397,8 +401,9 @@ sub ctx_statement_block { } # An else is really a conditional as long as its not else if - if ($level == 0 && $remainder =~ /(\s+else)(?:\s|{)/ && - $remainder !~ /\s+else\s+if\b/) { + if ($level == 0 && (!defined($p) || $p =~ /(?:\s|\})/) && + $remainder =~ /(else)(?:\s|{)/ && + $remainder !~ /else\s+if\b/) { $coff = $off + length($1); } @@ -445,21 +450,73 @@ sub ctx_statement_block { $line, $remain + 1, $off - $loff + 1, $level); } +sub statement_lines { + my ($stmt) = @_; + + # Strip the diff line prefixes and rip blank lines at start and end. + $stmt =~ s/(^|\n)./$1/g; + $stmt =~ s/^\s*//; + $stmt =~ s/\s*$//; + + my @stmt_lines = ($stmt =~ /\n/g); + + return $#stmt_lines + 2; +} + +sub statement_rawlines { + my ($stmt) = @_; + + my @stmt_lines = ($stmt =~ /\n/g); + + return $#stmt_lines + 2; +} + +sub statement_block_size { + my ($stmt) = @_; + + $stmt =~ s/(^|\n)./$1/g; + $stmt =~ s/^\s*{//; + $stmt =~ s/}\s*$//; + $stmt =~ s/^\s*//; + $stmt =~ s/\s*$//; + + my @stmt_lines = ($stmt =~ /\n/g); + my @stmt_statements = ($stmt =~ /;/g); + + my $stmt_lines = $#stmt_lines + 2; + my $stmt_statements = $#stmt_statements + 1; + + if ($stmt_lines > $stmt_statements) { + return $stmt_lines; + } else { + return $stmt_statements; + } +} + sub ctx_statement_full { my ($linenr, $remain, $off) = @_; my ($statement, $condition, $level); my (@chunks); + # Grab the first conditional/block pair. ($statement, $condition, $linenr, $remain, $off, $level) = ctx_statement_block($linenr, $remain, $off); #print "F: c<$condition> s<$statement>\n"; + push(@chunks, [ $condition, $statement ]); + if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) { + return ($level, $linenr, @chunks); + } + + # Pull in the following conditional/block pairs and see if they + # could continue the statement. for (;;) { - push(@chunks, [ $condition, $statement ]); - last if (!($remain > 0 && $condition =~ /^.\s*(?:if|else|do)/)); ($statement, $condition, $linenr, $remain, $off, $level) = ctx_statement_block($linenr, $remain, $off); - #print "C: c<$condition> s<$statement>\n"; + #print "C: c<$condition> s<$statement> remain<$remain>\n"; + last if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:else|do)\b/s)); + #print "C: push\n"; + push(@chunks, [ $condition, $statement ]); } return ($level, $linenr, @chunks); @@ -593,13 +650,13 @@ sub cat_vet { } my $av_preprocessor = 0; -my $av_paren = 0; +my $av_pending; my @av_paren_type; sub annotate_reset { $av_preprocessor = 0; - $av_paren = 0; - @av_paren_type = (); + $av_pending = '_'; + @av_paren_type = ('E'); } sub annotate_values { @@ -611,12 +668,13 @@ sub annotate_values { print "$stream\n" if ($dbg_values > 1); while (length($cur)) { - print " <$type> " if ($dbg_values > 1); + print " <" . join('', @av_paren_type) . + "> <$type> " if ($dbg_values > 1); if ($cur =~ /^(\s+)/o) { print "WS($1)\n" if ($dbg_values > 1); if ($1 =~ /\n/ && $av_preprocessor) { + $type = pop(@av_paren_type); $av_preprocessor = 0; - $type = 'N'; } } elsif ($cur =~ /^($Type)/) { @@ -626,11 +684,33 @@ sub annotate_values { } elsif ($cur =~ /^(#\s*define\s*$Ident)(\(?)/o) { print "DEFINE($1)\n" if ($dbg_values > 1); $av_preprocessor = 1; - $av_paren_type[$av_paren] = 'N'; + $av_pending = 'N'; - } elsif ($cur =~ /^(#\s*(?:ifdef|ifndef|if|else|elif|endif))/o) { - print "PRE($1)\n" if ($dbg_values > 1); + } elsif ($cur =~ /^(#\s*(?:ifdef|ifndef|if))/o) { + print "PRE_START($1)\n" if ($dbg_values > 1); $av_preprocessor = 1; + + push(@av_paren_type, $type); + push(@av_paren_type, $type); + $type = 'N'; + + } elsif ($cur =~ /^(#\s*(?:else|elif))/o) { + print "PRE_RESTART($1)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + + push(@av_paren_type, $av_paren_type[$#av_paren_type]); + + $type = 'N'; + + } elsif ($cur =~ /^(#\s*(?:endif))/o) { + print "PRE_END($1)\n" if ($dbg_values > 1); + + $av_preprocessor = 1; + + # Assume all arms of the conditional end as this + # one does, and continue as if the #endif was not here. + pop(@av_paren_type); + push(@av_paren_type, $type); $type = 'N'; } elsif ($cur =~ /^(\\\n)/o) { @@ -639,13 +719,13 @@ sub annotate_values { } elsif ($cur =~ /^(sizeof)\s*(\()?/o) { print "SIZEOF($1)\n" if ($dbg_values > 1); if (defined $2) { - $av_paren_type[$av_paren] = 'V'; + $av_pending = 'V'; } $type = 'N'; } elsif ($cur =~ /^(if|while|typeof|__typeof__|for)\b/o) { print "COND($1)\n" if ($dbg_values > 1); - $av_paren_type[$av_paren] = 'N'; + $av_pending = 'N'; $type = 'N'; } elsif ($cur =~/^(return|case|else)/o) { @@ -654,14 +734,14 @@ sub annotate_values { } elsif ($cur =~ /^(\()/o) { print "PAREN('$1')\n" if ($dbg_values > 1); - $av_paren++; + push(@av_paren_type, $av_pending); + $av_pending = '_'; $type = 'N'; } elsif ($cur =~ /^(\))/o) { - $av_paren-- if ($av_paren > 0); - if (defined $av_paren_type[$av_paren]) { - $type = $av_paren_type[$av_paren]; - undef $av_paren_type[$av_paren]; + my $new_type = pop(@av_paren_type); + if ($new_type ne '_') { + $type = $new_type; print "PAREN('$1') -> $type\n" if ($dbg_values > 1); } else { @@ -670,7 +750,7 @@ sub annotate_values { } elsif ($cur =~ /^($Ident)\(/o) { print "FUNC($1)\n" if ($dbg_values > 1); - $av_paren_type[$av_paren] = 'V'; + $av_pending = 'V'; } elsif ($cur =~ /^($Ident|$Constant)/o) { print "IDENT($1)\n" if ($dbg_values > 1); @@ -680,11 +760,11 @@ sub annotate_values { print "ASSIGN($1)\n" if ($dbg_values > 1); $type = 'N'; - } elsif ($cur =~/^(;)/) { + } elsif ($cur =~/^(;|{|})/) { print "END($1)\n" if ($dbg_values > 1); $type = 'E'; - } elsif ($cur =~ /^(;|{|}|\?|:|\[)/o) { + } elsif ($cur =~ /^(;|\?|:|\[)/o) { print "CLOSE($1)\n" if ($dbg_values > 1); $type = 'N'; @@ -988,7 +1068,7 @@ sub process { } # check for RCS/CVS revision markers - if ($rawline =~ /\$(Revision|Log|Id)(?:\$|)/) { + if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|)/) { WARN("CVS style keyword markers, these will _not_ be updated\n". $herecurr); } @@ -999,41 +1079,44 @@ sub process { # Check for potential 'bare' types if ($realcnt) { + my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0); + $s =~ s/\n./ /g; + $s =~ s/{.*$//; + # Ignore goto labels. - if ($line =~ /$Ident:\*$/) { + if ($s =~ /$Ident:\*$/) { # Ignore functions being called - } elsif ($line =~ /^.\s*$Ident\s*\(/) { + } elsif ($s =~ /^.\s*$Ident\s*\(/) { # definitions in global scope can only start with types - } elsif ($line =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b/) { - possible($1, $line); + } elsif ($s =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b/) { + possible($1, $s); # declarations always start with types - } elsif ($prev_values eq 'E' && $line =~ /^.\s*(?:$Storage\s+)?(?:const\s+)?($Ident)\b(:?\s+$Sparse)?\s*\**\s*$Ident\s*(?:;|=|,)/) { - possible($1); + } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:const\s+)?($Ident)\b(:?\s+$Sparse)?\s*\**\s*$Ident\s*(?:;|=|,)/) { + possible($1, $s); } # any (foo ... *) is a pointer cast, and foo is a type - while ($line =~ /\(($Ident)(?:\s+$Sparse)*\s*\*+\s*\)/g) { - possible($1, $line); + while ($s =~ /\(($Ident)(?:\s+$Sparse)*\s*\*+\s*\)/g) { + possible($1, $s); } # Check for any sort of function declaration. # int foo(something bar, other baz); # void (*store_gdt)(x86_descr_ptr *); - if ($prev_values eq 'E' && $line =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/) { + if ($prev_values eq 'E' && $s =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/) { my ($name_len) = length($1); - my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, $name_len); - my $ctx = join("\n", @ctx); - $ctx =~ s/\n.//; + my $ctx = $s; substr($ctx, 0, $name_len + 1) = ''; $ctx =~ s/\)[^\)]*$//; + for my $arg (split(/\s*,\s*/, $ctx)) { if ($arg =~ /^(?:const\s+)?($Ident)(?:\s+$Sparse)*\s*\**\s*(:?\b$Ident)?$/ || $arg =~ /^($Ident)$/) { - possible($1, $line); + possible($1, $s); } } } @@ -1100,8 +1183,8 @@ sub process { $curr_values = $prev_values . $curr_values; if ($dbg_values) { my $outline = $opline; $outline =~ s/\t/ /g; - warn "--> .$outline\n"; - warn "--> $curr_values\n"; + print "$linenr > .$outline\n"; + print "$linenr > $curr_values\n"; } $prev_values = substr($curr_values, -1); @@ -1148,7 +1231,9 @@ sub process { if (($prevline !~ /^}/) && ($prevline !~ /^\+}/) && ($prevline !~ /^ }/) && - ($prevline !~ /\b\Q$name\E(?:\s+$Attribute)?\s*(?:;|=)/)) { + ($prevline !~ /^.DECLARE_$Ident\(\Q$name\E\)/) && + ($prevline !~ /^.LIST_HEAD\(\Q$name\E\)/) && + ($prevline !~ /\b\Q$name\E(?:\s+$Attribute)?\s*(?:;|=|\[)/)) { WARN("EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr); } } @@ -1266,7 +1351,7 @@ sub process { =>|->|<<|>>|<|>|=|!|~| &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|% }x; - my @elements = split(/($;+|$ops|;)/, $opline); + my @elements = split(/($ops|;)/, $opline); my $off = 0; my $blank = copy_spacing($opline); @@ -1277,6 +1362,7 @@ sub process { my $a = ''; $a = 'V' if ($elements[$n] ne ''); $a = 'W' if ($elements[$n] =~ /\s$/); + $a = 'C' if ($elements[$n] =~ /$;$/); $a = 'B' if ($elements[$n] =~ /(\[|\()$/); $a = 'O' if ($elements[$n] eq ''); $a = 'E' if ($elements[$n] eq '' && $n == 0); @@ -1287,6 +1373,7 @@ sub process { if (defined $elements[$n + 2]) { $c = 'V' if ($elements[$n + 2] ne ''); $c = 'W' if ($elements[$n + 2] =~ /^\s/); + $c = 'C' if ($elements[$n + 2] =~ /^$;/); $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/); $c = 'O' if ($elements[$n + 2] eq ''); $c = 'E' if ($elements[$n + 2] =~ /\s*\\$/); @@ -1330,13 +1417,13 @@ sub process { if ($op_type ne 'V' && $ca =~ /\s$/ && $cc =~ /^\s*,/) { - # Ignore comments - } elsif ($op =~ /^$;+$/) { +# # Ignore comments +# } elsif ($op =~ /^$;+$/) { # ; should have either the end of line or a space or \ after it } elsif ($op eq ';') { - if ($ctx !~ /.x[WEB]/ && $cc !~ /^\\/ && - $cc !~ /^;/) { + if ($ctx !~ /.x[WEBC]/ && + $cc !~ /^\\/ && $cc !~ /^;/) { ERROR("need space after that '$op' $at\n" . $hereptr); } @@ -1351,7 +1438,7 @@ sub process { # , must have a space on the right. } elsif ($op eq ',') { - if ($ctx !~ /.xW|.xE/ && $cc !~ /^}/) { + if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) { ERROR("need space after that '$op' $at\n" . $hereptr); } @@ -1364,7 +1451,7 @@ sub process { # unary operator, or a cast } elsif ($op eq '!' || $op eq '~' || ($is_unary && ($op eq '*' || $op eq '-' || $op eq '&'))) { - if ($ctx !~ /[WEB]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { + if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { ERROR("need space before that '$op' $at\n" . $hereptr); } if ($ctx =~ /.xW/) { @@ -1373,7 +1460,7 @@ sub process { # unary ++ and unary -- are allowed no space on one side. } elsif ($op eq '++' or $op eq '--') { - if ($ctx !~ /[WOB]x[^W]/ && $ctx !~ /[^W]x[WOBE]/) { + if ($ctx !~ /[WOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) { ERROR("need space one side of that '$op' $at\n" . $hereptr); } if ($ctx =~ /WxB/ || ($ctx =~ /Wx./ && $cc =~ /^;/)) { @@ -1387,13 +1474,13 @@ sub process { $op eq '*' or $op eq '/' or $op eq '%') { - if ($ctx !~ /VxV|WxW|VxE|WxE|VxO/) { + if ($ctx !~ /VxV|WxW|VxE|WxE|VxO|Cx.|.xC/) { ERROR("need consistent spacing around '$op' $at\n" . $hereptr); } # All the others need spaces both sides. - } elsif ($ctx !~ /[EW]x[WE]/) { + } elsif ($ctx !~ /[EWC]x[CWE]/) { # Ignore email addresses <foo@bar> if (!($op eq '<' && $cb =~ /$;\S+\@\S+>/) && !($op eq '>' && $cb =~ /<\S+\@\S+$;/)) { @@ -1551,7 +1638,7 @@ sub process { # multi-statement macros should be enclosed in a do while loop, grab the # first statement and ensure its the whole macro if its not enclosed -# in a known goot container +# in a known good container if ($prevline =~ /\#define.*\\/ && $prevline !~/(?:do\s+{|\(\{|\{)/ && $line !~ /(?:do\s+{|\(\{|\{)/ && @@ -1599,84 +1686,95 @@ sub process { # check for redundant bracing round if etc if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) { my ($level, $endln, @chunks) = - ctx_statement_full($linenr, $realcnt, 0); + ctx_statement_full($linenr, $realcnt, 1); #print "chunks<$#chunks> linenr<$linenr> endln<$endln> level<$level>\n"; - if ($#chunks > 1 && $level == 0) { + #print "APW: <<$chunks[1][0]>><<$chunks[1][1]>>\n"; + if ($#chunks > 0 && $level == 0) { my $allowed = 0; my $seen = 0; + my $herectx = $here . "\n";; + my $ln = $linenr - 1; for my $chunk (@chunks) { my ($cond, $block) = @{$chunk}; + $herectx .= "$rawlines[$ln]\n[...]\n"; + $ln += statement_rawlines($block) - 1; + substr($block, 0, length($cond)) = ''; $seen++ if ($block =~ /^\s*{/); - $block =~ s/(^|\n)./$1/g; - $block =~ s/^\s*{//; - $block =~ s/}\s*$//; - $block =~ s/^\s*//; - $block =~ s/\s*$//; - - my @lines = ($block =~ /\n/g); - my @statements = ($block =~ /;/g); - - #print "cond<$cond> block<$block> lines<" . scalar(@lines) . "> statements<" . scalar(@statements) . "> seen<$seen> allowed<$allowed>\n"; - if (scalar(@lines) != 0) { + #print "cond<$cond> block<$block> allowed<$allowed>\n"; + if (statement_lines($cond) > 1) { + #print "APW: ALLOWED: cond<$cond>\n"; $allowed = 1; } if ($block =~/\b(?:if|for|while)\b/) { + #print "APW: ALLOWED: block<$block>\n"; $allowed = 1; } - if (scalar(@statements) > 1) { + if (statement_block_size($block) > 1) { + #print "APW: ALLOWED: lines block<$block>\n"; $allowed = 1; } } if ($seen && !$allowed) { - WARN("braces {} are not necessary for any arm of this statement\n" . $herecurr); - $suppress_ifbraces = $endln; + WARN("braces {} are not necessary for any arm of this statement\n" . $herectx); } + # Either way we have looked over this whole + # statement and said what needs to be said. + $suppress_ifbraces = $endln; } } if ($linenr > $suppress_ifbraces && $line =~ /\b(if|while|for|else)\b/) { - # Locate the end of the opening statement. - my @control = ctx_statement($linenr, $realcnt, 0); - my $nr = $linenr + (scalar(@control) - 1); - my $cnt = $realcnt - (scalar(@control) - 1); - - my $off = $realcnt - $cnt; - #print "$off: line<$line>end<" . $lines[$nr - 1] . ">\n"; - - # If this is is a braced statement group check it - if ($lines[$nr - 1] =~ /{\s*$/) { - my ($lvl, @block) = ctx_block_level($nr, $cnt); - - my $stmt = join("\n", @block); - # Drop the diff line leader. - $stmt =~ s/\n./\n/g; - # Drop the code outside the block. - $stmt =~ s/(^[^{]*){\s*//; - my $before = $1; - $stmt =~ s/\s*}([^}]*$)//; - my $after = $1; - - #print "block<" . join(' ', @block) . "><" . scalar(@block) . ">\n"; - #print "before<$before> stmt<$stmt> after<$after>\n\n"; - - # Count the newlines, if there is only one - # then the block should not have {}'s. - my @lines = ($stmt =~ /\n/g); - my @statements = ($stmt =~ /;/g); - #print "lines<" . scalar(@lines) . ">\n"; - #print "statements<" . scalar(@statements) . ">\n"; - if ($lvl == 0 && scalar(@lines) == 0 && - scalar(@statements) < 2 && - $stmt !~ /{/ && $stmt !~ /\bif\b/ && - $before !~ /}/ && $after !~ /{/) { - my $herectx = "$here\n" . join("\n", @control, @block[1 .. $#block]) . "\n"; - shift(@block); - WARN("braces {} are not necessary for single statement blocks\n" . $herectx); + my ($level, $endln, @chunks) = + ctx_statement_full($linenr, $realcnt, $-[0]); + + my $allowed = 0; + + # Check the pre-context. + if (substr($line, 0, $-[0]) =~ /(\}\s*)$/) { + #print "APW: ALLOWED: pre<$1>\n"; + $allowed = 1; + } + # Check the condition. + my ($cond, $block) = @{$chunks[0]}; + if (defined $cond) { + substr($block, 0, length($cond)) = ''; + } + if (statement_lines($cond) > 1) { + #print "APW: ALLOWED: cond<$cond>\n"; + $allowed = 1; + } + if ($block =~/\b(?:if|for|while)\b/) { + #print "APW: ALLOWED: block<$block>\n"; + $allowed = 1; + } + if (statement_block_size($block) > 1) { + #print "APW: ALLOWED: lines block<$block>\n"; + $allowed = 1; + } + # Check the post-context. + if (defined $chunks[1]) { + my ($cond, $block) = @{$chunks[1]}; + if (defined $cond) { + substr($block, 0, length($cond)) = ''; + } + if ($block =~ /^\s*\{/) { + #print "APW: ALLOWED: chunk-1 block<$block>\n"; + $allowed = 1; + } + } + if ($level == 0 && $block =~ /^\s*\{/ && !$allowed) { + my $herectx = $here . "\n";; + my $end = $linenr + statement_rawlines($block) - 1; + + for (my $ln = $linenr - 1; $ln < $end; $ln++) { + $herectx .= $rawlines[$ln] . "\n";; } + + WARN("braces {} are not necessary for single statement blocks\n" . $herectx); } } @@ -1828,15 +1926,6 @@ sub process { print "are false positives report them to the maintainer, see\n"; print "CHECKPATCH in MAINTAINERS.\n"; } - print <<EOL if ($file == 1 && $quiet == 0); - -WARNING: Using --file mode. Please do not send patches to linux-kernel -that change whole existing files if you did not significantly change most -of the the file for other reasons anyways or just wrote the file newly -from scratch. Pure code style patches have a significant cost in a -quickly changing code base like Linux because they cause rejects -with other changes. -EOL return $clean; } |