summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile3
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/cputable.c4
-rw-r--r--arch/powerpc/kernel/dma.c8
-rw-r--r--arch/powerpc/kernel/eeh.c43
-rw-r--r--arch/powerpc/kernel/eeh_cache.c16
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/entry_64.S37
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S16
-rw-r--r--arch/powerpc/kernel/head_8xx.S110
-rw-r--r--arch/powerpc/kernel/idle_e500.S9
-rw-r--r--arch/powerpc/kernel/iommu.c245
-rw-r--r--arch/powerpc/kernel/msi.c11
-rw-r--r--arch/powerpc/kernel/pci-common.c11
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c5
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/setup_64.c6
-rw-r--r--arch/powerpc/kernel/sysfs.c38
-rw-r--r--arch/powerpc/kernel/tm.S4
-rw-r--r--arch/powerpc/kernel/traps.c45
-rw-r--r--arch/powerpc/kernel/vdso.c135
-rw-r--r--arch/powerpc/kernel/vio.c5
22 files changed, 444 insertions, 312 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index c1ebbdaac28f..87c7d1473488 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -33,11 +33,12 @@ obj-y := cputable.o ptrace.o syscalls.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o dma.o \
- misc_$(CONFIG_WORD_SIZE).o vdso32/ \
+ misc_$(CONFIG_WORD_SIZE).o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
+obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 0034b6b3556a..98230579d99c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -247,7 +247,7 @@ int main(void)
#endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
- DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
+ DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default));
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 60262fdf35ba..7d80bfdfb15e 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -108,7 +108,9 @@ extern void __restore_cpu_e6500(void);
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_PSERIES_PERFMON_COMPAT)
#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
- PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_HTM_COMP | \
+ PPC_FEATURE2_HTM_NOSC_COMP | \
+ PPC_FEATURE2_DSCR | \
PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
PPC_FEATURE2_VEC_CRYPTO)
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 484b2d4462c1..35e4dcc5dce3 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -248,6 +248,14 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
{
if (ppc_md.dma_set_mask)
return ppc_md.dma_set_mask(dev, dma_mask);
+
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+ if (phb->controller_ops.dma_set_mask)
+ return phb->controller_ops.dma_set_mask(pdev, dma_mask);
+ }
+
return __dma_set_mask(dev, dma_mask);
}
EXPORT_SYMBOL(dma_set_mask);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9ee61d15653d..af9b597b10af 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -144,8 +144,6 @@ struct eeh_stats {
static struct eeh_stats eeh_stats;
-#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
-
static int __init eeh_setup(char *str)
{
if (!strcmp(str, "off"))
@@ -719,7 +717,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
/* The caller should restore state for the specified device */
if (pdev != dev)
- pci_save_state(pdev);
+ pci_restore_state(pdev);
return NULL;
}
@@ -1412,13 +1410,11 @@ static int dev_has_iommu_table(struct device *dev, void *data)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_dev **ppdev = data;
- struct iommu_table *tbl;
if (!dev)
return 0;
- tbl = get_iommu_table_base(dev);
- if (tbl && tbl->it_group) {
+ if (dev->iommu_group) {
*ppdev = pdev;
return 1;
}
@@ -1647,6 +1643,41 @@ int eeh_pe_configure(struct eeh_pe *pe)
}
EXPORT_SYMBOL_GPL(eeh_pe_configure);
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @function: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @function, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+ /* Invalid PE ? */
+ if (!pe)
+ return -ENODEV;
+
+ /* Unsupported operation ? */
+ if (!eeh_ops || !eeh_ops->err_inject)
+ return -ENOENT;
+
+ /* Check on PCI error type */
+ if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
+ return -EINVAL;
+
+ /* Check on PCI error function */
+ if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+ return -EINVAL;
+
+ return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
static int proc_eeh_show(struct seq_file *m, void *v)
{
if (!eeh_enabled()) {
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index eeabeabea49c..a1e86e172e3c 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -48,11 +48,11 @@
*/
struct pci_io_addr_range {
struct rb_node rb_node;
- unsigned long addr_lo;
- unsigned long addr_hi;
+ resource_size_t addr_lo;
+ resource_size_t addr_hi;
struct eeh_dev *edev;
struct pci_dev *pcidev;
- unsigned int flags;
+ unsigned long flags;
};
static struct pci_io_addr_cache {
@@ -125,8 +125,8 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
/* Insert address range into the rb tree. */
static struct pci_io_addr_range *
-eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
- unsigned long ahi, unsigned int flags)
+eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
+ resource_size_t ahi, unsigned long flags)
{
struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
struct rb_node *parent = NULL;
@@ -197,9 +197,9 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
/* Walk resources on this device, poke them into the tree */
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- unsigned long start = pci_resource_start(dev,i);
- unsigned long end = pci_resource_end(dev,i);
- unsigned int flags = pci_resource_flags(dev,i);
+ resource_size_t start = pci_resource_start(dev,i);
+ resource_size_t end = pci_resource_end(dev,i);
+ unsigned long flags = pci_resource_flags(dev,i);
/* We are interested only bus addresses, not dma or other stuff */
if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 24768ff3cb73..89eb4bc34d3a 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -660,7 +660,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_report_error, &result);
/* Get the current PCI slot state. This can take a long time,
- * sometimes over 3 seconds for certain systems.
+ * sometimes over 300 seconds for certain systems.
*/
rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index afbc20019c2e..579e0f9a2d57 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,6 +34,7 @@
#include <asm/ftrace.h>
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
+#include <asm/tm.h>
/*
* System calls.
@@ -51,6 +52,12 @@ exception_marker:
.globl system_call_common
system_call_common:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+ bne tabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
andi. r10,r12,MSR_PR
mr r10,r1
addi r1,r1,-INT_FRAME_SIZE
@@ -311,6 +318,34 @@ syscall_exit_work:
bl do_syscall_trace_leave
b ret_from_except
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tabort_syscall:
+ /* Firstly we need to enable TM in the kernel */
+ mfmsr r10
+ li r13, 1
+ rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r10, 0
+
+ /* tabort, this dooms the transaction, nothing else */
+ li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+ TABORT(R13)
+
+ /*
+ * Return directly to userspace. We have corrupted user register state,
+ * but userspace will never see that register state. Execution will
+ * resume after the tbegin of the aborted transaction with the
+ * checkpointed register state.
+ */
+ li r13, MSR_RI
+ andc r10, r10, r13
+ mtmsrd r10, 1
+ mtspr SPRN_SRR0, r11
+ mtspr SPRN_SRR1, r12
+
+ rfid
+ b . /* prevent speculative execution */
+#endif
+
/* Save non-volatile GPRs, if not already saved. */
_GLOBAL(save_nvgprs)
ld r11,_TRAP(r1)
@@ -556,7 +591,7 @@ BEGIN_FTR_SECTION
ld r0,THREAD_DSCR(r4)
cmpwi r6,0
bne 1f
- ld r0,PACA_DSCR(r13)
+ ld r0,PACA_DSCR_DEFAULT(r13)
1:
BEGIN_FTR_SECTION_NESTED(70)
mfspr r8, SPRN_FSCR
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9519e6bdc6d7..0a0399c2af11 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -59,14 +59,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
#if defined(CONFIG_RELOCATABLE)
/*
- * We can't branch directly; in the direct case we use LR
- * and system_call_entry restores LR. (We thus need to move
- * LR to r10 in the RFID case too.)
+ * We can't branch directly so we do it via the CTR which
+ * is volatile across system calls.
*/
#define SYSCALL_PSERIES_2_DIRECT \
mflr r10 ; \
ld r12,PACAKBASE(r13) ; \
- LOAD_HANDLER(r12, system_call_entry_direct) ; \
+ LOAD_HANDLER(r12, system_call_entry) ; \
mtctr r12 ; \
mfspr r12,SPRN_SRR1 ; \
/* Re-use of r13... No spare regs to do this */ \
@@ -80,7 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
mfspr r12,SPRN_SRR1 ; \
li r10,MSR_RI ; \
mtmsrd r10,1 ; /* Set RI (EE=0) */ \
- b system_call_entry_direct ;
+ b system_call_common ;
#endif
/*
@@ -969,13 +968,6 @@ hv_facility_unavailable_relon_trampoline:
__end_interrupts:
.align 7
-system_call_entry_direct:
-#if defined(CONFIG_RELOCATABLE)
- /* The first level prologue may have used LR to get here, saving
- * orig in r10. To save hacking/ifdeffing common code, restore here.
- */
- mtlr r10
-#endif
system_call_entry:
b system_call_common
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 9b53fe139bf6..78c1eba4c04a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -48,6 +48,19 @@
mtspr spr, reg
#endif
+/* Macro to test if an address is a kernel address */
+#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
+#define IS_KERNEL(tmp, addr) \
+ andis. tmp, addr, 0x8000 /* Address >= 0x80000000 */
+#define BRANCH_UNLESS_KERNEL(label) beq label
+#else
+#define IS_KERNEL(tmp, addr) \
+ rlwinm tmp, addr, 16, 16, 31; \
+ cmpli cr0, tmp, PAGE_OFFSET >> 16
+#define BRANCH_UNLESS_KERNEL(label) blt label
+#endif
+
+
/*
* Value for the bits that have fixed value in RPN entries.
* Also used for tagging DAR for DTLBerror.
@@ -116,13 +129,13 @@ turn_on_mmu:
*/
#define EXCEPTION_PROLOG \
EXCEPTION_PROLOG_0; \
+ mfcr r10; \
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
#define EXCEPTION_PROLOG_0 \
mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mfcr r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
#define EXCEPTION_PROLOG_1 \
mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
@@ -162,7 +175,6 @@ turn_on_mmu:
* Exception exit code.
*/
#define EXCEPTION_EPILOG_0 \
- mtcr r10; \
mfspr r10,SPRN_SPRG_SCRATCH0; \
mfspr r11,SPRN_SPRG_SCRATCH1
@@ -297,19 +309,22 @@ SystemCall:
* We have to use the MD_xxx registers for the tablewalk because the
* equivalent MI_xxx registers only perform the attribute functions.
*/
+
+#ifdef CONFIG_8xx_CPU15
+#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) \
+ addi tmp, addr, PAGE_SIZE; \
+ tlbie tmp; \
+ addi tmp, addr, -PAGE_SIZE; \
+ tlbie tmp
+#else
+#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr)
+#endif
+
InstructionTLBMiss:
#ifdef CONFIG_8xx_CPU6
- mtspr SPRN_DAR, r3
+ mtspr SPRN_SPRG_SCRATCH2, r3
#endif
EXCEPTION_PROLOG_0
- mtspr SPRN_SPRG_SCRATCH2, r10
- mfspr r10, SPRN_SRR0 /* Get effective address of fault */
-#ifdef CONFIG_8xx_CPU15
- addi r11, r10, PAGE_SIZE
- tlbie r11
- addi r11, r10, -PAGE_SIZE
- tlbie r11
-#endif
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
@@ -317,24 +332,34 @@ InstructionTLBMiss:
#ifdef CONFIG_MODULES
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
- andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
-#endif
+ mfspr r11, SPRN_SRR0 /* Get effective address of fault */
+ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
+ mfcr r10
+ IS_KERNEL(r11, r11)
mfspr r11, SPRN_M_TW /* Get level 1 table */
-#ifdef CONFIG_MODULES
- beq 3f
+ BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
+ mtcr r10
+ mfspr r10, SPRN_SRR0 /* Get effective address of fault */
+#else
+ mfspr r10, SPRN_SRR0 /* Get effective address of fault */
+ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
+ mfspr r11, SPRN_M_TW /* Get level 1 table base address */
#endif
/* Insert level 1 index */
rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
- /* Load the MI_TWC with the attributes for this "segment." */
- MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */
- rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */
/* Extract level 2 index */
rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
- lwzx r10, r10, r11 /* Get the pte */
+ rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
+ lwz r10, 0(r10) /* Get the pte */
+
+ /* Insert the APG into the TWC from the Linux PTE. */
+ rlwimi r11, r10, 0, 25, 26
+ /* Load the MI_TWC with the attributes for this "segment." */
+ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */
#ifdef CONFIG_SWAP
rlwinm r11, r10, 32-5, _PAGE_PRESENT
@@ -343,40 +368,41 @@ InstructionTLBMiss:
#endif
li r11, RPN_PATTERN
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 21 and 28 must be clear.
+ * Software indicator bits 20-23 and 28 must be clear.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
- rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */
+ rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */
MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */
/* Restore registers */
#ifdef CONFIG_8xx_CPU6
- mfspr r3, SPRN_DAR
- mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r3, SPRN_SPRG_SCRATCH2
#endif
- mfspr r10, SPRN_SPRG_SCRATCH2
EXCEPTION_EPILOG_0
rfi
. = 0x1200
DataStoreTLBMiss:
#ifdef CONFIG_8xx_CPU6
- mtspr SPRN_DAR, r3
+ mtspr SPRN_SPRG_SCRATCH2, r3
#endif
EXCEPTION_PROLOG_0
- mtspr SPRN_SPRG_SCRATCH2, r10
- mfspr r10, SPRN_MD_EPN
+ mfcr r10
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
- andis. r11, r10, 0x8000
+ mfspr r11, SPRN_MD_EPN
+ IS_KERNEL(r11, r11)
mfspr r11, SPRN_M_TW /* Get level 1 table */
- beq 3f
+ BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
+ mtcr r10
+ mfspr r10, SPRN_MD_EPN
+
/* Insert level 1 index */
rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
@@ -388,13 +414,13 @@ DataStoreTLBMiss:
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
- /* Insert the Guarded flag into the TWC from the Linux PTE.
- * It is bit 27 of both the Linux PTE and the TWC (at least
+ /* Insert the Guarded flag and APG into the TWC from the Linux PTE.
+ * It is bit 26-27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
- rlwimi r11, r10, 0, 27, 27
+ rlwimi r11, r10, 0, 26, 27
/* Insert the WriteThru flag into the TWC from the Linux PTE.
* It is bit 25 in the Linux PTE and bit 30 in the TWC
*/
@@ -423,14 +449,14 @@ DataStoreTLBMiss:
*/
li r11, RPN_PATTERN
rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
+ rlwimi r10, r11, 0, 20, 20 /* clear 20 */
MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */
/* Restore registers */
#ifdef CONFIG_8xx_CPU6
- mfspr r3, SPRN_DAR
+ mfspr r3, SPRN_SPRG_SCRATCH2
#endif
mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r10, SPRN_SPRG_SCRATCH2
EXCEPTION_EPILOG_0
rfi
@@ -456,6 +482,7 @@ InstructionTLBError:
. = 0x1400
DataTLBError:
EXCEPTION_PROLOG_0
+ mfcr r10
mfspr r11, SPRN_DAR
cmpwi cr0, r11, RPN_PATTERN
@@ -503,9 +530,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr r10, SPRN_SRR0
- andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
+ IS_KERNEL(r11, r10)
mfspr r11, SPRN_M_TW /* Get level 1 table */
- beq 3f
+ BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
/* Insert level 1 index */
3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -743,15 +770,20 @@ initial_mmu:
ori r8, r8, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r8
mtspr SPRN_MD_EPN, r8
- li r8, MI_PS8MEG /* Set 8M byte page */
+ li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */
ori r8, r8, MI_SVALID /* Make it valid */
mtspr SPRN_MI_TWC, r8
+ li r8, MI_PS8MEG /* Set 8M byte page, APG 0 */
+ ori r8, r8, MI_SVALID /* Make it valid */
mtspr SPRN_MD_TWC, r8
li r8, MI_BOOTINIT /* Create RPN for address 0 */
mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
mtspr SPRN_MD_RPN, r8
- lis r8, MI_Kp@h /* Set the protection mode */
+ lis r8, MI_APG_INIT@h /* Set protection modes */
+ ori r8, r8, MI_APG_INIT@l
mtspr SPRN_MI_AP, r8
+ lis r8, MD_APG_INIT@h
+ ori r8, r8, MD_APG_INIT@l
mtspr SPRN_MD_AP, r8
/* Map another 8 MByte at the IMMR to get the processor
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 15448668988d..b9b6ef510be1 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -58,15 +58,6 @@ BEGIN_FTR_SECTION
mtlr r0
lis r3,HID0_NAP@h
END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-BEGIN_FTR_SECTION
- msync
- li r7,L2CSR0_L2FL@l
- mtspr SPRN_L2CSR0,r7
-2:
- mfspr r7,SPRN_L2CSR0
- andi. r4,r7,L2CSR0_L2FL@l
- bne 2b
-END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP)
1:
/* Go to NAP or DOZE now */
mfspr r4,SPRN_HID0
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index b054f33ab1fb..a8e3490b54e3 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -322,11 +322,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
ret = entry << tbl->it_page_shift; /* Set the return dma address */
/* Put the TCEs in the HW table */
- build_fail = ppc_md.tce_build(tbl, entry, npages,
+ build_fail = tbl->it_ops->set(tbl, entry, npages,
(unsigned long)page &
IOMMU_PAGE_MASK(tbl), direction, attrs);
- /* ppc_md.tce_build() only returns non-zero for transient errors.
+ /* tbl->it_ops->set() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
* DMA_ERROR_CODE. For all other errors the functionality is
* not altered.
@@ -337,8 +337,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
}
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
/* Make sure updates are seen by hardware */
mb();
@@ -408,7 +408,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
if (!iommu_free_check(tbl, dma_addr, npages))
return;
- ppc_md.tce_free(tbl, entry, npages);
+ tbl->it_ops->clear(tbl, entry, npages);
spin_lock_irqsave(&(pool->lock), flags);
bitmap_clear(tbl->it_map, free_entry, npages);
@@ -424,8 +424,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
* not do an mb() here on purpose, it is not needed on any of
* the current platforms.
*/
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
}
int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
@@ -495,7 +495,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
npages, entry, dma_addr);
/* Insert into HW table */
- build_fail = ppc_md.tce_build(tbl, entry, npages,
+ build_fail = tbl->it_ops->set(tbl, entry, npages,
vaddr & IOMMU_PAGE_MASK(tbl),
direction, attrs);
if(unlikely(build_fail))
@@ -534,8 +534,8 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
}
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
DBG("mapped %d elements:\n", outcount);
@@ -600,8 +600,8 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
* do not do an mb() here, the affected platforms do not need it
* when freeing.
*/
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
}
static void iommu_table_clear(struct iommu_table *tbl)
@@ -613,17 +613,17 @@ static void iommu_table_clear(struct iommu_table *tbl)
*/
if (!is_kdump_kernel() || is_fadump_active()) {
/* Clear the table in case firmware left allocations in it */
- ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+ tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
return;
}
#ifdef CONFIG_CRASH_DUMP
- if (ppc_md.tce_get) {
+ if (tbl->it_ops->get) {
unsigned long index, tceval, tcecount = 0;
/* Reserve the existing mappings left by the first kernel. */
for (index = 0; index < tbl->it_size; index++) {
- tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
+ tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
/*
* Freed TCE entry contains 0x7fffffffffffffff on JS20
*/
@@ -657,6 +657,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
unsigned int i;
struct iommu_pool *p;
+ BUG_ON(!tbl->it_ops);
+
/* number of bytes needed for the bitmap */
sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
@@ -713,9 +715,11 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
unsigned long bitmap_sz;
unsigned int order;
- if (!tbl || !tbl->it_map) {
- printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
- node_name);
+ if (!tbl)
+ return;
+
+ if (!tbl->it_map) {
+ kfree(tbl);
return;
}
@@ -726,13 +730,6 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
-#ifdef CONFIG_IOMMU_API
- if (tbl->it_group) {
- iommu_group_put(tbl->it_group);
- BUG_ON(tbl->it_group);
- }
-#endif
-
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
@@ -871,17 +868,33 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
}
}
+unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
+{
+ switch (dir) {
+ case DMA_BIDIRECTIONAL:
+ return TCE_PCI_READ | TCE_PCI_WRITE;
+ case DMA_FROM_DEVICE:
+ return TCE_PCI_WRITE;
+ case DMA_TO_DEVICE:
+ return TCE_PCI_READ;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
+
#ifdef CONFIG_IOMMU_API
/*
* SPAPR TCE API
*/
static void group_release(void *iommu_data)
{
- struct iommu_table *tbl = iommu_data;
- tbl->it_group = NULL;
+ struct iommu_table_group *table_group = iommu_data;
+
+ table_group->group = NULL;
}
-void iommu_register_group(struct iommu_table *tbl,
+void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num)
{
struct iommu_group *grp;
@@ -893,8 +906,8 @@ void iommu_register_group(struct iommu_table *tbl,
PTR_ERR(grp));
return;
}
- tbl->it_group = grp;
- iommu_group_set_iommudata(grp, tbl, group_release);
+ table_group->group = grp;
+ iommu_group_set_iommudata(grp, table_group, group_release);
name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
pci_domain_number, pe_num);
if (!name)
@@ -919,8 +932,8 @@ EXPORT_SYMBOL_GPL(iommu_tce_direction);
void iommu_flush_tce(struct iommu_table *tbl)
{
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
/* Make sure updates are seen by hardware */
mb();
@@ -931,7 +944,7 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce_value,
unsigned long npages)
{
- /* ppc_md.tce_free() does not support any value but 0 */
+ /* tbl->it_ops->clear() does not support any value but 0 */
if (tce_value)
return -EINVAL;
@@ -952,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce)
{
- if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
- return -EINVAL;
-
- if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
+ if (tce & ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;
if (ioba & ~IOMMU_PAGE_MASK(tbl))
@@ -972,68 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
}
EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
-{
- unsigned long oldtce;
- struct iommu_pool *pool = get_pool(tbl, entry);
-
- spin_lock(&(pool->lock));
-
- oldtce = ppc_md.tce_get(tbl, entry);
- if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
- ppc_md.tce_free(tbl, entry, 1);
- else
- oldtce = 0;
-
- spin_unlock(&(pool->lock));
-
- return oldtce;
-}
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
-
-int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
- unsigned long entry, unsigned long pages)
-{
- unsigned long oldtce;
- struct page *page;
-
- for ( ; pages; --pages, ++entry) {
- oldtce = iommu_clear_tce(tbl, entry);
- if (!oldtce)
- continue;
-
- page = pfn_to_page(oldtce >> PAGE_SHIFT);
- WARN_ON(!page);
- if (page) {
- if (oldtce & TCE_PCI_WRITE)
- SetPageDirty(page);
- put_page(page);
- }
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
-
-/*
- * hwaddr is a kernel virtual address here (0xc... bazillion),
- * tce_build converts it to a physical address.
- */
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
- unsigned long hwaddr, enum dma_data_direction direction)
+long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction)
{
- int ret = -EBUSY;
- unsigned long oldtce;
- struct iommu_pool *pool = get_pool(tbl, entry);
-
- spin_lock(&(pool->lock));
+ long ret;
- oldtce = ppc_md.tce_get(tbl, entry);
- /* Add new entry if it is not busy */
- if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
- ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
+ ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
- spin_unlock(&(pool->lock));
+ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+ (*direction == DMA_BIDIRECTIONAL)))
+ SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
@@ -1042,84 +1000,72 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_tce_build);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg);
-int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
- unsigned long tce)
+int iommu_take_ownership(struct iommu_table *tbl)
{
- int ret;
- struct page *page = NULL;
- unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
- enum dma_data_direction direction = iommu_tce_direction(tce);
-
- ret = get_user_pages_fast(tce & PAGE_MASK, 1,
- direction != DMA_TO_DEVICE, &page);
- if (unlikely(ret != 1)) {
- /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
- tce, entry << tbl->it_page_shift, ret); */
- return -EFAULT;
- }
- hwaddr = (unsigned long) page_address(page) + offset;
-
- ret = iommu_tce_build(tbl, entry, hwaddr, direction);
- if (ret)
- put_page(page);
-
- if (ret < 0)
- pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
- __func__, entry << tbl->it_page_shift, tce, ret);
+ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+ int ret = 0;
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
+ /*
+ * VFIO does not control TCE entries allocation and the guest
+ * can write new TCEs on top of existing ones so iommu_tce_build()
+ * must be able to release old pages. This functionality
+ * requires exchange() callback defined so if it is not
+ * implemented, we disallow taking ownership over the table.
+ */
+ if (!tbl->it_ops->exchange)
+ return -EINVAL;
-int iommu_take_ownership(struct iommu_table *tbl)
-{
- unsigned long sz = (tbl->it_size + 7) >> 3;
+ spin_lock_irqsave(&tbl->large_pool.lock, flags);
+ for (i = 0; i < tbl->nr_pools; i++)
+ spin_lock(&tbl->pools[i].lock);
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
- return -EBUSY;
+ ret = -EBUSY;
+ /* Restore bit#0 set by iommu_init_table() */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+ } else {
+ memset(tbl->it_map, 0xff, sz);
}
- memset(tbl->it_map, 0xff, sz);
- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+ for (i = 0; i < tbl->nr_pools; i++)
+ spin_unlock(&tbl->pools[i].lock);
+ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
- /*
- * Disable iommu bypass, otherwise the user can DMA to all of
- * our physical memory via the bypass window instead of just
- * the pages that has been explicitly mapped into the iommu
- */
- if (tbl->set_bypass)
- tbl->set_bypass(tbl, false);
-
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(iommu_take_ownership);
void iommu_release_ownership(struct iommu_table *tbl)
{
- unsigned long sz = (tbl->it_size + 7) >> 3;
+ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+
+ spin_lock_irqsave(&tbl->large_pool.lock, flags);
+ for (i = 0; i < tbl->nr_pools; i++)
+ spin_lock(&tbl->pools[i].lock);
- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
memset(tbl->it_map, 0, sz);
/* Restore bit#0 set by iommu_init_table() */
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
- /* The kernel owns the device now, we can restore the iommu bypass */
- if (tbl->set_bypass)
- tbl->set_bypass(tbl, true);
+ for (i = 0; i < tbl->nr_pools; i++)
+ spin_unlock(&tbl->pools[i].lock);
+ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
}
EXPORT_SYMBOL_GPL(iommu_release_ownership);
int iommu_add_device(struct device *dev)
{
struct iommu_table *tbl;
+ struct iommu_table_group_link *tgl;
/*
* The sysfs entries should be populated before
@@ -1137,15 +1083,22 @@ int iommu_add_device(struct device *dev)
}
tbl = get_iommu_table_base(dev);
- if (!tbl || !tbl->it_group) {
+ if (!tbl) {
pr_debug("%s: Skipping device %s with no tbl\n",
__func__, dev_name(dev));
return 0;
}
+ tgl = list_first_entry_or_null(&tbl->it_group_list,
+ struct iommu_table_group_link, next);
+ if (!tgl) {
+ pr_debug("%s: Skipping device %s with no group\n",
+ __func__, dev_name(dev));
+ return 0;
+ }
pr_debug("%s: Adding %s to iommu group %d\n",
__func__, dev_name(dev),
- iommu_group_id(tbl->it_group));
+ iommu_group_id(tgl->table_group->group));
if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
@@ -1154,7 +1107,7 @@ int iommu_add_device(struct device *dev)
return -EINVAL;
}
- return iommu_group_add_device(tbl->it_group, dev);
+ return iommu_group_add_device(tgl->table_group->group, dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index 71bd161640cf..dab616a33b8d 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -15,7 +15,10 @@
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) {
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ if (!phb->controller_ops.setup_msi_irqs ||
+ !phb->controller_ops.teardown_msi_irqs) {
pr_debug("msi: Platform doesn't provide MSI callbacks.\n");
return -ENOSYS;
}
@@ -24,10 +27,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
- return ppc_md.setup_msi_irqs(dev, nvec, type);
+ return phb->controller_ops.setup_msi_irqs(dev, nvec, type);
}
void arch_teardown_msi_irqs(struct pci_dev *dev)
{
- ppc_md.teardown_msi_irqs(dev);
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ phb->controller_ops.teardown_msi_irqs(dev);
}
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 0d054068a21d..b9de34d44fcb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -89,6 +89,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
#endif
return phb;
}
+EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
void pcibios_free_controller(struct pci_controller *phb)
{
@@ -1447,6 +1448,7 @@ void pcibios_claim_one_bus(struct pci_bus *bus)
list_for_each_entry(child_bus, &bus->children, node)
pcibios_claim_one_bus(child_bus);
}
+EXPORT_SYMBOL_GPL(pcibios_claim_one_bus);
/* pcibios_finish_adding_to_bus
@@ -1488,6 +1490,14 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
return pci_enable_resources(dev, mask);
}
+void pcibios_disable_device(struct pci_dev *dev)
+{
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ if (phb->controller_ops.disable_device)
+ phb->controller_ops.disable_device(dev);
+}
+
resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
{
return (unsigned long) hose->io_base_virt - _IO_BASE;
@@ -1680,6 +1690,7 @@ void pcibios_scan_phb(struct pci_controller *hose)
pcie_bus_configure_settings(child);
}
}
+EXPORT_SYMBOL_GPL(pcibios_scan_phb);
static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
{
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 7ed85a69a9c2..7f9ed0c1f6b9 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -29,7 +29,12 @@
*/
void pcibios_release_device(struct pci_dev *dev)
{
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
eeh_remove_device(dev);
+
+ if (phb->controller_ops.release_device)
+ phb->controller_ops.release_device(dev);
}
/**
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index febb50dd5328..8005e18d1b40 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1112,7 +1112,6 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy a thread..
*/
-extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
/*
* Copy architecture-specific thread state
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index c69671c03c3b..bdcbb716f4d6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -523,7 +523,8 @@ void __init setup_system(void)
smp_release_cpus();
#endif
- pr_info("Starting Linux PPC64 %s\n", init_utsname()->version);
+ pr_info("Starting Linux %s %s\n", init_utsname()->machine,
+ init_utsname()->version);
pr_info("-----------------------------------------------------\n");
pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
@@ -686,6 +687,9 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC_64K_PAGES
init_mm.context.pte_frag = NULL;
#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_init(&init_mm.context);
+#endif
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index fa1fd8a0c867..692873bff334 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -496,13 +496,34 @@ static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
static unsigned long dscr_default;
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val: Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
static void read_dscr(void *val)
{
*(unsigned long *)val = get_paca()->dscr_default;
}
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val: New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
static void write_dscr(void *val)
{
get_paca()->dscr_default = *(unsigned long *)val;
@@ -520,12 +541,29 @@ static void add_write_permission_dev_attr(struct device_attribute *attr)
attr->attr.mode |= 0200;
}
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
static ssize_t show_dscr_default(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%lx\n", dscr_default);
}
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ * @count: Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
static ssize_t __used store_dscr_default(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 5754b226da7e..bf8f34a58670 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -293,7 +293,7 @@ dont_backup_fp:
ld r2, STK_GOT(r1)
/* Load CPU's default DSCR */
- ld r0, PACA_DSCR(r13)
+ ld r0, PACA_DSCR_DEFAULT(r13)
mtspr SPRN_DSCR, r0
blr
@@ -473,7 +473,7 @@ restore_gprs:
ld r2, STK_GOT(r1)
/* Load CPU's default DSCR */
- ld r0, PACA_DSCR(r13)
+ ld r0, PACA_DSCR_DEFAULT(r13)
mtspr SPRN_DSCR, r0
blr
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 19e4744b6eba..6530f1b8874d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1377,6 +1377,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
};
char *facility = "unknown";
u64 value;
+ u32 instword, rd;
u8 status;
bool hv;
@@ -1388,12 +1389,46 @@ void facility_unavailable_exception(struct pt_regs *regs)
status = value >> 56;
if (status == FSCR_DSCR_LG) {
- /* User is acessing the DSCR. Set the inherit bit and allow
- * the user to set it directly in future by setting via the
- * FSCR DSCR bit. We always leave HFSCR DSCR set.
+ /*
+ * User is accessing the DSCR register using the problem
+ * state only SPR number (0x03) either through a mfspr or
+ * a mtspr instruction. If it is a write attempt through
+ * a mtspr, then we set the inherit bit. This also allows
+ * the user to write or read the register directly in the
+ * future by setting via the FSCR DSCR bit. But in case it
+ * is a read DSCR attempt through a mfspr instruction, we
+ * just emulate the instruction instead. This code path will
+ * always emulate all the mfspr instructions till the user
+ * has attempted atleast one mtspr instruction. This way it
+ * preserves the same behaviour when the user is accessing
+ * the DSCR through privilege level only SPR number (0x11)
+ * which is emulated through illegal instruction exception.
+ * We always leave HFSCR DSCR set.
*/
- current->thread.dscr_inherit = 1;
- mtspr(SPRN_FSCR, value | FSCR_DSCR);
+ if (get_user(instword, (u32 __user *)(regs->nip))) {
+ pr_err("Failed to fetch the user instruction\n");
+ return;
+ }
+
+ /* Write into DSCR (mtspr 0x03, RS) */
+ if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
+ == PPC_INST_MTSPR_DSCR_USER) {
+ rd = (instword >> 21) & 0x1f;
+ current->thread.dscr = regs->gpr[rd];
+ current->thread.dscr_inherit = 1;
+ mtspr(SPRN_FSCR, value | FSCR_DSCR);
+ }
+
+ /* Read from DSCR (mfspr RT, 0x03) */
+ if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
+ == PPC_INST_MFSPR_DSCR_USER) {
+ if (emulate_instruction(regs)) {
+ pr_err("DSCR based mfspr emulation failed\n");
+ return;
+ }
+ regs->nip += 4;
+ emulate_single_step(regs);
+ }
return;
}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 305eb0d9b768..b457bfa28436 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -49,13 +49,16 @@
/* The alignment of the vDSO */
#define VDSO_ALIGNMENT (1 << 16)
-extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
static unsigned int vdso32_pages;
+static void *vdso32_kbase;
static struct page **vdso32_pagelist;
unsigned long vdso32_sigtramp;
unsigned long vdso32_rt_sigtramp;
+#ifdef CONFIG_VDSO32
+extern char vdso32_start, vdso32_end;
+#endif
+
#ifdef CONFIG_PPC64
extern char vdso64_start, vdso64_end;
static void *vdso64_kbase = &vdso64_start;
@@ -140,50 +143,6 @@ struct lib64_elfinfo
};
-#ifdef __DEBUG
-static void dump_one_vdso_page(struct page *pg, struct page *upg)
-{
- printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
- page_count(pg),
- pg->flags);
- if (upg && !IS_ERR(upg) /* && pg != upg*/) {
- printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
- << PAGE_SHIFT),
- page_count(upg),
- upg->flags);
- }
- printk("\n");
-}
-
-static void dump_vdso_pages(struct vm_area_struct * vma)
-{
- int i;
-
- if (!vma || is_32bit_task()) {
- printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
- for (i=0; i<vdso32_pages; i++) {
- struct page *pg = virt_to_page(vdso32_kbase +
- i*PAGE_SIZE);
- struct page *upg = (vma && vma->vm_mm) ?
- follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
- : NULL;
- dump_one_vdso_page(pg, upg);
- }
- }
- if (!vma || !is_32bit_task()) {
- printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
- for (i=0; i<vdso64_pages; i++) {
- struct page *pg = virt_to_page(vdso64_kbase +
- i*PAGE_SIZE);
- struct page *upg = (vma && vma->vm_mm) ?
- follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
- : NULL;
- dump_one_vdso_page(pg, upg);
- }
- }
-}
-#endif /* DEBUG */
-
/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
@@ -292,6 +251,7 @@ const char *arch_vma_name(struct vm_area_struct *vma)
+#ifdef CONFIG_VDSO32
static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
unsigned long *size)
{
@@ -379,6 +339,20 @@ static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
return 0;
}
+#else /* !CONFIG_VDSO32 */
+static unsigned long __init find_function32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ return 0;
+}
+
+static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ return 0;
+}
+#endif /* CONFIG_VDSO32 */
#ifdef CONFIG_PPC64
@@ -489,6 +463,7 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
* Locate symbol tables & text section
*/
+#ifdef CONFIG_VDSO32
v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
if (v32->dynsym == NULL || v32->dynstr == NULL) {
@@ -501,6 +476,7 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
return -1;
}
v32->text = sect - vdso32_kbase;
+#endif
#ifdef CONFIG_PPC64
v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
@@ -537,7 +513,9 @@ static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
+#ifdef CONFIG_VDSO32
Elf32_Sym *sym32;
+#endif
#ifdef CONFIG_PPC64
Elf64_Sym *sym64;
@@ -552,6 +530,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
(sym64->st_value - VDSO64_LBASE);
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_VDSO32
sym32 = find_symbol32(v32, "__kernel_datapage_offset");
if (sym32 == NULL) {
printk(KERN_ERR "vDSO32: Can't find symbol "
@@ -561,6 +540,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
*((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
(vdso32_pages << PAGE_SHIFT) -
(sym32->st_value - VDSO32_LBASE);
+#endif
return 0;
}
@@ -569,55 +549,54 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
- void *start32;
- unsigned long size32;
+ unsigned long size;
+ void *start;
#ifdef CONFIG_PPC64
- void *start64;
- unsigned long size64;
-
- start64 = find_section64(v64->hdr, "__ftr_fixup", &size64);
- if (start64)
+ start = find_section64(v64->hdr, "__ftr_fixup", &size);
+ if (start)
do_feature_fixups(cur_cpu_spec->cpu_features,
- start64, start64 + size64);
+ start, start + size);
- start64 = find_section64(v64->hdr, "__mmu_ftr_fixup", &size64);
- if (start64)
+ start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size);
+ if (start)
do_feature_fixups(cur_cpu_spec->mmu_features,
- start64, start64 + size64);
+ start, start + size);
- start64 = find_section64(v64->hdr, "__fw_ftr_fixup", &size64);
- if (start64)
+ start = find_section64(v64->hdr, "__fw_ftr_fixup", &size);
+ if (start)
do_feature_fixups(powerpc_firmware_features,
- start64, start64 + size64);
+ start, start + size);
- start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64);
- if (start64)
+ start = find_section64(v64->hdr, "__lwsync_fixup", &size);
+ if (start)
do_lwsync_fixups(cur_cpu_spec->cpu_features,
- start64, start64 + size64);
+ start, start + size);
#endif /* CONFIG_PPC64 */
- start32 = find_section32(v32->hdr, "__ftr_fixup", &size32);
- if (start32)
+#ifdef CONFIG_VDSO32
+ start = find_section32(v32->hdr, "__ftr_fixup", &size);
+ if (start)
do_feature_fixups(cur_cpu_spec->cpu_features,
- start32, start32 + size32);
+ start, start + size);
- start32 = find_section32(v32->hdr, "__mmu_ftr_fixup", &size32);
- if (start32)
+ start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size);
+ if (start)
do_feature_fixups(cur_cpu_spec->mmu_features,
- start32, start32 + size32);
+ start, start + size);
#ifdef CONFIG_PPC64
- start32 = find_section32(v32->hdr, "__fw_ftr_fixup", &size32);
- if (start32)
+ start = find_section32(v32->hdr, "__fw_ftr_fixup", &size);
+ if (start)
do_feature_fixups(powerpc_firmware_features,
- start32, start32 + size32);
+ start, start + size);
#endif /* CONFIG_PPC64 */
- start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32);
- if (start32)
+ start = find_section32(v32->hdr, "__lwsync_fixup", &size);
+ if (start)
do_lwsync_fixups(cur_cpu_spec->cpu_features,
- start32, start32 + size32);
+ start, start + size);
+#endif
return 0;
}
@@ -779,11 +758,15 @@ static int __init vdso_init(void)
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_VDSO32
+ vdso32_kbase = &vdso32_start;
+
/*
* Calculate the size of the 32 bits vDSO
*/
vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
+#endif
/*
@@ -804,6 +787,7 @@ static int __init vdso_init(void)
return 0;
}
+#ifdef CONFIG_VDSO32
/* Make sure pages are in the correct state */
vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 2),
GFP_KERNEL);
@@ -816,6 +800,7 @@ static int __init vdso_init(void)
}
vdso32_pagelist[i++] = virt_to_page(vdso_data);
vdso32_pagelist[i] = NULL;
+#endif
#ifdef CONFIG_PPC64
vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 2),
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 5bfdab9047be..b41426c60ef6 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1196,6 +1196,11 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
tbl->it_type = TCE_VB;
tbl->it_blocksize = 16;
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ else
+ tbl->it_ops = &iommu_table_pseries_ops;
+
return iommu_init_table(tbl, -1);
}