summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/fault.c7
-rw-r--r--arch/powerpc/mm/gup.c235
-rw-r--r--arch/powerpc/mm/hash_low_64.S19
-rw-r--r--arch/powerpc/mm/hash_native_64.c41
-rw-r--r--arch/powerpc/mm/hash_utils_64.c114
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c60
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage.c51
-rw-r--r--arch/powerpc/mm/init_32.c10
-rw-r--r--arch/powerpc/mm/init_64.c1
-rw-r--r--arch/powerpc/mm/mem.c77
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c8
-rw-r--r--arch/powerpc/mm/numa.c224
-rw-r--r--arch/powerpc/mm/pgtable_32.c3
-rw-r--r--arch/powerpc/mm/pgtable_64.c104
17 files changed, 264 insertions, 704 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 325e861616a1..438dcd3fd0d1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-obj-y := fault.o mem.o pgtable.o gup.o mmap.o \
+obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(CONFIG_WORD_SIZE).o \
pgtable_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 08d659a9fcdb..eb79907f34fa 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -43,7 +43,6 @@
#include <asm/tlbflush.h>
#include <asm/siginfo.h>
#include <asm/debug.h>
-#include <mm/mmu_decl.h>
#include "icswx.h"
@@ -380,12 +379,6 @@ good_area:
goto bad_area;
#endif /* CONFIG_6xx */
#if defined(CONFIG_8xx)
- /* 8xx sometimes need to load a invalid/non-present TLBs.
- * These must be invalidated separately as linux mm don't.
- */
- if (error_code & 0x40000000) /* no translation? */
- _tlbil_va(address, 0, 0, 0);
-
/* The MPC8xx seems to always set 0x80000000, which is
* "undefined". Of those that can be set, this is the only
* one which seems bad.
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
deleted file mode 100644
index d8746684f606..000000000000
--- a/arch/powerpc/mm/gup.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Lockless get_user_pages_fast for powerpc
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#undef DEBUG
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-
-#ifdef __HAVE_ARCH_PTE_SPECIAL
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long mask, result;
- pte_t *ptep;
-
- result = _PAGE_PRESENT|_PAGE_USER;
- if (write)
- result |= _PAGE_RW;
- mask = result | _PAGE_SPECIAL;
-
- ptep = pte_offset_kernel(&pmd, addr);
- do {
- pte_t pte = ACCESS_ONCE(*ptep);
- struct page *page;
- /*
- * Similar to the PMD case, NUMA hinting must take slow path
- */
- if (pte_numa(pte))
- return 0;
-
- if ((pte_val(pte) & mask) != result)
- return 0;
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- if (!page_cache_get_speculative(page))
- return 0;
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(page);
- return 0;
- }
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = ACCESS_ONCE(*pmdp);
-
- next = pmd_addr_end(addr, end);
- /*
- * If we find a splitting transparent hugepage we
- * return zero. That will result in taking the slow
- * path which will call wait_split_huge_page()
- * if the pmd is still in splitting state
- */
- if (pmd_none(pmd) || pmd_trans_splitting(pmd))
- return 0;
- if (pmd_huge(pmd) || pmd_large(pmd)) {
- /*
- * NUMA hinting faults need to be handled in the GUP
- * slowpath for accounting purposes and so that they
- * can be serialised against THP migration.
- */
- if (pmd_numa(pmd))
- return 0;
-
- if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
- write, pages, nr))
- return 0;
- } else if (is_hugepd(pmdp)) {
- if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
- addr, next, write, pages, nr))
- return 0;
- } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
- return 0;
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&pgd, addr);
- do {
- pud_t pud = ACCESS_ONCE(*pudp);
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (pud_huge(pud)) {
- if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
- write, pages, nr))
- return 0;
- } else if (is_hugepd(pudp)) {
- if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
- addr, next, write, pages, nr))
- return 0;
- } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
- return 0;
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- unsigned long flags;
- pgd_t *pgdp;
- int nr = 0;
-
- pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
-
- if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- start, len)))
- return 0;
-
- pr_devel(" aligned: %lx .. %lx\n", start, end);
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables from being freed on powerpc.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
- local_irq_save(flags);
-
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = ACCESS_ONCE(*pgdp);
-
- pr_devel(" %016lx: normal pgd %p\n", addr,
- (void *)pgd_val(pgd));
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (pgd_huge(pgd)) {
- if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
- write, pages, &nr))
- break;
- } else if (is_hugepd(pgdp)) {
- if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
- addr, next, write, pages, &nr))
- break;
- } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
-
- local_irq_restore(flags);
-
- return nr;
-}
-
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- int nr, ret;
-
- start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
- ret = nr;
-
- if (nr < nr_pages) {
- pr_devel(" slow path ! nr = %d\n", nr);
-
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- down_read(&mm->mmap_sem);
- ret = get_user_pages(current, mm, start,
- nr_pages - nr, write, 0, pages, NULL);
- up_read(&mm->mmap_sem);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
- }
-
- return ret;
-}
-
-#endif /* __HAVE_ARCH_PTE_SPECIAL */
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 057cbbb4c576..463174a4a647 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -46,7 +46,8 @@
/*
* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local, int ssize)
+ * pte_t *ptep, unsigned long trap, unsigned long flags,
+ * int ssize)
*
* Adds a 4K page to the hash table in a segment of 4K pages only
*/
@@ -298,7 +299,7 @@ htab_modify_pte:
li r6,MMU_PAGE_4K /* base page size */
li r7,MMU_PAGE_4K /* actual page size */
ld r8,STK_PARAM(R9)(r1) /* segment size */
- ld r9,STK_PARAM(R8)(r1) /* get "local" param */
+ ld r9,STK_PARAM(R8)(r1) /* get "flags" param */
.globl htab_call_hpte_updatepp
htab_call_hpte_updatepp:
bl . /* Patched by htab_finish_init() */
@@ -338,8 +339,8 @@ htab_pte_insert_failure:
*****************************************************************************/
/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local, int ssize,
- * int subpg_prot)
+ * pte_t *ptep, unsigned long trap, unsigned local flags,
+ * int ssize, int subpg_prot)
*/
/*
@@ -514,7 +515,7 @@ htab_insert_pte:
andis. r0,r31,_PAGE_4K_PFN@h
srdi r5,r31,PTE_RPN_SHIFT
bne- htab_special_pfn
- sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ sldi r5,r5,PAGE_FACTOR
add r5,r5,r25
htab_special_pfn:
sldi r5,r5,HW_PAGE_SHIFT
@@ -544,7 +545,7 @@ htab_call_hpte_insert1:
andis. r0,r31,_PAGE_4K_PFN@h
srdi r5,r31,PTE_RPN_SHIFT
bne- 3f
- sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ sldi r5,r5,PAGE_FACTOR
add r5,r5,r25
3: sldi r5,r5,HW_PAGE_SHIFT
@@ -594,7 +595,7 @@ htab_inval_old_hpte:
li r5,0 /* PTE.hidx */
li r6,MMU_PAGE_64K /* psize */
ld r7,STK_PARAM(R9)(r1) /* ssize */
- ld r8,STK_PARAM(R8)(r1) /* local */
+ ld r8,STK_PARAM(R8)(r1) /* flags */
bl flush_hash_page
/* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */
lis r0,_PAGE_HPTE_SUB@h
@@ -666,7 +667,7 @@ htab_modify_pte:
li r6,MMU_PAGE_4K /* base page size */
li r7,MMU_PAGE_4K /* actual page size */
ld r8,STK_PARAM(R9)(r1) /* segment size */
- ld r9,STK_PARAM(R8)(r1) /* get "local" param */
+ ld r9,STK_PARAM(R8)(r1) /* get "flags" param */
.globl htab_call_hpte_updatepp
htab_call_hpte_updatepp:
bl . /* patched by htab_finish_init() */
@@ -962,7 +963,7 @@ ht64_modify_pte:
li r6,MMU_PAGE_64K /* base page size */
li r7,MMU_PAGE_64K /* actual page size */
ld r8,STK_PARAM(R9)(r1) /* segment size */
- ld r9,STK_PARAM(R8)(r1) /* get "local" param */
+ ld r9,STK_PARAM(R8)(r1) /* get "flags" param */
.globl ht64_call_hpte_updatepp
ht64_call_hpte_updatepp:
bl . /* patched by htab_finish_init() */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index ae4962a06476..9c4880ddecd6 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -283,19 +283,17 @@ static long native_hpte_remove(unsigned long hpte_group)
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long vpn, int bpsize,
- int apsize, int ssize, int local)
+ int apsize, int ssize, unsigned long flags)
{
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
- int ret = 0;
+ int ret = 0, local = 0;
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
vpn, want_v & HPTE_V_AVPN, slot, newpp);
- native_lock_hpte(hptep);
-
hpte_v = be64_to_cpu(hptep->v);
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
@@ -308,15 +306,30 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
DBG_LOW(" -> miss\n");
ret = -1;
} else {
- DBG_LOW(" -> hit\n");
- /* Update the HPTE */
- hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
- (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
+ native_lock_hpte(hptep);
+ /* recheck with locks held */
+ hpte_v = be64_to_cpu(hptep->v);
+ if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
+ !(hpte_v & HPTE_V_VALID))) {
+ ret = -1;
+ } else {
+ DBG_LOW(" -> hit\n");
+ /* Update the HPTE */
+ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
+ ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N |
+ HPTE_R_C)));
+ }
+ native_unlock_hpte(hptep);
}
- native_unlock_hpte(hptep);
- /* Ensure it is out of the tlb too. */
- tlbie(vpn, bpsize, apsize, ssize, local);
+ if (flags & HPTE_LOCAL_UPDATE)
+ local = 1;
+ /*
+ * Ensure it is out of the tlb too if it is not a nohpte fault
+ */
+ if (!(flags & HPTE_NOHPTE_UPDATE))
+ tlbie(vpn, bpsize, apsize, ssize, local);
return ret;
}
@@ -419,7 +432,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
static void native_hugepage_invalidate(unsigned long vsid,
unsigned long addr,
unsigned char *hpte_slot_array,
- int psize, int ssize)
+ int psize, int ssize, int local)
{
int i;
struct hash_pte *hptep;
@@ -465,7 +478,7 @@ static void native_hugepage_invalidate(unsigned long vsid,
* instruction compares entry_VA in tlb with the VA specified
* here
*/
- tlbie(vpn, psize, actual_psize, ssize, 0);
+ tlbie(vpn, psize, actual_psize, ssize, local);
}
local_irq_restore(flags);
}
@@ -629,7 +642,7 @@ static void native_flush_hash_range(unsigned long number, int local)
unsigned long want_v;
unsigned long flags;
real_pte_t pte;
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
unsigned long psize = batch->psize;
int ssize = batch->ssize;
int i;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index d5339a3b9945..e56a307bc676 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
* -1 - critical hash insertion error
* -2 - access not permitted by subpage protection mechanism
*/
-int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap)
+int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap,
+ unsigned long flags)
{
enum ctx_state prev_state = exception_enter();
pgd_t *pgdir;
@@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
pte_t *ptep;
unsigned hugeshift;
const struct cpumask *tmp;
- int rc, user_region = 0, local = 0;
+ int rc, user_region = 0;
int psize, ssize;
DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
@@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
/* Check CPU locality */
tmp = cpumask_of(smp_processor_id());
if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
- local = 1;
+ flags |= HPTE_LOCAL_UPDATE;
#ifndef CONFIG_PPC_64K_PAGES
/* If we use 4K pages and our psize is not 4K, then we might
@@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
if (hugeshift) {
if (pmd_trans_huge(*(pmd_t *)ptep))
rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
- trap, local, ssize, psize);
+ trap, flags, ssize, psize);
#ifdef CONFIG_HUGETLB_PAGE
else
rc = __hash_page_huge(ea, access, vsid, ptep, trap,
- local, ssize, hugeshift, psize);
+ flags, ssize, hugeshift, psize);
#else
else {
/*
@@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
#ifdef CONFIG_PPC_HAS_HASH_64K
if (psize == MMU_PAGE_64K)
- rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap,
+ flags, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
{
@@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u
rc = -2;
else
rc = __hash_page_4K(ea, access, vsid, ptep, trap,
- local, ssize, spp);
+ flags, ssize, spp);
}
/* Dump some info in case of hash insertion failure, they should
@@ -1181,14 +1184,19 @@ bail:
}
EXPORT_SYMBOL_GPL(hash_page_mm);
-int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
+int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+ unsigned long dsisr)
{
+ unsigned long flags = 0;
struct mm_struct *mm = current->mm;
if (REGION_ID(ea) == VMALLOC_REGION_ID)
mm = &init_mm;
- return hash_page_mm(mm, ea, access, trap);
+ if (dsisr & DSISR_NOHPTE)
+ flags |= HPTE_NOHPTE_UPDATE;
+
+ return hash_page_mm(mm, ea, access, trap, flags);
}
EXPORT_SYMBOL_GPL(hash_page);
@@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
pgd_t *pgdir;
pte_t *ptep;
unsigned long flags;
- int rc, ssize, local = 0;
+ int rc, ssize, update_flags = 0;
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
@@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Is that local to this CPU ? */
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- local = 1;
+ update_flags |= HPTE_LOCAL_UPDATE;
/* Hash it in */
#ifdef CONFIG_PPC_HAS_HASH_64K
if (mm->context.user_psize == MMU_PAGE_64K)
- rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap,
+ update_flags, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
- rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
- subpage_protection(mm, ea));
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags,
+ ssize, subpage_protection(mm, ea));
/* Dump some info in case of hash insertion failure, they should
* never happen so it is really useful to know if/when they do
@@ -1278,9 +1287,10 @@ out_exit:
* do not forget to update the assembly call site !
*/
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
- int local)
+ unsigned long flags)
{
unsigned long hash, index, shift, hidx, slot;
+ int local = flags & HPTE_LOCAL_UPDATE;
DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
@@ -1315,6 +1325,78 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
#endif
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
+ pmd_t *pmdp, unsigned int psize, int ssize,
+ unsigned long flags)
+{
+ int i, max_hpte_count, valid;
+ unsigned long s_addr;
+ unsigned char *hpte_slot_array;
+ unsigned long hidx, shift, vpn, hash, slot;
+ int local = flags & HPTE_LOCAL_UPDATE;
+
+ s_addr = addr & HPAGE_PMD_MASK;
+ hpte_slot_array = get_hpte_slot_array(pmdp);
+ /*
+ * IF we try to do a HUGE PTE update after a withdraw is done.
+ * we will find the below NULL. This happens when we do
+ * split_huge_page_pmd
+ */
+ if (!hpte_slot_array)
+ return;
+
+ if (ppc_md.hugepage_invalidate) {
+ ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
+ psize, ssize, local);
+ goto tm_abort;
+ }
+ /*
+ * No bluk hpte removal support, invalidate each entry
+ */
+ shift = mmu_psize_defs[psize].shift;
+ max_hpte_count = HPAGE_PMD_SIZE >> shift;
+ for (i = 0; i < max_hpte_count; i++) {
+ /*
+ * 8 bits per each hpte entries
+ * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+ */
+ valid = hpte_valid(hpte_slot_array, i);
+ if (!valid)
+ continue;
+ hidx = hpte_hash_index(hpte_slot_array, i);
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ ppc_md.hpte_invalidate(slot, vpn, psize,
+ MMU_PAGE_16M, ssize, local);
+ }
+tm_abort:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Transactions are not aborted by tlbiel, only tlbie.
+ * Without, syncing a page back to a block device w/ PIO could pick up
+ * transactional data (bad!) so we force an abort here. Before the
+ * sync the page will be made read-only, which will flush_hash_page.
+ * BIG ISSUE here: if the kernel uses a page from userspace without
+ * unmapping it first, it may see the speculated version.
+ */
+ if (local && cpu_has_feature(CPU_FTR_TM) &&
+ current->thread.regs &&
+ MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ tm_enable();
+ tm_abort(TM_CAUSE_TLBI);
+ }
+#endif
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
void flush_hash_range(unsigned long number, int local)
{
if (ppc_md.flush_hash_range)
@@ -1322,7 +1404,7 @@ void flush_hash_range(unsigned long number, int local)
else {
int i;
struct ppc64_tlb_batch *batch =
- &__get_cpu_var(ppc64_tlb_batch);
+ this_cpu_ptr(&ppc64_tlb_batch);
for (i = 0; i < number; i++)
flush_hash_page(batch->vpn[i], batch->pte[i],
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 5f5e6328c21c..86686514ae13 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -18,60 +18,9 @@
#include <linux/mm.h>
#include <asm/machdep.h>
-static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
- pmd_t *pmdp, unsigned int psize, int ssize)
-{
- int i, max_hpte_count, valid;
- unsigned long s_addr;
- unsigned char *hpte_slot_array;
- unsigned long hidx, shift, vpn, hash, slot;
-
- s_addr = addr & HPAGE_PMD_MASK;
- hpte_slot_array = get_hpte_slot_array(pmdp);
- /*
- * IF we try to do a HUGE PTE update after a withdraw is done.
- * we will find the below NULL. This happens when we do
- * split_huge_page_pmd
- */
- if (!hpte_slot_array)
- return;
-
- if (ppc_md.hugepage_invalidate)
- return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
- psize, ssize);
- /*
- * No bluk hpte removal support, invalidate each entry
- */
- shift = mmu_psize_defs[psize].shift;
- max_hpte_count = HPAGE_PMD_SIZE >> shift;
- for (i = 0; i < max_hpte_count; i++) {
- /*
- * 8 bits per each hpte entries
- * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
- */
- valid = hpte_valid(hpte_slot_array, i);
- if (!valid)
- continue;
- hidx = hpte_hash_index(hpte_slot_array, i);
-
- /* get the vpn */
- addr = s_addr + (i * (1ul << shift));
- vpn = hpt_vpn(addr, vsid, ssize);
- hash = hpt_hash(vpn, shift, ssize);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
-
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, psize,
- MMU_PAGE_16M, ssize, 0);
- }
-}
-
-
int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
- pmd_t *pmdp, unsigned long trap, int local, int ssize,
- unsigned int psize)
+ pmd_t *pmdp, unsigned long trap, unsigned long flags,
+ int ssize, unsigned int psize)
{
unsigned int index, valid;
unsigned char *hpte_slot_array;
@@ -145,7 +94,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* hash page table entries.
*/
if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
- invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize);
+ flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
+ ssize, flags);
}
valid = hpte_valid(hpte_slot_array, index);
@@ -158,7 +108,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
slot += hidx & _PTEIDX_GROUP_IX;
ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
- psize, lpsize, ssize, local);
+ psize, lpsize, ssize, flags);
/*
* We failed to update, try to insert a new entry.
*/
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 5e4ee2573903..ba47aaf33a4b 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -33,13 +33,13 @@ static inline int tlb1_next(void)
ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
- index = __get_cpu_var(next_tlbcam_idx);
+ index = this_cpu_read(next_tlbcam_idx);
/* Just round-robin the entries and wrap when we hit the end */
if (unlikely(index == ncams - 1))
- __get_cpu_var(next_tlbcam_idx) = tlbcam_index;
+ __this_cpu_write(next_tlbcam_idx, tlbcam_index);
else
- __get_cpu_var(next_tlbcam_idx)++;
+ __this_cpu_inc(next_tlbcam_idx);
return index;
}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index a5bcf9301196..d94b1af53a93 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -19,8 +19,8 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
unsigned long vflags, int psize, int ssize);
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
- pte_t *ptep, unsigned long trap, int local, int ssize,
- unsigned int shift, unsigned int mmu_psize)
+ pte_t *ptep, unsigned long trap, unsigned long flags,
+ int ssize, unsigned int shift, unsigned int mmu_psize)
{
unsigned long vpn;
unsigned long old_pte, new_pte;
@@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
slot += (old_pte & _PAGE_F_GIX) >> 12;
if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
- mmu_psize, ssize, local) == -1)
+ mmu_psize, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6a4a5fcb9730..5ff4e07d920a 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -62,6 +62,9 @@ static unsigned nr_gpages;
/*
* We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
* 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+ *
+ * Defined in such a way that we can optimize away code block at build time
+ * if CONFIG_HUGETLB_PAGE=n.
*/
int pmd_huge(pmd_t pmd)
{
@@ -230,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL;
- return hugepte_offset(hpdp, addr, pdshift);
+ return hugepte_offset(*hpdp, addr, pdshift);
}
#else
@@ -270,13 +273,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL;
- return hugepte_offset(hpdp, addr, pdshift);
+ return hugepte_offset(*hpdp, addr, pdshift);
}
#endif
#ifdef CONFIG_PPC_FSL_BOOK3E
/* Build list of addresses of gigantic pages. This function is used in early
- * boot before the buddy or bootmem allocator is setup.
+ * boot before the buddy allocator is setup.
*/
void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
{
@@ -312,7 +315,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
* If gpages can be in highmem we can't use the trick of storing the
* data structure in the page; allocate space for this
*/
- m = alloc_bootmem(sizeof(struct huge_bootmem_page));
+ m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0);
m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
#else
m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
@@ -352,6 +355,13 @@ static int __init do_gpage_early_setup(char *param, char *val,
if (size != 0) {
if (sscanf(val, "%lu", &npages) <= 0)
npages = 0;
+ if (npages > MAX_NUMBER_GPAGES) {
+ pr_warn("MMU: %lu pages requested for page "
+ "size %llu KB, limiting to "
+ __stringify(MAX_NUMBER_GPAGES) "\n",
+ npages, size / 1024);
+ npages = MAX_NUMBER_GPAGES;
+ }
gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
size = 0;
}
@@ -399,7 +409,7 @@ void __init reserve_hugetlb_gpages(void)
#else /* !PPC_FSL_BOOK3E */
/* Build list of addresses of gigantic pages. This function is used in early
- * boot before the buddy or bootmem allocator is setup.
+ * boot before the buddy allocator is setup.
*/
void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
{
@@ -462,7 +472,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
{
struct hugepd_freelist **batchp;
- batchp = &get_cpu_var(hugepd_freelist_cur);
+ batchp = this_cpu_ptr(&hugepd_freelist_cur);
if (atomic_read(&tlb->mm->mm_users) < 2 ||
cpumask_equal(mm_cpumask(tlb->mm),
@@ -536,7 +546,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
do {
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
- if (!is_hugepd(pmd)) {
+ if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
/*
* if it is not hugepd pointer, we should already find
* it cleared.
@@ -585,7 +595,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
do {
pud = pud_offset(pgd, addr);
next = pud_addr_end(addr, end);
- if (!is_hugepd(pud)) {
+ if (!is_hugepd(__hugepd(pud_val(*pud)))) {
if (pud_none_or_clear_bad(pud))
continue;
hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
@@ -651,7 +661,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
do {
next = pgd_addr_end(addr, end);
pgd = pgd_offset(tlb->mm, addr);
- if (!is_hugepd(pgd)) {
+ if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
if (pgd_none_or_clear_bad(pgd))
continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
@@ -711,12 +721,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
return (__boundary - 1 < end - 1) ? __boundary : end;
}
-int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
- unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
+int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
+ unsigned long end, int write, struct page **pages, int *nr)
{
pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(*hugepd);
+ unsigned long sz = 1UL << hugepd_shift(hugepd);
unsigned long next;
ptep = hugepte_offset(hugepd, addr, pdshift);
@@ -959,7 +968,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
else if (pgd_huge(pgd)) {
ret_pte = (pte_t *) pgdp;
goto out;
- } else if (is_hugepd(&pgd))
+ } else if (is_hugepd(__hugepd(pgd_val(pgd))))
hpdp = (hugepd_t *)&pgd;
else {
/*
@@ -976,7 +985,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
else if (pud_huge(pud)) {
ret_pte = (pte_t *) pudp;
goto out;
- } else if (is_hugepd(&pud))
+ } else if (is_hugepd(__hugepd(pud_val(pud))))
hpdp = (hugepd_t *)&pud;
else {
pdshift = PMD_SHIFT;
@@ -997,7 +1006,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (pmd_huge(pmd) || pmd_large(pmd)) {
ret_pte = (pte_t *) pmdp;
goto out;
- } else if (is_hugepd(&pmd))
+ } else if (is_hugepd(__hugepd(pmd_val(pmd))))
hpdp = (hugepd_t *)&pmd;
else
return pte_offset_kernel(&pmd, ea);
@@ -1006,7 +1015,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (!hpdp)
return NULL;
- ret_pte = hugepte_offset(hpdp, ea, pdshift);
+ ret_pte = hugepte_offset(*hpdp, ea, pdshift);
pdshift = hugepd_shift(*hpdp);
out:
if (shift)
@@ -1036,14 +1045,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
if ((pte_val(pte) & mask) != mask)
return 0;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- /*
- * check for splitting here
- */
- if (pmd_trans_splitting(pte_pmd(pte)))
- return 0;
-#endif
-
/* hugepages are never "special" */
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 415a51b028b9..a10be665b645 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -26,7 +26,6 @@
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
#include <linux/highmem.h>
#include <linux/initrd.h>
#include <linux/pagemap.h>
@@ -195,15 +194,6 @@ void __init MMU_init(void)
memblock_set_current_limit(lowmem_end_addr);
}
-/* This is only called until mem_init is done. */
-void __init *early_get_page(void)
-{
- if (init_bootmem_done)
- return alloc_bootmem_pages(PAGE_SIZE);
- else
- return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
-}
-
#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 3481556a1880..10471f9bb63f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -34,7 +34,6 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/delay.h>
-#include <linux/bootmem.h>
#include <linux/highmem.h>
#include <linux/idr.h>
#include <linux/nodemask.h>
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 8ebaac75c940..b7285a5870f8 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -35,6 +35,7 @@
#include <linux/memblock.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -60,7 +61,6 @@
#define CPU_FTR_NOEXECUTE 0
#endif
-int init_bootmem_done;
int mem_init_done;
unsigned long long memory_limit;
@@ -144,8 +144,17 @@ int arch_remove_memory(u64 start, u64 size)
zone = page_zone(pfn_to_page(start_pfn));
ret = __remove_pages(zone, start_pfn, nr_pages);
- if (!ret && (ppc_md.remove_memory))
- ret = ppc_md.remove_memory(start, size);
+ if (ret)
+ return ret;
+
+ /* Remove htab bolted mappings for this section of memory */
+ start = (unsigned long)__va(start);
+ ret = remove_section_mapping(start, start + size);
+
+ /* Ensure all vmalloc mappings are flushed in case they also
+ * hit that section of memory
+ */
+ vm_unmap_aliases();
return ret;
}
@@ -180,70 +189,23 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
}
EXPORT_SYMBOL_GPL(walk_system_ram_range);
-/*
- * Initialize the bootmem system and give it all the memory we
- * have available. If we are using highmem, we only put the
- * lowmem into the bootmem system.
- */
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init do_init_bootmem(void)
+void __init initmem_init(void)
{
- unsigned long start, bootmap_pages;
- unsigned long total_pages;
- struct memblock_region *reg;
- int boot_mapsize;
-
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
- total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
+ min_low_pfn = MEMORY_START >> PAGE_SHIFT;
#ifdef CONFIG_HIGHMEM
- total_pages = total_lowmem >> PAGE_SHIFT;
max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
#endif
- /*
- * Find an area to use for the bootmem bitmap. Calculate the size of
- * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
- * Add 1 additional page in case the address isn't page-aligned.
- */
- bootmap_pages = bootmem_bootmap_pages(total_pages);
-
- start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
-
- min_low_pfn = MEMORY_START >> PAGE_SHIFT;
- boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
-
/* Place all memblock_regions in the same node and merge contiguous
* memblock_regions
*/
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
- /* Add all physical memory to the bootmem map, mark each area
- * present.
- */
-#ifdef CONFIG_HIGHMEM
- free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
-
- /* reserve the sections we're already using */
- for_each_memblock(reserved, reg) {
- unsigned long top = reg->base + reg->size - 1;
- if (top < lowmem_end_addr)
- reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
- else if (reg->base < lowmem_end_addr) {
- unsigned long trunc_size = lowmem_end_addr - reg->base;
- reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT);
- }
- }
-#else
- free_bootmem_with_active_regions(0, max_pfn);
-
- /* reserve the sections we're already using */
- for_each_memblock(reserved, reg)
- reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
-#endif
/* XXX need to clip this if using highmem? */
sparse_memory_present_with_active_regions(0);
-
- init_bootmem_done = 1;
+ sparse_init();
}
/* mark pages that don't exist as nosave */
@@ -359,14 +321,6 @@ void __init paging_init(void)
mark_nonram_nosave();
}
-static void __init register_page_bootmem_info(void)
-{
- int i;
-
- for_each_online_node(i)
- register_page_bootmem_info_node(NODE_DATA(i));
-}
-
void __init mem_init(void)
{
/*
@@ -379,7 +333,6 @@ void __init mem_init(void)
swiotlb_init(0);
#endif
- register_page_bootmem_info();
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
set_max_mapnr(max_pfn);
free_all_bootmem();
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 928ebe79668b..9cba6cba2e50 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -421,12 +421,12 @@ void __init mmu_context_init(void)
/*
* Allocate the maps used by context management
*/
- context_map = alloc_bootmem(CTX_MAP_SIZE);
- context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
+ context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0);
+ context_mm = memblock_virt_alloc(sizeof(void *) * (last_context + 1), 0);
#ifndef CONFIG_SMP
- stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
+ stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
#else
- stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE);
+ stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
register_cpu_notifier(&mmu_context_cpu_nb);
#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9fe6002c1d5a..0257a7d659ef 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
return 0;
}
-/*
- * get_node_active_region - Return active region containing pfn
- * Active range returned is empty if none found.
- * @pfn: The page to return the region for
- * @node_ar: Returned set to the active region containing @pfn
- */
-static void __init get_node_active_region(unsigned long pfn,
- struct node_active_region *node_ar)
-{
- unsigned long start_pfn, end_pfn;
- int i, nid;
-
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
- if (pfn >= start_pfn && pfn < end_pfn) {
- node_ar->nid = nid;
- node_ar->start_pfn = start_pfn;
- node_ar->end_pfn = end_pfn;
- break;
- }
- }
-}
-
static void reset_numa_cpu_lookup_table(void)
{
unsigned int cpu;
@@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void)
}
}
-/*
- * Allocate some memory, satisfying the memblock or bootmem allocator where
- * required. nid is the preferred node and end is the physical address of
- * the highest address in the node.
- *
- * Returns the virtual address of the memory.
- */
-static void __init *careful_zallocation(int nid, unsigned long size,
- unsigned long align,
- unsigned long end_pfn)
-{
- void *ret;
- int new_nid;
- unsigned long ret_paddr;
-
- ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
-
- /* retry over all memory */
- if (!ret_paddr)
- ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
-
- if (!ret_paddr)
- panic("numa.c: cannot allocate %lu bytes for node %d",
- size, nid);
-
- ret = __va(ret_paddr);
-
- /*
- * We initialize the nodes in numeric order: 0, 1, 2...
- * and hand over control from the MEMBLOCK allocator to the
- * bootmem allocator. If this function is called for
- * node 5, then we know that all nodes <5 are using the
- * bootmem allocator instead of the MEMBLOCK allocator.
- *
- * So, check the nid from which this allocation came
- * and double check to see if we need to use bootmem
- * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
- * since it would be useless.
- */
- new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
- if (new_nid < nid) {
- ret = __alloc_bootmem_node(NODE_DATA(new_nid),
- size, align, 0);
-
- dbg("alloc_bootmem %p %lx\n", ret, size);
- }
-
- memset(ret, 0, size);
- return ret;
-}
-
static struct notifier_block ppc64_numa_nb = {
.notifier_call = cpu_numa_callback,
.priority = 1 /* Must run before sched domains notifier. */
};
-static void __init mark_reserved_regions_for_nid(int nid)
+/* Initialize NODE_DATA for a node on the local memory */
+static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
{
- struct pglist_data *node = NODE_DATA(nid);
- struct memblock_region *reg;
-
- for_each_memblock(reserved, reg) {
- unsigned long physbase = reg->base;
- unsigned long size = reg->size;
- unsigned long start_pfn = physbase >> PAGE_SHIFT;
- unsigned long end_pfn = PFN_UP(physbase + size);
- struct node_active_region node_ar;
- unsigned long node_end_pfn = pgdat_end_pfn(node);
-
- /*
- * Check to make sure that this memblock.reserved area is
- * within the bounds of the node that we care about.
- * Checking the nid of the start and end points is not
- * sufficient because the reserved area could span the
- * entire node.
- */
- if (end_pfn <= node->node_start_pfn ||
- start_pfn >= node_end_pfn)
- continue;
-
- get_node_active_region(start_pfn, &node_ar);
- while (start_pfn < end_pfn &&
- node_ar.start_pfn < node_ar.end_pfn) {
- unsigned long reserve_size = size;
- /*
- * if reserved region extends past active region
- * then trim size to active region
- */
- if (end_pfn > node_ar.end_pfn)
- reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
- - physbase;
- /*
- * Only worry about *this* node, others may not
- * yet have valid NODE_DATA().
- */
- if (node_ar.nid == nid) {
- dbg("reserve_bootmem %lx %lx nid=%d\n",
- physbase, reserve_size, node_ar.nid);
- reserve_bootmem_node(NODE_DATA(node_ar.nid),
- physbase, reserve_size,
- BOOTMEM_DEFAULT);
- }
- /*
- * if reserved region is contained in the active region
- * then done.
- */
- if (end_pfn <= node_ar.end_pfn)
- break;
-
- /*
- * reserved region extends past the active region
- * get next active region that contains this
- * reserved region
- */
- start_pfn = node_ar.end_pfn;
- physbase = start_pfn << PAGE_SHIFT;
- size = size - reserve_size;
- get_node_active_region(start_pfn, &node_ar);
- }
- }
+ u64 spanned_pages = end_pfn - start_pfn;
+ const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
+ u64 nd_pa;
+ void *nd;
+ int tnid;
+
+ if (spanned_pages)
+ pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start_pfn << PAGE_SHIFT,
+ (end_pfn << PAGE_SHIFT) - 1);
+ else
+ pr_info("Initmem setup node %d\n", nid);
+
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd = __va(nd_pa);
+
+ /* report and initialize */
+ pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ nd_pa, nd_pa + nd_size - 1);
+ tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+ if (tnid != nid)
+ pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid);
+
+ node_data[nid] = nd;
+ memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+ NODE_DATA(nid)->node_id = nid;
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
+ NODE_DATA(nid)->node_spanned_pages = spanned_pages;
}
-
-void __init do_init_bootmem(void)
+void __init initmem_init(void)
{
int nid, cpu;
- min_low_pfn = 0;
max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
max_pfn = max_low_pfn;
@@ -1064,64 +956,18 @@ void __init do_init_bootmem(void)
else
dump_numa_memory_topology();
+ memblock_dump_all();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
- void *bootmem_vaddr;
- unsigned long bootmap_pages;
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
-
- /*
- * Allocate the node structure node local if possible
- *
- * Be careful moving this around, as it relies on all
- * previous nodes' bootmem to be initialized and have
- * all reserved areas marked.
- */
- NODE_DATA(nid) = careful_zallocation(nid,
- sizeof(struct pglist_data),
- SMP_CACHE_BYTES, end_pfn);
-
- dbg("node %d\n", nid);
- dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
-
- NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
- NODE_DATA(nid)->node_start_pfn = start_pfn;
- NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
-
- if (NODE_DATA(nid)->node_spanned_pages == 0)
- continue;
-
- dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
- dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
-
- bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmem_vaddr = careful_zallocation(nid,
- bootmap_pages << PAGE_SHIFT,
- PAGE_SIZE, end_pfn);
-
- dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
-
- init_bootmem_node(NODE_DATA(nid),
- __pa(bootmem_vaddr) >> PAGE_SHIFT,
- start_pfn, end_pfn);
-
- free_bootmem_with_active_regions(nid, end_pfn);
- /*
- * Be very careful about moving this around. Future
- * calls to careful_zallocation() depend on this getting
- * done correctly.
- */
- mark_reserved_regions_for_nid(nid);
+ setup_node_data(nid, start_pfn, end_pfn);
sparse_memory_present_with_active_regions(nid);
}
- init_bootmem_done = 1;
+ sparse_init();
- /*
- * Now bootmem is initialised we can create the node to cpumask
- * lookup tables and setup the cpu callback to populate them.
- */
setup_node_to_cpumask_map();
reset_numa_cpu_lookup_table();
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index cf11342bf519..d545b1231594 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -100,12 +100,11 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
{
pte_t *pte;
extern int mem_init_done;
- extern void *early_get_page(void);
if (mem_init_done) {
pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
} else {
- pte = (pte_t *)early_get_page();
+ pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
if (pte)
clear_page(pte);
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c8d709ab489d..4fe5f64cc179 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -33,9 +33,9 @@
#include <linux/swap.h>
#include <linux/stddef.h>
#include <linux/vmalloc.h>
-#include <linux/bootmem.h>
#include <linux/memblock.h>
#include <linux/slab.h>
+#include <linux/hugetlb.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -51,6 +51,7 @@
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/firmware.h>
+#include <asm/dma.h>
#include "mmu_decl.h"
@@ -75,11 +76,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
{
void *pt;
- if (init_bootmem_done)
- pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
- else
- pt = __va(memblock_alloc_base(size, size,
- __pa(MAX_DMA_ADDRESS)));
+ pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
memset(pt, 0, size);
return pt;
@@ -113,10 +110,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
__pgprot(flags)));
} else {
#ifdef CONFIG_PPC_MMU_NOHASH
- /* Warning ! This will blow up if bootmem is not initialized
- * which our ppc64 code is keen to do that, we'll need to
- * fix it and/or be more careful
- */
pgdp = pgd_offset_k(ea);
#ifdef PUD_TABLE_SIZE
if (pgd_none(*pgdp)) {
@@ -352,16 +345,31 @@ EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(__iounmap);
EXPORT_SYMBOL(__iounmap_at);
+#ifndef __PAGETABLE_PUD_FOLDED
+/* 4 level page table */
+struct page *pgd_page(pgd_t pgd)
+{
+ if (pgd_huge(pgd))
+ return pte_page(pgd_pte(pgd));
+ return virt_to_page(pgd_page_vaddr(pgd));
+}
+#endif
+
+struct page *pud_page(pud_t pud)
+{
+ if (pud_huge(pud))
+ return pte_page(pud_pte(pud));
+ return virt_to_page(pud_page_vaddr(pud));
+}
+
/*
* For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
* For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
*/
struct page *pmd_page(pmd_t pmd)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_trans_huge(pmd))
+ if (pmd_trans_huge(pmd) || pmd_huge(pmd))
return pfn_to_page(pmd_pfn(pmd));
-#endif
return virt_to_page(pmd_page_vaddr(pmd));
}
@@ -731,29 +739,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long old_pmd)
{
- int ssize, i;
- unsigned long s_addr;
- int max_hpte_count;
- unsigned int psize, valid;
- unsigned char *hpte_slot_array;
- unsigned long hidx, vpn, vsid, hash, shift, slot;
-
- /*
- * Flush all the hptes mapping this hugepage
- */
- s_addr = addr & HPAGE_PMD_MASK;
- hpte_slot_array = get_hpte_slot_array(pmdp);
- /*
- * IF we try to do a HUGE PTE update after a withdraw is done.
- * we will find the below NULL. This happens when we do
- * split_huge_page_pmd
- */
- if (!hpte_slot_array)
- return;
+ int ssize;
+ unsigned int psize;
+ unsigned long vsid;
+ unsigned long flags = 0;
+ const struct cpumask *tmp;
/* get the base page size,vsid and segment size */
#ifdef CONFIG_DEBUG_VM
- psize = get_slice_psize(mm, s_addr);
+ psize = get_slice_psize(mm, addr);
BUG_ON(psize == MMU_PAGE_16M);
#endif
if (old_pmd & _PAGE_COMBO)
@@ -761,46 +755,20 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
else
psize = MMU_PAGE_64K;
- if (!is_kernel_addr(s_addr)) {
- ssize = user_segment_size(s_addr);
- vsid = get_vsid(mm->context.id, s_addr, ssize);
+ if (!is_kernel_addr(addr)) {
+ ssize = user_segment_size(addr);
+ vsid = get_vsid(mm->context.id, addr, ssize);
WARN_ON(vsid == 0);
} else {
- vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
+ vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;
}
- if (ppc_md.hugepage_invalidate)
- return ppc_md.hugepage_invalidate(vsid, s_addr,
- hpte_slot_array,
- psize, ssize);
- /*
- * No bluk hpte removal support, invalidate each entry
- */
- shift = mmu_psize_defs[psize].shift;
- max_hpte_count = HPAGE_PMD_SIZE >> shift;
- for (i = 0; i < max_hpte_count; i++) {
- /*
- * 8 bits per each hpte entries
- * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
- */
- valid = hpte_valid(hpte_slot_array, i);
- if (!valid)
- continue;
- hidx = hpte_hash_index(hpte_slot_array, i);
-
- /* get the vpn */
- addr = s_addr + (i * (1ul << shift));
- vpn = hpt_vpn(addr, vsid, ssize);
- hash = hpt_hash(vpn, shift, ssize);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
-
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, psize,
- MMU_PAGE_16M, ssize, 0);
- }
+ tmp = cpumask_of(smp_processor_id());
+ if (cpumask_equal(mm_cpumask(mm), tmp))
+ flags |= HPTE_LOCAL_UPDATE;
+
+ return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
}
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)