diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 91 |
1 files changed, 76 insertions, 15 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 977c641f78cf..fb678a7254f0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -442,6 +442,7 @@ static inline bool queue_pages_required(struct page *page, */ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) + __releases(ptl) { int ret = 0; struct page *page; @@ -557,9 +558,10 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { + int ret = 0; #ifdef CONFIG_HUGETLB_PAGE struct queue_pages *qp = walk->private; - unsigned long flags = qp->flags; + unsigned long flags = (qp->flags & MPOL_MF_VALID); struct page *page; spinlock_t *ptl; pte_t entry; @@ -571,16 +573,44 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, page = pte_page(entry); if (!queue_pages_required(page, qp)) goto unlock; + + if (flags == MPOL_MF_STRICT) { + /* + * STRICT alone means only detecting misplaced page and no + * need to further check other vma. + */ + ret = -EIO; + goto unlock; + } + + if (!vma_migratable(walk->vma)) { + /* + * Must be STRICT with MOVE*, otherwise .test_walk() have + * stopped walking current vma. + * Detecting misplaced page but allow migrating pages which + * have been queued. + */ + ret = 1; + goto unlock; + } + /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ if (flags & (MPOL_MF_MOVE_ALL) || - (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) - isolate_huge_page(page, qp->pagelist); + (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) { + if (!isolate_huge_page(page, qp->pagelist) && + (flags & MPOL_MF_STRICT)) + /* + * Failed to isolate page but allow migrating pages + * which have been queued. + */ + ret = 1; + } unlock: spin_unlock(ptl); #else BUG(); #endif - return 0; + return ret; } #ifdef CONFIG_NUMA_BALANCING @@ -598,7 +628,7 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, { int nr_updated; - nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1); + nr_updated = change_protection(vma, addr, end, PAGE_NONE, MM_CP_PROT_NUMA); if (nr_updated) count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); @@ -621,7 +651,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, unsigned long flags = qp->flags; /* range check first */ - VM_BUG_ON((vma->vm_start > start) || (vma->vm_end < end)); + VM_BUG_ON_VMA((vma->vm_start > start) || (vma->vm_end < end), vma); if (!qp->first) { qp->first = vma; @@ -649,8 +679,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, if (flags & MPOL_MF_LAZY) { /* Similar to task_numa_work, skip inaccessible VMAs */ - if (!is_vm_hugetlb_page(vma) && - (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) && + if (!is_vm_hugetlb_page(vma) && vma_is_accessible(vma) && !(vma->vm_flags & VM_MIXEDMAP)) change_prot_numa(vma, start, endvma); return 1; @@ -852,7 +881,6 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes) switch (p->mode) { case MPOL_BIND: - /* Fall through */ case MPOL_INTERLEAVE: *nodes = p->v.nodes; break; @@ -868,12 +896,15 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes) static int lookup_node(struct mm_struct *mm, unsigned long addr) { - struct page *p; + struct page *p = NULL; int err; int locked = 1; err = get_user_pages_locked(addr & PAGE_MASK, 1, 0, &p, &locked); - if (err >= 0) { + if (err == 0) { + /* E.g. GUP interrupted by fatal signal */ + err = -EFAULT; + } else if (err > 0) { err = page_to_nid(p); put_page(p); } @@ -994,7 +1025,7 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, if (!isolate_lru_page(head)) { list_add_tail(&head->lru, pagelist); mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + page_is_file_cache(head), + NR_ISOLATED_ANON + page_is_file_lru(head), hpage_nr_pages(head)); } else if (flags & MPOL_MF_STRICT) { /* @@ -1714,6 +1745,34 @@ COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid, #endif /* CONFIG_COMPAT */ +bool vma_migratable(struct vm_area_struct *vma) +{ + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + return false; + + /* + * DAX device mappings require predictable access latency, so avoid + * incurring periodic faults. + */ + if (vma_is_dax(vma)) + return false; + + if (is_vm_hugetlb_page(vma) && + !hugepage_migration_supported(hstate_vma(vma))) + return false; + + /* + * Migration allocates pages in the highest zone. If we cannot + * do so then migration (at least from node to node) is not + * possible. + */ + if (vma->vm_file && + gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) + < policy_zone) + return false; + return true; +} + struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr) { @@ -2009,7 +2068,6 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask) break; case MPOL_BIND: - /* Fall through */ case MPOL_INTERLEAVE: *mask = mempolicy->v.nodes; break; @@ -2276,7 +2334,6 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) switch (a->mode) { case MPOL_BIND: - /* Fall through */ case MPOL_INTERLEAVE: return !!nodes_equal(a->v.nodes, b->v.nodes); case MPOL_PREFERRED: @@ -2841,7 +2898,9 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) switch (mode) { case MPOL_PREFERRED: /* - * Insist on a nodelist of one node only + * Insist on a nodelist of one node only, although later + * we use first_node(nodes) to grab a single node, so here + * nodelist (or nodes) cannot be empty. */ if (nodelist) { char *rest = nodelist; @@ -2849,6 +2908,8 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) rest++; if (*rest) goto out; + if (nodes_empty(nodes)) + goto out; } break; case MPOL_INTERLEAVE: |