summaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c61
1 files changed, 56 insertions, 5 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 90f92c53476f..0d63e7ce35cc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1088,6 +1088,7 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
+ struct page *subpage = page;
page = compound_head(page);
@@ -1101,6 +1102,7 @@ void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
* folio_test_anon()) will not see one without the other.
*/
WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
+ SetPageAnonExclusive(subpage);
}
/**
@@ -1118,7 +1120,7 @@ static void __page_set_anon_rmap(struct page *page,
BUG_ON(!anon_vma);
if (PageAnon(page))
- return;
+ goto out;
/*
* If the page isn't exclusively mapped into this vma,
@@ -1137,6 +1139,9 @@ static void __page_set_anon_rmap(struct page *page,
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
page->index = linear_page_index(vma, address);
+out:
+ if (exclusive)
+ SetPageAnonExclusive(page);
}
/**
@@ -1198,6 +1203,8 @@ void page_add_anon_rmap(struct page *page,
} else {
first = atomic_inc_and_test(&page->_mapcount);
}
+ VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
+ VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page);
if (first) {
int nr = compound ? thp_nr_pages(page) : 1;
@@ -1459,7 +1466,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
pte_t pteval;
struct page *subpage;
- bool ret = true;
+ bool anon_exclusive, ret = true;
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
@@ -1515,6 +1522,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
subpage = folio_page(folio,
pte_pfn(*pvmw.pte) - folio_pfn(folio));
address = pvmw.address;
+ anon_exclusive = folio_test_anon(folio) &&
+ PageAnonExclusive(subpage);
if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
/*
@@ -1550,9 +1559,12 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
}
}
- /* Nuke the page table entry. */
+ /*
+ * Nuke the page table entry. When having to clear
+ * PageAnonExclusive(), we always have to flush.
+ */
flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
- if (should_defer_flush(mm, flags)) {
+ if (should_defer_flush(mm, flags) && !anon_exclusive) {
/*
* We clear the PTE but do not flush so potentially
* a remote CPU could still be writing to the folio.
@@ -1677,6 +1689,24 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
page_vma_mapped_walk_done(&pvmw);
break;
}
+ if (anon_exclusive &&
+ page_try_share_anon_rmap(subpage)) {
+ swap_free(entry);
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ /*
+ * Note: We *don't* remember yet if the page was mapped
+ * exclusively in the swap entry, so swapin code has
+ * to re-determine that manually and might detect the
+ * page as possibly shared, for example, if there are
+ * other references on the page or if the page is under
+ * writeback. We made sure that there are no GUP pins
+ * on the page that would rely on it, so for GUP pins
+ * this is fine.
+ */
if (list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
if (list_empty(&mm->mmlist))
@@ -1776,7 +1806,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
pte_t pteval;
struct page *subpage;
- bool ret = true;
+ bool anon_exclusive, ret = true;
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
@@ -1837,6 +1867,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
subpage = folio_page(folio,
pte_pfn(*pvmw.pte) - folio_pfn(folio));
address = pvmw.address;
+ anon_exclusive = folio_test_anon(folio) &&
+ PageAnonExclusive(subpage);
if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
/*
@@ -1888,6 +1920,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
swp_entry_t entry;
pte_t swp_pte;
+ if (anon_exclusive)
+ BUG_ON(page_try_share_anon_rmap(subpage));
+
/*
* Store the pfn of the page in a special migration
* pte. do_swap_page() will wait until the migration
@@ -1896,6 +1931,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
entry = pte_to_swp_entry(pteval);
if (is_writable_device_private_entry(entry))
entry = make_writable_migration_entry(pfn);
+ else if (anon_exclusive)
+ entry = make_readable_exclusive_migration_entry(pfn);
else
entry = make_readable_migration_entry(pfn);
swp_pte = swp_entry_to_pte(entry);
@@ -1960,6 +1997,15 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
page_vma_mapped_walk_done(&pvmw);
break;
}
+ VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) &&
+ !anon_exclusive, subpage);
+ if (anon_exclusive &&
+ page_try_share_anon_rmap(subpage)) {
+ set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
/*
* Store the pfn of the page in a special migration
@@ -1969,6 +2015,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
if (pte_write(pteval))
entry = make_writable_migration_entry(
page_to_pfn(subpage));
+ else if (anon_exclusive)
+ entry = make_readable_exclusive_migration_entry(
+ page_to_pfn(subpage));
else
entry = make_readable_migration_entry(
page_to_pfn(subpage));
@@ -2405,6 +2454,8 @@ void hugepage_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
BUG_ON(!anon_vma);
/* address might be in next vma when migration races vma_adjust */
first = atomic_inc_and_test(compound_mapcount_ptr(page));
+ VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
+ VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page);
if (first)
__page_set_anon_rmap(page, vma, address,
!!(flags & RMAP_EXCLUSIVE));