xref: /OK3568_Linux_fs/kernel/mm/page_vma_mapped.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun #include <linux/mm.h>
3*4882a593Smuzhiyun #include <linux/rmap.h>
4*4882a593Smuzhiyun #include <linux/hugetlb.h>
5*4882a593Smuzhiyun #include <linux/swap.h>
6*4882a593Smuzhiyun #include <linux/swapops.h>
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include "internal.h"
9*4882a593Smuzhiyun 
not_found(struct page_vma_mapped_walk * pvmw)10*4882a593Smuzhiyun static inline bool not_found(struct page_vma_mapped_walk *pvmw)
11*4882a593Smuzhiyun {
12*4882a593Smuzhiyun 	page_vma_mapped_walk_done(pvmw);
13*4882a593Smuzhiyun 	return false;
14*4882a593Smuzhiyun }
15*4882a593Smuzhiyun 
map_pte(struct page_vma_mapped_walk * pvmw)16*4882a593Smuzhiyun static bool map_pte(struct page_vma_mapped_walk *pvmw)
17*4882a593Smuzhiyun {
18*4882a593Smuzhiyun 	pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address);
19*4882a593Smuzhiyun 	if (!(pvmw->flags & PVMW_SYNC)) {
20*4882a593Smuzhiyun 		if (pvmw->flags & PVMW_MIGRATION) {
21*4882a593Smuzhiyun 			if (!is_swap_pte(*pvmw->pte))
22*4882a593Smuzhiyun 				return false;
23*4882a593Smuzhiyun 		} else {
24*4882a593Smuzhiyun 			/*
25*4882a593Smuzhiyun 			 * We get here when we are trying to unmap a private
26*4882a593Smuzhiyun 			 * device page from the process address space. Such
27*4882a593Smuzhiyun 			 * page is not CPU accessible and thus is mapped as
28*4882a593Smuzhiyun 			 * a special swap entry, nonetheless it still does
29*4882a593Smuzhiyun 			 * count as a valid regular mapping for the page (and
30*4882a593Smuzhiyun 			 * is accounted as such in page maps count).
31*4882a593Smuzhiyun 			 *
32*4882a593Smuzhiyun 			 * So handle this special case as if it was a normal
33*4882a593Smuzhiyun 			 * page mapping ie lock CPU page table and returns
34*4882a593Smuzhiyun 			 * true.
35*4882a593Smuzhiyun 			 *
36*4882a593Smuzhiyun 			 * For more details on device private memory see HMM
37*4882a593Smuzhiyun 			 * (include/linux/hmm.h or mm/hmm.c).
38*4882a593Smuzhiyun 			 */
39*4882a593Smuzhiyun 			if (is_swap_pte(*pvmw->pte)) {
40*4882a593Smuzhiyun 				swp_entry_t entry;
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun 				/* Handle un-addressable ZONE_DEVICE memory */
43*4882a593Smuzhiyun 				entry = pte_to_swp_entry(*pvmw->pte);
44*4882a593Smuzhiyun 				if (!is_device_private_entry(entry))
45*4882a593Smuzhiyun 					return false;
46*4882a593Smuzhiyun 			} else if (!pte_present(*pvmw->pte))
47*4882a593Smuzhiyun 				return false;
48*4882a593Smuzhiyun 		}
49*4882a593Smuzhiyun 	}
50*4882a593Smuzhiyun 	pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd);
51*4882a593Smuzhiyun 	spin_lock(pvmw->ptl);
52*4882a593Smuzhiyun 	return true;
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun 
pfn_is_match(struct page * page,unsigned long pfn)55*4882a593Smuzhiyun static inline bool pfn_is_match(struct page *page, unsigned long pfn)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	unsigned long page_pfn = page_to_pfn(page);
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun 	/* normal page and hugetlbfs page */
60*4882a593Smuzhiyun 	if (!PageTransCompound(page) || PageHuge(page))
61*4882a593Smuzhiyun 		return page_pfn == pfn;
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun 	/* THP can be referenced by any subpage */
64*4882a593Smuzhiyun 	return pfn >= page_pfn && pfn - page_pfn < thp_nr_pages(page);
65*4882a593Smuzhiyun }
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun /**
68*4882a593Smuzhiyun  * check_pte - check if @pvmw->page is mapped at the @pvmw->pte
69*4882a593Smuzhiyun  *
70*4882a593Smuzhiyun  * page_vma_mapped_walk() found a place where @pvmw->page is *potentially*
71*4882a593Smuzhiyun  * mapped. check_pte() has to validate this.
72*4882a593Smuzhiyun  *
73*4882a593Smuzhiyun  * @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary
74*4882a593Smuzhiyun  * page.
75*4882a593Smuzhiyun  *
76*4882a593Smuzhiyun  * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
77*4882a593Smuzhiyun  * entry that points to @pvmw->page or any subpage in case of THP.
78*4882a593Smuzhiyun  *
79*4882a593Smuzhiyun  * If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to
80*4882a593Smuzhiyun  * @pvmw->page or any subpage in case of THP.
81*4882a593Smuzhiyun  *
82*4882a593Smuzhiyun  * Otherwise, return false.
83*4882a593Smuzhiyun  *
84*4882a593Smuzhiyun  */
check_pte(struct page_vma_mapped_walk * pvmw)85*4882a593Smuzhiyun static bool check_pte(struct page_vma_mapped_walk *pvmw)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun 	unsigned long pfn;
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	if (pvmw->flags & PVMW_MIGRATION) {
90*4882a593Smuzhiyun 		swp_entry_t entry;
91*4882a593Smuzhiyun 		if (!is_swap_pte(*pvmw->pte))
92*4882a593Smuzhiyun 			return false;
93*4882a593Smuzhiyun 		entry = pte_to_swp_entry(*pvmw->pte);
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 		if (!is_migration_entry(entry))
96*4882a593Smuzhiyun 			return false;
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 		pfn = migration_entry_to_pfn(entry);
99*4882a593Smuzhiyun 	} else if (is_swap_pte(*pvmw->pte)) {
100*4882a593Smuzhiyun 		swp_entry_t entry;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 		/* Handle un-addressable ZONE_DEVICE memory */
103*4882a593Smuzhiyun 		entry = pte_to_swp_entry(*pvmw->pte);
104*4882a593Smuzhiyun 		if (!is_device_private_entry(entry))
105*4882a593Smuzhiyun 			return false;
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 		pfn = device_private_entry_to_pfn(entry);
108*4882a593Smuzhiyun 	} else {
109*4882a593Smuzhiyun 		if (!pte_present(*pvmw->pte))
110*4882a593Smuzhiyun 			return false;
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 		pfn = pte_pfn(*pvmw->pte);
113*4882a593Smuzhiyun 	}
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	return pfn_is_match(pvmw->page, pfn);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun 
step_forward(struct page_vma_mapped_walk * pvmw,unsigned long size)118*4882a593Smuzhiyun static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun 	pvmw->address = (pvmw->address + size) & ~(size - 1);
121*4882a593Smuzhiyun 	if (!pvmw->address)
122*4882a593Smuzhiyun 		pvmw->address = ULONG_MAX;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun /**
126*4882a593Smuzhiyun  * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
127*4882a593Smuzhiyun  * @pvmw->address
128*4882a593Smuzhiyun  * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
129*4882a593Smuzhiyun  * must be set. pmd, pte and ptl must be NULL.
130*4882a593Smuzhiyun  *
131*4882a593Smuzhiyun  * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
132*4882a593Smuzhiyun  * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
133*4882a593Smuzhiyun  * adjusted if needed (for PTE-mapped THPs).
134*4882a593Smuzhiyun  *
135*4882a593Smuzhiyun  * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
136*4882a593Smuzhiyun  * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
137*4882a593Smuzhiyun  * a loop to find all PTEs that map the THP.
138*4882a593Smuzhiyun  *
139*4882a593Smuzhiyun  * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
140*4882a593Smuzhiyun  * regardless of which page table level the page is mapped at. @pvmw->pmd is
141*4882a593Smuzhiyun  * NULL.
142*4882a593Smuzhiyun  *
143*4882a593Smuzhiyun  * Retruns false if there are no more page table entries for the page in
144*4882a593Smuzhiyun  * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
145*4882a593Smuzhiyun  *
146*4882a593Smuzhiyun  * If you need to stop the walk before page_vma_mapped_walk() returned false,
147*4882a593Smuzhiyun  * use page_vma_mapped_walk_done(). It will do the housekeeping.
148*4882a593Smuzhiyun  */
page_vma_mapped_walk(struct page_vma_mapped_walk * pvmw)149*4882a593Smuzhiyun bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun 	struct mm_struct *mm = pvmw->vma->vm_mm;
152*4882a593Smuzhiyun 	struct page *page = pvmw->page;
153*4882a593Smuzhiyun 	unsigned long end;
154*4882a593Smuzhiyun 	pgd_t *pgd;
155*4882a593Smuzhiyun 	p4d_t *p4d;
156*4882a593Smuzhiyun 	pud_t *pud;
157*4882a593Smuzhiyun 	pmd_t pmde;
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	/* The only possible pmd mapping has been handled on last iteration */
160*4882a593Smuzhiyun 	if (pvmw->pmd && !pvmw->pte)
161*4882a593Smuzhiyun 		return not_found(pvmw);
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	if (unlikely(PageHuge(page))) {
164*4882a593Smuzhiyun 		/* The only possible mapping was handled on last iteration */
165*4882a593Smuzhiyun 		if (pvmw->pte)
166*4882a593Smuzhiyun 			return not_found(pvmw);
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 		/* when pud is not present, pte will be NULL */
169*4882a593Smuzhiyun 		pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
170*4882a593Smuzhiyun 		if (!pvmw->pte)
171*4882a593Smuzhiyun 			return false;
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 		pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte);
174*4882a593Smuzhiyun 		spin_lock(pvmw->ptl);
175*4882a593Smuzhiyun 		if (!check_pte(pvmw))
176*4882a593Smuzhiyun 			return not_found(pvmw);
177*4882a593Smuzhiyun 		return true;
178*4882a593Smuzhiyun 	}
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	/*
181*4882a593Smuzhiyun 	 * Seek to next pte only makes sense for THP.
182*4882a593Smuzhiyun 	 * But more important than that optimization, is to filter out
183*4882a593Smuzhiyun 	 * any PageKsm page: whose page->index misleads vma_address()
184*4882a593Smuzhiyun 	 * and vma_address_end() to disaster.
185*4882a593Smuzhiyun 	 */
186*4882a593Smuzhiyun 	end = PageTransCompound(page) ?
187*4882a593Smuzhiyun 		vma_address_end(page, pvmw->vma) :
188*4882a593Smuzhiyun 		pvmw->address + PAGE_SIZE;
189*4882a593Smuzhiyun 	if (pvmw->pte)
190*4882a593Smuzhiyun 		goto next_pte;
191*4882a593Smuzhiyun restart:
192*4882a593Smuzhiyun 	do {
193*4882a593Smuzhiyun 		pgd = pgd_offset(mm, pvmw->address);
194*4882a593Smuzhiyun 		if (!pgd_present(*pgd)) {
195*4882a593Smuzhiyun 			step_forward(pvmw, PGDIR_SIZE);
196*4882a593Smuzhiyun 			continue;
197*4882a593Smuzhiyun 		}
198*4882a593Smuzhiyun 		p4d = p4d_offset(pgd, pvmw->address);
199*4882a593Smuzhiyun 		if (!p4d_present(*p4d)) {
200*4882a593Smuzhiyun 			step_forward(pvmw, P4D_SIZE);
201*4882a593Smuzhiyun 			continue;
202*4882a593Smuzhiyun 		}
203*4882a593Smuzhiyun 		pud = pud_offset(p4d, pvmw->address);
204*4882a593Smuzhiyun 		if (!pud_present(*pud)) {
205*4882a593Smuzhiyun 			step_forward(pvmw, PUD_SIZE);
206*4882a593Smuzhiyun 			continue;
207*4882a593Smuzhiyun 		}
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 		pvmw->pmd = pmd_offset(pud, pvmw->address);
210*4882a593Smuzhiyun 		/*
211*4882a593Smuzhiyun 		 * Make sure the pmd value isn't cached in a register by the
212*4882a593Smuzhiyun 		 * compiler and used as a stale value after we've observed a
213*4882a593Smuzhiyun 		 * subsequent update.
214*4882a593Smuzhiyun 		 */
215*4882a593Smuzhiyun 		pmde = READ_ONCE(*pvmw->pmd);
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 		if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
218*4882a593Smuzhiyun 			pvmw->ptl = pmd_lock(mm, pvmw->pmd);
219*4882a593Smuzhiyun 			pmde = *pvmw->pmd;
220*4882a593Smuzhiyun 			if (likely(pmd_trans_huge(pmde))) {
221*4882a593Smuzhiyun 				if (pvmw->flags & PVMW_MIGRATION)
222*4882a593Smuzhiyun 					return not_found(pvmw);
223*4882a593Smuzhiyun 				if (pmd_page(pmde) != page)
224*4882a593Smuzhiyun 					return not_found(pvmw);
225*4882a593Smuzhiyun 				return true;
226*4882a593Smuzhiyun 			}
227*4882a593Smuzhiyun 			if (!pmd_present(pmde)) {
228*4882a593Smuzhiyun 				swp_entry_t entry;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 				if (!thp_migration_supported() ||
231*4882a593Smuzhiyun 				    !(pvmw->flags & PVMW_MIGRATION))
232*4882a593Smuzhiyun 					return not_found(pvmw);
233*4882a593Smuzhiyun 				entry = pmd_to_swp_entry(pmde);
234*4882a593Smuzhiyun 				if (!is_migration_entry(entry) ||
235*4882a593Smuzhiyun 				    migration_entry_to_page(entry) != page)
236*4882a593Smuzhiyun 					return not_found(pvmw);
237*4882a593Smuzhiyun 				return true;
238*4882a593Smuzhiyun 			}
239*4882a593Smuzhiyun 			/* THP pmd was split under us: handle on pte level */
240*4882a593Smuzhiyun 			spin_unlock(pvmw->ptl);
241*4882a593Smuzhiyun 			pvmw->ptl = NULL;
242*4882a593Smuzhiyun 		} else if (!pmd_present(pmde)) {
243*4882a593Smuzhiyun 			/*
244*4882a593Smuzhiyun 			 * If PVMW_SYNC, take and drop THP pmd lock so that we
245*4882a593Smuzhiyun 			 * cannot return prematurely, while zap_huge_pmd() has
246*4882a593Smuzhiyun 			 * cleared *pmd but not decremented compound_mapcount().
247*4882a593Smuzhiyun 			 */
248*4882a593Smuzhiyun 			if ((pvmw->flags & PVMW_SYNC) &&
249*4882a593Smuzhiyun 			    PageTransCompound(page)) {
250*4882a593Smuzhiyun 				spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 				spin_unlock(ptl);
253*4882a593Smuzhiyun 			}
254*4882a593Smuzhiyun 			step_forward(pvmw, PMD_SIZE);
255*4882a593Smuzhiyun 			continue;
256*4882a593Smuzhiyun 		}
257*4882a593Smuzhiyun 		if (!map_pte(pvmw))
258*4882a593Smuzhiyun 			goto next_pte;
259*4882a593Smuzhiyun this_pte:
260*4882a593Smuzhiyun 		if (check_pte(pvmw))
261*4882a593Smuzhiyun 			return true;
262*4882a593Smuzhiyun next_pte:
263*4882a593Smuzhiyun 		do {
264*4882a593Smuzhiyun 			pvmw->address += PAGE_SIZE;
265*4882a593Smuzhiyun 			if (pvmw->address >= end)
266*4882a593Smuzhiyun 				return not_found(pvmw);
267*4882a593Smuzhiyun 			/* Did we cross page table boundary? */
268*4882a593Smuzhiyun 			if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
269*4882a593Smuzhiyun 				if (pvmw->ptl) {
270*4882a593Smuzhiyun 					spin_unlock(pvmw->ptl);
271*4882a593Smuzhiyun 					pvmw->ptl = NULL;
272*4882a593Smuzhiyun 				}
273*4882a593Smuzhiyun 				pte_unmap(pvmw->pte);
274*4882a593Smuzhiyun 				pvmw->pte = NULL;
275*4882a593Smuzhiyun 				goto restart;
276*4882a593Smuzhiyun 			}
277*4882a593Smuzhiyun 			pvmw->pte++;
278*4882a593Smuzhiyun 			if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
279*4882a593Smuzhiyun 				pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
280*4882a593Smuzhiyun 				spin_lock(pvmw->ptl);
281*4882a593Smuzhiyun 			}
282*4882a593Smuzhiyun 		} while (pte_none(*pvmw->pte));
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 		if (!pvmw->ptl) {
285*4882a593Smuzhiyun 			pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
286*4882a593Smuzhiyun 			spin_lock(pvmw->ptl);
287*4882a593Smuzhiyun 		}
288*4882a593Smuzhiyun 		goto this_pte;
289*4882a593Smuzhiyun 	} while (pvmw->address < end);
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	return false;
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun /**
295*4882a593Smuzhiyun  * page_mapped_in_vma - check whether a page is really mapped in a VMA
296*4882a593Smuzhiyun  * @page: the page to test
297*4882a593Smuzhiyun  * @vma: the VMA to test
298*4882a593Smuzhiyun  *
299*4882a593Smuzhiyun  * Returns 1 if the page is mapped into the page tables of the VMA, 0
300*4882a593Smuzhiyun  * if the page is not mapped into the page tables of this VMA.  Only
301*4882a593Smuzhiyun  * valid for normal file or anonymous VMAs.
302*4882a593Smuzhiyun  */
page_mapped_in_vma(struct page * page,struct vm_area_struct * vma)303*4882a593Smuzhiyun int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
304*4882a593Smuzhiyun {
305*4882a593Smuzhiyun 	struct page_vma_mapped_walk pvmw = {
306*4882a593Smuzhiyun 		.page = page,
307*4882a593Smuzhiyun 		.vma = vma,
308*4882a593Smuzhiyun 		.flags = PVMW_SYNC,
309*4882a593Smuzhiyun 	};
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	pvmw.address = vma_address(page, vma);
312*4882a593Smuzhiyun 	if (pvmw.address == -EFAULT)
313*4882a593Smuzhiyun 		return 0;
314*4882a593Smuzhiyun 	if (!page_vma_mapped_walk(&pvmw))
315*4882a593Smuzhiyun 		return 0;
316*4882a593Smuzhiyun 	page_vma_mapped_walk_done(&pvmw);
317*4882a593Smuzhiyun 	return 1;
318*4882a593Smuzhiyun }
319