1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * arch/arm64/mm/hugetlbpage.c
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2013 Linaro Ltd.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Based on arch/x86/mm/hugetlbpage.c.
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <linux/init.h>
11*4882a593Smuzhiyun #include <linux/fs.h>
12*4882a593Smuzhiyun #include <linux/mm.h>
13*4882a593Smuzhiyun #include <linux/hugetlb.h>
14*4882a593Smuzhiyun #include <linux/pagemap.h>
15*4882a593Smuzhiyun #include <linux/err.h>
16*4882a593Smuzhiyun #include <linux/sysctl.h>
17*4882a593Smuzhiyun #include <asm/mman.h>
18*4882a593Smuzhiyun #include <asm/tlb.h>
19*4882a593Smuzhiyun #include <asm/tlbflush.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun /*
22*4882a593Smuzhiyun * HugeTLB Support Matrix
23*4882a593Smuzhiyun *
24*4882a593Smuzhiyun * ---------------------------------------------------
25*4882a593Smuzhiyun * | Page Size | CONT PTE | PMD | CONT PMD | PUD |
26*4882a593Smuzhiyun * ---------------------------------------------------
27*4882a593Smuzhiyun * | 4K | 64K | 2M | 32M | 1G |
28*4882a593Smuzhiyun * | 16K | 2M | 32M | 1G | |
29*4882a593Smuzhiyun * | 64K | 2M | 512M | 16G | |
30*4882a593Smuzhiyun * ---------------------------------------------------
31*4882a593Smuzhiyun */
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun /*
34*4882a593Smuzhiyun * Reserve CMA areas for the largest supported gigantic
35*4882a593Smuzhiyun * huge page when requested. Any other smaller gigantic
36*4882a593Smuzhiyun * huge pages could still be served from those areas.
37*4882a593Smuzhiyun */
38*4882a593Smuzhiyun #ifdef CONFIG_CMA
arm64_hugetlb_cma_reserve(void)39*4882a593Smuzhiyun void __init arm64_hugetlb_cma_reserve(void)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun int order;
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun #ifdef CONFIG_ARM64_4K_PAGES
44*4882a593Smuzhiyun order = PUD_SHIFT - PAGE_SHIFT;
45*4882a593Smuzhiyun #else
46*4882a593Smuzhiyun order = CONT_PMD_SHIFT - PAGE_SHIFT;
47*4882a593Smuzhiyun #endif
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun * HugeTLB CMA reservation is required for gigantic
50*4882a593Smuzhiyun * huge pages which could not be allocated via the
51*4882a593Smuzhiyun * page allocator. Just warn if there is any change
52*4882a593Smuzhiyun * breaking this assumption.
53*4882a593Smuzhiyun */
54*4882a593Smuzhiyun WARN_ON(order <= MAX_ORDER);
55*4882a593Smuzhiyun hugetlb_cma_reserve(order);
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun #endif /* CONFIG_CMA */
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
arch_hugetlb_migration_supported(struct hstate * h)60*4882a593Smuzhiyun bool arch_hugetlb_migration_supported(struct hstate *h)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun size_t pagesize = huge_page_size(h);
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun switch (pagesize) {
65*4882a593Smuzhiyun #ifdef CONFIG_ARM64_4K_PAGES
66*4882a593Smuzhiyun case PUD_SIZE:
67*4882a593Smuzhiyun #endif
68*4882a593Smuzhiyun case PMD_SIZE:
69*4882a593Smuzhiyun case CONT_PMD_SIZE:
70*4882a593Smuzhiyun case CONT_PTE_SIZE:
71*4882a593Smuzhiyun return true;
72*4882a593Smuzhiyun }
73*4882a593Smuzhiyun pr_warn("%s: unrecognized huge page size 0x%lx\n",
74*4882a593Smuzhiyun __func__, pagesize);
75*4882a593Smuzhiyun return false;
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun #endif
78*4882a593Smuzhiyun
pmd_huge(pmd_t pmd)79*4882a593Smuzhiyun int pmd_huge(pmd_t pmd)
80*4882a593Smuzhiyun {
81*4882a593Smuzhiyun return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun
pud_huge(pud_t pud)84*4882a593Smuzhiyun int pud_huge(pud_t pud)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun #ifndef __PAGETABLE_PMD_FOLDED
87*4882a593Smuzhiyun return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
88*4882a593Smuzhiyun #else
89*4882a593Smuzhiyun return 0;
90*4882a593Smuzhiyun #endif
91*4882a593Smuzhiyun }
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun /*
94*4882a593Smuzhiyun * Select all bits except the pfn
95*4882a593Smuzhiyun */
pte_pgprot(pte_t pte)96*4882a593Smuzhiyun static inline pgprot_t pte_pgprot(pte_t pte)
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun unsigned long pfn = pte_pfn(pte);
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun
find_num_contig(struct mm_struct * mm,unsigned long addr,pte_t * ptep,size_t * pgsize)103*4882a593Smuzhiyun static int find_num_contig(struct mm_struct *mm, unsigned long addr,
104*4882a593Smuzhiyun pte_t *ptep, size_t *pgsize)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun pgd_t *pgdp = pgd_offset(mm, addr);
107*4882a593Smuzhiyun p4d_t *p4dp;
108*4882a593Smuzhiyun pud_t *pudp;
109*4882a593Smuzhiyun pmd_t *pmdp;
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun *pgsize = PAGE_SIZE;
112*4882a593Smuzhiyun p4dp = p4d_offset(pgdp, addr);
113*4882a593Smuzhiyun pudp = pud_offset(p4dp, addr);
114*4882a593Smuzhiyun pmdp = pmd_offset(pudp, addr);
115*4882a593Smuzhiyun if ((pte_t *)pmdp == ptep) {
116*4882a593Smuzhiyun *pgsize = PMD_SIZE;
117*4882a593Smuzhiyun return CONT_PMDS;
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun return CONT_PTES;
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun
num_contig_ptes(unsigned long size,size_t * pgsize)122*4882a593Smuzhiyun static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun int contig_ptes = 0;
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun *pgsize = size;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun switch (size) {
129*4882a593Smuzhiyun #ifdef CONFIG_ARM64_4K_PAGES
130*4882a593Smuzhiyun case PUD_SIZE:
131*4882a593Smuzhiyun #endif
132*4882a593Smuzhiyun case PMD_SIZE:
133*4882a593Smuzhiyun contig_ptes = 1;
134*4882a593Smuzhiyun break;
135*4882a593Smuzhiyun case CONT_PMD_SIZE:
136*4882a593Smuzhiyun *pgsize = PMD_SIZE;
137*4882a593Smuzhiyun contig_ptes = CONT_PMDS;
138*4882a593Smuzhiyun break;
139*4882a593Smuzhiyun case CONT_PTE_SIZE:
140*4882a593Smuzhiyun *pgsize = PAGE_SIZE;
141*4882a593Smuzhiyun contig_ptes = CONT_PTES;
142*4882a593Smuzhiyun break;
143*4882a593Smuzhiyun }
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun return contig_ptes;
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun /*
149*4882a593Smuzhiyun * Changing some bits of contiguous entries requires us to follow a
150*4882a593Smuzhiyun * Break-Before-Make approach, breaking the whole contiguous set
151*4882a593Smuzhiyun * before we can change any entries. See ARM DDI 0487A.k_iss10775,
152*4882a593Smuzhiyun * "Misprogramming of the Contiguous bit", page D4-1762.
153*4882a593Smuzhiyun *
154*4882a593Smuzhiyun * This helper performs the break step.
155*4882a593Smuzhiyun */
get_clear_flush(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long pgsize,unsigned long ncontig)156*4882a593Smuzhiyun static pte_t get_clear_flush(struct mm_struct *mm,
157*4882a593Smuzhiyun unsigned long addr,
158*4882a593Smuzhiyun pte_t *ptep,
159*4882a593Smuzhiyun unsigned long pgsize,
160*4882a593Smuzhiyun unsigned long ncontig)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun pte_t orig_pte = huge_ptep_get(ptep);
163*4882a593Smuzhiyun bool valid = pte_valid(orig_pte);
164*4882a593Smuzhiyun unsigned long i, saddr = addr;
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
167*4882a593Smuzhiyun pte_t pte = ptep_get_and_clear(mm, addr, ptep);
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun /*
170*4882a593Smuzhiyun * If HW_AFDBM is enabled, then the HW could turn on
171*4882a593Smuzhiyun * the dirty or accessed bit for any page in the set,
172*4882a593Smuzhiyun * so check them all.
173*4882a593Smuzhiyun */
174*4882a593Smuzhiyun if (pte_dirty(pte))
175*4882a593Smuzhiyun orig_pte = pte_mkdirty(orig_pte);
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun if (pte_young(pte))
178*4882a593Smuzhiyun orig_pte = pte_mkyoung(orig_pte);
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun if (valid) {
182*4882a593Smuzhiyun struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
183*4882a593Smuzhiyun flush_tlb_range(&vma, saddr, addr);
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun return orig_pte;
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun /*
189*4882a593Smuzhiyun * Changing some bits of contiguous entries requires us to follow a
190*4882a593Smuzhiyun * Break-Before-Make approach, breaking the whole contiguous set
191*4882a593Smuzhiyun * before we can change any entries. See ARM DDI 0487A.k_iss10775,
192*4882a593Smuzhiyun * "Misprogramming of the Contiguous bit", page D4-1762.
193*4882a593Smuzhiyun *
194*4882a593Smuzhiyun * This helper performs the break step for use cases where the
195*4882a593Smuzhiyun * original pte is not needed.
196*4882a593Smuzhiyun */
clear_flush(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long pgsize,unsigned long ncontig)197*4882a593Smuzhiyun static void clear_flush(struct mm_struct *mm,
198*4882a593Smuzhiyun unsigned long addr,
199*4882a593Smuzhiyun pte_t *ptep,
200*4882a593Smuzhiyun unsigned long pgsize,
201*4882a593Smuzhiyun unsigned long ncontig)
202*4882a593Smuzhiyun {
203*4882a593Smuzhiyun struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
204*4882a593Smuzhiyun unsigned long i, saddr = addr;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
207*4882a593Smuzhiyun pte_clear(mm, addr, ptep);
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun flush_tlb_range(&vma, saddr, addr);
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun
set_huge_pte_at(struct mm_struct * mm,unsigned long addr,pte_t * ptep,pte_t pte)212*4882a593Smuzhiyun void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
213*4882a593Smuzhiyun pte_t *ptep, pte_t pte)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun size_t pgsize;
216*4882a593Smuzhiyun int i;
217*4882a593Smuzhiyun int ncontig;
218*4882a593Smuzhiyun unsigned long pfn, dpfn;
219*4882a593Smuzhiyun pgprot_t hugeprot;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun /*
222*4882a593Smuzhiyun * Code needs to be expanded to handle huge swap and migration
223*4882a593Smuzhiyun * entries. Needed for HUGETLB and MEMORY_FAILURE.
224*4882a593Smuzhiyun */
225*4882a593Smuzhiyun WARN_ON(!pte_present(pte));
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun if (!pte_cont(pte)) {
228*4882a593Smuzhiyun set_pte_at(mm, addr, ptep, pte);
229*4882a593Smuzhiyun return;
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun ncontig = find_num_contig(mm, addr, ptep, &pgsize);
233*4882a593Smuzhiyun pfn = pte_pfn(pte);
234*4882a593Smuzhiyun dpfn = pgsize >> PAGE_SHIFT;
235*4882a593Smuzhiyun hugeprot = pte_pgprot(pte);
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun clear_flush(mm, addr, ptep, pgsize, ncontig);
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
240*4882a593Smuzhiyun set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun
set_huge_swap_pte_at(struct mm_struct * mm,unsigned long addr,pte_t * ptep,pte_t pte,unsigned long sz)243*4882a593Smuzhiyun void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
244*4882a593Smuzhiyun pte_t *ptep, pte_t pte, unsigned long sz)
245*4882a593Smuzhiyun {
246*4882a593Smuzhiyun int i, ncontig;
247*4882a593Smuzhiyun size_t pgsize;
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun ncontig = num_contig_ptes(sz, &pgsize);
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun for (i = 0; i < ncontig; i++, ptep++)
252*4882a593Smuzhiyun set_pte(ptep, pte);
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun
huge_pte_alloc(struct mm_struct * mm,struct vm_area_struct * vma,unsigned long addr,unsigned long sz)255*4882a593Smuzhiyun pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
256*4882a593Smuzhiyun unsigned long addr, unsigned long sz)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun pgd_t *pgdp;
259*4882a593Smuzhiyun p4d_t *p4dp;
260*4882a593Smuzhiyun pud_t *pudp;
261*4882a593Smuzhiyun pmd_t *pmdp;
262*4882a593Smuzhiyun pte_t *ptep = NULL;
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun pgdp = pgd_offset(mm, addr);
265*4882a593Smuzhiyun p4dp = p4d_offset(pgdp, addr);
266*4882a593Smuzhiyun pudp = pud_alloc(mm, p4dp, addr);
267*4882a593Smuzhiyun if (!pudp)
268*4882a593Smuzhiyun return NULL;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun if (sz == PUD_SIZE) {
271*4882a593Smuzhiyun ptep = (pte_t *)pudp;
272*4882a593Smuzhiyun } else if (sz == (CONT_PTE_SIZE)) {
273*4882a593Smuzhiyun pmdp = pmd_alloc(mm, pudp, addr);
274*4882a593Smuzhiyun if (!pmdp)
275*4882a593Smuzhiyun return NULL;
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun WARN_ON(addr & (sz - 1));
278*4882a593Smuzhiyun /*
279*4882a593Smuzhiyun * Note that if this code were ever ported to the
280*4882a593Smuzhiyun * 32-bit arm platform then it will cause trouble in
281*4882a593Smuzhiyun * the case where CONFIG_HIGHPTE is set, since there
282*4882a593Smuzhiyun * will be no pte_unmap() to correspond with this
283*4882a593Smuzhiyun * pte_alloc_map().
284*4882a593Smuzhiyun */
285*4882a593Smuzhiyun ptep = pte_alloc_map(mm, pmdp, addr);
286*4882a593Smuzhiyun } else if (sz == PMD_SIZE) {
287*4882a593Smuzhiyun if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
288*4882a593Smuzhiyun ptep = huge_pmd_share(mm, vma, addr, pudp);
289*4882a593Smuzhiyun else
290*4882a593Smuzhiyun ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
291*4882a593Smuzhiyun } else if (sz == (CONT_PMD_SIZE)) {
292*4882a593Smuzhiyun pmdp = pmd_alloc(mm, pudp, addr);
293*4882a593Smuzhiyun WARN_ON(addr & (sz - 1));
294*4882a593Smuzhiyun return (pte_t *)pmdp;
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun return ptep;
298*4882a593Smuzhiyun }
299*4882a593Smuzhiyun
huge_pte_offset(struct mm_struct * mm,unsigned long addr,unsigned long sz)300*4882a593Smuzhiyun pte_t *huge_pte_offset(struct mm_struct *mm,
301*4882a593Smuzhiyun unsigned long addr, unsigned long sz)
302*4882a593Smuzhiyun {
303*4882a593Smuzhiyun pgd_t *pgdp;
304*4882a593Smuzhiyun p4d_t *p4dp;
305*4882a593Smuzhiyun pud_t *pudp, pud;
306*4882a593Smuzhiyun pmd_t *pmdp, pmd;
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun pgdp = pgd_offset(mm, addr);
309*4882a593Smuzhiyun if (!pgd_present(READ_ONCE(*pgdp)))
310*4882a593Smuzhiyun return NULL;
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun p4dp = p4d_offset(pgdp, addr);
313*4882a593Smuzhiyun if (!p4d_present(READ_ONCE(*p4dp)))
314*4882a593Smuzhiyun return NULL;
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun pudp = pud_offset(p4dp, addr);
317*4882a593Smuzhiyun pud = READ_ONCE(*pudp);
318*4882a593Smuzhiyun if (sz != PUD_SIZE && pud_none(pud))
319*4882a593Smuzhiyun return NULL;
320*4882a593Smuzhiyun /* hugepage or swap? */
321*4882a593Smuzhiyun if (pud_huge(pud) || !pud_present(pud))
322*4882a593Smuzhiyun return (pte_t *)pudp;
323*4882a593Smuzhiyun /* table; check the next level */
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun if (sz == CONT_PMD_SIZE)
326*4882a593Smuzhiyun addr &= CONT_PMD_MASK;
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun pmdp = pmd_offset(pudp, addr);
329*4882a593Smuzhiyun pmd = READ_ONCE(*pmdp);
330*4882a593Smuzhiyun if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
331*4882a593Smuzhiyun pmd_none(pmd))
332*4882a593Smuzhiyun return NULL;
333*4882a593Smuzhiyun if (pmd_huge(pmd) || !pmd_present(pmd))
334*4882a593Smuzhiyun return (pte_t *)pmdp;
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun if (sz == CONT_PTE_SIZE)
337*4882a593Smuzhiyun return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun return NULL;
340*4882a593Smuzhiyun }
341*4882a593Smuzhiyun
arch_make_huge_pte(pte_t entry,struct vm_area_struct * vma,struct page * page,int writable)342*4882a593Smuzhiyun pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
343*4882a593Smuzhiyun struct page *page, int writable)
344*4882a593Smuzhiyun {
345*4882a593Smuzhiyun size_t pagesize = huge_page_size(hstate_vma(vma));
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun if (pagesize == CONT_PTE_SIZE) {
348*4882a593Smuzhiyun entry = pte_mkcont(entry);
349*4882a593Smuzhiyun } else if (pagesize == CONT_PMD_SIZE) {
350*4882a593Smuzhiyun entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
351*4882a593Smuzhiyun } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
352*4882a593Smuzhiyun pr_warn("%s: unrecognized huge page size 0x%lx\n",
353*4882a593Smuzhiyun __func__, pagesize);
354*4882a593Smuzhiyun }
355*4882a593Smuzhiyun return entry;
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun
huge_pte_clear(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long sz)358*4882a593Smuzhiyun void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
359*4882a593Smuzhiyun pte_t *ptep, unsigned long sz)
360*4882a593Smuzhiyun {
361*4882a593Smuzhiyun int i, ncontig;
362*4882a593Smuzhiyun size_t pgsize;
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun ncontig = num_contig_ptes(sz, &pgsize);
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
367*4882a593Smuzhiyun pte_clear(mm, addr, ptep);
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun
huge_ptep_get_and_clear(struct mm_struct * mm,unsigned long addr,pte_t * ptep)370*4882a593Smuzhiyun pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
371*4882a593Smuzhiyun unsigned long addr, pte_t *ptep)
372*4882a593Smuzhiyun {
373*4882a593Smuzhiyun int ncontig;
374*4882a593Smuzhiyun size_t pgsize;
375*4882a593Smuzhiyun pte_t orig_pte = huge_ptep_get(ptep);
376*4882a593Smuzhiyun
377*4882a593Smuzhiyun if (!pte_cont(orig_pte))
378*4882a593Smuzhiyun return ptep_get_and_clear(mm, addr, ptep);
379*4882a593Smuzhiyun
380*4882a593Smuzhiyun ncontig = find_num_contig(mm, addr, ptep, &pgsize);
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun /*
386*4882a593Smuzhiyun * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
387*4882a593Smuzhiyun * and write permission.
388*4882a593Smuzhiyun *
389*4882a593Smuzhiyun * For a contiguous huge pte range we need to check whether or not write
390*4882a593Smuzhiyun * permission has to change only on the first pte in the set. Then for
391*4882a593Smuzhiyun * all the contiguous ptes we need to check whether or not there is a
392*4882a593Smuzhiyun * discrepancy between dirty or young.
393*4882a593Smuzhiyun */
__cont_access_flags_changed(pte_t * ptep,pte_t pte,int ncontig)394*4882a593Smuzhiyun static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
395*4882a593Smuzhiyun {
396*4882a593Smuzhiyun int i;
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
399*4882a593Smuzhiyun return 1;
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun for (i = 0; i < ncontig; i++) {
402*4882a593Smuzhiyun pte_t orig_pte = huge_ptep_get(ptep + i);
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun if (pte_dirty(pte) != pte_dirty(orig_pte))
405*4882a593Smuzhiyun return 1;
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun if (pte_young(pte) != pte_young(orig_pte))
408*4882a593Smuzhiyun return 1;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun return 0;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun
huge_ptep_set_access_flags(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,pte_t pte,int dirty)414*4882a593Smuzhiyun int huge_ptep_set_access_flags(struct vm_area_struct *vma,
415*4882a593Smuzhiyun unsigned long addr, pte_t *ptep,
416*4882a593Smuzhiyun pte_t pte, int dirty)
417*4882a593Smuzhiyun {
418*4882a593Smuzhiyun int ncontig, i;
419*4882a593Smuzhiyun size_t pgsize = 0;
420*4882a593Smuzhiyun unsigned long pfn = pte_pfn(pte), dpfn;
421*4882a593Smuzhiyun pgprot_t hugeprot;
422*4882a593Smuzhiyun pte_t orig_pte;
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun if (!pte_cont(pte))
425*4882a593Smuzhiyun return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
428*4882a593Smuzhiyun dpfn = pgsize >> PAGE_SHIFT;
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun if (!__cont_access_flags_changed(ptep, pte, ncontig))
431*4882a593Smuzhiyun return 0;
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
434*4882a593Smuzhiyun
435*4882a593Smuzhiyun /* Make sure we don't lose the dirty or young state */
436*4882a593Smuzhiyun if (pte_dirty(orig_pte))
437*4882a593Smuzhiyun pte = pte_mkdirty(pte);
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun if (pte_young(orig_pte))
440*4882a593Smuzhiyun pte = pte_mkyoung(pte);
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun hugeprot = pte_pgprot(pte);
443*4882a593Smuzhiyun for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
444*4882a593Smuzhiyun set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun return 1;
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun
huge_ptep_set_wrprotect(struct mm_struct * mm,unsigned long addr,pte_t * ptep)449*4882a593Smuzhiyun void huge_ptep_set_wrprotect(struct mm_struct *mm,
450*4882a593Smuzhiyun unsigned long addr, pte_t *ptep)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun unsigned long pfn, dpfn;
453*4882a593Smuzhiyun pgprot_t hugeprot;
454*4882a593Smuzhiyun int ncontig, i;
455*4882a593Smuzhiyun size_t pgsize;
456*4882a593Smuzhiyun pte_t pte;
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun if (!pte_cont(READ_ONCE(*ptep))) {
459*4882a593Smuzhiyun ptep_set_wrprotect(mm, addr, ptep);
460*4882a593Smuzhiyun return;
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun ncontig = find_num_contig(mm, addr, ptep, &pgsize);
464*4882a593Smuzhiyun dpfn = pgsize >> PAGE_SHIFT;
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
467*4882a593Smuzhiyun pte = pte_wrprotect(pte);
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun hugeprot = pte_pgprot(pte);
470*4882a593Smuzhiyun pfn = pte_pfn(pte);
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
473*4882a593Smuzhiyun set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
474*4882a593Smuzhiyun }
475*4882a593Smuzhiyun
huge_ptep_clear_flush(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep)476*4882a593Smuzhiyun void huge_ptep_clear_flush(struct vm_area_struct *vma,
477*4882a593Smuzhiyun unsigned long addr, pte_t *ptep)
478*4882a593Smuzhiyun {
479*4882a593Smuzhiyun size_t pgsize;
480*4882a593Smuzhiyun int ncontig;
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun if (!pte_cont(READ_ONCE(*ptep))) {
483*4882a593Smuzhiyun ptep_clear_flush(vma, addr, ptep);
484*4882a593Smuzhiyun return;
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun
487*4882a593Smuzhiyun ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
488*4882a593Smuzhiyun clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
489*4882a593Smuzhiyun }
490*4882a593Smuzhiyun
hugetlbpage_init(void)491*4882a593Smuzhiyun static int __init hugetlbpage_init(void)
492*4882a593Smuzhiyun {
493*4882a593Smuzhiyun #ifdef CONFIG_ARM64_4K_PAGES
494*4882a593Smuzhiyun hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
495*4882a593Smuzhiyun #endif
496*4882a593Smuzhiyun hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
497*4882a593Smuzhiyun hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
498*4882a593Smuzhiyun hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun return 0;
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun arch_initcall(hugetlbpage_init);
503*4882a593Smuzhiyun
arch_hugetlb_valid_size(unsigned long size)504*4882a593Smuzhiyun bool __init arch_hugetlb_valid_size(unsigned long size)
505*4882a593Smuzhiyun {
506*4882a593Smuzhiyun switch (size) {
507*4882a593Smuzhiyun #ifdef CONFIG_ARM64_4K_PAGES
508*4882a593Smuzhiyun case PUD_SIZE:
509*4882a593Smuzhiyun #endif
510*4882a593Smuzhiyun case CONT_PMD_SIZE:
511*4882a593Smuzhiyun case PMD_SIZE:
512*4882a593Smuzhiyun case CONT_PTE_SIZE:
513*4882a593Smuzhiyun return true;
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun return false;
517*4882a593Smuzhiyun }
518