1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun /*
4*4882a593Smuzhiyun * Copyright 2016-2019 HabanaLabs, Ltd.
5*4882a593Smuzhiyun * All Rights Reserved.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include "habanalabs.h"
9*4882a593Smuzhiyun #include "../include/hw_ip/mmu/mmu_general.h"
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include <linux/genalloc.h>
12*4882a593Smuzhiyun #include <linux/slab.h>
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
15*4882a593Smuzhiyun
get_pgt_info(struct hl_ctx * ctx,u64 hop_addr)16*4882a593Smuzhiyun static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
17*4882a593Smuzhiyun {
18*4882a593Smuzhiyun struct pgt_info *pgt_info = NULL;
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
21*4882a593Smuzhiyun (unsigned long) hop_addr)
22*4882a593Smuzhiyun if (hop_addr == pgt_info->shadow_addr)
23*4882a593Smuzhiyun break;
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun return pgt_info;
26*4882a593Smuzhiyun }
27*4882a593Smuzhiyun
_free_hop(struct hl_ctx * ctx,struct pgt_info * pgt_info)28*4882a593Smuzhiyun static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun struct hl_device *hdev = ctx->hdev;
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
33*4882a593Smuzhiyun hdev->asic_prop.mmu_hop_table_size);
34*4882a593Smuzhiyun hash_del(&pgt_info->node);
35*4882a593Smuzhiyun kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
36*4882a593Smuzhiyun kfree(pgt_info);
37*4882a593Smuzhiyun }
38*4882a593Smuzhiyun
free_hop(struct hl_ctx * ctx,u64 hop_addr)39*4882a593Smuzhiyun static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun _free_hop(ctx, pgt_info);
44*4882a593Smuzhiyun }
45*4882a593Smuzhiyun
alloc_hop(struct hl_ctx * ctx)46*4882a593Smuzhiyun static u64 alloc_hop(struct hl_ctx *ctx)
47*4882a593Smuzhiyun {
48*4882a593Smuzhiyun struct hl_device *hdev = ctx->hdev;
49*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
50*4882a593Smuzhiyun struct pgt_info *pgt_info;
51*4882a593Smuzhiyun u64 phys_addr, shadow_addr;
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
54*4882a593Smuzhiyun if (!pgt_info)
55*4882a593Smuzhiyun return ULLONG_MAX;
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
58*4882a593Smuzhiyun prop->mmu_hop_table_size);
59*4882a593Smuzhiyun if (!phys_addr) {
60*4882a593Smuzhiyun dev_err(hdev->dev, "failed to allocate page\n");
61*4882a593Smuzhiyun goto pool_add_err;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
65*4882a593Smuzhiyun GFP_KERNEL);
66*4882a593Smuzhiyun if (!shadow_addr)
67*4882a593Smuzhiyun goto shadow_err;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun pgt_info->phys_addr = phys_addr;
70*4882a593Smuzhiyun pgt_info->shadow_addr = shadow_addr;
71*4882a593Smuzhiyun pgt_info->ctx = ctx;
72*4882a593Smuzhiyun pgt_info->num_of_ptes = 0;
73*4882a593Smuzhiyun hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun return shadow_addr;
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun shadow_err:
78*4882a593Smuzhiyun gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
79*4882a593Smuzhiyun prop->mmu_hop_table_size);
80*4882a593Smuzhiyun pool_add_err:
81*4882a593Smuzhiyun kfree(pgt_info);
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun return ULLONG_MAX;
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
get_phys_hop0_addr(struct hl_ctx * ctx)86*4882a593Smuzhiyun static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun return ctx->hdev->asic_prop.mmu_pgt_addr +
89*4882a593Smuzhiyun (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun
get_hop0_addr(struct hl_ctx * ctx)92*4882a593Smuzhiyun static inline u64 get_hop0_addr(struct hl_ctx *ctx)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
95*4882a593Smuzhiyun (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun
flush(struct hl_ctx * ctx)98*4882a593Smuzhiyun static void flush(struct hl_ctx *ctx)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun /* flush all writes from all cores to reach PCI */
101*4882a593Smuzhiyun mb();
102*4882a593Smuzhiyun ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun /* transform the value to physical address when writing to H/W */
write_pte(struct hl_ctx * ctx,u64 shadow_pte_addr,u64 val)106*4882a593Smuzhiyun static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun /*
109*4882a593Smuzhiyun * The value to write is actually the address of the next shadow hop +
110*4882a593Smuzhiyun * flags at the 12 LSBs.
111*4882a593Smuzhiyun * Hence in order to get the value to write to the physical PTE, we
112*4882a593Smuzhiyun * clear the 12 LSBs and translate the shadow hop to its associated
113*4882a593Smuzhiyun * physical hop, and add back the original 12 LSBs.
114*4882a593Smuzhiyun */
115*4882a593Smuzhiyun u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
116*4882a593Smuzhiyun (val & FLAGS_MASK);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun ctx->hdev->asic_funcs->write_pte(ctx->hdev,
119*4882a593Smuzhiyun get_phys_addr(ctx, shadow_pte_addr),
120*4882a593Smuzhiyun phys_val);
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun *(u64 *) (uintptr_t) shadow_pte_addr = val;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun /* do not transform the value to physical address when writing to H/W */
write_final_pte(struct hl_ctx * ctx,u64 shadow_pte_addr,u64 val)126*4882a593Smuzhiyun static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
127*4882a593Smuzhiyun u64 val)
128*4882a593Smuzhiyun {
129*4882a593Smuzhiyun ctx->hdev->asic_funcs->write_pte(ctx->hdev,
130*4882a593Smuzhiyun get_phys_addr(ctx, shadow_pte_addr),
131*4882a593Smuzhiyun val);
132*4882a593Smuzhiyun *(u64 *) (uintptr_t) shadow_pte_addr = val;
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun /* clear the last and present bits */
clear_pte(struct hl_ctx * ctx,u64 pte_addr)136*4882a593Smuzhiyun static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
137*4882a593Smuzhiyun {
138*4882a593Smuzhiyun /* no need to transform the value to physical address */
139*4882a593Smuzhiyun write_final_pte(ctx, pte_addr, 0);
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun
get_pte(struct hl_ctx * ctx,u64 hop_addr)142*4882a593Smuzhiyun static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun get_pgt_info(ctx, hop_addr)->num_of_ptes++;
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun /*
148*4882a593Smuzhiyun * put_pte - decrement the num of ptes and free the hop if possible
149*4882a593Smuzhiyun *
150*4882a593Smuzhiyun * @ctx: pointer to the context structure
151*4882a593Smuzhiyun * @hop_addr: addr of the hop
152*4882a593Smuzhiyun *
153*4882a593Smuzhiyun * This function returns the number of ptes left on this hop. If the number is
154*4882a593Smuzhiyun * 0, it means the pte was freed.
155*4882a593Smuzhiyun */
put_pte(struct hl_ctx * ctx,u64 hop_addr)156*4882a593Smuzhiyun static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
157*4882a593Smuzhiyun {
158*4882a593Smuzhiyun struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
159*4882a593Smuzhiyun int num_of_ptes_left;
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun pgt_info->num_of_ptes--;
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun /*
164*4882a593Smuzhiyun * Need to save the number of ptes left because free_hop might free
165*4882a593Smuzhiyun * the pgt_info
166*4882a593Smuzhiyun */
167*4882a593Smuzhiyun num_of_ptes_left = pgt_info->num_of_ptes;
168*4882a593Smuzhiyun if (!num_of_ptes_left)
169*4882a593Smuzhiyun _free_hop(ctx, pgt_info);
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun return num_of_ptes_left;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
get_hopN_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 virt_addr,u64 mask,u64 shift)174*4882a593Smuzhiyun static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
175*4882a593Smuzhiyun u64 virt_addr, u64 mask, u64 shift)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
178*4882a593Smuzhiyun ((virt_addr & mask) >> shift);
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun
get_hop0_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)181*4882a593Smuzhiyun static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
182*4882a593Smuzhiyun struct hl_mmu_properties *mmu_prop,
183*4882a593Smuzhiyun u64 hop_addr, u64 vaddr)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
186*4882a593Smuzhiyun mmu_prop->hop0_shift);
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun
get_hop1_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)189*4882a593Smuzhiyun static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
190*4882a593Smuzhiyun struct hl_mmu_properties *mmu_prop,
191*4882a593Smuzhiyun u64 hop_addr, u64 vaddr)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
194*4882a593Smuzhiyun mmu_prop->hop1_shift);
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
get_hop2_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)197*4882a593Smuzhiyun static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
198*4882a593Smuzhiyun struct hl_mmu_properties *mmu_prop,
199*4882a593Smuzhiyun u64 hop_addr, u64 vaddr)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
202*4882a593Smuzhiyun mmu_prop->hop2_shift);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun
get_hop3_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)205*4882a593Smuzhiyun static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
206*4882a593Smuzhiyun struct hl_mmu_properties *mmu_prop,
207*4882a593Smuzhiyun u64 hop_addr, u64 vaddr)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
210*4882a593Smuzhiyun mmu_prop->hop3_shift);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
get_hop4_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)213*4882a593Smuzhiyun static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
214*4882a593Smuzhiyun struct hl_mmu_properties *mmu_prop,
215*4882a593Smuzhiyun u64 hop_addr, u64 vaddr)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
218*4882a593Smuzhiyun mmu_prop->hop4_shift);
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun
get_next_hop_addr(struct hl_ctx * ctx,u64 curr_pte)221*4882a593Smuzhiyun static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun if (curr_pte & PAGE_PRESENT_MASK)
224*4882a593Smuzhiyun return curr_pte & HOP_PHYS_ADDR_MASK;
225*4882a593Smuzhiyun else
226*4882a593Smuzhiyun return ULLONG_MAX;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
get_alloc_next_hop_addr(struct hl_ctx * ctx,u64 curr_pte,bool * is_new_hop)229*4882a593Smuzhiyun static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
230*4882a593Smuzhiyun bool *is_new_hop)
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun if (hop_addr == ULLONG_MAX) {
235*4882a593Smuzhiyun hop_addr = alloc_hop(ctx);
236*4882a593Smuzhiyun *is_new_hop = (hop_addr != ULLONG_MAX);
237*4882a593Smuzhiyun }
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun return hop_addr;
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun /* translates shadow address inside hop to a physical address */
get_phys_addr(struct hl_ctx * ctx,u64 shadow_addr)243*4882a593Smuzhiyun static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
246*4882a593Smuzhiyun u64 shadow_hop_addr = shadow_addr & ~page_mask;
247*4882a593Smuzhiyun u64 pte_offset = shadow_addr & page_mask;
248*4882a593Smuzhiyun u64 phys_hop_addr;
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun if (shadow_hop_addr != get_hop0_addr(ctx))
251*4882a593Smuzhiyun phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
252*4882a593Smuzhiyun else
253*4882a593Smuzhiyun phys_hop_addr = get_phys_hop0_addr(ctx);
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun return phys_hop_addr + pte_offset;
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
dram_default_mapping_init(struct hl_ctx * ctx)258*4882a593Smuzhiyun static int dram_default_mapping_init(struct hl_ctx *ctx)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun struct hl_device *hdev = ctx->hdev;
261*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
262*4882a593Smuzhiyun u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
263*4882a593Smuzhiyun hop2_pte_addr, hop3_pte_addr, pte_val;
264*4882a593Smuzhiyun int rc, i, j, hop3_allocated = 0;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun if ((!hdev->dram_supports_virtual_memory) ||
267*4882a593Smuzhiyun (!hdev->dram_default_page_mapping) ||
268*4882a593Smuzhiyun (ctx->asid == HL_KERNEL_ASID_ID))
269*4882a593Smuzhiyun return 0;
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun num_of_hop3 = prop->dram_size_for_default_page_mapping;
272*4882a593Smuzhiyun do_div(num_of_hop3, prop->dram_page_size);
273*4882a593Smuzhiyun do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun /* add hop1 and hop2 */
276*4882a593Smuzhiyun total_hops = num_of_hop3 + 2;
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
279*4882a593Smuzhiyun if (!ctx->dram_default_hops)
280*4882a593Smuzhiyun return -ENOMEM;
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun hop0_addr = get_hop0_addr(ctx);
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun hop1_addr = alloc_hop(ctx);
285*4882a593Smuzhiyun if (hop1_addr == ULLONG_MAX) {
286*4882a593Smuzhiyun dev_err(hdev->dev, "failed to alloc hop 1\n");
287*4882a593Smuzhiyun rc = -ENOMEM;
288*4882a593Smuzhiyun goto hop1_err;
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun ctx->dram_default_hops[total_hops - 1] = hop1_addr;
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun hop2_addr = alloc_hop(ctx);
294*4882a593Smuzhiyun if (hop2_addr == ULLONG_MAX) {
295*4882a593Smuzhiyun dev_err(hdev->dev, "failed to alloc hop 2\n");
296*4882a593Smuzhiyun rc = -ENOMEM;
297*4882a593Smuzhiyun goto hop2_err;
298*4882a593Smuzhiyun }
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun ctx->dram_default_hops[total_hops - 2] = hop2_addr;
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun for (i = 0 ; i < num_of_hop3 ; i++) {
303*4882a593Smuzhiyun ctx->dram_default_hops[i] = alloc_hop(ctx);
304*4882a593Smuzhiyun if (ctx->dram_default_hops[i] == ULLONG_MAX) {
305*4882a593Smuzhiyun dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
306*4882a593Smuzhiyun rc = -ENOMEM;
307*4882a593Smuzhiyun goto hop3_err;
308*4882a593Smuzhiyun }
309*4882a593Smuzhiyun hop3_allocated++;
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun /* need only pte 0 in hops 0 and 1 */
313*4882a593Smuzhiyun pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
314*4882a593Smuzhiyun write_pte(ctx, hop0_addr, pte_val);
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
317*4882a593Smuzhiyun write_pte(ctx, hop1_addr, pte_val);
318*4882a593Smuzhiyun get_pte(ctx, hop1_addr);
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun hop2_pte_addr = hop2_addr;
321*4882a593Smuzhiyun for (i = 0 ; i < num_of_hop3 ; i++) {
322*4882a593Smuzhiyun pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
323*4882a593Smuzhiyun PAGE_PRESENT_MASK;
324*4882a593Smuzhiyun write_pte(ctx, hop2_pte_addr, pte_val);
325*4882a593Smuzhiyun get_pte(ctx, hop2_addr);
326*4882a593Smuzhiyun hop2_pte_addr += HL_PTE_SIZE;
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
330*4882a593Smuzhiyun LAST_MASK | PAGE_PRESENT_MASK;
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun for (i = 0 ; i < num_of_hop3 ; i++) {
333*4882a593Smuzhiyun hop3_pte_addr = ctx->dram_default_hops[i];
334*4882a593Smuzhiyun for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
335*4882a593Smuzhiyun write_final_pte(ctx, hop3_pte_addr, pte_val);
336*4882a593Smuzhiyun get_pte(ctx, ctx->dram_default_hops[i]);
337*4882a593Smuzhiyun hop3_pte_addr += HL_PTE_SIZE;
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun }
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun flush(ctx);
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun return 0;
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun hop3_err:
346*4882a593Smuzhiyun for (i = 0 ; i < hop3_allocated ; i++)
347*4882a593Smuzhiyun free_hop(ctx, ctx->dram_default_hops[i]);
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun free_hop(ctx, hop2_addr);
350*4882a593Smuzhiyun hop2_err:
351*4882a593Smuzhiyun free_hop(ctx, hop1_addr);
352*4882a593Smuzhiyun hop1_err:
353*4882a593Smuzhiyun kfree(ctx->dram_default_hops);
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun return rc;
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun
dram_default_mapping_fini(struct hl_ctx * ctx)358*4882a593Smuzhiyun static void dram_default_mapping_fini(struct hl_ctx *ctx)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun struct hl_device *hdev = ctx->hdev;
361*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
362*4882a593Smuzhiyun u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
363*4882a593Smuzhiyun hop2_pte_addr, hop3_pte_addr;
364*4882a593Smuzhiyun int i, j;
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun if ((!hdev->dram_supports_virtual_memory) ||
367*4882a593Smuzhiyun (!hdev->dram_default_page_mapping) ||
368*4882a593Smuzhiyun (ctx->asid == HL_KERNEL_ASID_ID))
369*4882a593Smuzhiyun return;
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun num_of_hop3 = prop->dram_size_for_default_page_mapping;
372*4882a593Smuzhiyun do_div(num_of_hop3, prop->dram_page_size);
373*4882a593Smuzhiyun do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
374*4882a593Smuzhiyun
375*4882a593Smuzhiyun hop0_addr = get_hop0_addr(ctx);
376*4882a593Smuzhiyun /* add hop1 and hop2 */
377*4882a593Smuzhiyun total_hops = num_of_hop3 + 2;
378*4882a593Smuzhiyun hop1_addr = ctx->dram_default_hops[total_hops - 1];
379*4882a593Smuzhiyun hop2_addr = ctx->dram_default_hops[total_hops - 2];
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun for (i = 0 ; i < num_of_hop3 ; i++) {
382*4882a593Smuzhiyun hop3_pte_addr = ctx->dram_default_hops[i];
383*4882a593Smuzhiyun for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
384*4882a593Smuzhiyun clear_pte(ctx, hop3_pte_addr);
385*4882a593Smuzhiyun put_pte(ctx, ctx->dram_default_hops[i]);
386*4882a593Smuzhiyun hop3_pte_addr += HL_PTE_SIZE;
387*4882a593Smuzhiyun }
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun hop2_pte_addr = hop2_addr;
391*4882a593Smuzhiyun hop2_pte_addr = hop2_addr;
392*4882a593Smuzhiyun for (i = 0 ; i < num_of_hop3 ; i++) {
393*4882a593Smuzhiyun clear_pte(ctx, hop2_pte_addr);
394*4882a593Smuzhiyun put_pte(ctx, hop2_addr);
395*4882a593Smuzhiyun hop2_pte_addr += HL_PTE_SIZE;
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun clear_pte(ctx, hop1_addr);
399*4882a593Smuzhiyun put_pte(ctx, hop1_addr);
400*4882a593Smuzhiyun clear_pte(ctx, hop0_addr);
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun kfree(ctx->dram_default_hops);
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun flush(ctx);
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun /**
408*4882a593Smuzhiyun * hl_mmu_v1_init() - initialize the MMU module.
409*4882a593Smuzhiyun * @hdev: habanalabs device structure.
410*4882a593Smuzhiyun *
411*4882a593Smuzhiyun * This function does the following:
412*4882a593Smuzhiyun * - Create a pool of pages for pgt_infos.
413*4882a593Smuzhiyun * - Create a shadow table for pgt
414*4882a593Smuzhiyun *
415*4882a593Smuzhiyun * Return: 0 for success, non-zero for failure.
416*4882a593Smuzhiyun */
hl_mmu_v1_init(struct hl_device * hdev)417*4882a593Smuzhiyun static int hl_mmu_v1_init(struct hl_device *hdev)
418*4882a593Smuzhiyun {
419*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
420*4882a593Smuzhiyun int rc;
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun hdev->mmu_priv.mmu_pgt_pool =
423*4882a593Smuzhiyun gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun if (!hdev->mmu_priv.mmu_pgt_pool) {
426*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to create page gen pool\n");
427*4882a593Smuzhiyun return -ENOMEM;
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
431*4882a593Smuzhiyun prop->mmu_hop0_tables_total_size,
432*4882a593Smuzhiyun prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
433*4882a593Smuzhiyun -1);
434*4882a593Smuzhiyun if (rc) {
435*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
436*4882a593Smuzhiyun goto err_pool_add;
437*4882a593Smuzhiyun }
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
440*4882a593Smuzhiyun prop->mmu_hop_table_size,
441*4882a593Smuzhiyun GFP_KERNEL | __GFP_ZERO);
442*4882a593Smuzhiyun if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
443*4882a593Smuzhiyun rc = -ENOMEM;
444*4882a593Smuzhiyun goto err_pool_add;
445*4882a593Smuzhiyun }
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun /* MMU H/W init will be done in device hw_init() */
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun return 0;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun err_pool_add:
452*4882a593Smuzhiyun gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun return rc;
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun /**
458*4882a593Smuzhiyun * hl_mmu_fini() - release the MMU module.
459*4882a593Smuzhiyun * @hdev: habanalabs device structure.
460*4882a593Smuzhiyun *
461*4882a593Smuzhiyun * This function does the following:
462*4882a593Smuzhiyun * - Disable MMU in H/W.
463*4882a593Smuzhiyun * - Free the pgt_infos pool.
464*4882a593Smuzhiyun *
465*4882a593Smuzhiyun * All contexts should be freed before calling this function.
466*4882a593Smuzhiyun */
hl_mmu_v1_fini(struct hl_device * hdev)467*4882a593Smuzhiyun static void hl_mmu_v1_fini(struct hl_device *hdev)
468*4882a593Smuzhiyun {
469*4882a593Smuzhiyun /* MMU H/W fini was already done in device hw_fini() */
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun kvfree(hdev->mmu_priv.mmu_shadow_hop0);
472*4882a593Smuzhiyun gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun /**
476*4882a593Smuzhiyun * hl_mmu_ctx_init() - initialize a context for using the MMU module.
477*4882a593Smuzhiyun * @ctx: pointer to the context structure to initialize.
478*4882a593Smuzhiyun *
479*4882a593Smuzhiyun * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
480*4882a593Smuzhiyun * page tables hops related to this context.
481*4882a593Smuzhiyun * Return: 0 on success, non-zero otherwise.
482*4882a593Smuzhiyun */
hl_mmu_v1_ctx_init(struct hl_ctx * ctx)483*4882a593Smuzhiyun static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
484*4882a593Smuzhiyun {
485*4882a593Smuzhiyun mutex_init(&ctx->mmu_lock);
486*4882a593Smuzhiyun hash_init(ctx->mmu_shadow_hash);
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun return dram_default_mapping_init(ctx);
489*4882a593Smuzhiyun }
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun /*
492*4882a593Smuzhiyun * hl_mmu_ctx_fini - disable a ctx from using the mmu module
493*4882a593Smuzhiyun *
494*4882a593Smuzhiyun * @ctx: pointer to the context structure
495*4882a593Smuzhiyun *
496*4882a593Smuzhiyun * This function does the following:
497*4882a593Smuzhiyun * - Free any pgts which were not freed yet
498*4882a593Smuzhiyun * - Free the mutex
499*4882a593Smuzhiyun * - Free DRAM default page mapping hops
500*4882a593Smuzhiyun */
hl_mmu_v1_ctx_fini(struct hl_ctx * ctx)501*4882a593Smuzhiyun static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
502*4882a593Smuzhiyun {
503*4882a593Smuzhiyun struct hl_device *hdev = ctx->hdev;
504*4882a593Smuzhiyun struct pgt_info *pgt_info;
505*4882a593Smuzhiyun struct hlist_node *tmp;
506*4882a593Smuzhiyun int i;
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun dram_default_mapping_fini(ctx);
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun if (!hash_empty(ctx->mmu_shadow_hash))
511*4882a593Smuzhiyun dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
512*4882a593Smuzhiyun ctx->asid);
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
515*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
516*4882a593Smuzhiyun "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
517*4882a593Smuzhiyun pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
518*4882a593Smuzhiyun _free_hop(ctx, pgt_info);
519*4882a593Smuzhiyun }
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun mutex_destroy(&ctx->mmu_lock);
522*4882a593Smuzhiyun }
523*4882a593Smuzhiyun
_hl_mmu_v1_unmap(struct hl_ctx * ctx,u64 virt_addr,bool is_dram_addr)524*4882a593Smuzhiyun static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
525*4882a593Smuzhiyun u64 virt_addr, bool is_dram_addr)
526*4882a593Smuzhiyun {
527*4882a593Smuzhiyun struct hl_device *hdev = ctx->hdev;
528*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
529*4882a593Smuzhiyun struct hl_mmu_properties *mmu_prop;
530*4882a593Smuzhiyun u64 hop0_addr = 0, hop0_pte_addr = 0,
531*4882a593Smuzhiyun hop1_addr = 0, hop1_pte_addr = 0,
532*4882a593Smuzhiyun hop2_addr = 0, hop2_pte_addr = 0,
533*4882a593Smuzhiyun hop3_addr = 0, hop3_pte_addr = 0,
534*4882a593Smuzhiyun hop4_addr = 0, hop4_pte_addr = 0,
535*4882a593Smuzhiyun curr_pte;
536*4882a593Smuzhiyun bool is_huge, clear_hop3 = true;
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun /* shifts and masks are the same in PMMU and HPMMU, use one of them */
539*4882a593Smuzhiyun mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun hop0_addr = get_hop0_addr(ctx);
542*4882a593Smuzhiyun hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
543*4882a593Smuzhiyun
544*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun hop1_addr = get_next_hop_addr(ctx, curr_pte);
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun if (hop1_addr == ULLONG_MAX)
549*4882a593Smuzhiyun goto not_mapped;
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun hop2_addr = get_next_hop_addr(ctx, curr_pte);
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun if (hop2_addr == ULLONG_MAX)
558*4882a593Smuzhiyun goto not_mapped;
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun hop3_addr = get_next_hop_addr(ctx, curr_pte);
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun if (hop3_addr == ULLONG_MAX)
567*4882a593Smuzhiyun goto not_mapped;
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
572*4882a593Smuzhiyun
573*4882a593Smuzhiyun is_huge = curr_pte & LAST_MASK;
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun if (is_dram_addr && !is_huge) {
576*4882a593Smuzhiyun dev_err(hdev->dev,
577*4882a593Smuzhiyun "DRAM unmapping should use huge pages only\n");
578*4882a593Smuzhiyun return -EFAULT;
579*4882a593Smuzhiyun }
580*4882a593Smuzhiyun
581*4882a593Smuzhiyun if (!is_huge) {
582*4882a593Smuzhiyun hop4_addr = get_next_hop_addr(ctx, curr_pte);
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun if (hop4_addr == ULLONG_MAX)
585*4882a593Smuzhiyun goto not_mapped;
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
588*4882a593Smuzhiyun virt_addr);
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun clear_hop3 = false;
593*4882a593Smuzhiyun }
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun if (hdev->dram_default_page_mapping && is_dram_addr) {
596*4882a593Smuzhiyun u64 default_pte = (prop->mmu_dram_default_page_addr &
597*4882a593Smuzhiyun HOP_PHYS_ADDR_MASK) | LAST_MASK |
598*4882a593Smuzhiyun PAGE_PRESENT_MASK;
599*4882a593Smuzhiyun if (curr_pte == default_pte) {
600*4882a593Smuzhiyun dev_err(hdev->dev,
601*4882a593Smuzhiyun "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
602*4882a593Smuzhiyun virt_addr);
603*4882a593Smuzhiyun goto not_mapped;
604*4882a593Smuzhiyun }
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun if (!(curr_pte & PAGE_PRESENT_MASK)) {
607*4882a593Smuzhiyun dev_err(hdev->dev,
608*4882a593Smuzhiyun "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
609*4882a593Smuzhiyun virt_addr);
610*4882a593Smuzhiyun goto not_mapped;
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun
613*4882a593Smuzhiyun write_final_pte(ctx, hop3_pte_addr, default_pte);
614*4882a593Smuzhiyun put_pte(ctx, hop3_addr);
615*4882a593Smuzhiyun } else {
616*4882a593Smuzhiyun if (!(curr_pte & PAGE_PRESENT_MASK))
617*4882a593Smuzhiyun goto not_mapped;
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun if (hop4_addr)
620*4882a593Smuzhiyun clear_pte(ctx, hop4_pte_addr);
621*4882a593Smuzhiyun else
622*4882a593Smuzhiyun clear_pte(ctx, hop3_pte_addr);
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun if (hop4_addr && !put_pte(ctx, hop4_addr))
625*4882a593Smuzhiyun clear_hop3 = true;
626*4882a593Smuzhiyun
627*4882a593Smuzhiyun if (!clear_hop3)
628*4882a593Smuzhiyun goto mapped;
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun clear_pte(ctx, hop3_pte_addr);
631*4882a593Smuzhiyun
632*4882a593Smuzhiyun if (put_pte(ctx, hop3_addr))
633*4882a593Smuzhiyun goto mapped;
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun clear_pte(ctx, hop2_pte_addr);
636*4882a593Smuzhiyun
637*4882a593Smuzhiyun if (put_pte(ctx, hop2_addr))
638*4882a593Smuzhiyun goto mapped;
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun clear_pte(ctx, hop1_pte_addr);
641*4882a593Smuzhiyun
642*4882a593Smuzhiyun if (put_pte(ctx, hop1_addr))
643*4882a593Smuzhiyun goto mapped;
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun clear_pte(ctx, hop0_pte_addr);
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun mapped:
649*4882a593Smuzhiyun return 0;
650*4882a593Smuzhiyun
651*4882a593Smuzhiyun not_mapped:
652*4882a593Smuzhiyun dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
653*4882a593Smuzhiyun virt_addr);
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun return -EINVAL;
656*4882a593Smuzhiyun }
657*4882a593Smuzhiyun
_hl_mmu_v1_map(struct hl_ctx * ctx,u64 virt_addr,u64 phys_addr,u32 page_size,bool is_dram_addr)658*4882a593Smuzhiyun static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
659*4882a593Smuzhiyun u32 page_size, bool is_dram_addr)
660*4882a593Smuzhiyun {
661*4882a593Smuzhiyun struct hl_device *hdev = ctx->hdev;
662*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
663*4882a593Smuzhiyun struct hl_mmu_properties *mmu_prop;
664*4882a593Smuzhiyun u64 hop0_addr = 0, hop0_pte_addr = 0,
665*4882a593Smuzhiyun hop1_addr = 0, hop1_pte_addr = 0,
666*4882a593Smuzhiyun hop2_addr = 0, hop2_pte_addr = 0,
667*4882a593Smuzhiyun hop3_addr = 0, hop3_pte_addr = 0,
668*4882a593Smuzhiyun hop4_addr = 0, hop4_pte_addr = 0,
669*4882a593Smuzhiyun curr_pte = 0;
670*4882a593Smuzhiyun bool hop1_new = false, hop2_new = false, hop3_new = false,
671*4882a593Smuzhiyun hop4_new = false, is_huge;
672*4882a593Smuzhiyun int rc = -ENOMEM;
673*4882a593Smuzhiyun
674*4882a593Smuzhiyun /*
675*4882a593Smuzhiyun * This mapping function can map a page or a huge page. For huge page
676*4882a593Smuzhiyun * there are only 3 hops rather than 4. Currently the DRAM allocation
677*4882a593Smuzhiyun * uses huge pages only but user memory could have been allocated with
678*4882a593Smuzhiyun * one of the two page sizes. Since this is a common code for all the
679*4882a593Smuzhiyun * three cases, we need this hugs page check.
680*4882a593Smuzhiyun */
681*4882a593Smuzhiyun if (is_dram_addr) {
682*4882a593Smuzhiyun mmu_prop = &prop->dmmu;
683*4882a593Smuzhiyun is_huge = true;
684*4882a593Smuzhiyun } else if (page_size == prop->pmmu_huge.page_size) {
685*4882a593Smuzhiyun mmu_prop = &prop->pmmu_huge;
686*4882a593Smuzhiyun is_huge = true;
687*4882a593Smuzhiyun } else {
688*4882a593Smuzhiyun mmu_prop = &prop->pmmu;
689*4882a593Smuzhiyun is_huge = false;
690*4882a593Smuzhiyun }
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun hop0_addr = get_hop0_addr(ctx);
693*4882a593Smuzhiyun hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
694*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
697*4882a593Smuzhiyun if (hop1_addr == ULLONG_MAX)
698*4882a593Smuzhiyun goto err;
699*4882a593Smuzhiyun
700*4882a593Smuzhiyun hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
701*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
702*4882a593Smuzhiyun
703*4882a593Smuzhiyun hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
704*4882a593Smuzhiyun if (hop2_addr == ULLONG_MAX)
705*4882a593Smuzhiyun goto err;
706*4882a593Smuzhiyun
707*4882a593Smuzhiyun hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
708*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
711*4882a593Smuzhiyun if (hop3_addr == ULLONG_MAX)
712*4882a593Smuzhiyun goto err;
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
715*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun if (!is_huge) {
718*4882a593Smuzhiyun hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
719*4882a593Smuzhiyun if (hop4_addr == ULLONG_MAX)
720*4882a593Smuzhiyun goto err;
721*4882a593Smuzhiyun
722*4882a593Smuzhiyun hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
723*4882a593Smuzhiyun virt_addr);
724*4882a593Smuzhiyun curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun
727*4882a593Smuzhiyun if (hdev->dram_default_page_mapping && is_dram_addr) {
728*4882a593Smuzhiyun u64 default_pte = (prop->mmu_dram_default_page_addr &
729*4882a593Smuzhiyun HOP_PHYS_ADDR_MASK) | LAST_MASK |
730*4882a593Smuzhiyun PAGE_PRESENT_MASK;
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun if (curr_pte != default_pte) {
733*4882a593Smuzhiyun dev_err(hdev->dev,
734*4882a593Smuzhiyun "DRAM: mapping already exists for virt_addr 0x%llx\n",
735*4882a593Smuzhiyun virt_addr);
736*4882a593Smuzhiyun rc = -EINVAL;
737*4882a593Smuzhiyun goto err;
738*4882a593Smuzhiyun }
739*4882a593Smuzhiyun
740*4882a593Smuzhiyun if (hop1_new || hop2_new || hop3_new || hop4_new) {
741*4882a593Smuzhiyun dev_err(hdev->dev,
742*4882a593Smuzhiyun "DRAM mapping should not allocate more hops\n");
743*4882a593Smuzhiyun rc = -EFAULT;
744*4882a593Smuzhiyun goto err;
745*4882a593Smuzhiyun }
746*4882a593Smuzhiyun } else if (curr_pte & PAGE_PRESENT_MASK) {
747*4882a593Smuzhiyun dev_err(hdev->dev,
748*4882a593Smuzhiyun "mapping already exists for virt_addr 0x%llx\n",
749*4882a593Smuzhiyun virt_addr);
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
752*4882a593Smuzhiyun *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
753*4882a593Smuzhiyun dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
754*4882a593Smuzhiyun *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
755*4882a593Smuzhiyun dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
756*4882a593Smuzhiyun *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
757*4882a593Smuzhiyun dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
758*4882a593Smuzhiyun *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun if (!is_huge)
761*4882a593Smuzhiyun dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
762*4882a593Smuzhiyun *(u64 *) (uintptr_t) hop4_pte_addr,
763*4882a593Smuzhiyun hop4_pte_addr);
764*4882a593Smuzhiyun
765*4882a593Smuzhiyun rc = -EINVAL;
766*4882a593Smuzhiyun goto err;
767*4882a593Smuzhiyun }
768*4882a593Smuzhiyun
769*4882a593Smuzhiyun curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
770*4882a593Smuzhiyun | PAGE_PRESENT_MASK;
771*4882a593Smuzhiyun
772*4882a593Smuzhiyun if (is_huge)
773*4882a593Smuzhiyun write_final_pte(ctx, hop3_pte_addr, curr_pte);
774*4882a593Smuzhiyun else
775*4882a593Smuzhiyun write_final_pte(ctx, hop4_pte_addr, curr_pte);
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun if (hop1_new) {
778*4882a593Smuzhiyun curr_pte =
779*4882a593Smuzhiyun (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
780*4882a593Smuzhiyun write_pte(ctx, hop0_pte_addr, curr_pte);
781*4882a593Smuzhiyun }
782*4882a593Smuzhiyun if (hop2_new) {
783*4882a593Smuzhiyun curr_pte =
784*4882a593Smuzhiyun (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
785*4882a593Smuzhiyun write_pte(ctx, hop1_pte_addr, curr_pte);
786*4882a593Smuzhiyun get_pte(ctx, hop1_addr);
787*4882a593Smuzhiyun }
788*4882a593Smuzhiyun if (hop3_new) {
789*4882a593Smuzhiyun curr_pte =
790*4882a593Smuzhiyun (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
791*4882a593Smuzhiyun write_pte(ctx, hop2_pte_addr, curr_pte);
792*4882a593Smuzhiyun get_pte(ctx, hop2_addr);
793*4882a593Smuzhiyun }
794*4882a593Smuzhiyun
795*4882a593Smuzhiyun if (!is_huge) {
796*4882a593Smuzhiyun if (hop4_new) {
797*4882a593Smuzhiyun curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
798*4882a593Smuzhiyun PAGE_PRESENT_MASK;
799*4882a593Smuzhiyun write_pte(ctx, hop3_pte_addr, curr_pte);
800*4882a593Smuzhiyun get_pte(ctx, hop3_addr);
801*4882a593Smuzhiyun }
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun get_pte(ctx, hop4_addr);
804*4882a593Smuzhiyun } else {
805*4882a593Smuzhiyun get_pte(ctx, hop3_addr);
806*4882a593Smuzhiyun }
807*4882a593Smuzhiyun
808*4882a593Smuzhiyun return 0;
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun err:
811*4882a593Smuzhiyun if (hop4_new)
812*4882a593Smuzhiyun free_hop(ctx, hop4_addr);
813*4882a593Smuzhiyun if (hop3_new)
814*4882a593Smuzhiyun free_hop(ctx, hop3_addr);
815*4882a593Smuzhiyun if (hop2_new)
816*4882a593Smuzhiyun free_hop(ctx, hop2_addr);
817*4882a593Smuzhiyun if (hop1_new)
818*4882a593Smuzhiyun free_hop(ctx, hop1_addr);
819*4882a593Smuzhiyun
820*4882a593Smuzhiyun return rc;
821*4882a593Smuzhiyun }
822*4882a593Smuzhiyun
823*4882a593Smuzhiyun /*
824*4882a593Smuzhiyun * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
825*4882a593Smuzhiyun *
826*4882a593Smuzhiyun * @ctx: pointer to the context structure
827*4882a593Smuzhiyun *
828*4882a593Smuzhiyun */
hl_mmu_v1_swap_out(struct hl_ctx * ctx)829*4882a593Smuzhiyun static void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
830*4882a593Smuzhiyun {
831*4882a593Smuzhiyun
832*4882a593Smuzhiyun }
833*4882a593Smuzhiyun
834*4882a593Smuzhiyun /*
835*4882a593Smuzhiyun * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
836*4882a593Smuzhiyun *
837*4882a593Smuzhiyun * @ctx: pointer to the context structure
838*4882a593Smuzhiyun *
839*4882a593Smuzhiyun */
hl_mmu_v1_swap_in(struct hl_ctx * ctx)840*4882a593Smuzhiyun static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
841*4882a593Smuzhiyun {
842*4882a593Smuzhiyun
843*4882a593Smuzhiyun }
844*4882a593Smuzhiyun
845*4882a593Smuzhiyun /*
846*4882a593Smuzhiyun * hl_mmu_v1_prepare - prepare mmu for working with mmu v1
847*4882a593Smuzhiyun *
848*4882a593Smuzhiyun * @hdev: pointer to the device structure
849*4882a593Smuzhiyun */
hl_mmu_v1_set_funcs(struct hl_device * hdev)850*4882a593Smuzhiyun void hl_mmu_v1_set_funcs(struct hl_device *hdev)
851*4882a593Smuzhiyun {
852*4882a593Smuzhiyun struct hl_mmu_funcs *mmu = &hdev->mmu_func;
853*4882a593Smuzhiyun
854*4882a593Smuzhiyun mmu->init = hl_mmu_v1_init;
855*4882a593Smuzhiyun mmu->fini = hl_mmu_v1_fini;
856*4882a593Smuzhiyun mmu->ctx_init = hl_mmu_v1_ctx_init;
857*4882a593Smuzhiyun mmu->ctx_fini = hl_mmu_v1_ctx_fini;
858*4882a593Smuzhiyun mmu->map = _hl_mmu_v1_map;
859*4882a593Smuzhiyun mmu->unmap = _hl_mmu_v1_unmap;
860*4882a593Smuzhiyun mmu->flush = flush;
861*4882a593Smuzhiyun mmu->swap_out = hl_mmu_v1_swap_out;
862*4882a593Smuzhiyun mmu->swap_in = hl_mmu_v1_swap_in;
863*4882a593Smuzhiyun }
864