xref: /OK3568_Linux_fs/kernel/drivers/misc/habanalabs/common/mmu_v1.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun 
3*4882a593Smuzhiyun /*
4*4882a593Smuzhiyun  * Copyright 2016-2019 HabanaLabs, Ltd.
5*4882a593Smuzhiyun  * All Rights Reserved.
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include "habanalabs.h"
9*4882a593Smuzhiyun #include "../include/hw_ip/mmu/mmu_general.h"
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include <linux/genalloc.h>
12*4882a593Smuzhiyun #include <linux/slab.h>
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
15*4882a593Smuzhiyun 
get_pgt_info(struct hl_ctx * ctx,u64 hop_addr)16*4882a593Smuzhiyun static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
17*4882a593Smuzhiyun {
18*4882a593Smuzhiyun 	struct pgt_info *pgt_info = NULL;
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun 	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
21*4882a593Smuzhiyun 				(unsigned long) hop_addr)
22*4882a593Smuzhiyun 		if (hop_addr == pgt_info->shadow_addr)
23*4882a593Smuzhiyun 			break;
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun 	return pgt_info;
26*4882a593Smuzhiyun }
27*4882a593Smuzhiyun 
_free_hop(struct hl_ctx * ctx,struct pgt_info * pgt_info)28*4882a593Smuzhiyun static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun 	struct hl_device *hdev = ctx->hdev;
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun 	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
33*4882a593Smuzhiyun 			hdev->asic_prop.mmu_hop_table_size);
34*4882a593Smuzhiyun 	hash_del(&pgt_info->node);
35*4882a593Smuzhiyun 	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
36*4882a593Smuzhiyun 	kfree(pgt_info);
37*4882a593Smuzhiyun }
38*4882a593Smuzhiyun 
free_hop(struct hl_ctx * ctx,u64 hop_addr)39*4882a593Smuzhiyun static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun 	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	_free_hop(ctx, pgt_info);
44*4882a593Smuzhiyun }
45*4882a593Smuzhiyun 
alloc_hop(struct hl_ctx * ctx)46*4882a593Smuzhiyun static u64 alloc_hop(struct hl_ctx *ctx)
47*4882a593Smuzhiyun {
48*4882a593Smuzhiyun 	struct hl_device *hdev = ctx->hdev;
49*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
50*4882a593Smuzhiyun 	struct pgt_info *pgt_info;
51*4882a593Smuzhiyun 	u64 phys_addr, shadow_addr;
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
54*4882a593Smuzhiyun 	if (!pgt_info)
55*4882a593Smuzhiyun 		return ULLONG_MAX;
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
58*4882a593Smuzhiyun 					prop->mmu_hop_table_size);
59*4882a593Smuzhiyun 	if (!phys_addr) {
60*4882a593Smuzhiyun 		dev_err(hdev->dev, "failed to allocate page\n");
61*4882a593Smuzhiyun 		goto pool_add_err;
62*4882a593Smuzhiyun 	}
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
65*4882a593Smuzhiyun 						GFP_KERNEL);
66*4882a593Smuzhiyun 	if (!shadow_addr)
67*4882a593Smuzhiyun 		goto shadow_err;
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	pgt_info->phys_addr = phys_addr;
70*4882a593Smuzhiyun 	pgt_info->shadow_addr = shadow_addr;
71*4882a593Smuzhiyun 	pgt_info->ctx = ctx;
72*4882a593Smuzhiyun 	pgt_info->num_of_ptes = 0;
73*4882a593Smuzhiyun 	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 	return shadow_addr;
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun shadow_err:
78*4882a593Smuzhiyun 	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
79*4882a593Smuzhiyun 			prop->mmu_hop_table_size);
80*4882a593Smuzhiyun pool_add_err:
81*4882a593Smuzhiyun 	kfree(pgt_info);
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	return ULLONG_MAX;
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun 
get_phys_hop0_addr(struct hl_ctx * ctx)86*4882a593Smuzhiyun static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun 	return ctx->hdev->asic_prop.mmu_pgt_addr +
89*4882a593Smuzhiyun 			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun 
get_hop0_addr(struct hl_ctx * ctx)92*4882a593Smuzhiyun static inline u64 get_hop0_addr(struct hl_ctx *ctx)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun 	return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
95*4882a593Smuzhiyun 			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun 
flush(struct hl_ctx * ctx)98*4882a593Smuzhiyun static void flush(struct hl_ctx *ctx)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun 	/* flush all writes from all cores to reach PCI */
101*4882a593Smuzhiyun 	mb();
102*4882a593Smuzhiyun 	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun /* transform the value to physical address when writing to H/W */
write_pte(struct hl_ctx * ctx,u64 shadow_pte_addr,u64 val)106*4882a593Smuzhiyun static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	/*
109*4882a593Smuzhiyun 	 * The value to write is actually the address of the next shadow hop +
110*4882a593Smuzhiyun 	 * flags at the 12 LSBs.
111*4882a593Smuzhiyun 	 * Hence in order to get the value to write to the physical PTE, we
112*4882a593Smuzhiyun 	 * clear the 12 LSBs and translate the shadow hop to its associated
113*4882a593Smuzhiyun 	 * physical hop, and add back the original 12 LSBs.
114*4882a593Smuzhiyun 	 */
115*4882a593Smuzhiyun 	u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
116*4882a593Smuzhiyun 				(val & FLAGS_MASK);
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
119*4882a593Smuzhiyun 					get_phys_addr(ctx, shadow_pte_addr),
120*4882a593Smuzhiyun 					phys_val);
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	*(u64 *) (uintptr_t) shadow_pte_addr = val;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun /* do not transform the value to physical address when writing to H/W */
write_final_pte(struct hl_ctx * ctx,u64 shadow_pte_addr,u64 val)126*4882a593Smuzhiyun static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
127*4882a593Smuzhiyun 					u64 val)
128*4882a593Smuzhiyun {
129*4882a593Smuzhiyun 	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
130*4882a593Smuzhiyun 					get_phys_addr(ctx, shadow_pte_addr),
131*4882a593Smuzhiyun 					val);
132*4882a593Smuzhiyun 	*(u64 *) (uintptr_t) shadow_pte_addr = val;
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun /* clear the last and present bits */
clear_pte(struct hl_ctx * ctx,u64 pte_addr)136*4882a593Smuzhiyun static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
137*4882a593Smuzhiyun {
138*4882a593Smuzhiyun 	/* no need to transform the value to physical address */
139*4882a593Smuzhiyun 	write_final_pte(ctx, pte_addr, 0);
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun 
get_pte(struct hl_ctx * ctx,u64 hop_addr)142*4882a593Smuzhiyun static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun 	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun /*
148*4882a593Smuzhiyun  * put_pte - decrement the num of ptes and free the hop if possible
149*4882a593Smuzhiyun  *
150*4882a593Smuzhiyun  * @ctx: pointer to the context structure
151*4882a593Smuzhiyun  * @hop_addr: addr of the hop
152*4882a593Smuzhiyun  *
153*4882a593Smuzhiyun  * This function returns the number of ptes left on this hop. If the number is
154*4882a593Smuzhiyun  * 0, it means the pte was freed.
155*4882a593Smuzhiyun  */
put_pte(struct hl_ctx * ctx,u64 hop_addr)156*4882a593Smuzhiyun static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
157*4882a593Smuzhiyun {
158*4882a593Smuzhiyun 	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
159*4882a593Smuzhiyun 	int num_of_ptes_left;
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	pgt_info->num_of_ptes--;
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	/*
164*4882a593Smuzhiyun 	 * Need to save the number of ptes left because free_hop might free
165*4882a593Smuzhiyun 	 * the pgt_info
166*4882a593Smuzhiyun 	 */
167*4882a593Smuzhiyun 	num_of_ptes_left = pgt_info->num_of_ptes;
168*4882a593Smuzhiyun 	if (!num_of_ptes_left)
169*4882a593Smuzhiyun 		_free_hop(ctx, pgt_info);
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	return num_of_ptes_left;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun 
get_hopN_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 virt_addr,u64 mask,u64 shift)174*4882a593Smuzhiyun static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
175*4882a593Smuzhiyun 					u64 virt_addr, u64 mask, u64 shift)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun 	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
178*4882a593Smuzhiyun 			((virt_addr & mask) >> shift);
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun 
get_hop0_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)181*4882a593Smuzhiyun static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
182*4882a593Smuzhiyun 					struct hl_mmu_properties *mmu_prop,
183*4882a593Smuzhiyun 					u64 hop_addr, u64 vaddr)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
186*4882a593Smuzhiyun 					mmu_prop->hop0_shift);
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun 
get_hop1_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)189*4882a593Smuzhiyun static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
190*4882a593Smuzhiyun 					struct hl_mmu_properties *mmu_prop,
191*4882a593Smuzhiyun 					u64 hop_addr, u64 vaddr)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
194*4882a593Smuzhiyun 					mmu_prop->hop1_shift);
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun 
get_hop2_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)197*4882a593Smuzhiyun static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
198*4882a593Smuzhiyun 					struct hl_mmu_properties *mmu_prop,
199*4882a593Smuzhiyun 					u64 hop_addr, u64 vaddr)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
202*4882a593Smuzhiyun 					mmu_prop->hop2_shift);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
get_hop3_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)205*4882a593Smuzhiyun static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
206*4882a593Smuzhiyun 					struct hl_mmu_properties *mmu_prop,
207*4882a593Smuzhiyun 					u64 hop_addr, u64 vaddr)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
210*4882a593Smuzhiyun 					mmu_prop->hop3_shift);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
get_hop4_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)213*4882a593Smuzhiyun static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
214*4882a593Smuzhiyun 					struct hl_mmu_properties *mmu_prop,
215*4882a593Smuzhiyun 					u64 hop_addr, u64 vaddr)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
218*4882a593Smuzhiyun 					mmu_prop->hop4_shift);
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun 
get_next_hop_addr(struct hl_ctx * ctx,u64 curr_pte)221*4882a593Smuzhiyun static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun 	if (curr_pte & PAGE_PRESENT_MASK)
224*4882a593Smuzhiyun 		return curr_pte & HOP_PHYS_ADDR_MASK;
225*4882a593Smuzhiyun 	else
226*4882a593Smuzhiyun 		return ULLONG_MAX;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun 
get_alloc_next_hop_addr(struct hl_ctx * ctx,u64 curr_pte,bool * is_new_hop)229*4882a593Smuzhiyun static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
230*4882a593Smuzhiyun 						bool *is_new_hop)
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun 	u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 	if (hop_addr == ULLONG_MAX) {
235*4882a593Smuzhiyun 		hop_addr = alloc_hop(ctx);
236*4882a593Smuzhiyun 		*is_new_hop = (hop_addr != ULLONG_MAX);
237*4882a593Smuzhiyun 	}
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	return hop_addr;
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun /* translates shadow address inside hop to a physical address */
get_phys_addr(struct hl_ctx * ctx,u64 shadow_addr)243*4882a593Smuzhiyun static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
246*4882a593Smuzhiyun 	u64 shadow_hop_addr = shadow_addr & ~page_mask;
247*4882a593Smuzhiyun 	u64 pte_offset = shadow_addr & page_mask;
248*4882a593Smuzhiyun 	u64 phys_hop_addr;
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	if (shadow_hop_addr != get_hop0_addr(ctx))
251*4882a593Smuzhiyun 		phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
252*4882a593Smuzhiyun 	else
253*4882a593Smuzhiyun 		phys_hop_addr = get_phys_hop0_addr(ctx);
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	return phys_hop_addr + pte_offset;
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun 
dram_default_mapping_init(struct hl_ctx * ctx)258*4882a593Smuzhiyun static int dram_default_mapping_init(struct hl_ctx *ctx)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun 	struct hl_device *hdev = ctx->hdev;
261*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
262*4882a593Smuzhiyun 	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
263*4882a593Smuzhiyun 		hop2_pte_addr, hop3_pte_addr, pte_val;
264*4882a593Smuzhiyun 	int rc, i, j, hop3_allocated = 0;
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	if ((!hdev->dram_supports_virtual_memory) ||
267*4882a593Smuzhiyun 			(!hdev->dram_default_page_mapping) ||
268*4882a593Smuzhiyun 			(ctx->asid == HL_KERNEL_ASID_ID))
269*4882a593Smuzhiyun 		return 0;
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	num_of_hop3 = prop->dram_size_for_default_page_mapping;
272*4882a593Smuzhiyun 	do_div(num_of_hop3, prop->dram_page_size);
273*4882a593Smuzhiyun 	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 	/* add hop1 and hop2 */
276*4882a593Smuzhiyun 	total_hops = num_of_hop3 + 2;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
279*4882a593Smuzhiyun 	if (!ctx->dram_default_hops)
280*4882a593Smuzhiyun 		return -ENOMEM;
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 	hop0_addr = get_hop0_addr(ctx);
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 	hop1_addr = alloc_hop(ctx);
285*4882a593Smuzhiyun 	if (hop1_addr == ULLONG_MAX) {
286*4882a593Smuzhiyun 		dev_err(hdev->dev, "failed to alloc hop 1\n");
287*4882a593Smuzhiyun 		rc = -ENOMEM;
288*4882a593Smuzhiyun 		goto hop1_err;
289*4882a593Smuzhiyun 	}
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	hop2_addr = alloc_hop(ctx);
294*4882a593Smuzhiyun 	if (hop2_addr == ULLONG_MAX) {
295*4882a593Smuzhiyun 		dev_err(hdev->dev, "failed to alloc hop 2\n");
296*4882a593Smuzhiyun 		rc = -ENOMEM;
297*4882a593Smuzhiyun 		goto hop2_err;
298*4882a593Smuzhiyun 	}
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	for (i = 0 ; i < num_of_hop3 ; i++) {
303*4882a593Smuzhiyun 		ctx->dram_default_hops[i] = alloc_hop(ctx);
304*4882a593Smuzhiyun 		if (ctx->dram_default_hops[i] == ULLONG_MAX) {
305*4882a593Smuzhiyun 			dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
306*4882a593Smuzhiyun 			rc = -ENOMEM;
307*4882a593Smuzhiyun 			goto hop3_err;
308*4882a593Smuzhiyun 		}
309*4882a593Smuzhiyun 		hop3_allocated++;
310*4882a593Smuzhiyun 	}
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	/* need only pte 0 in hops 0 and 1 */
313*4882a593Smuzhiyun 	pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
314*4882a593Smuzhiyun 	write_pte(ctx, hop0_addr, pte_val);
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
317*4882a593Smuzhiyun 	write_pte(ctx, hop1_addr, pte_val);
318*4882a593Smuzhiyun 	get_pte(ctx, hop1_addr);
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	hop2_pte_addr = hop2_addr;
321*4882a593Smuzhiyun 	for (i = 0 ; i < num_of_hop3 ; i++) {
322*4882a593Smuzhiyun 		pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
323*4882a593Smuzhiyun 				PAGE_PRESENT_MASK;
324*4882a593Smuzhiyun 		write_pte(ctx, hop2_pte_addr, pte_val);
325*4882a593Smuzhiyun 		get_pte(ctx, hop2_addr);
326*4882a593Smuzhiyun 		hop2_pte_addr += HL_PTE_SIZE;
327*4882a593Smuzhiyun 	}
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
330*4882a593Smuzhiyun 			LAST_MASK | PAGE_PRESENT_MASK;
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun 	for (i = 0 ; i < num_of_hop3 ; i++) {
333*4882a593Smuzhiyun 		hop3_pte_addr = ctx->dram_default_hops[i];
334*4882a593Smuzhiyun 		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
335*4882a593Smuzhiyun 			write_final_pte(ctx, hop3_pte_addr, pte_val);
336*4882a593Smuzhiyun 			get_pte(ctx, ctx->dram_default_hops[i]);
337*4882a593Smuzhiyun 			hop3_pte_addr += HL_PTE_SIZE;
338*4882a593Smuzhiyun 		}
339*4882a593Smuzhiyun 	}
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	flush(ctx);
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	return 0;
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun hop3_err:
346*4882a593Smuzhiyun 	for (i = 0 ; i < hop3_allocated ; i++)
347*4882a593Smuzhiyun 		free_hop(ctx, ctx->dram_default_hops[i]);
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun 	free_hop(ctx, hop2_addr);
350*4882a593Smuzhiyun hop2_err:
351*4882a593Smuzhiyun 	free_hop(ctx, hop1_addr);
352*4882a593Smuzhiyun hop1_err:
353*4882a593Smuzhiyun 	kfree(ctx->dram_default_hops);
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	return rc;
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun 
dram_default_mapping_fini(struct hl_ctx * ctx)358*4882a593Smuzhiyun static void dram_default_mapping_fini(struct hl_ctx *ctx)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun 	struct hl_device *hdev = ctx->hdev;
361*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
362*4882a593Smuzhiyun 	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
363*4882a593Smuzhiyun 		hop2_pte_addr, hop3_pte_addr;
364*4882a593Smuzhiyun 	int i, j;
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 	if ((!hdev->dram_supports_virtual_memory) ||
367*4882a593Smuzhiyun 			(!hdev->dram_default_page_mapping) ||
368*4882a593Smuzhiyun 			(ctx->asid == HL_KERNEL_ASID_ID))
369*4882a593Smuzhiyun 		return;
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	num_of_hop3 = prop->dram_size_for_default_page_mapping;
372*4882a593Smuzhiyun 	do_div(num_of_hop3, prop->dram_page_size);
373*4882a593Smuzhiyun 	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun 	hop0_addr = get_hop0_addr(ctx);
376*4882a593Smuzhiyun 	/* add hop1 and hop2 */
377*4882a593Smuzhiyun 	total_hops = num_of_hop3 + 2;
378*4882a593Smuzhiyun 	hop1_addr = ctx->dram_default_hops[total_hops - 1];
379*4882a593Smuzhiyun 	hop2_addr = ctx->dram_default_hops[total_hops - 2];
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun 	for (i = 0 ; i < num_of_hop3 ; i++) {
382*4882a593Smuzhiyun 		hop3_pte_addr = ctx->dram_default_hops[i];
383*4882a593Smuzhiyun 		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
384*4882a593Smuzhiyun 			clear_pte(ctx, hop3_pte_addr);
385*4882a593Smuzhiyun 			put_pte(ctx, ctx->dram_default_hops[i]);
386*4882a593Smuzhiyun 			hop3_pte_addr += HL_PTE_SIZE;
387*4882a593Smuzhiyun 		}
388*4882a593Smuzhiyun 	}
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 	hop2_pte_addr = hop2_addr;
391*4882a593Smuzhiyun 	hop2_pte_addr = hop2_addr;
392*4882a593Smuzhiyun 	for (i = 0 ; i < num_of_hop3 ; i++) {
393*4882a593Smuzhiyun 		clear_pte(ctx, hop2_pte_addr);
394*4882a593Smuzhiyun 		put_pte(ctx, hop2_addr);
395*4882a593Smuzhiyun 		hop2_pte_addr += HL_PTE_SIZE;
396*4882a593Smuzhiyun 	}
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	clear_pte(ctx, hop1_addr);
399*4882a593Smuzhiyun 	put_pte(ctx, hop1_addr);
400*4882a593Smuzhiyun 	clear_pte(ctx, hop0_addr);
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	kfree(ctx->dram_default_hops);
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 	flush(ctx);
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun /**
408*4882a593Smuzhiyun  * hl_mmu_v1_init() - initialize the MMU module.
409*4882a593Smuzhiyun  * @hdev: habanalabs device structure.
410*4882a593Smuzhiyun  *
411*4882a593Smuzhiyun  * This function does the following:
412*4882a593Smuzhiyun  * - Create a pool of pages for pgt_infos.
413*4882a593Smuzhiyun  * - Create a shadow table for pgt
414*4882a593Smuzhiyun  *
415*4882a593Smuzhiyun  * Return: 0 for success, non-zero for failure.
416*4882a593Smuzhiyun  */
hl_mmu_v1_init(struct hl_device * hdev)417*4882a593Smuzhiyun static int hl_mmu_v1_init(struct hl_device *hdev)
418*4882a593Smuzhiyun {
419*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
420*4882a593Smuzhiyun 	int rc;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	hdev->mmu_priv.mmu_pgt_pool =
423*4882a593Smuzhiyun 			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 	if (!hdev->mmu_priv.mmu_pgt_pool) {
426*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to create page gen pool\n");
427*4882a593Smuzhiyun 		return -ENOMEM;
428*4882a593Smuzhiyun 	}
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
431*4882a593Smuzhiyun 			prop->mmu_hop0_tables_total_size,
432*4882a593Smuzhiyun 			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
433*4882a593Smuzhiyun 			-1);
434*4882a593Smuzhiyun 	if (rc) {
435*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
436*4882a593Smuzhiyun 		goto err_pool_add;
437*4882a593Smuzhiyun 	}
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 	hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
440*4882a593Smuzhiyun 						prop->mmu_hop_table_size,
441*4882a593Smuzhiyun 						GFP_KERNEL | __GFP_ZERO);
442*4882a593Smuzhiyun 	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
443*4882a593Smuzhiyun 		rc = -ENOMEM;
444*4882a593Smuzhiyun 		goto err_pool_add;
445*4882a593Smuzhiyun 	}
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 	/* MMU H/W init will be done in device hw_init() */
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun 	return 0;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun err_pool_add:
452*4882a593Smuzhiyun 	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	return rc;
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun /**
458*4882a593Smuzhiyun  * hl_mmu_fini() - release the MMU module.
459*4882a593Smuzhiyun  * @hdev: habanalabs device structure.
460*4882a593Smuzhiyun  *
461*4882a593Smuzhiyun  * This function does the following:
462*4882a593Smuzhiyun  * - Disable MMU in H/W.
463*4882a593Smuzhiyun  * - Free the pgt_infos pool.
464*4882a593Smuzhiyun  *
465*4882a593Smuzhiyun  * All contexts should be freed before calling this function.
466*4882a593Smuzhiyun  */
hl_mmu_v1_fini(struct hl_device * hdev)467*4882a593Smuzhiyun static void hl_mmu_v1_fini(struct hl_device *hdev)
468*4882a593Smuzhiyun {
469*4882a593Smuzhiyun 	/* MMU H/W fini was already done in device hw_fini() */
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 	kvfree(hdev->mmu_priv.mmu_shadow_hop0);
472*4882a593Smuzhiyun 	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun /**
476*4882a593Smuzhiyun  * hl_mmu_ctx_init() - initialize a context for using the MMU module.
477*4882a593Smuzhiyun  * @ctx: pointer to the context structure to initialize.
478*4882a593Smuzhiyun  *
479*4882a593Smuzhiyun  * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
480*4882a593Smuzhiyun  * page tables hops related to this context.
481*4882a593Smuzhiyun  * Return: 0 on success, non-zero otherwise.
482*4882a593Smuzhiyun  */
hl_mmu_v1_ctx_init(struct hl_ctx * ctx)483*4882a593Smuzhiyun static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
484*4882a593Smuzhiyun {
485*4882a593Smuzhiyun 	mutex_init(&ctx->mmu_lock);
486*4882a593Smuzhiyun 	hash_init(ctx->mmu_shadow_hash);
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun 	return dram_default_mapping_init(ctx);
489*4882a593Smuzhiyun }
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun /*
492*4882a593Smuzhiyun  * hl_mmu_ctx_fini - disable a ctx from using the mmu module
493*4882a593Smuzhiyun  *
494*4882a593Smuzhiyun  * @ctx: pointer to the context structure
495*4882a593Smuzhiyun  *
496*4882a593Smuzhiyun  * This function does the following:
497*4882a593Smuzhiyun  * - Free any pgts which were not freed yet
498*4882a593Smuzhiyun  * - Free the mutex
499*4882a593Smuzhiyun  * - Free DRAM default page mapping hops
500*4882a593Smuzhiyun  */
hl_mmu_v1_ctx_fini(struct hl_ctx * ctx)501*4882a593Smuzhiyun static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
502*4882a593Smuzhiyun {
503*4882a593Smuzhiyun 	struct hl_device *hdev = ctx->hdev;
504*4882a593Smuzhiyun 	struct pgt_info *pgt_info;
505*4882a593Smuzhiyun 	struct hlist_node *tmp;
506*4882a593Smuzhiyun 	int i;
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	dram_default_mapping_fini(ctx);
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	if (!hash_empty(ctx->mmu_shadow_hash))
511*4882a593Smuzhiyun 		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
512*4882a593Smuzhiyun 			ctx->asid);
513*4882a593Smuzhiyun 
514*4882a593Smuzhiyun 	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
515*4882a593Smuzhiyun 		dev_err_ratelimited(hdev->dev,
516*4882a593Smuzhiyun 			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
517*4882a593Smuzhiyun 			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
518*4882a593Smuzhiyun 		_free_hop(ctx, pgt_info);
519*4882a593Smuzhiyun 	}
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 	mutex_destroy(&ctx->mmu_lock);
522*4882a593Smuzhiyun }
523*4882a593Smuzhiyun 
_hl_mmu_v1_unmap(struct hl_ctx * ctx,u64 virt_addr,bool is_dram_addr)524*4882a593Smuzhiyun static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
525*4882a593Smuzhiyun 				u64 virt_addr, bool is_dram_addr)
526*4882a593Smuzhiyun {
527*4882a593Smuzhiyun 	struct hl_device *hdev = ctx->hdev;
528*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
529*4882a593Smuzhiyun 	struct hl_mmu_properties *mmu_prop;
530*4882a593Smuzhiyun 	u64 hop0_addr = 0, hop0_pte_addr = 0,
531*4882a593Smuzhiyun 		hop1_addr = 0, hop1_pte_addr = 0,
532*4882a593Smuzhiyun 		hop2_addr = 0, hop2_pte_addr = 0,
533*4882a593Smuzhiyun 		hop3_addr = 0, hop3_pte_addr = 0,
534*4882a593Smuzhiyun 		hop4_addr = 0, hop4_pte_addr = 0,
535*4882a593Smuzhiyun 		curr_pte;
536*4882a593Smuzhiyun 	bool is_huge, clear_hop3 = true;
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
539*4882a593Smuzhiyun 	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	hop0_addr = get_hop0_addr(ctx);
542*4882a593Smuzhiyun 	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
545*4882a593Smuzhiyun 
546*4882a593Smuzhiyun 	hop1_addr = get_next_hop_addr(ctx, curr_pte);
547*4882a593Smuzhiyun 
548*4882a593Smuzhiyun 	if (hop1_addr == ULLONG_MAX)
549*4882a593Smuzhiyun 		goto not_mapped;
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
554*4882a593Smuzhiyun 
555*4882a593Smuzhiyun 	hop2_addr = get_next_hop_addr(ctx, curr_pte);
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 	if (hop2_addr == ULLONG_MAX)
558*4882a593Smuzhiyun 		goto not_mapped;
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
561*4882a593Smuzhiyun 
562*4882a593Smuzhiyun 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 	hop3_addr = get_next_hop_addr(ctx, curr_pte);
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun 	if (hop3_addr == ULLONG_MAX)
567*4882a593Smuzhiyun 		goto not_mapped;
568*4882a593Smuzhiyun 
569*4882a593Smuzhiyun 	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun 	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 	is_huge = curr_pte & LAST_MASK;
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	if (is_dram_addr && !is_huge) {
576*4882a593Smuzhiyun 		dev_err(hdev->dev,
577*4882a593Smuzhiyun 				"DRAM unmapping should use huge pages only\n");
578*4882a593Smuzhiyun 		return -EFAULT;
579*4882a593Smuzhiyun 	}
580*4882a593Smuzhiyun 
581*4882a593Smuzhiyun 	if (!is_huge) {
582*4882a593Smuzhiyun 		hop4_addr = get_next_hop_addr(ctx, curr_pte);
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 		if (hop4_addr == ULLONG_MAX)
585*4882a593Smuzhiyun 			goto not_mapped;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
588*4882a593Smuzhiyun 							virt_addr);
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun 		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun 		clear_hop3 = false;
593*4882a593Smuzhiyun 	}
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	if (hdev->dram_default_page_mapping && is_dram_addr) {
596*4882a593Smuzhiyun 		u64 default_pte = (prop->mmu_dram_default_page_addr &
597*4882a593Smuzhiyun 				HOP_PHYS_ADDR_MASK) | LAST_MASK |
598*4882a593Smuzhiyun 					PAGE_PRESENT_MASK;
599*4882a593Smuzhiyun 		if (curr_pte == default_pte) {
600*4882a593Smuzhiyun 			dev_err(hdev->dev,
601*4882a593Smuzhiyun 				"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
602*4882a593Smuzhiyun 					virt_addr);
603*4882a593Smuzhiyun 			goto not_mapped;
604*4882a593Smuzhiyun 		}
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 		if (!(curr_pte & PAGE_PRESENT_MASK)) {
607*4882a593Smuzhiyun 			dev_err(hdev->dev,
608*4882a593Smuzhiyun 				"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
609*4882a593Smuzhiyun 					virt_addr);
610*4882a593Smuzhiyun 			goto not_mapped;
611*4882a593Smuzhiyun 		}
612*4882a593Smuzhiyun 
613*4882a593Smuzhiyun 		write_final_pte(ctx, hop3_pte_addr, default_pte);
614*4882a593Smuzhiyun 		put_pte(ctx, hop3_addr);
615*4882a593Smuzhiyun 	} else {
616*4882a593Smuzhiyun 		if (!(curr_pte & PAGE_PRESENT_MASK))
617*4882a593Smuzhiyun 			goto not_mapped;
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 		if (hop4_addr)
620*4882a593Smuzhiyun 			clear_pte(ctx, hop4_pte_addr);
621*4882a593Smuzhiyun 		else
622*4882a593Smuzhiyun 			clear_pte(ctx, hop3_pte_addr);
623*4882a593Smuzhiyun 
624*4882a593Smuzhiyun 		if (hop4_addr && !put_pte(ctx, hop4_addr))
625*4882a593Smuzhiyun 			clear_hop3 = true;
626*4882a593Smuzhiyun 
627*4882a593Smuzhiyun 		if (!clear_hop3)
628*4882a593Smuzhiyun 			goto mapped;
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun 		clear_pte(ctx, hop3_pte_addr);
631*4882a593Smuzhiyun 
632*4882a593Smuzhiyun 		if (put_pte(ctx, hop3_addr))
633*4882a593Smuzhiyun 			goto mapped;
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun 		clear_pte(ctx, hop2_pte_addr);
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 		if (put_pte(ctx, hop2_addr))
638*4882a593Smuzhiyun 			goto mapped;
639*4882a593Smuzhiyun 
640*4882a593Smuzhiyun 		clear_pte(ctx, hop1_pte_addr);
641*4882a593Smuzhiyun 
642*4882a593Smuzhiyun 		if (put_pte(ctx, hop1_addr))
643*4882a593Smuzhiyun 			goto mapped;
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun 		clear_pte(ctx, hop0_pte_addr);
646*4882a593Smuzhiyun 	}
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun mapped:
649*4882a593Smuzhiyun 	return 0;
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun not_mapped:
652*4882a593Smuzhiyun 	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
653*4882a593Smuzhiyun 		virt_addr);
654*4882a593Smuzhiyun 
655*4882a593Smuzhiyun 	return -EINVAL;
656*4882a593Smuzhiyun }
657*4882a593Smuzhiyun 
_hl_mmu_v1_map(struct hl_ctx * ctx,u64 virt_addr,u64 phys_addr,u32 page_size,bool is_dram_addr)658*4882a593Smuzhiyun static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
659*4882a593Smuzhiyun 			u32 page_size, bool is_dram_addr)
660*4882a593Smuzhiyun {
661*4882a593Smuzhiyun 	struct hl_device *hdev = ctx->hdev;
662*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
663*4882a593Smuzhiyun 	struct hl_mmu_properties *mmu_prop;
664*4882a593Smuzhiyun 	u64 hop0_addr = 0, hop0_pte_addr = 0,
665*4882a593Smuzhiyun 		hop1_addr = 0, hop1_pte_addr = 0,
666*4882a593Smuzhiyun 		hop2_addr = 0, hop2_pte_addr = 0,
667*4882a593Smuzhiyun 		hop3_addr = 0, hop3_pte_addr = 0,
668*4882a593Smuzhiyun 		hop4_addr = 0, hop4_pte_addr = 0,
669*4882a593Smuzhiyun 		curr_pte = 0;
670*4882a593Smuzhiyun 	bool hop1_new = false, hop2_new = false, hop3_new = false,
671*4882a593Smuzhiyun 		hop4_new = false, is_huge;
672*4882a593Smuzhiyun 	int rc = -ENOMEM;
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 	/*
675*4882a593Smuzhiyun 	 * This mapping function can map a page or a huge page. For huge page
676*4882a593Smuzhiyun 	 * there are only 3 hops rather than 4. Currently the DRAM allocation
677*4882a593Smuzhiyun 	 * uses huge pages only but user memory could have been allocated with
678*4882a593Smuzhiyun 	 * one of the two page sizes. Since this is a common code for all the
679*4882a593Smuzhiyun 	 * three cases, we need this hugs page check.
680*4882a593Smuzhiyun 	 */
681*4882a593Smuzhiyun 	if (is_dram_addr) {
682*4882a593Smuzhiyun 		mmu_prop = &prop->dmmu;
683*4882a593Smuzhiyun 		is_huge = true;
684*4882a593Smuzhiyun 	} else if (page_size == prop->pmmu_huge.page_size) {
685*4882a593Smuzhiyun 		mmu_prop = &prop->pmmu_huge;
686*4882a593Smuzhiyun 		is_huge = true;
687*4882a593Smuzhiyun 	} else {
688*4882a593Smuzhiyun 		mmu_prop = &prop->pmmu;
689*4882a593Smuzhiyun 		is_huge = false;
690*4882a593Smuzhiyun 	}
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 	hop0_addr = get_hop0_addr(ctx);
693*4882a593Smuzhiyun 	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
694*4882a593Smuzhiyun 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
697*4882a593Smuzhiyun 	if (hop1_addr == ULLONG_MAX)
698*4882a593Smuzhiyun 		goto err;
699*4882a593Smuzhiyun 
700*4882a593Smuzhiyun 	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
701*4882a593Smuzhiyun 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun 	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
704*4882a593Smuzhiyun 	if (hop2_addr == ULLONG_MAX)
705*4882a593Smuzhiyun 		goto err;
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
708*4882a593Smuzhiyun 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
709*4882a593Smuzhiyun 
710*4882a593Smuzhiyun 	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
711*4882a593Smuzhiyun 	if (hop3_addr == ULLONG_MAX)
712*4882a593Smuzhiyun 		goto err;
713*4882a593Smuzhiyun 
714*4882a593Smuzhiyun 	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
715*4882a593Smuzhiyun 	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun 	if (!is_huge) {
718*4882a593Smuzhiyun 		hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
719*4882a593Smuzhiyun 		if (hop4_addr == ULLONG_MAX)
720*4882a593Smuzhiyun 			goto err;
721*4882a593Smuzhiyun 
722*4882a593Smuzhiyun 		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
723*4882a593Smuzhiyun 							virt_addr);
724*4882a593Smuzhiyun 		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
725*4882a593Smuzhiyun 	}
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 	if (hdev->dram_default_page_mapping && is_dram_addr) {
728*4882a593Smuzhiyun 		u64 default_pte = (prop->mmu_dram_default_page_addr &
729*4882a593Smuzhiyun 					HOP_PHYS_ADDR_MASK) | LAST_MASK |
730*4882a593Smuzhiyun 						PAGE_PRESENT_MASK;
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun 		if (curr_pte != default_pte) {
733*4882a593Smuzhiyun 			dev_err(hdev->dev,
734*4882a593Smuzhiyun 				"DRAM: mapping already exists for virt_addr 0x%llx\n",
735*4882a593Smuzhiyun 					virt_addr);
736*4882a593Smuzhiyun 			rc = -EINVAL;
737*4882a593Smuzhiyun 			goto err;
738*4882a593Smuzhiyun 		}
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 		if (hop1_new || hop2_new || hop3_new || hop4_new) {
741*4882a593Smuzhiyun 			dev_err(hdev->dev,
742*4882a593Smuzhiyun 				"DRAM mapping should not allocate more hops\n");
743*4882a593Smuzhiyun 			rc = -EFAULT;
744*4882a593Smuzhiyun 			goto err;
745*4882a593Smuzhiyun 		}
746*4882a593Smuzhiyun 	} else if (curr_pte & PAGE_PRESENT_MASK) {
747*4882a593Smuzhiyun 		dev_err(hdev->dev,
748*4882a593Smuzhiyun 			"mapping already exists for virt_addr 0x%llx\n",
749*4882a593Smuzhiyun 				virt_addr);
750*4882a593Smuzhiyun 
751*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
752*4882a593Smuzhiyun 			*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
753*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
754*4882a593Smuzhiyun 			*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
755*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
756*4882a593Smuzhiyun 			*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
757*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
758*4882a593Smuzhiyun 			*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
759*4882a593Smuzhiyun 
760*4882a593Smuzhiyun 		if (!is_huge)
761*4882a593Smuzhiyun 			dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
762*4882a593Smuzhiyun 				*(u64 *) (uintptr_t) hop4_pte_addr,
763*4882a593Smuzhiyun 				hop4_pte_addr);
764*4882a593Smuzhiyun 
765*4882a593Smuzhiyun 		rc = -EINVAL;
766*4882a593Smuzhiyun 		goto err;
767*4882a593Smuzhiyun 	}
768*4882a593Smuzhiyun 
769*4882a593Smuzhiyun 	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
770*4882a593Smuzhiyun 			| PAGE_PRESENT_MASK;
771*4882a593Smuzhiyun 
772*4882a593Smuzhiyun 	if (is_huge)
773*4882a593Smuzhiyun 		write_final_pte(ctx, hop3_pte_addr, curr_pte);
774*4882a593Smuzhiyun 	else
775*4882a593Smuzhiyun 		write_final_pte(ctx, hop4_pte_addr, curr_pte);
776*4882a593Smuzhiyun 
777*4882a593Smuzhiyun 	if (hop1_new) {
778*4882a593Smuzhiyun 		curr_pte =
779*4882a593Smuzhiyun 			(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
780*4882a593Smuzhiyun 		write_pte(ctx, hop0_pte_addr, curr_pte);
781*4882a593Smuzhiyun 	}
782*4882a593Smuzhiyun 	if (hop2_new) {
783*4882a593Smuzhiyun 		curr_pte =
784*4882a593Smuzhiyun 			(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
785*4882a593Smuzhiyun 		write_pte(ctx, hop1_pte_addr, curr_pte);
786*4882a593Smuzhiyun 		get_pte(ctx, hop1_addr);
787*4882a593Smuzhiyun 	}
788*4882a593Smuzhiyun 	if (hop3_new) {
789*4882a593Smuzhiyun 		curr_pte =
790*4882a593Smuzhiyun 			(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
791*4882a593Smuzhiyun 		write_pte(ctx, hop2_pte_addr, curr_pte);
792*4882a593Smuzhiyun 		get_pte(ctx, hop2_addr);
793*4882a593Smuzhiyun 	}
794*4882a593Smuzhiyun 
795*4882a593Smuzhiyun 	if (!is_huge) {
796*4882a593Smuzhiyun 		if (hop4_new) {
797*4882a593Smuzhiyun 			curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
798*4882a593Smuzhiyun 					PAGE_PRESENT_MASK;
799*4882a593Smuzhiyun 			write_pte(ctx, hop3_pte_addr, curr_pte);
800*4882a593Smuzhiyun 			get_pte(ctx, hop3_addr);
801*4882a593Smuzhiyun 		}
802*4882a593Smuzhiyun 
803*4882a593Smuzhiyun 		get_pte(ctx, hop4_addr);
804*4882a593Smuzhiyun 	} else {
805*4882a593Smuzhiyun 		get_pte(ctx, hop3_addr);
806*4882a593Smuzhiyun 	}
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 	return 0;
809*4882a593Smuzhiyun 
810*4882a593Smuzhiyun err:
811*4882a593Smuzhiyun 	if (hop4_new)
812*4882a593Smuzhiyun 		free_hop(ctx, hop4_addr);
813*4882a593Smuzhiyun 	if (hop3_new)
814*4882a593Smuzhiyun 		free_hop(ctx, hop3_addr);
815*4882a593Smuzhiyun 	if (hop2_new)
816*4882a593Smuzhiyun 		free_hop(ctx, hop2_addr);
817*4882a593Smuzhiyun 	if (hop1_new)
818*4882a593Smuzhiyun 		free_hop(ctx, hop1_addr);
819*4882a593Smuzhiyun 
820*4882a593Smuzhiyun 	return rc;
821*4882a593Smuzhiyun }
822*4882a593Smuzhiyun 
823*4882a593Smuzhiyun /*
824*4882a593Smuzhiyun  * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
825*4882a593Smuzhiyun  *
826*4882a593Smuzhiyun  * @ctx: pointer to the context structure
827*4882a593Smuzhiyun  *
828*4882a593Smuzhiyun  */
hl_mmu_v1_swap_out(struct hl_ctx * ctx)829*4882a593Smuzhiyun static void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
830*4882a593Smuzhiyun {
831*4882a593Smuzhiyun 
832*4882a593Smuzhiyun }
833*4882a593Smuzhiyun 
834*4882a593Smuzhiyun /*
835*4882a593Smuzhiyun  * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
836*4882a593Smuzhiyun  *
837*4882a593Smuzhiyun  * @ctx: pointer to the context structure
838*4882a593Smuzhiyun  *
839*4882a593Smuzhiyun  */
hl_mmu_v1_swap_in(struct hl_ctx * ctx)840*4882a593Smuzhiyun static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
841*4882a593Smuzhiyun {
842*4882a593Smuzhiyun 
843*4882a593Smuzhiyun }
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun /*
846*4882a593Smuzhiyun  * hl_mmu_v1_prepare - prepare mmu  for working with mmu v1
847*4882a593Smuzhiyun  *
848*4882a593Smuzhiyun  * @hdev: pointer to the device structure
849*4882a593Smuzhiyun  */
hl_mmu_v1_set_funcs(struct hl_device * hdev)850*4882a593Smuzhiyun void hl_mmu_v1_set_funcs(struct hl_device *hdev)
851*4882a593Smuzhiyun {
852*4882a593Smuzhiyun 	struct hl_mmu_funcs *mmu = &hdev->mmu_func;
853*4882a593Smuzhiyun 
854*4882a593Smuzhiyun 	mmu->init = hl_mmu_v1_init;
855*4882a593Smuzhiyun 	mmu->fini = hl_mmu_v1_fini;
856*4882a593Smuzhiyun 	mmu->ctx_init = hl_mmu_v1_ctx_init;
857*4882a593Smuzhiyun 	mmu->ctx_fini = hl_mmu_v1_ctx_fini;
858*4882a593Smuzhiyun 	mmu->map = _hl_mmu_v1_map;
859*4882a593Smuzhiyun 	mmu->unmap = _hl_mmu_v1_unmap;
860*4882a593Smuzhiyun 	mmu->flush = flush;
861*4882a593Smuzhiyun 	mmu->swap_out = hl_mmu_v1_swap_out;
862*4882a593Smuzhiyun 	mmu->swap_in = hl_mmu_v1_swap_in;
863*4882a593Smuzhiyun }
864