xref: /OK3568_Linux_fs/kernel/drivers/misc/habanalabs/common/context.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun 
3*4882a593Smuzhiyun /*
4*4882a593Smuzhiyun  * Copyright 2016-2019 HabanaLabs, Ltd.
5*4882a593Smuzhiyun  * All Rights Reserved.
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include "habanalabs.h"
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include <linux/slab.h>
11*4882a593Smuzhiyun 
hl_ctx_fini(struct hl_ctx * ctx)12*4882a593Smuzhiyun static void hl_ctx_fini(struct hl_ctx *ctx)
13*4882a593Smuzhiyun {
14*4882a593Smuzhiyun 	struct hl_device *hdev = ctx->hdev;
15*4882a593Smuzhiyun 	u64 idle_mask = 0;
16*4882a593Smuzhiyun 	int i;
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun 	/*
19*4882a593Smuzhiyun 	 * If we arrived here, there are no jobs waiting for this context
20*4882a593Smuzhiyun 	 * on its queues so we can safely remove it.
21*4882a593Smuzhiyun 	 * This is because for each CS, we increment the ref count and for
22*4882a593Smuzhiyun 	 * every CS that was finished we decrement it and we won't arrive
23*4882a593Smuzhiyun 	 * to this function unless the ref count is 0
24*4882a593Smuzhiyun 	 */
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun 	for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
27*4882a593Smuzhiyun 		hl_fence_put(ctx->cs_pending[i]);
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun 	kfree(ctx->cs_pending);
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun 	if (ctx->asid != HL_KERNEL_ASID_ID) {
32*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid);
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun 		/* The engines are stopped as there is no executing CS, but the
35*4882a593Smuzhiyun 		 * Coresight might be still working by accessing addresses
36*4882a593Smuzhiyun 		 * related to the stopped engines. Hence stop it explicitly.
37*4882a593Smuzhiyun 		 * Stop only if this is the compute context, as there can be
38*4882a593Smuzhiyun 		 * only one compute context
39*4882a593Smuzhiyun 		 */
40*4882a593Smuzhiyun 		if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
41*4882a593Smuzhiyun 			hl_device_set_debug_mode(hdev, false);
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 		hl_cb_va_pool_fini(ctx);
44*4882a593Smuzhiyun 		hl_vm_ctx_fini(ctx);
45*4882a593Smuzhiyun 		hl_asid_free(hdev, ctx->asid);
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 		if ((!hdev->pldm) && (hdev->pdev) &&
48*4882a593Smuzhiyun 				(!hdev->asic_funcs->is_device_idle(hdev,
49*4882a593Smuzhiyun 							&idle_mask, NULL)))
50*4882a593Smuzhiyun 			dev_notice(hdev->dev,
51*4882a593Smuzhiyun 				"device not idle after user context is closed (0x%llx)\n",
52*4882a593Smuzhiyun 				idle_mask);
53*4882a593Smuzhiyun 	} else {
54*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "closing kernel context\n");
55*4882a593Smuzhiyun 		hl_mmu_ctx_fini(ctx);
56*4882a593Smuzhiyun 	}
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun 
hl_ctx_do_release(struct kref * ref)59*4882a593Smuzhiyun void hl_ctx_do_release(struct kref *ref)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun 	struct hl_ctx *ctx;
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun 	ctx = container_of(ref, struct hl_ctx, refcount);
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun 	hl_ctx_fini(ctx);
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 	if (ctx->hpriv)
68*4882a593Smuzhiyun 		hl_hpriv_put(ctx->hpriv);
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	kfree(ctx);
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun 
hl_ctx_create(struct hl_device * hdev,struct hl_fpriv * hpriv)73*4882a593Smuzhiyun int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun 	struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr;
76*4882a593Smuzhiyun 	struct hl_ctx *ctx;
77*4882a593Smuzhiyun 	int rc;
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
80*4882a593Smuzhiyun 	if (!ctx) {
81*4882a593Smuzhiyun 		rc = -ENOMEM;
82*4882a593Smuzhiyun 		goto out_err;
83*4882a593Smuzhiyun 	}
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	mutex_lock(&mgr->ctx_lock);
86*4882a593Smuzhiyun 	rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
87*4882a593Smuzhiyun 	mutex_unlock(&mgr->ctx_lock);
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	if (rc < 0) {
90*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
91*4882a593Smuzhiyun 		goto free_ctx;
92*4882a593Smuzhiyun 	}
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	ctx->handle = rc;
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	rc = hl_ctx_init(hdev, ctx, false);
97*4882a593Smuzhiyun 	if (rc)
98*4882a593Smuzhiyun 		goto remove_from_idr;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	hl_hpriv_get(hpriv);
101*4882a593Smuzhiyun 	ctx->hpriv = hpriv;
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	/* TODO: remove for multiple contexts per process */
104*4882a593Smuzhiyun 	hpriv->ctx = ctx;
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	/* TODO: remove the following line for multiple process support */
107*4882a593Smuzhiyun 	hdev->compute_ctx = ctx;
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	return 0;
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun remove_from_idr:
112*4882a593Smuzhiyun 	mutex_lock(&mgr->ctx_lock);
113*4882a593Smuzhiyun 	idr_remove(&mgr->ctx_handles, ctx->handle);
114*4882a593Smuzhiyun 	mutex_unlock(&mgr->ctx_lock);
115*4882a593Smuzhiyun free_ctx:
116*4882a593Smuzhiyun 	kfree(ctx);
117*4882a593Smuzhiyun out_err:
118*4882a593Smuzhiyun 	return rc;
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun 
hl_ctx_free(struct hl_device * hdev,struct hl_ctx * ctx)121*4882a593Smuzhiyun void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
122*4882a593Smuzhiyun {
123*4882a593Smuzhiyun 	if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
124*4882a593Smuzhiyun 		return;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	dev_warn(hdev->dev,
127*4882a593Smuzhiyun 		"user process released device but its command submissions are still executing\n");
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun 
hl_ctx_init(struct hl_device * hdev,struct hl_ctx * ctx,bool is_kernel_ctx)130*4882a593Smuzhiyun int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun 	int rc = 0;
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	ctx->hdev = hdev;
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	kref_init(&ctx->refcount);
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	ctx->cs_sequence = 1;
139*4882a593Smuzhiyun 	spin_lock_init(&ctx->cs_lock);
140*4882a593Smuzhiyun 	atomic_set(&ctx->thread_ctx_switch_token, 1);
141*4882a593Smuzhiyun 	ctx->thread_ctx_switch_wait_token = 0;
142*4882a593Smuzhiyun 	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
143*4882a593Smuzhiyun 				sizeof(struct hl_fence *),
144*4882a593Smuzhiyun 				GFP_KERNEL);
145*4882a593Smuzhiyun 	if (!ctx->cs_pending)
146*4882a593Smuzhiyun 		return -ENOMEM;
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 	if (is_kernel_ctx) {
149*4882a593Smuzhiyun 		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
150*4882a593Smuzhiyun 		rc = hl_mmu_ctx_init(ctx);
151*4882a593Smuzhiyun 		if (rc) {
152*4882a593Smuzhiyun 			dev_err(hdev->dev, "Failed to init mmu ctx module\n");
153*4882a593Smuzhiyun 			goto err_free_cs_pending;
154*4882a593Smuzhiyun 		}
155*4882a593Smuzhiyun 	} else {
156*4882a593Smuzhiyun 		ctx->asid = hl_asid_alloc(hdev);
157*4882a593Smuzhiyun 		if (!ctx->asid) {
158*4882a593Smuzhiyun 			dev_err(hdev->dev, "No free ASID, failed to create context\n");
159*4882a593Smuzhiyun 			rc = -ENOMEM;
160*4882a593Smuzhiyun 			goto err_free_cs_pending;
161*4882a593Smuzhiyun 		}
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 		rc = hl_vm_ctx_init(ctx);
164*4882a593Smuzhiyun 		if (rc) {
165*4882a593Smuzhiyun 			dev_err(hdev->dev, "Failed to init mem ctx module\n");
166*4882a593Smuzhiyun 			rc = -ENOMEM;
167*4882a593Smuzhiyun 			goto err_asid_free;
168*4882a593Smuzhiyun 		}
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun 		rc = hl_cb_va_pool_init(ctx);
171*4882a593Smuzhiyun 		if (rc) {
172*4882a593Smuzhiyun 			dev_err(hdev->dev,
173*4882a593Smuzhiyun 				"Failed to init VA pool for mapped CB\n");
174*4882a593Smuzhiyun 			goto err_vm_ctx_fini;
175*4882a593Smuzhiyun 		}
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun 		rc = hdev->asic_funcs->ctx_init(ctx);
178*4882a593Smuzhiyun 		if (rc) {
179*4882a593Smuzhiyun 			dev_err(hdev->dev, "ctx_init failed\n");
180*4882a593Smuzhiyun 			goto err_cb_va_pool_fini;
181*4882a593Smuzhiyun 		}
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
184*4882a593Smuzhiyun 	}
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	return 0;
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun err_cb_va_pool_fini:
189*4882a593Smuzhiyun 	hl_cb_va_pool_fini(ctx);
190*4882a593Smuzhiyun err_vm_ctx_fini:
191*4882a593Smuzhiyun 	hl_vm_ctx_fini(ctx);
192*4882a593Smuzhiyun err_asid_free:
193*4882a593Smuzhiyun 	hl_asid_free(hdev, ctx->asid);
194*4882a593Smuzhiyun err_free_cs_pending:
195*4882a593Smuzhiyun 	kfree(ctx->cs_pending);
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	return rc;
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun 
hl_ctx_get(struct hl_device * hdev,struct hl_ctx * ctx)200*4882a593Smuzhiyun void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	kref_get(&ctx->refcount);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
hl_ctx_put(struct hl_ctx * ctx)205*4882a593Smuzhiyun int hl_ctx_put(struct hl_ctx *ctx)
206*4882a593Smuzhiyun {
207*4882a593Smuzhiyun 	return kref_put(&ctx->refcount, hl_ctx_do_release);
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun 
hl_ctx_get_fence(struct hl_ctx * ctx,u64 seq)210*4882a593Smuzhiyun struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
211*4882a593Smuzhiyun {
212*4882a593Smuzhiyun 	struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
213*4882a593Smuzhiyun 	struct hl_fence *fence;
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	spin_lock(&ctx->cs_lock);
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	if (seq >= ctx->cs_sequence) {
218*4882a593Smuzhiyun 		spin_unlock(&ctx->cs_lock);
219*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
220*4882a593Smuzhiyun 	}
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
223*4882a593Smuzhiyun 		spin_unlock(&ctx->cs_lock);
224*4882a593Smuzhiyun 		return NULL;
225*4882a593Smuzhiyun 	}
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
228*4882a593Smuzhiyun 	hl_fence_get(fence);
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	spin_unlock(&ctx->cs_lock);
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	return fence;
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun /*
236*4882a593Smuzhiyun  * hl_ctx_mgr_init - initialize the context manager
237*4882a593Smuzhiyun  *
238*4882a593Smuzhiyun  * @mgr: pointer to context manager structure
239*4882a593Smuzhiyun  *
240*4882a593Smuzhiyun  * This manager is an object inside the hpriv object of the user process.
241*4882a593Smuzhiyun  * The function is called when a user process opens the FD.
242*4882a593Smuzhiyun  */
hl_ctx_mgr_init(struct hl_ctx_mgr * mgr)243*4882a593Smuzhiyun void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	mutex_init(&mgr->ctx_lock);
246*4882a593Smuzhiyun 	idr_init(&mgr->ctx_handles);
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun /*
250*4882a593Smuzhiyun  * hl_ctx_mgr_fini - finalize the context manager
251*4882a593Smuzhiyun  *
252*4882a593Smuzhiyun  * @hdev: pointer to device structure
253*4882a593Smuzhiyun  * @mgr: pointer to context manager structure
254*4882a593Smuzhiyun  *
255*4882a593Smuzhiyun  * This function goes over all the contexts in the manager and frees them.
256*4882a593Smuzhiyun  * It is called when a process closes the FD.
257*4882a593Smuzhiyun  */
hl_ctx_mgr_fini(struct hl_device * hdev,struct hl_ctx_mgr * mgr)258*4882a593Smuzhiyun void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun 	struct hl_ctx *ctx;
261*4882a593Smuzhiyun 	struct idr *idp;
262*4882a593Smuzhiyun 	u32 id;
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	idp = &mgr->ctx_handles;
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	idr_for_each_entry(idp, ctx, id)
267*4882a593Smuzhiyun 		hl_ctx_free(hdev, ctx);
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 	idr_destroy(&mgr->ctx_handles);
270*4882a593Smuzhiyun 	mutex_destroy(&mgr->ctx_lock);
271*4882a593Smuzhiyun }
272