1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun /*
4*4882a593Smuzhiyun * Copyright 2016-2019 HabanaLabs, Ltd.
5*4882a593Smuzhiyun * All Rights Reserved.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #define pr_fmt(fmt) "habanalabs: " fmt
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include "habanalabs.h"
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include <linux/pci.h>
14*4882a593Smuzhiyun #include <linux/aer.h>
15*4882a593Smuzhiyun #include <linux/module.h>
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun MODULE_AUTHOR(HL_DRIVER_AUTHOR);
22*4882a593Smuzhiyun MODULE_DESCRIPTION(HL_DRIVER_DESC);
23*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun static int hl_major;
26*4882a593Smuzhiyun static struct class *hl_class;
27*4882a593Smuzhiyun static DEFINE_IDR(hl_devs_idr);
28*4882a593Smuzhiyun static DEFINE_MUTEX(hl_devs_idr_lock);
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun static int timeout_locked = 5;
31*4882a593Smuzhiyun static int reset_on_lockup = 1;
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun module_param(timeout_locked, int, 0444);
34*4882a593Smuzhiyun MODULE_PARM_DESC(timeout_locked,
35*4882a593Smuzhiyun "Device lockup timeout in seconds (0 = disabled, default 5s)");
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun module_param(reset_on_lockup, int, 0444);
38*4882a593Smuzhiyun MODULE_PARM_DESC(reset_on_lockup,
39*4882a593Smuzhiyun "Do device reset on lockup (0 = no, 1 = yes, default yes)");
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun #define PCI_VENDOR_ID_HABANALABS 0x1da3
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun #define PCI_IDS_GOYA 0x0001
44*4882a593Smuzhiyun #define PCI_IDS_GAUDI 0x1000
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun static const struct pci_device_id ids[] = {
47*4882a593Smuzhiyun { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
48*4882a593Smuzhiyun { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
49*4882a593Smuzhiyun { 0, }
50*4882a593Smuzhiyun };
51*4882a593Smuzhiyun MODULE_DEVICE_TABLE(pci, ids);
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun /*
54*4882a593Smuzhiyun * get_asic_type - translate device id to asic type
55*4882a593Smuzhiyun *
56*4882a593Smuzhiyun * @device: id of the PCI device
57*4882a593Smuzhiyun *
58*4882a593Smuzhiyun * Translate device id to asic type.
59*4882a593Smuzhiyun * In case of unidentified device, return -1
60*4882a593Smuzhiyun */
get_asic_type(u16 device)61*4882a593Smuzhiyun static enum hl_asic_type get_asic_type(u16 device)
62*4882a593Smuzhiyun {
63*4882a593Smuzhiyun enum hl_asic_type asic_type;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun switch (device) {
66*4882a593Smuzhiyun case PCI_IDS_GOYA:
67*4882a593Smuzhiyun asic_type = ASIC_GOYA;
68*4882a593Smuzhiyun break;
69*4882a593Smuzhiyun case PCI_IDS_GAUDI:
70*4882a593Smuzhiyun asic_type = ASIC_GAUDI;
71*4882a593Smuzhiyun break;
72*4882a593Smuzhiyun default:
73*4882a593Smuzhiyun asic_type = ASIC_INVALID;
74*4882a593Smuzhiyun break;
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun return asic_type;
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun /*
81*4882a593Smuzhiyun * hl_device_open - open function for habanalabs device
82*4882a593Smuzhiyun *
83*4882a593Smuzhiyun * @inode: pointer to inode structure
84*4882a593Smuzhiyun * @filp: pointer to file structure
85*4882a593Smuzhiyun *
86*4882a593Smuzhiyun * Called when process opens an habanalabs device.
87*4882a593Smuzhiyun */
hl_device_open(struct inode * inode,struct file * filp)88*4882a593Smuzhiyun int hl_device_open(struct inode *inode, struct file *filp)
89*4882a593Smuzhiyun {
90*4882a593Smuzhiyun struct hl_device *hdev;
91*4882a593Smuzhiyun struct hl_fpriv *hpriv;
92*4882a593Smuzhiyun int rc;
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun mutex_lock(&hl_devs_idr_lock);
95*4882a593Smuzhiyun hdev = idr_find(&hl_devs_idr, iminor(inode));
96*4882a593Smuzhiyun mutex_unlock(&hl_devs_idr_lock);
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun if (!hdev) {
99*4882a593Smuzhiyun pr_err("Couldn't find device %d:%d\n",
100*4882a593Smuzhiyun imajor(inode), iminor(inode));
101*4882a593Smuzhiyun return -ENXIO;
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
105*4882a593Smuzhiyun if (!hpriv)
106*4882a593Smuzhiyun return -ENOMEM;
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun hpriv->hdev = hdev;
109*4882a593Smuzhiyun filp->private_data = hpriv;
110*4882a593Smuzhiyun hpriv->filp = filp;
111*4882a593Smuzhiyun mutex_init(&hpriv->restore_phase_mutex);
112*4882a593Smuzhiyun kref_init(&hpriv->refcount);
113*4882a593Smuzhiyun nonseekable_open(inode, filp);
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun hl_cb_mgr_init(&hpriv->cb_mgr);
116*4882a593Smuzhiyun hl_ctx_mgr_init(&hpriv->ctx_mgr);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun hpriv->taskpid = find_get_pid(current->pid);
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun mutex_lock(&hdev->fpriv_list_lock);
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun if (hl_device_disabled_or_in_reset(hdev)) {
123*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
124*4882a593Smuzhiyun "Can't open %s because it is disabled or in reset\n",
125*4882a593Smuzhiyun dev_name(hdev->dev));
126*4882a593Smuzhiyun rc = -EPERM;
127*4882a593Smuzhiyun goto out_err;
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun if (hdev->in_debug) {
131*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
132*4882a593Smuzhiyun "Can't open %s because it is being debugged by another user\n",
133*4882a593Smuzhiyun dev_name(hdev->dev));
134*4882a593Smuzhiyun rc = -EPERM;
135*4882a593Smuzhiyun goto out_err;
136*4882a593Smuzhiyun }
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun if (hdev->compute_ctx) {
139*4882a593Smuzhiyun dev_dbg_ratelimited(hdev->dev,
140*4882a593Smuzhiyun "Can't open %s because another user is working on it\n",
141*4882a593Smuzhiyun dev_name(hdev->dev));
142*4882a593Smuzhiyun rc = -EBUSY;
143*4882a593Smuzhiyun goto out_err;
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun rc = hl_ctx_create(hdev, hpriv);
147*4882a593Smuzhiyun if (rc) {
148*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to create context %d\n", rc);
149*4882a593Smuzhiyun goto out_err;
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun /* Device is IDLE at this point so it is legal to change PLLs.
153*4882a593Smuzhiyun * There is no need to check anything because if the PLL is
154*4882a593Smuzhiyun * already HIGH, the set function will return without doing
155*4882a593Smuzhiyun * anything
156*4882a593Smuzhiyun */
157*4882a593Smuzhiyun hl_device_set_frequency(hdev, PLL_HIGH);
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun list_add(&hpriv->dev_node, &hdev->fpriv_list);
160*4882a593Smuzhiyun mutex_unlock(&hdev->fpriv_list_lock);
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun hl_debugfs_add_file(hpriv);
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun return 0;
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun out_err:
167*4882a593Smuzhiyun mutex_unlock(&hdev->fpriv_list_lock);
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
170*4882a593Smuzhiyun hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
171*4882a593Smuzhiyun filp->private_data = NULL;
172*4882a593Smuzhiyun mutex_destroy(&hpriv->restore_phase_mutex);
173*4882a593Smuzhiyun put_pid(hpriv->taskpid);
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun kfree(hpriv);
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun return rc;
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun
hl_device_open_ctrl(struct inode * inode,struct file * filp)180*4882a593Smuzhiyun int hl_device_open_ctrl(struct inode *inode, struct file *filp)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun struct hl_device *hdev;
183*4882a593Smuzhiyun struct hl_fpriv *hpriv;
184*4882a593Smuzhiyun int rc;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun mutex_lock(&hl_devs_idr_lock);
187*4882a593Smuzhiyun hdev = idr_find(&hl_devs_idr, iminor(inode));
188*4882a593Smuzhiyun mutex_unlock(&hl_devs_idr_lock);
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun if (!hdev) {
191*4882a593Smuzhiyun pr_err("Couldn't find device %d:%d\n",
192*4882a593Smuzhiyun imajor(inode), iminor(inode));
193*4882a593Smuzhiyun return -ENXIO;
194*4882a593Smuzhiyun }
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
197*4882a593Smuzhiyun if (!hpriv)
198*4882a593Smuzhiyun return -ENOMEM;
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun mutex_lock(&hdev->fpriv_list_lock);
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun if (hl_device_disabled_or_in_reset(hdev)) {
203*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev_ctrl,
204*4882a593Smuzhiyun "Can't open %s because it is disabled or in reset\n",
205*4882a593Smuzhiyun dev_name(hdev->dev_ctrl));
206*4882a593Smuzhiyun rc = -EPERM;
207*4882a593Smuzhiyun goto out_err;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun list_add(&hpriv->dev_node, &hdev->fpriv_list);
211*4882a593Smuzhiyun mutex_unlock(&hdev->fpriv_list_lock);
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun hpriv->hdev = hdev;
214*4882a593Smuzhiyun filp->private_data = hpriv;
215*4882a593Smuzhiyun hpriv->filp = filp;
216*4882a593Smuzhiyun hpriv->is_control = true;
217*4882a593Smuzhiyun nonseekable_open(inode, filp);
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun hpriv->taskpid = find_get_pid(current->pid);
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun return 0;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun out_err:
224*4882a593Smuzhiyun mutex_unlock(&hdev->fpriv_list_lock);
225*4882a593Smuzhiyun kfree(hpriv);
226*4882a593Smuzhiyun return rc;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
set_driver_behavior_per_device(struct hl_device * hdev)229*4882a593Smuzhiyun static void set_driver_behavior_per_device(struct hl_device *hdev)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun hdev->mmu_enable = 1;
232*4882a593Smuzhiyun hdev->cpu_enable = 1;
233*4882a593Smuzhiyun hdev->fw_loading = 1;
234*4882a593Smuzhiyun hdev->cpu_queues_enable = 1;
235*4882a593Smuzhiyun hdev->heartbeat = 1;
236*4882a593Smuzhiyun hdev->clock_gating_mask = ULONG_MAX;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun hdev->reset_pcilink = 0;
239*4882a593Smuzhiyun hdev->axi_drain = 0;
240*4882a593Smuzhiyun hdev->sram_scrambler_enable = 1;
241*4882a593Smuzhiyun hdev->dram_scrambler_enable = 1;
242*4882a593Smuzhiyun hdev->bmc_enable = 1;
243*4882a593Smuzhiyun hdev->hard_reset_on_fw_events = 1;
244*4882a593Smuzhiyun }
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun /*
247*4882a593Smuzhiyun * create_hdev - create habanalabs device instance
248*4882a593Smuzhiyun *
249*4882a593Smuzhiyun * @dev: will hold the pointer to the new habanalabs device structure
250*4882a593Smuzhiyun * @pdev: pointer to the pci device
251*4882a593Smuzhiyun * @asic_type: in case of simulator device, which device is it
252*4882a593Smuzhiyun * @minor: in case of simulator device, the minor of the device
253*4882a593Smuzhiyun *
254*4882a593Smuzhiyun * Allocate memory for habanalabs device and initialize basic fields
255*4882a593Smuzhiyun * Identify the ASIC type
256*4882a593Smuzhiyun * Allocate ID (minor) for the device (only for real devices)
257*4882a593Smuzhiyun */
create_hdev(struct hl_device ** dev,struct pci_dev * pdev,enum hl_asic_type asic_type,int minor)258*4882a593Smuzhiyun int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
259*4882a593Smuzhiyun enum hl_asic_type asic_type, int minor)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun struct hl_device *hdev;
262*4882a593Smuzhiyun int rc, main_id, ctrl_id = 0;
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun *dev = NULL;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
267*4882a593Smuzhiyun if (!hdev)
268*4882a593Smuzhiyun return -ENOMEM;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun /* First, we must find out which ASIC are we handling. This is needed
271*4882a593Smuzhiyun * to configure the behavior of the driver (kernel parameters)
272*4882a593Smuzhiyun */
273*4882a593Smuzhiyun if (pdev) {
274*4882a593Smuzhiyun hdev->asic_type = get_asic_type(pdev->device);
275*4882a593Smuzhiyun if (hdev->asic_type == ASIC_INVALID) {
276*4882a593Smuzhiyun dev_err(&pdev->dev, "Unsupported ASIC\n");
277*4882a593Smuzhiyun rc = -ENODEV;
278*4882a593Smuzhiyun goto free_hdev;
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun } else {
281*4882a593Smuzhiyun hdev->asic_type = asic_type;
282*4882a593Smuzhiyun }
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun hdev->major = hl_major;
285*4882a593Smuzhiyun hdev->reset_on_lockup = reset_on_lockup;
286*4882a593Smuzhiyun hdev->pldm = 0;
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun set_driver_behavior_per_device(hdev);
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun if (timeout_locked)
291*4882a593Smuzhiyun hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
292*4882a593Smuzhiyun else
293*4882a593Smuzhiyun hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun hdev->disabled = true;
296*4882a593Smuzhiyun hdev->pdev = pdev; /* can be NULL in case of simulator device */
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun /* Set default DMA mask to 32 bits */
299*4882a593Smuzhiyun hdev->dma_mask = 32;
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun mutex_lock(&hl_devs_idr_lock);
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun /* Always save 2 numbers, 1 for main device and 1 for control.
304*4882a593Smuzhiyun * They must be consecutive
305*4882a593Smuzhiyun */
306*4882a593Smuzhiyun main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
307*4882a593Smuzhiyun GFP_KERNEL);
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun if (main_id >= 0)
310*4882a593Smuzhiyun ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
311*4882a593Smuzhiyun main_id + 2, GFP_KERNEL);
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun mutex_unlock(&hl_devs_idr_lock);
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun if ((main_id < 0) || (ctrl_id < 0)) {
316*4882a593Smuzhiyun if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
317*4882a593Smuzhiyun pr_err("too many devices in the system\n");
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun if (main_id >= 0) {
320*4882a593Smuzhiyun mutex_lock(&hl_devs_idr_lock);
321*4882a593Smuzhiyun idr_remove(&hl_devs_idr, main_id);
322*4882a593Smuzhiyun mutex_unlock(&hl_devs_idr_lock);
323*4882a593Smuzhiyun }
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun rc = -EBUSY;
326*4882a593Smuzhiyun goto free_hdev;
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun hdev->id = main_id;
330*4882a593Smuzhiyun hdev->id_control = ctrl_id;
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun *dev = hdev;
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun return 0;
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun free_hdev:
337*4882a593Smuzhiyun kfree(hdev);
338*4882a593Smuzhiyun return rc;
339*4882a593Smuzhiyun }
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun /*
342*4882a593Smuzhiyun * destroy_hdev - destroy habanalabs device instance
343*4882a593Smuzhiyun *
344*4882a593Smuzhiyun * @dev: pointer to the habanalabs device structure
345*4882a593Smuzhiyun *
346*4882a593Smuzhiyun */
destroy_hdev(struct hl_device * hdev)347*4882a593Smuzhiyun void destroy_hdev(struct hl_device *hdev)
348*4882a593Smuzhiyun {
349*4882a593Smuzhiyun /* Remove device from the device list */
350*4882a593Smuzhiyun mutex_lock(&hl_devs_idr_lock);
351*4882a593Smuzhiyun idr_remove(&hl_devs_idr, hdev->id);
352*4882a593Smuzhiyun idr_remove(&hl_devs_idr, hdev->id_control);
353*4882a593Smuzhiyun mutex_unlock(&hl_devs_idr_lock);
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun kfree(hdev);
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun
hl_pmops_suspend(struct device * dev)358*4882a593Smuzhiyun static int hl_pmops_suspend(struct device *dev)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun struct hl_device *hdev = dev_get_drvdata(dev);
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun pr_debug("Going to suspend PCI device\n");
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun if (!hdev) {
365*4882a593Smuzhiyun pr_err("device pointer is NULL in suspend\n");
366*4882a593Smuzhiyun return 0;
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun
369*4882a593Smuzhiyun return hl_device_suspend(hdev);
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun
hl_pmops_resume(struct device * dev)372*4882a593Smuzhiyun static int hl_pmops_resume(struct device *dev)
373*4882a593Smuzhiyun {
374*4882a593Smuzhiyun struct hl_device *hdev = dev_get_drvdata(dev);
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun pr_debug("Going to resume PCI device\n");
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun if (!hdev) {
379*4882a593Smuzhiyun pr_err("device pointer is NULL in resume\n");
380*4882a593Smuzhiyun return 0;
381*4882a593Smuzhiyun }
382*4882a593Smuzhiyun
383*4882a593Smuzhiyun return hl_device_resume(hdev);
384*4882a593Smuzhiyun }
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun /*
387*4882a593Smuzhiyun * hl_pci_probe - probe PCI habanalabs devices
388*4882a593Smuzhiyun *
389*4882a593Smuzhiyun * @pdev: pointer to pci device
390*4882a593Smuzhiyun * @id: pointer to pci device id structure
391*4882a593Smuzhiyun *
392*4882a593Smuzhiyun * Standard PCI probe function for habanalabs device.
393*4882a593Smuzhiyun * Create a new habanalabs device and initialize it according to the
394*4882a593Smuzhiyun * device's type
395*4882a593Smuzhiyun */
hl_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)396*4882a593Smuzhiyun static int hl_pci_probe(struct pci_dev *pdev,
397*4882a593Smuzhiyun const struct pci_device_id *id)
398*4882a593Smuzhiyun {
399*4882a593Smuzhiyun struct hl_device *hdev;
400*4882a593Smuzhiyun int rc;
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun dev_info(&pdev->dev, HL_NAME
403*4882a593Smuzhiyun " device found [%04x:%04x] (rev %x)\n",
404*4882a593Smuzhiyun (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
407*4882a593Smuzhiyun if (rc)
408*4882a593Smuzhiyun return rc;
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun pci_set_drvdata(pdev, hdev);
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun pci_enable_pcie_error_reporting(pdev);
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun rc = hl_device_init(hdev, hl_class);
415*4882a593Smuzhiyun if (rc) {
416*4882a593Smuzhiyun dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
417*4882a593Smuzhiyun rc = -ENODEV;
418*4882a593Smuzhiyun goto disable_device;
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun return 0;
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun disable_device:
424*4882a593Smuzhiyun pci_disable_pcie_error_reporting(pdev);
425*4882a593Smuzhiyun pci_set_drvdata(pdev, NULL);
426*4882a593Smuzhiyun destroy_hdev(hdev);
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun return rc;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun /*
432*4882a593Smuzhiyun * hl_pci_remove - remove PCI habanalabs devices
433*4882a593Smuzhiyun *
434*4882a593Smuzhiyun * @pdev: pointer to pci device
435*4882a593Smuzhiyun *
436*4882a593Smuzhiyun * Standard PCI remove function for habanalabs device
437*4882a593Smuzhiyun */
hl_pci_remove(struct pci_dev * pdev)438*4882a593Smuzhiyun static void hl_pci_remove(struct pci_dev *pdev)
439*4882a593Smuzhiyun {
440*4882a593Smuzhiyun struct hl_device *hdev;
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun hdev = pci_get_drvdata(pdev);
443*4882a593Smuzhiyun if (!hdev)
444*4882a593Smuzhiyun return;
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun hl_device_fini(hdev);
447*4882a593Smuzhiyun pci_disable_pcie_error_reporting(pdev);
448*4882a593Smuzhiyun pci_set_drvdata(pdev, NULL);
449*4882a593Smuzhiyun destroy_hdev(hdev);
450*4882a593Smuzhiyun }
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun /**
453*4882a593Smuzhiyun * hl_pci_err_detected - a PCI bus error detected on this device
454*4882a593Smuzhiyun *
455*4882a593Smuzhiyun * @pdev: pointer to pci device
456*4882a593Smuzhiyun * @state: PCI error type
457*4882a593Smuzhiyun *
458*4882a593Smuzhiyun * Called by the PCI subsystem whenever a non-correctable
459*4882a593Smuzhiyun * PCI bus error is detected
460*4882a593Smuzhiyun */
461*4882a593Smuzhiyun static pci_ers_result_t
hl_pci_err_detected(struct pci_dev * pdev,pci_channel_state_t state)462*4882a593Smuzhiyun hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
463*4882a593Smuzhiyun {
464*4882a593Smuzhiyun struct hl_device *hdev = pci_get_drvdata(pdev);
465*4882a593Smuzhiyun enum pci_ers_result result;
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun switch (state) {
468*4882a593Smuzhiyun case pci_channel_io_normal:
469*4882a593Smuzhiyun return PCI_ERS_RESULT_CAN_RECOVER;
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun case pci_channel_io_frozen:
472*4882a593Smuzhiyun dev_warn(hdev->dev, "frozen state error detected\n");
473*4882a593Smuzhiyun result = PCI_ERS_RESULT_NEED_RESET;
474*4882a593Smuzhiyun break;
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun case pci_channel_io_perm_failure:
477*4882a593Smuzhiyun dev_warn(hdev->dev, "failure state error detected\n");
478*4882a593Smuzhiyun result = PCI_ERS_RESULT_DISCONNECT;
479*4882a593Smuzhiyun break;
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun default:
482*4882a593Smuzhiyun result = PCI_ERS_RESULT_NONE;
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun hdev->asic_funcs->halt_engines(hdev, true);
486*4882a593Smuzhiyun
487*4882a593Smuzhiyun return result;
488*4882a593Smuzhiyun }
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun /**
491*4882a593Smuzhiyun * hl_pci_err_resume - resume after a PCI slot reset
492*4882a593Smuzhiyun *
493*4882a593Smuzhiyun * @pdev: pointer to pci device
494*4882a593Smuzhiyun *
495*4882a593Smuzhiyun */
hl_pci_err_resume(struct pci_dev * pdev)496*4882a593Smuzhiyun static void hl_pci_err_resume(struct pci_dev *pdev)
497*4882a593Smuzhiyun {
498*4882a593Smuzhiyun struct hl_device *hdev = pci_get_drvdata(pdev);
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun dev_warn(hdev->dev, "Resuming device after PCI slot reset\n");
501*4882a593Smuzhiyun hl_device_resume(hdev);
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun /**
505*4882a593Smuzhiyun * hl_pci_err_slot_reset - a PCI slot reset has just happened
506*4882a593Smuzhiyun *
507*4882a593Smuzhiyun * @pdev: pointer to pci device
508*4882a593Smuzhiyun *
509*4882a593Smuzhiyun * Determine if the driver can recover from the PCI slot reset
510*4882a593Smuzhiyun */
hl_pci_err_slot_reset(struct pci_dev * pdev)511*4882a593Smuzhiyun static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
512*4882a593Smuzhiyun {
513*4882a593Smuzhiyun return PCI_ERS_RESULT_RECOVERED;
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun static const struct dev_pm_ops hl_pm_ops = {
517*4882a593Smuzhiyun .suspend = hl_pmops_suspend,
518*4882a593Smuzhiyun .resume = hl_pmops_resume,
519*4882a593Smuzhiyun };
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun static const struct pci_error_handlers hl_pci_err_handler = {
522*4882a593Smuzhiyun .error_detected = hl_pci_err_detected,
523*4882a593Smuzhiyun .slot_reset = hl_pci_err_slot_reset,
524*4882a593Smuzhiyun .resume = hl_pci_err_resume,
525*4882a593Smuzhiyun };
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun static struct pci_driver hl_pci_driver = {
528*4882a593Smuzhiyun .name = HL_NAME,
529*4882a593Smuzhiyun .id_table = ids,
530*4882a593Smuzhiyun .probe = hl_pci_probe,
531*4882a593Smuzhiyun .remove = hl_pci_remove,
532*4882a593Smuzhiyun .shutdown = hl_pci_remove,
533*4882a593Smuzhiyun .driver.pm = &hl_pm_ops,
534*4882a593Smuzhiyun .err_handler = &hl_pci_err_handler,
535*4882a593Smuzhiyun };
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun /*
538*4882a593Smuzhiyun * hl_init - Initialize the habanalabs kernel driver
539*4882a593Smuzhiyun */
hl_init(void)540*4882a593Smuzhiyun static int __init hl_init(void)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun int rc;
543*4882a593Smuzhiyun dev_t dev;
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun pr_info("loading driver\n");
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
548*4882a593Smuzhiyun if (rc < 0) {
549*4882a593Smuzhiyun pr_err("unable to get major\n");
550*4882a593Smuzhiyun return rc;
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun hl_major = MAJOR(dev);
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun hl_class = class_create(THIS_MODULE, HL_NAME);
556*4882a593Smuzhiyun if (IS_ERR(hl_class)) {
557*4882a593Smuzhiyun pr_err("failed to allocate class\n");
558*4882a593Smuzhiyun rc = PTR_ERR(hl_class);
559*4882a593Smuzhiyun goto remove_major;
560*4882a593Smuzhiyun }
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun hl_debugfs_init();
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun rc = pci_register_driver(&hl_pci_driver);
565*4882a593Smuzhiyun if (rc) {
566*4882a593Smuzhiyun pr_err("failed to register pci device\n");
567*4882a593Smuzhiyun goto remove_debugfs;
568*4882a593Smuzhiyun }
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun pr_debug("driver loaded\n");
571*4882a593Smuzhiyun
572*4882a593Smuzhiyun return 0;
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun remove_debugfs:
575*4882a593Smuzhiyun hl_debugfs_fini();
576*4882a593Smuzhiyun class_destroy(hl_class);
577*4882a593Smuzhiyun remove_major:
578*4882a593Smuzhiyun unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
579*4882a593Smuzhiyun return rc;
580*4882a593Smuzhiyun }
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun /*
583*4882a593Smuzhiyun * hl_exit - Release all resources of the habanalabs kernel driver
584*4882a593Smuzhiyun */
hl_exit(void)585*4882a593Smuzhiyun static void __exit hl_exit(void)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun pci_unregister_driver(&hl_pci_driver);
588*4882a593Smuzhiyun
589*4882a593Smuzhiyun /*
590*4882a593Smuzhiyun * Removing debugfs must be after all devices or simulator devices
591*4882a593Smuzhiyun * have been removed because otherwise we get a bug in the
592*4882a593Smuzhiyun * debugfs module for referencing NULL objects
593*4882a593Smuzhiyun */
594*4882a593Smuzhiyun hl_debugfs_fini();
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun class_destroy(hl_class);
597*4882a593Smuzhiyun unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun idr_destroy(&hl_devs_idr);
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun pr_debug("driver removed\n");
602*4882a593Smuzhiyun }
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun module_init(hl_init);
605*4882a593Smuzhiyun module_exit(hl_exit);
606