1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* Copyright (c) 2019 HiSilicon Limited. */
3*4882a593Smuzhiyun #include <linux/dma-mapping.h>
4*4882a593Smuzhiyun #include <linux/module.h>
5*4882a593Smuzhiyun #include <linux/slab.h>
6*4882a593Smuzhiyun #include "qm.h"
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #define HISI_ACC_SGL_SGE_NR_MIN 1
9*4882a593Smuzhiyun #define HISI_ACC_SGL_NR_MAX 256
10*4882a593Smuzhiyun #define HISI_ACC_SGL_ALIGN_SIZE 64
11*4882a593Smuzhiyun #define HISI_ACC_MEM_BLOCK_NR 5
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun struct acc_hw_sge {
14*4882a593Smuzhiyun dma_addr_t buf;
15*4882a593Smuzhiyun void *page_ctrl;
16*4882a593Smuzhiyun __le32 len;
17*4882a593Smuzhiyun __le32 pad;
18*4882a593Smuzhiyun __le32 pad0;
19*4882a593Smuzhiyun __le32 pad1;
20*4882a593Smuzhiyun };
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /* use default sgl head size 64B */
23*4882a593Smuzhiyun struct hisi_acc_hw_sgl {
24*4882a593Smuzhiyun dma_addr_t next_dma;
25*4882a593Smuzhiyun __le16 entry_sum_in_chain;
26*4882a593Smuzhiyun __le16 entry_sum_in_sgl;
27*4882a593Smuzhiyun __le16 entry_length_in_sgl;
28*4882a593Smuzhiyun __le16 pad0;
29*4882a593Smuzhiyun __le64 pad1[5];
30*4882a593Smuzhiyun struct hisi_acc_hw_sgl *next;
31*4882a593Smuzhiyun struct acc_hw_sge sge_entries[];
32*4882a593Smuzhiyun } __aligned(1);
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun struct hisi_acc_sgl_pool {
35*4882a593Smuzhiyun struct mem_block {
36*4882a593Smuzhiyun struct hisi_acc_hw_sgl *sgl;
37*4882a593Smuzhiyun dma_addr_t sgl_dma;
38*4882a593Smuzhiyun size_t size;
39*4882a593Smuzhiyun } mem_block[HISI_ACC_MEM_BLOCK_NR];
40*4882a593Smuzhiyun u32 sgl_num_per_block;
41*4882a593Smuzhiyun u32 block_num;
42*4882a593Smuzhiyun u32 count;
43*4882a593Smuzhiyun u32 sge_nr;
44*4882a593Smuzhiyun size_t sgl_size;
45*4882a593Smuzhiyun };
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun /**
48*4882a593Smuzhiyun * hisi_acc_create_sgl_pool() - Create a hw sgl pool.
49*4882a593Smuzhiyun * @dev: The device which hw sgl pool belongs to.
50*4882a593Smuzhiyun * @count: Count of hisi_acc_hw_sgl in pool.
51*4882a593Smuzhiyun * @sge_nr: The count of sge in hw_sgl
52*4882a593Smuzhiyun *
53*4882a593Smuzhiyun * This function creates a hw sgl pool, after this user can get hw sgl memory
54*4882a593Smuzhiyun * from it.
55*4882a593Smuzhiyun */
hisi_acc_create_sgl_pool(struct device * dev,u32 count,u32 sge_nr)56*4882a593Smuzhiyun struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
57*4882a593Smuzhiyun u32 count, u32 sge_nr)
58*4882a593Smuzhiyun {
59*4882a593Smuzhiyun u32 sgl_size, block_size, sgl_num_per_block, block_num, remain_sgl = 0;
60*4882a593Smuzhiyun struct hisi_acc_sgl_pool *pool;
61*4882a593Smuzhiyun struct mem_block *block;
62*4882a593Smuzhiyun u32 i, j;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
65*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun sgl_size = sizeof(struct acc_hw_sge) * sge_nr +
68*4882a593Smuzhiyun sizeof(struct hisi_acc_hw_sgl);
69*4882a593Smuzhiyun block_size = 1 << (PAGE_SHIFT + MAX_ORDER <= 32 ?
70*4882a593Smuzhiyun PAGE_SHIFT + MAX_ORDER - 1 : 31);
71*4882a593Smuzhiyun sgl_num_per_block = block_size / sgl_size;
72*4882a593Smuzhiyun block_num = count / sgl_num_per_block;
73*4882a593Smuzhiyun remain_sgl = count % sgl_num_per_block;
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun if ((!remain_sgl && block_num > HISI_ACC_MEM_BLOCK_NR) ||
76*4882a593Smuzhiyun (remain_sgl > 0 && block_num > HISI_ACC_MEM_BLOCK_NR - 1))
77*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun pool = kzalloc(sizeof(*pool), GFP_KERNEL);
80*4882a593Smuzhiyun if (!pool)
81*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
82*4882a593Smuzhiyun block = pool->mem_block;
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun for (i = 0; i < block_num; i++) {
85*4882a593Smuzhiyun block[i].sgl = dma_alloc_coherent(dev, block_size,
86*4882a593Smuzhiyun &block[i].sgl_dma,
87*4882a593Smuzhiyun GFP_KERNEL);
88*4882a593Smuzhiyun if (!block[i].sgl)
89*4882a593Smuzhiyun goto err_free_mem;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun block[i].size = block_size;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun if (remain_sgl > 0) {
95*4882a593Smuzhiyun block[i].sgl = dma_alloc_coherent(dev, remain_sgl * sgl_size,
96*4882a593Smuzhiyun &block[i].sgl_dma,
97*4882a593Smuzhiyun GFP_KERNEL);
98*4882a593Smuzhiyun if (!block[i].sgl)
99*4882a593Smuzhiyun goto err_free_mem;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun block[i].size = remain_sgl * sgl_size;
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun pool->sgl_num_per_block = sgl_num_per_block;
105*4882a593Smuzhiyun pool->block_num = remain_sgl ? block_num + 1 : block_num;
106*4882a593Smuzhiyun pool->count = count;
107*4882a593Smuzhiyun pool->sgl_size = sgl_size;
108*4882a593Smuzhiyun pool->sge_nr = sge_nr;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun return pool;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun err_free_mem:
113*4882a593Smuzhiyun for (j = 0; j < i; j++) {
114*4882a593Smuzhiyun dma_free_coherent(dev, block_size, block[j].sgl,
115*4882a593Smuzhiyun block[j].sgl_dma);
116*4882a593Smuzhiyun memset(block + j, 0, sizeof(*block));
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun kfree(pool);
119*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun /**
124*4882a593Smuzhiyun * hisi_acc_free_sgl_pool() - Free a hw sgl pool.
125*4882a593Smuzhiyun * @dev: The device which hw sgl pool belongs to.
126*4882a593Smuzhiyun * @pool: Pointer of pool.
127*4882a593Smuzhiyun *
128*4882a593Smuzhiyun * This function frees memory of a hw sgl pool.
129*4882a593Smuzhiyun */
hisi_acc_free_sgl_pool(struct device * dev,struct hisi_acc_sgl_pool * pool)130*4882a593Smuzhiyun void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun struct mem_block *block;
133*4882a593Smuzhiyun int i;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun if (!dev || !pool)
136*4882a593Smuzhiyun return;
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun block = pool->mem_block;
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun for (i = 0; i < pool->block_num; i++)
141*4882a593Smuzhiyun dma_free_coherent(dev, block[i].size, block[i].sgl,
142*4882a593Smuzhiyun block[i].sgl_dma);
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun kfree(pool);
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(hisi_acc_free_sgl_pool);
147*4882a593Smuzhiyun
acc_get_sgl(struct hisi_acc_sgl_pool * pool,u32 index,dma_addr_t * hw_sgl_dma)148*4882a593Smuzhiyun static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool,
149*4882a593Smuzhiyun u32 index, dma_addr_t *hw_sgl_dma)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun struct mem_block *block;
152*4882a593Smuzhiyun u32 block_index, offset;
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun if (!pool || !hw_sgl_dma || index >= pool->count)
155*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun block = pool->mem_block;
158*4882a593Smuzhiyun block_index = index / pool->sgl_num_per_block;
159*4882a593Smuzhiyun offset = index % pool->sgl_num_per_block;
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun *hw_sgl_dma = block[block_index].sgl_dma + pool->sgl_size * offset;
162*4882a593Smuzhiyun return (void *)block[block_index].sgl + pool->sgl_size * offset;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun
sg_map_to_hw_sg(struct scatterlist * sgl,struct acc_hw_sge * hw_sge)165*4882a593Smuzhiyun static void sg_map_to_hw_sg(struct scatterlist *sgl,
166*4882a593Smuzhiyun struct acc_hw_sge *hw_sge)
167*4882a593Smuzhiyun {
168*4882a593Smuzhiyun hw_sge->buf = sg_dma_address(sgl);
169*4882a593Smuzhiyun hw_sge->len = cpu_to_le32(sg_dma_len(sgl));
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun
inc_hw_sgl_sge(struct hisi_acc_hw_sgl * hw_sgl)172*4882a593Smuzhiyun static void inc_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
173*4882a593Smuzhiyun {
174*4882a593Smuzhiyun u16 var = le16_to_cpu(hw_sgl->entry_sum_in_sgl);
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun var++;
177*4882a593Smuzhiyun hw_sgl->entry_sum_in_sgl = cpu_to_le16(var);
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun
update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl * hw_sgl,u16 sum)180*4882a593Smuzhiyun static void update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl *hw_sgl, u16 sum)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun hw_sgl->entry_sum_in_chain = cpu_to_le16(sum);
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun /**
186*4882a593Smuzhiyun * hisi_acc_sg_buf_map_to_hw_sgl - Map a scatterlist to a hw sgl.
187*4882a593Smuzhiyun * @dev: The device which hw sgl belongs to.
188*4882a593Smuzhiyun * @sgl: Scatterlist which will be mapped to hw sgl.
189*4882a593Smuzhiyun * @pool: Pool which hw sgl memory will be allocated in.
190*4882a593Smuzhiyun * @index: Index of hisi_acc_hw_sgl in pool.
191*4882a593Smuzhiyun * @hw_sgl_dma: The dma address of allocated hw sgl.
192*4882a593Smuzhiyun *
193*4882a593Smuzhiyun * This function builds hw sgl according input sgl, user can use hw_sgl_dma
194*4882a593Smuzhiyun * as src/dst in its BD. Only support single hw sgl currently.
195*4882a593Smuzhiyun */
196*4882a593Smuzhiyun struct hisi_acc_hw_sgl *
hisi_acc_sg_buf_map_to_hw_sgl(struct device * dev,struct scatterlist * sgl,struct hisi_acc_sgl_pool * pool,u32 index,dma_addr_t * hw_sgl_dma)197*4882a593Smuzhiyun hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
198*4882a593Smuzhiyun struct scatterlist *sgl,
199*4882a593Smuzhiyun struct hisi_acc_sgl_pool *pool,
200*4882a593Smuzhiyun u32 index, dma_addr_t *hw_sgl_dma)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun struct hisi_acc_hw_sgl *curr_hw_sgl;
203*4882a593Smuzhiyun dma_addr_t curr_sgl_dma = 0;
204*4882a593Smuzhiyun struct acc_hw_sge *curr_hw_sge;
205*4882a593Smuzhiyun struct scatterlist *sg;
206*4882a593Smuzhiyun int i, sg_n, sg_n_mapped;
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun if (!dev || !sgl || !pool || !hw_sgl_dma)
209*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun sg_n = sg_nents(sgl);
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
214*4882a593Smuzhiyun if (!sg_n_mapped)
215*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun if (sg_n_mapped > pool->sge_nr) {
218*4882a593Smuzhiyun dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
219*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun curr_hw_sgl = acc_get_sgl(pool, index, &curr_sgl_dma);
223*4882a593Smuzhiyun if (IS_ERR(curr_hw_sgl)) {
224*4882a593Smuzhiyun dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
225*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr);
229*4882a593Smuzhiyun curr_hw_sge = curr_hw_sgl->sge_entries;
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun for_each_sg(sgl, sg, sg_n_mapped, i) {
232*4882a593Smuzhiyun sg_map_to_hw_sg(sg, curr_hw_sge);
233*4882a593Smuzhiyun inc_hw_sgl_sge(curr_hw_sgl);
234*4882a593Smuzhiyun curr_hw_sge++;
235*4882a593Smuzhiyun }
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun update_hw_sgl_sum_sge(curr_hw_sgl, pool->sge_nr);
238*4882a593Smuzhiyun *hw_sgl_dma = curr_sgl_dma;
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun return curr_hw_sgl;
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl);
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun /**
245*4882a593Smuzhiyun * hisi_acc_sg_buf_unmap() - Unmap allocated hw sgl.
246*4882a593Smuzhiyun * @dev: The device which hw sgl belongs to.
247*4882a593Smuzhiyun * @sgl: Related scatterlist.
248*4882a593Smuzhiyun * @hw_sgl: Virtual address of hw sgl.
249*4882a593Smuzhiyun * @hw_sgl_dma: DMA address of hw sgl.
250*4882a593Smuzhiyun * @pool: Pool which hw sgl is allocated in.
251*4882a593Smuzhiyun *
252*4882a593Smuzhiyun * This function unmaps allocated hw sgl.
253*4882a593Smuzhiyun */
hisi_acc_sg_buf_unmap(struct device * dev,struct scatterlist * sgl,struct hisi_acc_hw_sgl * hw_sgl)254*4882a593Smuzhiyun void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
255*4882a593Smuzhiyun struct hisi_acc_hw_sgl *hw_sgl)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun if (!dev || !sgl || !hw_sgl)
258*4882a593Smuzhiyun return;
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL);
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun hw_sgl->entry_sum_in_chain = 0;
263*4882a593Smuzhiyun hw_sgl->entry_sum_in_sgl = 0;
264*4882a593Smuzhiyun hw_sgl->entry_length_in_sgl = 0;
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_unmap);
267