1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun /*
4*4882a593Smuzhiyun * Copyright 2020 IBM Corp.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Author: Bulent Abali <abali@us.ibm.com>
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun #include <stdio.h>
10*4882a593Smuzhiyun #include <stdlib.h>
11*4882a593Smuzhiyun #include <string.h>
12*4882a593Smuzhiyun #include <unistd.h>
13*4882a593Smuzhiyun #include <stdint.h>
14*4882a593Smuzhiyun #include <sys/types.h>
15*4882a593Smuzhiyun #include <sys/stat.h>
16*4882a593Smuzhiyun #include <sys/time.h>
17*4882a593Smuzhiyun #include <sys/fcntl.h>
18*4882a593Smuzhiyun #include <sys/mman.h>
19*4882a593Smuzhiyun #include <endian.h>
20*4882a593Smuzhiyun #include <bits/endian.h>
21*4882a593Smuzhiyun #include <sys/ioctl.h>
22*4882a593Smuzhiyun #include <assert.h>
23*4882a593Smuzhiyun #include <errno.h>
24*4882a593Smuzhiyun #include <signal.h>
25*4882a593Smuzhiyun #include "vas-api.h"
26*4882a593Smuzhiyun #include "nx.h"
27*4882a593Smuzhiyun #include "copy-paste.h"
28*4882a593Smuzhiyun #include "nxu.h"
29*4882a593Smuzhiyun #include "nx_dbg.h"
30*4882a593Smuzhiyun #include <sys/platform/ppc.h>
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun #define barrier()
33*4882a593Smuzhiyun #define hwsync() ({ asm volatile("sync" ::: "memory"); })
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun #ifndef NX_NO_CPU_PRI
36*4882a593Smuzhiyun #define cpu_pri_default() ({ asm volatile ("or 2, 2, 2"); })
37*4882a593Smuzhiyun #define cpu_pri_low() ({ asm volatile ("or 31, 31, 31"); })
38*4882a593Smuzhiyun #else
39*4882a593Smuzhiyun #define cpu_pri_default()
40*4882a593Smuzhiyun #define cpu_pri_low()
41*4882a593Smuzhiyun #endif
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun void *nx_fault_storage_address;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun struct nx_handle {
46*4882a593Smuzhiyun int fd;
47*4882a593Smuzhiyun int function;
48*4882a593Smuzhiyun void *paste_addr;
49*4882a593Smuzhiyun };
50*4882a593Smuzhiyun
open_device_nodes(char * devname,int pri,struct nx_handle * handle)51*4882a593Smuzhiyun static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun int rc, fd;
54*4882a593Smuzhiyun void *addr;
55*4882a593Smuzhiyun struct vas_tx_win_open_attr txattr;
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun fd = open(devname, O_RDWR);
58*4882a593Smuzhiyun if (fd < 0) {
59*4882a593Smuzhiyun fprintf(stderr, " open device name %s\n", devname);
60*4882a593Smuzhiyun return -errno;
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun memset(&txattr, 0, sizeof(txattr));
64*4882a593Smuzhiyun txattr.version = 1;
65*4882a593Smuzhiyun txattr.vas_id = pri;
66*4882a593Smuzhiyun rc = ioctl(fd, VAS_TX_WIN_OPEN, (unsigned long)&txattr);
67*4882a593Smuzhiyun if (rc < 0) {
68*4882a593Smuzhiyun fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
69*4882a593Smuzhiyun rc = -errno;
70*4882a593Smuzhiyun goto out;
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
74*4882a593Smuzhiyun if (addr == MAP_FAILED) {
75*4882a593Smuzhiyun fprintf(stderr, "mmap() failed, errno %d\n", errno);
76*4882a593Smuzhiyun rc = -errno;
77*4882a593Smuzhiyun goto out;
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun handle->fd = fd;
80*4882a593Smuzhiyun handle->paste_addr = (void *)((char *)addr + 0x400);
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun rc = 0;
83*4882a593Smuzhiyun out:
84*4882a593Smuzhiyun close(fd);
85*4882a593Smuzhiyun return rc;
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun
nx_function_begin(int function,int pri)88*4882a593Smuzhiyun void *nx_function_begin(int function, int pri)
89*4882a593Smuzhiyun {
90*4882a593Smuzhiyun int rc;
91*4882a593Smuzhiyun char *devname = "/dev/crypto/nx-gzip";
92*4882a593Smuzhiyun struct nx_handle *nxhandle;
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun if (function != NX_FUNC_COMP_GZIP) {
95*4882a593Smuzhiyun errno = EINVAL;
96*4882a593Smuzhiyun fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
97*4882a593Smuzhiyun return NULL;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun nxhandle = malloc(sizeof(*nxhandle));
102*4882a593Smuzhiyun if (!nxhandle) {
103*4882a593Smuzhiyun errno = ENOMEM;
104*4882a593Smuzhiyun fprintf(stderr, " No memory\n");
105*4882a593Smuzhiyun return NULL;
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun nxhandle->function = function;
109*4882a593Smuzhiyun rc = open_device_nodes(devname, pri, nxhandle);
110*4882a593Smuzhiyun if (rc < 0) {
111*4882a593Smuzhiyun errno = -rc;
112*4882a593Smuzhiyun fprintf(stderr, " open_device_nodes failed\n");
113*4882a593Smuzhiyun return NULL;
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun return nxhandle;
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
nx_function_end(void * handle)119*4882a593Smuzhiyun int nx_function_end(void *handle)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun int rc = 0;
122*4882a593Smuzhiyun struct nx_handle *nxhandle = handle;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun rc = munmap(nxhandle->paste_addr - 0x400, 4096);
125*4882a593Smuzhiyun if (rc < 0) {
126*4882a593Smuzhiyun fprintf(stderr, "munmap() failed, errno %d\n", errno);
127*4882a593Smuzhiyun return rc;
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun close(nxhandle->fd);
130*4882a593Smuzhiyun free(nxhandle);
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun return rc;
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun
nx_wait_for_csb(struct nx_gzip_crb_cpb_t * cmdp)135*4882a593Smuzhiyun static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun long poll = 0;
138*4882a593Smuzhiyun uint64_t t;
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun /* Save power and let other threads use the h/w. top may show
141*4882a593Smuzhiyun * 100% but only because OS doesn't know we slowed the this
142*4882a593Smuzhiyun * h/w thread while polling. We're letting other threads have
143*4882a593Smuzhiyun * higher throughput on the core.
144*4882a593Smuzhiyun */
145*4882a593Smuzhiyun cpu_pri_low();
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun #define CSB_MAX_POLL 200000000UL
148*4882a593Smuzhiyun #define USLEEP_TH 300000UL
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun t = __ppc_get_timebase();
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun while (getnn(cmdp->crb.csb, csb_v) == 0) {
153*4882a593Smuzhiyun ++poll;
154*4882a593Smuzhiyun hwsync();
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun cpu_pri_low();
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /* usleep(0) takes around 29000 ticks ~60 us.
159*4882a593Smuzhiyun * 300000 is spinning for about 600 us then
160*4882a593Smuzhiyun * start sleeping.
161*4882a593Smuzhiyun */
162*4882a593Smuzhiyun if ((__ppc_get_timebase() - t) > USLEEP_TH) {
163*4882a593Smuzhiyun cpu_pri_default();
164*4882a593Smuzhiyun usleep(1);
165*4882a593Smuzhiyun }
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun if (poll > CSB_MAX_POLL)
168*4882a593Smuzhiyun break;
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun /* Fault address from signal handler */
171*4882a593Smuzhiyun if (nx_fault_storage_address) {
172*4882a593Smuzhiyun cpu_pri_default();
173*4882a593Smuzhiyun return -EAGAIN;
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun cpu_pri_default();
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun /* hw has updated csb and output buffer */
181*4882a593Smuzhiyun hwsync();
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun /* Check CSB flags. */
184*4882a593Smuzhiyun if (getnn(cmdp->crb.csb, csb_v) == 0) {
185*4882a593Smuzhiyun fprintf(stderr, "CSB still not valid after %d polls.\n",
186*4882a593Smuzhiyun (int) poll);
187*4882a593Smuzhiyun prt_err("CSB still not valid after %d polls, giving up.\n",
188*4882a593Smuzhiyun (int) poll);
189*4882a593Smuzhiyun return -ETIMEDOUT;
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun return 0;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
nxu_run_job(struct nx_gzip_crb_cpb_t * cmdp,void * handle)195*4882a593Smuzhiyun static int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
196*4882a593Smuzhiyun {
197*4882a593Smuzhiyun int i, ret, retries;
198*4882a593Smuzhiyun struct nx_handle *nxhandle = handle;
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun assert(handle != NULL);
201*4882a593Smuzhiyun i = 0;
202*4882a593Smuzhiyun retries = 5000;
203*4882a593Smuzhiyun while (i++ < retries) {
204*4882a593Smuzhiyun hwsync();
205*4882a593Smuzhiyun vas_copy(&cmdp->crb, 0);
206*4882a593Smuzhiyun ret = vas_paste(nxhandle->paste_addr, 0);
207*4882a593Smuzhiyun hwsync();
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
210*4882a593Smuzhiyun i, retries, ret));
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun if ((ret == 2) || (ret == 3)) {
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun ret = nx_wait_for_csb(cmdp);
215*4882a593Smuzhiyun if (!ret) {
216*4882a593Smuzhiyun goto out;
217*4882a593Smuzhiyun } else if (ret == -EAGAIN) {
218*4882a593Smuzhiyun long x;
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun prt_err("Touching address %p, 0x%lx\n",
221*4882a593Smuzhiyun nx_fault_storage_address,
222*4882a593Smuzhiyun *(long *) nx_fault_storage_address);
223*4882a593Smuzhiyun x = *(long *) nx_fault_storage_address;
224*4882a593Smuzhiyun *(long *) nx_fault_storage_address = x;
225*4882a593Smuzhiyun nx_fault_storage_address = 0;
226*4882a593Smuzhiyun continue;
227*4882a593Smuzhiyun } else {
228*4882a593Smuzhiyun prt_err("wait_for_csb() returns %d\n", ret);
229*4882a593Smuzhiyun break;
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun } else {
232*4882a593Smuzhiyun if (i < 10) {
233*4882a593Smuzhiyun /* spin for few ticks */
234*4882a593Smuzhiyun #define SPIN_TH 500UL
235*4882a593Smuzhiyun uint64_t fail_spin;
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun fail_spin = __ppc_get_timebase();
238*4882a593Smuzhiyun while ((__ppc_get_timebase() - fail_spin) <
239*4882a593Smuzhiyun SPIN_TH)
240*4882a593Smuzhiyun ;
241*4882a593Smuzhiyun } else {
242*4882a593Smuzhiyun /* sleep */
243*4882a593Smuzhiyun unsigned int pr = 0;
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun if (pr++ % 100 == 0) {
246*4882a593Smuzhiyun prt_err("Paste attempt %d/", i);
247*4882a593Smuzhiyun prt_err("%d, failed pid= %d\n", retries,
248*4882a593Smuzhiyun getpid());
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun usleep(1);
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun continue;
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun out:
257*4882a593Smuzhiyun cpu_pri_default();
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun return ret;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun
nxu_submit_job(struct nx_gzip_crb_cpb_t * cmdp,void * handle)262*4882a593Smuzhiyun int nxu_submit_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun int cc;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun cc = nxu_run_job(cmdp, handle);
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun if (!cc)
269*4882a593Smuzhiyun cc = getnn(cmdp->crb.csb, csb_cc); /* CC Table 6-8 */
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun return cc;
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun
nxu_sigsegv_handler(int sig,siginfo_t * info,void * ctx)275*4882a593Smuzhiyun void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx)
276*4882a593Smuzhiyun {
277*4882a593Smuzhiyun fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
278*4882a593Smuzhiyun sig, info->si_code, info->si_addr);
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun nx_fault_storage_address = info->si_addr;
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun /*
284*4882a593Smuzhiyun * Fault in pages prior to NX job submission. wr=1 may be required to
285*4882a593Smuzhiyun * touch writeable pages. System zero pages do not fault-in the page as
286*4882a593Smuzhiyun * intended. Typically set wr=1 for NX target pages and set wr=0 for NX
287*4882a593Smuzhiyun * source pages.
288*4882a593Smuzhiyun */
nxu_touch_pages(void * buf,long buf_len,long page_len,int wr)289*4882a593Smuzhiyun int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr)
290*4882a593Smuzhiyun {
291*4882a593Smuzhiyun char *begin = buf;
292*4882a593Smuzhiyun char *end = (char *) buf + buf_len - 1;
293*4882a593Smuzhiyun volatile char t;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun assert(buf_len >= 0 && !!buf);
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
298*4882a593Smuzhiyun (buf + buf_len), buf_len, wr));
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun if (buf_len <= 0 || buf == NULL)
301*4882a593Smuzhiyun return -1;
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun do {
304*4882a593Smuzhiyun t = *begin;
305*4882a593Smuzhiyun if (wr)
306*4882a593Smuzhiyun *begin = t;
307*4882a593Smuzhiyun begin = begin + page_len;
308*4882a593Smuzhiyun } while (begin < end);
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun /* When buf_sz is small or buf tail is in another page */
311*4882a593Smuzhiyun t = *end;
312*4882a593Smuzhiyun if (wr)
313*4882a593Smuzhiyun *end = t;
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun return 0;
316*4882a593Smuzhiyun }
317