xref: /OK3568_Linux_fs/kernel/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun 
3*4882a593Smuzhiyun /* P9 gunzip sample code for demonstrating the P9 NX hardware
4*4882a593Smuzhiyun  * interface.  Not intended for productive uses or for performance or
5*4882a593Smuzhiyun  * compression ratio measurements.  Note also that /dev/crypto/gzip,
6*4882a593Smuzhiyun  * VAS and skiboot support are required
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * Copyright 2020 IBM Corp.
9*4882a593Smuzhiyun  *
10*4882a593Smuzhiyun  * Author: Bulent Abali <abali@us.ibm.com>
11*4882a593Smuzhiyun  *
12*4882a593Smuzhiyun  * https://github.com/libnxz/power-gzip for zlib api and other utils
13*4882a593Smuzhiyun  * Definitions of acronyms used here.  See
14*4882a593Smuzhiyun  * P9 NX Gzip Accelerator User's Manual for details:
15*4882a593Smuzhiyun  * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
16*4882a593Smuzhiyun  *
17*4882a593Smuzhiyun  * adler/crc: 32 bit checksums appended to stream tail
18*4882a593Smuzhiyun  * ce:       completion extension
19*4882a593Smuzhiyun  * cpb:      coprocessor parameter block (metadata)
20*4882a593Smuzhiyun  * crb:      coprocessor request block (command)
21*4882a593Smuzhiyun  * csb:      coprocessor status block (status)
22*4882a593Smuzhiyun  * dht:      dynamic huffman table
23*4882a593Smuzhiyun  * dde:      data descriptor element (address, length)
24*4882a593Smuzhiyun  * ddl:      list of ddes
25*4882a593Smuzhiyun  * dh/fh:    dynamic and fixed huffman types
26*4882a593Smuzhiyun  * fc:       coprocessor function code
27*4882a593Smuzhiyun  * histlen:  history/dictionary length
28*4882a593Smuzhiyun  * history:  sliding window of up to 32KB of data
29*4882a593Smuzhiyun  * lzcount:  Deflate LZ symbol counts
30*4882a593Smuzhiyun  * rembytecnt: remaining byte count
31*4882a593Smuzhiyun  * sfbt:     source final block type; last block's type during decomp
32*4882a593Smuzhiyun  * spbc:     source processed byte count
33*4882a593Smuzhiyun  * subc:     source unprocessed bit count
34*4882a593Smuzhiyun  * tebc:     target ending bit count; valid bits in the last byte
35*4882a593Smuzhiyun  * tpbc:     target processed byte count
36*4882a593Smuzhiyun  * vas:      virtual accelerator switch; the user mode interface
37*4882a593Smuzhiyun  */
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun #define _ISOC11_SOURCE	// For aligned_alloc()
40*4882a593Smuzhiyun #define _DEFAULT_SOURCE	// For endian.h
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun #include <stdio.h>
43*4882a593Smuzhiyun #include <stdlib.h>
44*4882a593Smuzhiyun #include <string.h>
45*4882a593Smuzhiyun #include <unistd.h>
46*4882a593Smuzhiyun #include <stdint.h>
47*4882a593Smuzhiyun #include <sys/types.h>
48*4882a593Smuzhiyun #include <sys/stat.h>
49*4882a593Smuzhiyun #include <sys/time.h>
50*4882a593Smuzhiyun #include <sys/fcntl.h>
51*4882a593Smuzhiyun #include <sys/mman.h>
52*4882a593Smuzhiyun #include <endian.h>
53*4882a593Smuzhiyun #include <bits/endian.h>
54*4882a593Smuzhiyun #include <sys/ioctl.h>
55*4882a593Smuzhiyun #include <assert.h>
56*4882a593Smuzhiyun #include <errno.h>
57*4882a593Smuzhiyun #include <signal.h>
58*4882a593Smuzhiyun #include "nxu.h"
59*4882a593Smuzhiyun #include "nx.h"
60*4882a593Smuzhiyun #include "crb.h"
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun int nx_dbg;
63*4882a593Smuzhiyun FILE *nx_gzip_log;
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun #define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
66*4882a593Smuzhiyun #define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun #define GETINPC(X) fgetc(X)
69*4882a593Smuzhiyun #define FNAME_MAX 1024
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun /* fifo queue management */
72*4882a593Smuzhiyun #define fifo_used_bytes(used) (used)
73*4882a593Smuzhiyun #define fifo_free_bytes(used, len) ((len)-(used))
74*4882a593Smuzhiyun /* amount of free bytes in the first and last parts */
75*4882a593Smuzhiyun #define fifo_free_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
76*4882a593Smuzhiyun 						  ? (len)-((cur)+(used)) : 0)
77*4882a593Smuzhiyun #define fifo_free_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
78*4882a593Smuzhiyun 						  ? (cur) : (len)-(used))
79*4882a593Smuzhiyun /* amount of used bytes in the first and last parts */
80*4882a593Smuzhiyun #define fifo_used_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
81*4882a593Smuzhiyun 						  ? (used) : (len)-(cur))
82*4882a593Smuzhiyun #define fifo_used_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
83*4882a593Smuzhiyun 						  ? 0 : ((used)+(cur))-(len))
84*4882a593Smuzhiyun /* first and last free parts start here */
85*4882a593Smuzhiyun #define fifo_free_first_offset(cur, used)      ((cur)+(used))
86*4882a593Smuzhiyun #define fifo_free_last_offset(cur, used, len)  \
87*4882a593Smuzhiyun 					   fifo_used_last_bytes(cur, used, len)
88*4882a593Smuzhiyun /* first and last used parts start here */
89*4882a593Smuzhiyun #define fifo_used_first_offset(cur)            (cur)
90*4882a593Smuzhiyun #define fifo_used_last_offset(cur)             (0)
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun const int fifo_in_len = 1<<24;
93*4882a593Smuzhiyun const int fifo_out_len = 1<<24;
94*4882a593Smuzhiyun const int page_sz = 1<<16;
95*4882a593Smuzhiyun const int line_sz = 1<<7;
96*4882a593Smuzhiyun const int window_max = 1<<15;
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun /*
99*4882a593Smuzhiyun  * Adds an (address, len) pair to the list of ddes (ddl) and updates
100*4882a593Smuzhiyun  * the base dde.  ddl[0] is the only dde in a direct dde which
101*4882a593Smuzhiyun  * contains a single (addr,len) pair.  For more pairs, ddl[0] becomes
102*4882a593Smuzhiyun  * the indirect (base) dde that points to a list of direct ddes.
103*4882a593Smuzhiyun  * See Section 6.4 of the NX-gzip user manual for DDE description.
104*4882a593Smuzhiyun  * Addr=NULL, len=0 clears the ddl[0].  Returns the total number of
105*4882a593Smuzhiyun  * bytes in ddl.  Caller is responsible for allocting the array of
106*4882a593Smuzhiyun  * nx_dde_t *ddl.  If N addresses are required in the scatter-gather
107*4882a593Smuzhiyun  * list, the ddl array must have N+1 entries minimum.
108*4882a593Smuzhiyun  */
nx_append_dde(struct nx_dde_t * ddl,void * addr,uint32_t len)109*4882a593Smuzhiyun static inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
110*4882a593Smuzhiyun 					uint32_t len)
111*4882a593Smuzhiyun {
112*4882a593Smuzhiyun 	uint32_t ddecnt;
113*4882a593Smuzhiyun 	uint32_t bytes;
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	if (addr == NULL && len == 0) {
116*4882a593Smuzhiyun 		clearp_dde(ddl);
117*4882a593Smuzhiyun 		return 0;
118*4882a593Smuzhiyun 	}
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
121*4882a593Smuzhiyun 			__func__, len));
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	/* Number of ddes in the dde list ; == 0 when it is a direct dde */
124*4882a593Smuzhiyun 	ddecnt = getpnn(ddl, dde_count);
125*4882a593Smuzhiyun 	bytes = getp32(ddl, ddebc);
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	if (ddecnt == 0 && bytes == 0) {
128*4882a593Smuzhiyun 		/* First dde is unused; make it a direct dde */
129*4882a593Smuzhiyun 		bytes = len;
130*4882a593Smuzhiyun 		putp32(ddl, ddebc, bytes);
131*4882a593Smuzhiyun 		putp64(ddl, ddead, (uint64_t) addr);
132*4882a593Smuzhiyun 	} else if (ddecnt == 0) {
133*4882a593Smuzhiyun 		/* Converting direct to indirect dde
134*4882a593Smuzhiyun 		 * ddl[0] becomes head dde of ddl
135*4882a593Smuzhiyun 		 * copy direct to indirect first.
136*4882a593Smuzhiyun 		 */
137*4882a593Smuzhiyun 		ddl[1] = ddl[0];
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 		/* Add the new dde next */
140*4882a593Smuzhiyun 		clear_dde(ddl[2]);
141*4882a593Smuzhiyun 		put32(ddl[2], ddebc, len);
142*4882a593Smuzhiyun 		put64(ddl[2], ddead, (uint64_t) addr);
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 		/* Ddl head points to 2 direct ddes */
145*4882a593Smuzhiyun 		ddecnt = 2;
146*4882a593Smuzhiyun 		putpnn(ddl, dde_count, ddecnt);
147*4882a593Smuzhiyun 		bytes = bytes + len;
148*4882a593Smuzhiyun 		putp32(ddl, ddebc, bytes);
149*4882a593Smuzhiyun 		/* Pointer to the first direct dde */
150*4882a593Smuzhiyun 		putp64(ddl, ddead, (uint64_t) &ddl[1]);
151*4882a593Smuzhiyun 	} else {
152*4882a593Smuzhiyun 		/* Append a dde to an existing indirect ddl */
153*4882a593Smuzhiyun 		++ddecnt;
154*4882a593Smuzhiyun 		clear_dde(ddl[ddecnt]);
155*4882a593Smuzhiyun 		put64(ddl[ddecnt], ddead, (uint64_t) addr);
156*4882a593Smuzhiyun 		put32(ddl[ddecnt], ddebc, len);
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 		putpnn(ddl, dde_count, ddecnt);
159*4882a593Smuzhiyun 		bytes = bytes + len;
160*4882a593Smuzhiyun 		putp32(ddl, ddebc, bytes); /* byte sum of all dde */
161*4882a593Smuzhiyun 	}
162*4882a593Smuzhiyun 	return bytes;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun /*
166*4882a593Smuzhiyun  * Touch specified number of pages represented in number bytes
167*4882a593Smuzhiyun  * beginning from the first buffer in a dde list.
168*4882a593Smuzhiyun  * Do not touch the pages past buf_sz-th byte's page.
169*4882a593Smuzhiyun  *
170*4882a593Smuzhiyun  * Set buf_sz = 0 to touch all pages described by the ddep.
171*4882a593Smuzhiyun  */
nx_touch_pages_dde(struct nx_dde_t * ddep,long buf_sz,long page_sz,int wr)172*4882a593Smuzhiyun static int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
173*4882a593Smuzhiyun 				int wr)
174*4882a593Smuzhiyun {
175*4882a593Smuzhiyun 	uint32_t indirect_count;
176*4882a593Smuzhiyun 	uint32_t buf_len;
177*4882a593Smuzhiyun 	long total;
178*4882a593Smuzhiyun 	uint64_t buf_addr;
179*4882a593Smuzhiyun 	struct nx_dde_t *dde_list;
180*4882a593Smuzhiyun 	int i;
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 	assert(!!ddep);
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun 	indirect_count = getpnn(ddep, dde_count);
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
187*4882a593Smuzhiyun 			indirect_count));
188*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	if (indirect_count == 0) {
191*4882a593Smuzhiyun 		/* Direct dde */
192*4882a593Smuzhiyun 		buf_len = getp32(ddep, ddebc);
193*4882a593Smuzhiyun 		buf_addr = getp64(ddep, ddead);
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 		NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
196*4882a593Smuzhiyun 				buf_len, (void *)buf_addr));
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 		if (buf_sz == 0)
199*4882a593Smuzhiyun 			nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
200*4882a593Smuzhiyun 		else
201*4882a593Smuzhiyun 			nxu_touch_pages((void *)buf_addr, NX_MIN(buf_len,
202*4882a593Smuzhiyun 					buf_sz), page_sz, wr);
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 		return ERR_NX_OK;
205*4882a593Smuzhiyun 	}
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	/* Indirect dde */
208*4882a593Smuzhiyun 	if (indirect_count > MAX_DDE_COUNT)
209*4882a593Smuzhiyun 		return ERR_NX_EXCESSIVE_DDE;
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	/* First address of the list */
212*4882a593Smuzhiyun 	dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	if (buf_sz == 0)
215*4882a593Smuzhiyun 		buf_sz = getp32(ddep, ddebc);
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	total = 0;
218*4882a593Smuzhiyun 	for (i = 0; i < indirect_count; i++) {
219*4882a593Smuzhiyun 		buf_len = get32(dde_list[i], ddebc);
220*4882a593Smuzhiyun 		buf_addr = get64(dde_list[i], ddead);
221*4882a593Smuzhiyun 		total += buf_len;
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 		NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
224*4882a593Smuzhiyun 				buf_len, (void *)buf_addr));
225*4882a593Smuzhiyun 		NXPRT(fprintf(stderr, "0x%lx\n", total));
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 		/* Touching fewer pages than encoded in the ddebc */
228*4882a593Smuzhiyun 		if (total > buf_sz) {
229*4882a593Smuzhiyun 			buf_len = NX_MIN(buf_len, total - buf_sz);
230*4882a593Smuzhiyun 			nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
231*4882a593Smuzhiyun 			NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
232*4882a593Smuzhiyun 				      buf_len));
233*4882a593Smuzhiyun 			NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
234*4882a593Smuzhiyun 			break;
235*4882a593Smuzhiyun 		}
236*4882a593Smuzhiyun 		nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
237*4882a593Smuzhiyun 	}
238*4882a593Smuzhiyun 	return ERR_NX_OK;
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun /*
242*4882a593Smuzhiyun  * Src and dst buffers are supplied in scatter gather lists.
243*4882a593Smuzhiyun  * NX function code and other parameters supplied in cmdp.
244*4882a593Smuzhiyun  */
nx_submit_job(struct nx_dde_t * src,struct nx_dde_t * dst,struct nx_gzip_crb_cpb_t * cmdp,void * handle)245*4882a593Smuzhiyun static int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
246*4882a593Smuzhiyun 			 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
247*4882a593Smuzhiyun {
248*4882a593Smuzhiyun 	uint64_t csbaddr;
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 	cmdp->crb.source_dde = *src;
253*4882a593Smuzhiyun 	cmdp->crb.target_dde = *dst;
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	/* Status, output byte count in tpbc */
256*4882a593Smuzhiyun 	csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
257*4882a593Smuzhiyun 	put64(cmdp->crb, csb_address, csbaddr);
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	/* NX reports input bytes in spbc; cleared */
260*4882a593Smuzhiyun 	cmdp->cpb.out_spbc_comp_wrap = 0;
261*4882a593Smuzhiyun 	cmdp->cpb.out_spbc_comp_with_count = 0;
262*4882a593Smuzhiyun 	cmdp->cpb.out_spbc_decomp = 0;
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	/* Clear output */
265*4882a593Smuzhiyun 	put32(cmdp->cpb, out_crc, INIT_CRC);
266*4882a593Smuzhiyun 	put32(cmdp->cpb, out_adler, INIT_ADLER);
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	/* Submit the crb, the job descriptor, to the accelerator. */
269*4882a593Smuzhiyun 	return nxu_submit_job(cmdp, handle);
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun 
decompress_file(int argc,char ** argv,void * devhandle)272*4882a593Smuzhiyun int decompress_file(int argc, char **argv, void *devhandle)
273*4882a593Smuzhiyun {
274*4882a593Smuzhiyun 	FILE *inpf = NULL;
275*4882a593Smuzhiyun 	FILE *outf = NULL;
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	int c, expect, i, cc, rc = 0;
278*4882a593Smuzhiyun 	char gzfname[FNAME_MAX];
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 	/* Queuing, file ops, byte counting */
281*4882a593Smuzhiyun 	char *fifo_in, *fifo_out;
282*4882a593Smuzhiyun 	int used_in, cur_in, used_out, cur_out, read_sz, n;
283*4882a593Smuzhiyun 	int first_free, last_free, first_used, last_used;
284*4882a593Smuzhiyun 	int first_offset, last_offset;
285*4882a593Smuzhiyun 	int write_sz, free_space, source_sz;
286*4882a593Smuzhiyun 	int source_sz_estimate, target_sz_estimate;
287*4882a593Smuzhiyun 	uint64_t last_comp_ratio = 0; /* 1000 max */
288*4882a593Smuzhiyun 	uint64_t total_out = 0;
289*4882a593Smuzhiyun 	int is_final, is_eof;
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	/* nx hardware */
292*4882a593Smuzhiyun 	int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
293*4882a593Smuzhiyun 	int history_len = 0;
294*4882a593Smuzhiyun 	struct nx_gzip_crb_cpb_t cmd, *cmdp;
295*4882a593Smuzhiyun 	struct nx_dde_t *ddl_in;
296*4882a593Smuzhiyun 	struct nx_dde_t dde_in[6] __aligned(128);
297*4882a593Smuzhiyun 	struct nx_dde_t *ddl_out;
298*4882a593Smuzhiyun 	struct nx_dde_t dde_out[6] __aligned(128);
299*4882a593Smuzhiyun 	int pgfault_retries;
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 	/* when using mmap'ed files */
302*4882a593Smuzhiyun 	off_t input_file_offset;
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 	if (argc > 2) {
305*4882a593Smuzhiyun 		fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
306*4882a593Smuzhiyun 		fprintf(stderr, "    writes to stdout or <fname>.nx.gunzip\n");
307*4882a593Smuzhiyun 		return -1;
308*4882a593Smuzhiyun 	}
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun 	if (argc == 1) {
311*4882a593Smuzhiyun 		inpf = stdin;
312*4882a593Smuzhiyun 		outf = stdout;
313*4882a593Smuzhiyun 	} else if (argc == 2) {
314*4882a593Smuzhiyun 		char w[1024];
315*4882a593Smuzhiyun 		char *wp;
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 		inpf = fopen(argv[1], "r");
318*4882a593Smuzhiyun 		if (inpf == NULL) {
319*4882a593Smuzhiyun 			perror(argv[1]);
320*4882a593Smuzhiyun 			return -1;
321*4882a593Smuzhiyun 		}
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 		/* Make a new file name to write to.  Ignoring '.gz' */
324*4882a593Smuzhiyun 		wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
325*4882a593Smuzhiyun 		strcpy(w, wp);
326*4882a593Smuzhiyun 		strcat(w, ".nx.gunzip");
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 		outf = fopen(w, "w");
329*4882a593Smuzhiyun 		if (outf == NULL) {
330*4882a593Smuzhiyun 			perror(w);
331*4882a593Smuzhiyun 			return -1;
332*4882a593Smuzhiyun 		}
333*4882a593Smuzhiyun 	}
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	/* Decode the gzip header */
336*4882a593Smuzhiyun 	c = GETINPC(inpf); expect = 0x1f; /* ID1 */
337*4882a593Smuzhiyun 	if (c != expect)
338*4882a593Smuzhiyun 		goto err1;
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 	c = GETINPC(inpf); expect = 0x8b; /* ID2 */
341*4882a593Smuzhiyun 	if (c != expect)
342*4882a593Smuzhiyun 		goto err1;
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun 	c = GETINPC(inpf); expect = 0x08; /* CM */
345*4882a593Smuzhiyun 	if (c != expect)
346*4882a593Smuzhiyun 		goto err1;
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	int flg = GETINPC(inpf); /* FLG */
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	if (flg & 0xE0 || flg & 0x4 || flg == EOF)
351*4882a593Smuzhiyun 		goto err2;
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 	fprintf(stderr, "gzHeader FLG %x\n", flg);
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	/* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
356*4882a593Smuzhiyun 	 * sample code.
357*4882a593Smuzhiyun 	 */
358*4882a593Smuzhiyun 	for (i = 0; i < 6; i++) {
359*4882a593Smuzhiyun 		char tmp[10];
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 		tmp[i] = GETINPC(inpf);
362*4882a593Smuzhiyun 		if (tmp[i] == EOF)
363*4882a593Smuzhiyun 			goto err3;
364*4882a593Smuzhiyun 		fprintf(stderr, "%02x ", tmp[i]);
365*4882a593Smuzhiyun 		if (i == 5)
366*4882a593Smuzhiyun 			fprintf(stderr, "\n");
367*4882a593Smuzhiyun 	}
368*4882a593Smuzhiyun 	fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 	/* FNAME */
371*4882a593Smuzhiyun 	if (flg & 0x8) {
372*4882a593Smuzhiyun 		int k = 0;
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 		do {
375*4882a593Smuzhiyun 			c = GETINPC(inpf);
376*4882a593Smuzhiyun 			if (c == EOF || k >= FNAME_MAX)
377*4882a593Smuzhiyun 				goto err3;
378*4882a593Smuzhiyun 			gzfname[k++] = c;
379*4882a593Smuzhiyun 		} while (c);
380*4882a593Smuzhiyun 		fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
381*4882a593Smuzhiyun 	}
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 	/* FHCRC */
384*4882a593Smuzhiyun 	if (flg & 0x2) {
385*4882a593Smuzhiyun 		c = GETINPC(inpf);
386*4882a593Smuzhiyun 		if (c == EOF)
387*4882a593Smuzhiyun 			goto err3;
388*4882a593Smuzhiyun 		c = GETINPC(inpf);
389*4882a593Smuzhiyun 		if (c == EOF)
390*4882a593Smuzhiyun 			goto err3;
391*4882a593Smuzhiyun 		fprintf(stderr, "gzHeader FHCRC: ignored\n");
392*4882a593Smuzhiyun 	}
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 	used_in = cur_in = used_out = cur_out = 0;
395*4882a593Smuzhiyun 	is_final = is_eof = 0;
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	/* Allocate one page larger to prevent page faults due to NX
398*4882a593Smuzhiyun 	 * overfetching.
399*4882a593Smuzhiyun 	 * Either do this (char*)(uintptr_t)aligned_alloc or use
400*4882a593Smuzhiyun 	 * -std=c11 flag to make the int-to-pointer warning go away.
401*4882a593Smuzhiyun 	 */
402*4882a593Smuzhiyun 	assert((fifo_in  = (char *)(uintptr_t)aligned_alloc(line_sz,
403*4882a593Smuzhiyun 				   fifo_in_len + page_sz)) != NULL);
404*4882a593Smuzhiyun 	assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
405*4882a593Smuzhiyun 				   fifo_out_len + page_sz + line_sz)) != NULL);
406*4882a593Smuzhiyun 	/* Leave unused space due to history rounding rules */
407*4882a593Smuzhiyun 	fifo_out = fifo_out + line_sz;
408*4882a593Smuzhiyun 	nxu_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
409*4882a593Smuzhiyun 
410*4882a593Smuzhiyun 	ddl_in  = &dde_in[0];
411*4882a593Smuzhiyun 	ddl_out = &dde_out[0];
412*4882a593Smuzhiyun 	cmdp = &cmd;
413*4882a593Smuzhiyun 	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun read_state:
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 	/* Read from .gz file */
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "read_state:\n"));
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 	if (is_eof != 0)
422*4882a593Smuzhiyun 		goto write_state;
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 	/* We read in to fifo_in in two steps: first: read in to from
425*4882a593Smuzhiyun 	 * cur_in to the end of the buffer.  last: if free space wrapped
426*4882a593Smuzhiyun 	 * around, read from fifo_in offset 0 to offset cur_in.
427*4882a593Smuzhiyun 	 */
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 	/* Reset fifo head to reduce unnecessary wrap arounds */
430*4882a593Smuzhiyun 	cur_in = (used_in == 0) ? 0 : cur_in;
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 	/* Free space total is reduced by a gap */
433*4882a593Smuzhiyun 	free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
434*4882a593Smuzhiyun 			    - line_sz);
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	/* Free space may wrap around as first and last */
437*4882a593Smuzhiyun 	first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
438*4882a593Smuzhiyun 	last_free  = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	/* Start offsets of the free memory */
441*4882a593Smuzhiyun 	first_offset = fifo_free_first_offset(cur_in, used_in);
442*4882a593Smuzhiyun 	last_offset  = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun 	/* Reduce read_sz because of the line_sz gap */
445*4882a593Smuzhiyun 	read_sz = NX_MIN(free_space, first_free);
446*4882a593Smuzhiyun 	n = 0;
447*4882a593Smuzhiyun 	if (read_sz > 0) {
448*4882a593Smuzhiyun 		/* Read in to offset cur_in + used_in */
449*4882a593Smuzhiyun 		n = fread(fifo_in + first_offset, 1, read_sz, inpf);
450*4882a593Smuzhiyun 		used_in = used_in + n;
451*4882a593Smuzhiyun 		free_space = free_space - n;
452*4882a593Smuzhiyun 		assert(n <= read_sz);
453*4882a593Smuzhiyun 		if (n != read_sz) {
454*4882a593Smuzhiyun 			/* Either EOF or error; exit the read loop */
455*4882a593Smuzhiyun 			is_eof = 1;
456*4882a593Smuzhiyun 			goto write_state;
457*4882a593Smuzhiyun 		}
458*4882a593Smuzhiyun 	}
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	/* If free space wrapped around */
461*4882a593Smuzhiyun 	if (last_free > 0) {
462*4882a593Smuzhiyun 		/* Reduce read_sz because of the line_sz gap */
463*4882a593Smuzhiyun 		read_sz = NX_MIN(free_space, last_free);
464*4882a593Smuzhiyun 		n = 0;
465*4882a593Smuzhiyun 		if (read_sz > 0) {
466*4882a593Smuzhiyun 			n = fread(fifo_in + last_offset, 1, read_sz, inpf);
467*4882a593Smuzhiyun 			used_in = used_in + n;       /* Increase used space */
468*4882a593Smuzhiyun 			free_space = free_space - n; /* Decrease free space */
469*4882a593Smuzhiyun 			assert(n <= read_sz);
470*4882a593Smuzhiyun 			if (n != read_sz) {
471*4882a593Smuzhiyun 				/* Either EOF or error; exit the read loop */
472*4882a593Smuzhiyun 				is_eof = 1;
473*4882a593Smuzhiyun 				goto write_state;
474*4882a593Smuzhiyun 			}
475*4882a593Smuzhiyun 		}
476*4882a593Smuzhiyun 	}
477*4882a593Smuzhiyun 
478*4882a593Smuzhiyun 	/* At this point we have used_in bytes in fifo_in with the
479*4882a593Smuzhiyun 	 * data head starting at cur_in and possibly wrapping around.
480*4882a593Smuzhiyun 	 */
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun write_state:
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	/* Write decompressed data to output file */
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "write_state:\n"));
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun 	if (used_out == 0)
489*4882a593Smuzhiyun 		goto decomp_state;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	/* If fifo_out has data waiting, write it out to the file to
492*4882a593Smuzhiyun 	 * make free target space for the accelerator used bytes in
493*4882a593Smuzhiyun 	 * the first and last parts of fifo_out.
494*4882a593Smuzhiyun 	 */
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun 	first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
497*4882a593Smuzhiyun 	last_used  = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	write_sz = first_used;
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	n = 0;
502*4882a593Smuzhiyun 	if (write_sz > 0) {
503*4882a593Smuzhiyun 		n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
504*4882a593Smuzhiyun 		used_out = used_out - n;
505*4882a593Smuzhiyun 		/* Move head of the fifo */
506*4882a593Smuzhiyun 		cur_out = (cur_out + n) % fifo_out_len;
507*4882a593Smuzhiyun 		assert(n <= write_sz);
508*4882a593Smuzhiyun 		if (n != write_sz) {
509*4882a593Smuzhiyun 			fprintf(stderr, "error: write\n");
510*4882a593Smuzhiyun 			rc = -1;
511*4882a593Smuzhiyun 			goto err5;
512*4882a593Smuzhiyun 		}
513*4882a593Smuzhiyun 	}
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 	if (last_used > 0) { /* If more data available in the last part */
516*4882a593Smuzhiyun 		write_sz = last_used; /* Keep it here for later */
517*4882a593Smuzhiyun 		n = 0;
518*4882a593Smuzhiyun 		if (write_sz > 0) {
519*4882a593Smuzhiyun 			n = fwrite(fifo_out, 1, write_sz, outf);
520*4882a593Smuzhiyun 			used_out = used_out - n;
521*4882a593Smuzhiyun 			cur_out = (cur_out + n) % fifo_out_len;
522*4882a593Smuzhiyun 			assert(n <= write_sz);
523*4882a593Smuzhiyun 			if (n != write_sz) {
524*4882a593Smuzhiyun 				fprintf(stderr, "error: write\n");
525*4882a593Smuzhiyun 				rc = -1;
526*4882a593Smuzhiyun 				goto err5;
527*4882a593Smuzhiyun 			}
528*4882a593Smuzhiyun 		}
529*4882a593Smuzhiyun 	}
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun decomp_state:
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	/* NX decompresses input data */
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "decomp_state:\n"));
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	if (is_final)
538*4882a593Smuzhiyun 		goto finish_state;
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	/* Address/len lists */
541*4882a593Smuzhiyun 	clearp_dde(ddl_in);
542*4882a593Smuzhiyun 	clearp_dde(ddl_out);
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	/* FC, CRC, HistLen, Table 6-6 */
545*4882a593Smuzhiyun 	if (resuming) {
546*4882a593Smuzhiyun 		/* Resuming a partially decompressed input.
547*4882a593Smuzhiyun 		 * The key to resume is supplying the 32KB
548*4882a593Smuzhiyun 		 * dictionary (history) to NX, which is basically
549*4882a593Smuzhiyun 		 * the last 32KB of output produced.
550*4882a593Smuzhiyun 		 */
551*4882a593Smuzhiyun 		fc = GZIP_FC_DECOMPRESS_RESUME;
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 		cmdp->cpb.in_crc   = cmdp->cpb.out_crc;
554*4882a593Smuzhiyun 		cmdp->cpb.in_adler = cmdp->cpb.out_adler;
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun 		/* Round up the history size to quadword.  Section 2.10 */
557*4882a593Smuzhiyun 		history_len = (history_len + 15) / 16;
558*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_histlen, history_len);
559*4882a593Smuzhiyun 		history_len = history_len * 16; /* bytes */
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 		if (history_len > 0) {
562*4882a593Smuzhiyun 			/* Chain in the history buffer to the DDE list */
563*4882a593Smuzhiyun 			if (cur_out >= history_len) {
564*4882a593Smuzhiyun 				nx_append_dde(ddl_in, fifo_out
565*4882a593Smuzhiyun 					      + (cur_out - history_len),
566*4882a593Smuzhiyun 					      history_len);
567*4882a593Smuzhiyun 			} else {
568*4882a593Smuzhiyun 				nx_append_dde(ddl_in, fifo_out
569*4882a593Smuzhiyun 					      + ((fifo_out_len + cur_out)
570*4882a593Smuzhiyun 					      - history_len),
571*4882a593Smuzhiyun 					      history_len - cur_out);
572*4882a593Smuzhiyun 				/* Up to 32KB history wraps around fifo_out */
573*4882a593Smuzhiyun 				nx_append_dde(ddl_in, fifo_out, cur_out);
574*4882a593Smuzhiyun 			}
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun 		}
577*4882a593Smuzhiyun 	} else {
578*4882a593Smuzhiyun 		/* First decompress job */
579*4882a593Smuzhiyun 		fc = GZIP_FC_DECOMPRESS;
580*4882a593Smuzhiyun 
581*4882a593Smuzhiyun 		history_len = 0;
582*4882a593Smuzhiyun 		/* Writing 0 clears out subc as well */
583*4882a593Smuzhiyun 		cmdp->cpb.in_histlen = 0;
584*4882a593Smuzhiyun 		total_out = 0;
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 		put32(cmdp->cpb, in_crc, INIT_CRC);
587*4882a593Smuzhiyun 		put32(cmdp->cpb, in_adler, INIT_ADLER);
588*4882a593Smuzhiyun 		put32(cmdp->cpb, out_crc, INIT_CRC);
589*4882a593Smuzhiyun 		put32(cmdp->cpb, out_adler, INIT_ADLER);
590*4882a593Smuzhiyun 
591*4882a593Smuzhiyun 		/* Assuming 10% compression ratio initially; use the
592*4882a593Smuzhiyun 		 * most recently measured compression ratio as a
593*4882a593Smuzhiyun 		 * heuristic to estimate the input and output
594*4882a593Smuzhiyun 		 * sizes.  If we give too much input, the target buffer
595*4882a593Smuzhiyun 		 * overflows and NX cycles are wasted, and then we
596*4882a593Smuzhiyun 		 * must retry with smaller input size.  1000 is 100%.
597*4882a593Smuzhiyun 		 */
598*4882a593Smuzhiyun 		last_comp_ratio = 100UL;
599*4882a593Smuzhiyun 	}
600*4882a593Smuzhiyun 	cmdp->crb.gzip_fc = 0;
601*4882a593Smuzhiyun 	putnn(cmdp->crb, gzip_fc, fc);
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	/*
604*4882a593Smuzhiyun 	 * NX source buffers
605*4882a593Smuzhiyun 	 */
606*4882a593Smuzhiyun 	first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
607*4882a593Smuzhiyun 	last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
608*4882a593Smuzhiyun 
609*4882a593Smuzhiyun 	if (first_used > 0)
610*4882a593Smuzhiyun 		nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 	if (last_used > 0)
613*4882a593Smuzhiyun 		nx_append_dde(ddl_in, fifo_in, last_used);
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 	/*
616*4882a593Smuzhiyun 	 * NX target buffers
617*4882a593Smuzhiyun 	 */
618*4882a593Smuzhiyun 	first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
619*4882a593Smuzhiyun 	last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
620*4882a593Smuzhiyun 
621*4882a593Smuzhiyun 	/* Reduce output free space amount not to overwrite the history */
622*4882a593Smuzhiyun 	int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
623*4882a593Smuzhiyun 				- (1<<16));
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
626*4882a593Smuzhiyun 		      target_max));
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun 	first_free = NX_MIN(target_max, first_free);
629*4882a593Smuzhiyun 	if (first_free > 0) {
630*4882a593Smuzhiyun 		first_offset = fifo_free_first_offset(cur_out, used_out);
631*4882a593Smuzhiyun 		nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
632*4882a593Smuzhiyun 	}
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun 	if (last_free > 0) {
635*4882a593Smuzhiyun 		last_free = NX_MIN(target_max - first_free, last_free);
636*4882a593Smuzhiyun 		if (last_free > 0) {
637*4882a593Smuzhiyun 			last_offset = fifo_free_last_offset(cur_out, used_out,
638*4882a593Smuzhiyun 							    fifo_out_len);
639*4882a593Smuzhiyun 			nx_append_dde(ddl_out, fifo_out + last_offset,
640*4882a593Smuzhiyun 				      last_free);
641*4882a593Smuzhiyun 		}
642*4882a593Smuzhiyun 	}
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	/* Target buffer size is used to limit the source data size
645*4882a593Smuzhiyun 	 * based on previous measurements of compression ratio.
646*4882a593Smuzhiyun 	 */
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 	/* source_sz includes history */
649*4882a593Smuzhiyun 	source_sz = getp32(ddl_in, ddebc);
650*4882a593Smuzhiyun 	assert(source_sz > history_len);
651*4882a593Smuzhiyun 	source_sz = source_sz - history_len;
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun 	/* Estimating how much source is needed to 3/4 fill a
654*4882a593Smuzhiyun 	 * target_max size target buffer.  If we overshoot, then NX
655*4882a593Smuzhiyun 	 * must repeat the job with smaller input and we waste
656*4882a593Smuzhiyun 	 * bandwidth.  If we undershoot then we use more NX calls than
657*4882a593Smuzhiyun 	 * necessary.
658*4882a593Smuzhiyun 	 */
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun 	source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
661*4882a593Smuzhiyun 				/ 4000;
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun 	if (source_sz_estimate < source_sz) {
664*4882a593Smuzhiyun 		/* Target might be small, therefore limiting the
665*4882a593Smuzhiyun 		 * source data.
666*4882a593Smuzhiyun 		 */
667*4882a593Smuzhiyun 		source_sz = source_sz_estimate;
668*4882a593Smuzhiyun 		target_sz_estimate = target_max;
669*4882a593Smuzhiyun 	} else {
670*4882a593Smuzhiyun 		/* Source file might be small, therefore limiting target
671*4882a593Smuzhiyun 		 * touch pages to a smaller value to save processor cycles.
672*4882a593Smuzhiyun 		 */
673*4882a593Smuzhiyun 		target_sz_estimate = ((uint64_t)source_sz * 1000UL)
674*4882a593Smuzhiyun 					/ (last_comp_ratio + 1);
675*4882a593Smuzhiyun 		target_sz_estimate = NX_MIN(2 * target_sz_estimate,
676*4882a593Smuzhiyun 					    target_max);
677*4882a593Smuzhiyun 	}
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 	source_sz = source_sz + history_len;
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun 	/* Some NX condition codes require submitting the NX job again.
682*4882a593Smuzhiyun 	 * Kernel doesn't handle NX page faults. Expects user code to
683*4882a593Smuzhiyun 	 * touch pages.
684*4882a593Smuzhiyun 	 */
685*4882a593Smuzhiyun 	pgfault_retries = NX_MAX_FAULTS;
686*4882a593Smuzhiyun 
687*4882a593Smuzhiyun restart_nx:
688*4882a593Smuzhiyun 
689*4882a593Smuzhiyun 	putp32(ddl_in, ddebc, source_sz);
690*4882a593Smuzhiyun 
691*4882a593Smuzhiyun 	/* Fault in pages */
692*4882a593Smuzhiyun 	nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), page_sz, 1);
693*4882a593Smuzhiyun 	nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
694*4882a593Smuzhiyun 	nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	/* Send job to NX */
697*4882a593Smuzhiyun 	cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
698*4882a593Smuzhiyun 
699*4882a593Smuzhiyun 	switch (cc) {
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun 	case ERR_NX_AT_FAULT:
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun 		/* We touched the pages ahead of time.  In the most common case
704*4882a593Smuzhiyun 		 * we shouldn't be here.  But may be some pages were paged out.
705*4882a593Smuzhiyun 		 * Kernel should have placed the faulting address to fsaddr.
706*4882a593Smuzhiyun 		 */
707*4882a593Smuzhiyun 		NXPRT(fprintf(stderr, "ERR_NX_AT_FAULT %p\n",
708*4882a593Smuzhiyun 			      (void *)cmdp->crb.csb.fsaddr));
709*4882a593Smuzhiyun 
710*4882a593Smuzhiyun 		if (pgfault_retries == NX_MAX_FAULTS) {
711*4882a593Smuzhiyun 			/* Try once with exact number of pages */
712*4882a593Smuzhiyun 			--pgfault_retries;
713*4882a593Smuzhiyun 			goto restart_nx;
714*4882a593Smuzhiyun 		} else if (pgfault_retries > 0) {
715*4882a593Smuzhiyun 			/* If still faulting try fewer input pages
716*4882a593Smuzhiyun 			 * assuming memory outage
717*4882a593Smuzhiyun 			 */
718*4882a593Smuzhiyun 			if (source_sz > page_sz)
719*4882a593Smuzhiyun 				source_sz = NX_MAX(source_sz / 2, page_sz);
720*4882a593Smuzhiyun 			--pgfault_retries;
721*4882a593Smuzhiyun 			goto restart_nx;
722*4882a593Smuzhiyun 		} else {
723*4882a593Smuzhiyun 			fprintf(stderr, "cannot make progress; too many ");
724*4882a593Smuzhiyun 			fprintf(stderr, "page fault retries cc= %d\n", cc);
725*4882a593Smuzhiyun 			rc = -1;
726*4882a593Smuzhiyun 			goto err5;
727*4882a593Smuzhiyun 		}
728*4882a593Smuzhiyun 
729*4882a593Smuzhiyun 	case ERR_NX_DATA_LENGTH:
730*4882a593Smuzhiyun 
731*4882a593Smuzhiyun 		NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
732*4882a593Smuzhiyun 		NXPRT(fprintf(stderr, "stream may have trailing data\n"));
733*4882a593Smuzhiyun 
734*4882a593Smuzhiyun 		/* Not an error in the most common case; it just says
735*4882a593Smuzhiyun 		 * there is trailing data that we must examine.
736*4882a593Smuzhiyun 		 *
737*4882a593Smuzhiyun 		 * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
738*4882a593Smuzhiyun 		 * Fig.6-7 and Table 6-8.
739*4882a593Smuzhiyun 		 */
740*4882a593Smuzhiyun 		nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun 		if (!csb_ce_termination(nx_ce) &&
743*4882a593Smuzhiyun 		    csb_ce_partial_completion(nx_ce)) {
744*4882a593Smuzhiyun 			/* Check CPB for more information
745*4882a593Smuzhiyun 			 * spbc and tpbc are valid
746*4882a593Smuzhiyun 			 */
747*4882a593Smuzhiyun 			sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
748*4882a593Smuzhiyun 			subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
749*4882a593Smuzhiyun 			spbc = get32(cmdp->cpb, out_spbc_decomp);
750*4882a593Smuzhiyun 			tpbc = get32(cmdp->crb.csb, tpbc);
751*4882a593Smuzhiyun 			assert(target_max >= tpbc);
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun 			goto ok_cc3; /* not an error */
754*4882a593Smuzhiyun 		} else {
755*4882a593Smuzhiyun 			/* History length error when CE(1)=1 CE(0)=0. */
756*4882a593Smuzhiyun 			rc = -1;
757*4882a593Smuzhiyun 			fprintf(stderr, "history length error cc= %d\n", cc);
758*4882a593Smuzhiyun 			goto err5;
759*4882a593Smuzhiyun 		}
760*4882a593Smuzhiyun 
761*4882a593Smuzhiyun 	case ERR_NX_TARGET_SPACE:
762*4882a593Smuzhiyun 
763*4882a593Smuzhiyun 		/* Target buffer not large enough; retry smaller input
764*4882a593Smuzhiyun 		 * data; give at least 1 byte.  SPBC/TPBC are not valid.
765*4882a593Smuzhiyun 		 */
766*4882a593Smuzhiyun 		assert(source_sz > history_len);
767*4882a593Smuzhiyun 		source_sz = ((source_sz - history_len + 2) / 2) + history_len;
768*4882a593Smuzhiyun 		NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
769*4882a593Smuzhiyun 		NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
770*4882a593Smuzhiyun 			      source_sz, history_len));
771*4882a593Smuzhiyun 		goto restart_nx;
772*4882a593Smuzhiyun 
773*4882a593Smuzhiyun 	case ERR_NX_OK:
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 		/* This should not happen for gzip formatted data;
776*4882a593Smuzhiyun 		 * we need trailing crc and isize
777*4882a593Smuzhiyun 		 */
778*4882a593Smuzhiyun 		fprintf(stderr, "ERR_NX_OK\n");
779*4882a593Smuzhiyun 		spbc = get32(cmdp->cpb, out_spbc_decomp);
780*4882a593Smuzhiyun 		tpbc = get32(cmdp->crb.csb, tpbc);
781*4882a593Smuzhiyun 		assert(target_max >= tpbc);
782*4882a593Smuzhiyun 		assert(spbc >= history_len);
783*4882a593Smuzhiyun 		source_sz = spbc - history_len;
784*4882a593Smuzhiyun 		goto offsets_state;
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun 	default:
787*4882a593Smuzhiyun 		fprintf(stderr, "error: cc= %d\n", cc);
788*4882a593Smuzhiyun 		rc = -1;
789*4882a593Smuzhiyun 		goto err5;
790*4882a593Smuzhiyun 	}
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun ok_cc3:
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 	assert(spbc > history_len);
797*4882a593Smuzhiyun 	source_sz = spbc - history_len;
798*4882a593Smuzhiyun 
799*4882a593Smuzhiyun 	/* Table 6-4: Source Final Block Type (SFBT) describes the
800*4882a593Smuzhiyun 	 * last processed deflate block and clues the software how to
801*4882a593Smuzhiyun 	 * resume the next job.  SUBC indicates how many input bits NX
802*4882a593Smuzhiyun 	 * consumed but did not process.  SPBC indicates how many
803*4882a593Smuzhiyun 	 * bytes of source were given to the accelerator including
804*4882a593Smuzhiyun 	 * history bytes.
805*4882a593Smuzhiyun 	 */
806*4882a593Smuzhiyun 
807*4882a593Smuzhiyun 	switch (sfbt) {
808*4882a593Smuzhiyun 		int dhtlen;
809*4882a593Smuzhiyun 
810*4882a593Smuzhiyun 	case 0x0: /* Deflate final EOB received */
811*4882a593Smuzhiyun 
812*4882a593Smuzhiyun 		/* Calculating the checksum start position. */
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 		source_sz = source_sz - subc / 8;
815*4882a593Smuzhiyun 		is_final = 1;
816*4882a593Smuzhiyun 		break;
817*4882a593Smuzhiyun 
818*4882a593Smuzhiyun 		/* Resume decompression cases are below. Basically
819*4882a593Smuzhiyun 		 * indicates where NX has suspended and how to resume
820*4882a593Smuzhiyun 		 * the input stream.
821*4882a593Smuzhiyun 		 */
822*4882a593Smuzhiyun 
823*4882a593Smuzhiyun 	case 0x8: /* Within a literal block; use rembytecount */
824*4882a593Smuzhiyun 	case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
825*4882a593Smuzhiyun 
826*4882a593Smuzhiyun 		/* Supply the partially processed source byte again */
827*4882a593Smuzhiyun 		source_sz = source_sz - ((subc + 7) / 8);
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 		/* SUBC LS 3bits: number of bits in the first source byte need
830*4882a593Smuzhiyun 		 * to be processed.
831*4882a593Smuzhiyun 		 * 000 means all 8 bits;  Table 6-3
832*4882a593Smuzhiyun 		 * Clear subc, histlen, sfbt, rembytecnt, dhtlen
833*4882a593Smuzhiyun 		 */
834*4882a593Smuzhiyun 		cmdp->cpb.in_subc = 0;
835*4882a593Smuzhiyun 		cmdp->cpb.in_sfbt = 0;
836*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_subc, subc % 8);
837*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_sfbt, sfbt);
838*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
839*4882a593Smuzhiyun 						      out_rembytecnt));
840*4882a593Smuzhiyun 		break;
841*4882a593Smuzhiyun 
842*4882a593Smuzhiyun 	case 0xA: /* Within a FH block; */
843*4882a593Smuzhiyun 	case 0xB: /* Within a FH block; bfinal=1 */
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 		source_sz = source_sz - ((subc + 7) / 8);
846*4882a593Smuzhiyun 
847*4882a593Smuzhiyun 		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
848*4882a593Smuzhiyun 		cmdp->cpb.in_subc = 0;
849*4882a593Smuzhiyun 		cmdp->cpb.in_sfbt = 0;
850*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_subc, subc % 8);
851*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_sfbt, sfbt);
852*4882a593Smuzhiyun 		break;
853*4882a593Smuzhiyun 
854*4882a593Smuzhiyun 	case 0xC: /* Within a DH block; */
855*4882a593Smuzhiyun 	case 0xD: /* Within a DH block; bfinal=1 */
856*4882a593Smuzhiyun 
857*4882a593Smuzhiyun 		source_sz = source_sz - ((subc + 7) / 8);
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun 		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
860*4882a593Smuzhiyun 		cmdp->cpb.in_subc = 0;
861*4882a593Smuzhiyun 		cmdp->cpb.in_sfbt = 0;
862*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_subc, subc % 8);
863*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_sfbt, sfbt);
864*4882a593Smuzhiyun 
865*4882a593Smuzhiyun 		dhtlen = getnn(cmdp->cpb, out_dhtlen);
866*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_dhtlen, dhtlen);
867*4882a593Smuzhiyun 		assert(dhtlen >= 42);
868*4882a593Smuzhiyun 
869*4882a593Smuzhiyun 		/* Round up to a qword */
870*4882a593Smuzhiyun 		dhtlen = (dhtlen + 127) / 128;
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun 		while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
873*4882a593Smuzhiyun 			--dhtlen;
874*4882a593Smuzhiyun 			cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
875*4882a593Smuzhiyun 		}
876*4882a593Smuzhiyun 		break;
877*4882a593Smuzhiyun 
878*4882a593Smuzhiyun 	case 0xE: /* Within a block header; bfinal=0; */
879*4882a593Smuzhiyun 		     /* Also given if source data exactly ends (SUBC=0) with
880*4882a593Smuzhiyun 		      * EOB code with BFINAL=0.  Means the next byte will
881*4882a593Smuzhiyun 		      * contain a block header.
882*4882a593Smuzhiyun 		      */
883*4882a593Smuzhiyun 	case 0xF: /* within a block header with BFINAL=1. */
884*4882a593Smuzhiyun 
885*4882a593Smuzhiyun 		source_sz = source_sz - ((subc + 7) / 8);
886*4882a593Smuzhiyun 
887*4882a593Smuzhiyun 		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
888*4882a593Smuzhiyun 		cmdp->cpb.in_subc = 0;
889*4882a593Smuzhiyun 		cmdp->cpb.in_sfbt = 0;
890*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_subc, subc % 8);
891*4882a593Smuzhiyun 		putnn(cmdp->cpb, in_sfbt, sfbt);
892*4882a593Smuzhiyun 
893*4882a593Smuzhiyun 		/* Engine did not process any data */
894*4882a593Smuzhiyun 		if (is_eof && (source_sz == 0))
895*4882a593Smuzhiyun 			is_final = 1;
896*4882a593Smuzhiyun 	}
897*4882a593Smuzhiyun 
898*4882a593Smuzhiyun offsets_state:
899*4882a593Smuzhiyun 
900*4882a593Smuzhiyun 	/* Adjust the source and target buffer offsets and lengths  */
901*4882a593Smuzhiyun 
902*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "offsets_state:\n"));
903*4882a593Smuzhiyun 
904*4882a593Smuzhiyun 	/* Delete input data from fifo_in */
905*4882a593Smuzhiyun 	used_in = used_in - source_sz;
906*4882a593Smuzhiyun 	cur_in = (cur_in + source_sz) % fifo_in_len;
907*4882a593Smuzhiyun 	input_file_offset = input_file_offset + source_sz;
908*4882a593Smuzhiyun 
909*4882a593Smuzhiyun 	/* Add output data to fifo_out */
910*4882a593Smuzhiyun 	used_out = used_out + tpbc;
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun 	assert(used_out <= fifo_out_len);
913*4882a593Smuzhiyun 
914*4882a593Smuzhiyun 	total_out = total_out + tpbc;
915*4882a593Smuzhiyun 
916*4882a593Smuzhiyun 	/* Deflate history is 32KB max.  No need to supply more
917*4882a593Smuzhiyun 	 * than 32KB on a resume.
918*4882a593Smuzhiyun 	 */
919*4882a593Smuzhiyun 	history_len = (total_out > window_max) ? window_max : total_out;
920*4882a593Smuzhiyun 
921*4882a593Smuzhiyun 	/* To estimate expected expansion in the next NX job; 500 means 50%.
922*4882a593Smuzhiyun 	 * Deflate best case is around 1 to 1000.
923*4882a593Smuzhiyun 	 */
924*4882a593Smuzhiyun 	last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
925*4882a593Smuzhiyun 			  / ((uint64_t)tpbc + 1);
926*4882a593Smuzhiyun 	last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
927*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
928*4882a593Smuzhiyun 		      last_comp_ratio, source_sz, spbc, tpbc));
929*4882a593Smuzhiyun 
930*4882a593Smuzhiyun 	resuming = 1;
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun finish_state:
933*4882a593Smuzhiyun 
934*4882a593Smuzhiyun 	NXPRT(fprintf(stderr, "finish_state:\n"));
935*4882a593Smuzhiyun 
936*4882a593Smuzhiyun 	if (is_final) {
937*4882a593Smuzhiyun 		if (used_out)
938*4882a593Smuzhiyun 			goto write_state; /* More data to write out */
939*4882a593Smuzhiyun 		else if (used_in < 8) {
940*4882a593Smuzhiyun 			/* Need at least 8 more bytes containing gzip crc
941*4882a593Smuzhiyun 			 * and isize.
942*4882a593Smuzhiyun 			 */
943*4882a593Smuzhiyun 			rc = -1;
944*4882a593Smuzhiyun 			goto err4;
945*4882a593Smuzhiyun 		} else {
946*4882a593Smuzhiyun 			/* Compare checksums and exit */
947*4882a593Smuzhiyun 			int i;
948*4882a593Smuzhiyun 			unsigned char tail[8];
949*4882a593Smuzhiyun 			uint32_t cksum, isize;
950*4882a593Smuzhiyun 
951*4882a593Smuzhiyun 			for (i = 0; i < 8; i++)
952*4882a593Smuzhiyun 				tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
953*4882a593Smuzhiyun 			fprintf(stderr, "computed checksum %08x isize %08x\n",
954*4882a593Smuzhiyun 				cmdp->cpb.out_crc, (uint32_t) (total_out
955*4882a593Smuzhiyun 				% (1ULL<<32)));
956*4882a593Smuzhiyun 			cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
957*4882a593Smuzhiyun 				 | (uint32_t) tail[2]<<16
958*4882a593Smuzhiyun 				 | (uint32_t) tail[3]<<24);
959*4882a593Smuzhiyun 			isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
960*4882a593Smuzhiyun 				 | (uint32_t) tail[6]<<16
961*4882a593Smuzhiyun 				 | (uint32_t) tail[7]<<24);
962*4882a593Smuzhiyun 			fprintf(stderr, "stored   checksum %08x isize %08x\n",
963*4882a593Smuzhiyun 				cksum, isize);
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun 			if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
966*4882a593Smuzhiyun 			    (total_out % (1ULL<<32))) {
967*4882a593Smuzhiyun 				rc = 0;	goto ok1;
968*4882a593Smuzhiyun 			} else {
969*4882a593Smuzhiyun 				rc = -1; goto err4;
970*4882a593Smuzhiyun 			}
971*4882a593Smuzhiyun 		}
972*4882a593Smuzhiyun 	} else
973*4882a593Smuzhiyun 		goto read_state;
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	return -1;
976*4882a593Smuzhiyun 
977*4882a593Smuzhiyun err1:
978*4882a593Smuzhiyun 	fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
979*4882a593Smuzhiyun 		expect, c);
980*4882a593Smuzhiyun 	return -1;
981*4882a593Smuzhiyun 
982*4882a593Smuzhiyun err2:
983*4882a593Smuzhiyun 	fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
984*4882a593Smuzhiyun 	return -1;
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun err3:
987*4882a593Smuzhiyun 	fprintf(stderr, "error: gzip header\n");
988*4882a593Smuzhiyun 	return -1;
989*4882a593Smuzhiyun 
990*4882a593Smuzhiyun err4:
991*4882a593Smuzhiyun 	fprintf(stderr, "error: checksum missing or mismatch\n");
992*4882a593Smuzhiyun 
993*4882a593Smuzhiyun err5:
994*4882a593Smuzhiyun ok1:
995*4882a593Smuzhiyun 	fprintf(stderr, "decomp is complete: fclose\n");
996*4882a593Smuzhiyun 	fclose(outf);
997*4882a593Smuzhiyun 
998*4882a593Smuzhiyun 	return rc;
999*4882a593Smuzhiyun }
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 
main(int argc,char ** argv)1002*4882a593Smuzhiyun int main(int argc, char **argv)
1003*4882a593Smuzhiyun {
1004*4882a593Smuzhiyun 	int rc;
1005*4882a593Smuzhiyun 	struct sigaction act;
1006*4882a593Smuzhiyun 	void *handle;
1007*4882a593Smuzhiyun 
1008*4882a593Smuzhiyun 	nx_dbg = 0;
1009*4882a593Smuzhiyun 	nx_gzip_log = NULL;
1010*4882a593Smuzhiyun 	act.sa_handler = 0;
1011*4882a593Smuzhiyun 	act.sa_sigaction = nxu_sigsegv_handler;
1012*4882a593Smuzhiyun 	act.sa_flags = SA_SIGINFO;
1013*4882a593Smuzhiyun 	act.sa_restorer = 0;
1014*4882a593Smuzhiyun 	sigemptyset(&act.sa_mask);
1015*4882a593Smuzhiyun 	sigaction(SIGSEGV, &act, NULL);
1016*4882a593Smuzhiyun 
1017*4882a593Smuzhiyun 	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
1018*4882a593Smuzhiyun 	if (!handle) {
1019*4882a593Smuzhiyun 		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
1020*4882a593Smuzhiyun 		exit(-1);
1021*4882a593Smuzhiyun 	}
1022*4882a593Smuzhiyun 
1023*4882a593Smuzhiyun 	rc = decompress_file(argc, argv, handle);
1024*4882a593Smuzhiyun 
1025*4882a593Smuzhiyun 	nx_function_end(handle);
1026*4882a593Smuzhiyun 
1027*4882a593Smuzhiyun 	return rc;
1028*4882a593Smuzhiyun }
1029