xref: /utopia/UTPA2-700.0.x/modules/ojpd_vdec_v1/api/jpeg/cmodel/src/jidctint.c (revision 53ee8cc121a030b8d368113ac3e966b4705770ef)
1*53ee8cc1Swenshuai.xi //<MStar Software>
2*53ee8cc1Swenshuai.xi //******************************************************************************
3*53ee8cc1Swenshuai.xi // MStar Software
4*53ee8cc1Swenshuai.xi // Copyright (c) 2010 - 2012 MStar Semiconductor, Inc. All rights reserved.
5*53ee8cc1Swenshuai.xi // All software, firmware and related documentation herein ("MStar Software") are
6*53ee8cc1Swenshuai.xi // intellectual property of MStar Semiconductor, Inc. ("MStar") and protected by
7*53ee8cc1Swenshuai.xi // law, including, but not limited to, copyright law and international treaties.
8*53ee8cc1Swenshuai.xi // Any use, modification, reproduction, retransmission, or republication of all
9*53ee8cc1Swenshuai.xi // or part of MStar Software is expressly prohibited, unless prior written
10*53ee8cc1Swenshuai.xi // permission has been granted by MStar.
11*53ee8cc1Swenshuai.xi //
12*53ee8cc1Swenshuai.xi // By accessing, browsing and/or using MStar Software, you acknowledge that you
13*53ee8cc1Swenshuai.xi // have read, understood, and agree, to be bound by below terms ("Terms") and to
14*53ee8cc1Swenshuai.xi // comply with all applicable laws and regulations:
15*53ee8cc1Swenshuai.xi //
16*53ee8cc1Swenshuai.xi // 1. MStar shall retain any and all right, ownership and interest to MStar
17*53ee8cc1Swenshuai.xi //    Software and any modification/derivatives thereof.
18*53ee8cc1Swenshuai.xi //    No right, ownership, or interest to MStar Software and any
19*53ee8cc1Swenshuai.xi //    modification/derivatives thereof is transferred to you under Terms.
20*53ee8cc1Swenshuai.xi //
21*53ee8cc1Swenshuai.xi // 2. You understand that MStar Software might include, incorporate or be
22*53ee8cc1Swenshuai.xi //    supplied together with third party`s software and the use of MStar
23*53ee8cc1Swenshuai.xi //    Software may require additional licenses from third parties.
24*53ee8cc1Swenshuai.xi //    Therefore, you hereby agree it is your sole responsibility to separately
25*53ee8cc1Swenshuai.xi //    obtain any and all third party right and license necessary for your use of
26*53ee8cc1Swenshuai.xi //    such third party`s software.
27*53ee8cc1Swenshuai.xi //
28*53ee8cc1Swenshuai.xi // 3. MStar Software and any modification/derivatives thereof shall be deemed as
29*53ee8cc1Swenshuai.xi //    MStar`s confidential information and you agree to keep MStar`s
30*53ee8cc1Swenshuai.xi //    confidential information in strictest confidence and not disclose to any
31*53ee8cc1Swenshuai.xi //    third party.
32*53ee8cc1Swenshuai.xi //
33*53ee8cc1Swenshuai.xi // 4. MStar Software is provided on an "AS IS" basis without warranties of any
34*53ee8cc1Swenshuai.xi //    kind. Any warranties are hereby expressly disclaimed by MStar, including
35*53ee8cc1Swenshuai.xi //    without limitation, any warranties of merchantability, non-infringement of
36*53ee8cc1Swenshuai.xi //    intellectual property rights, fitness for a particular purpose, error free
37*53ee8cc1Swenshuai.xi //    and in conformity with any international standard.  You agree to waive any
38*53ee8cc1Swenshuai.xi //    claim against MStar for any loss, damage, cost or expense that you may
39*53ee8cc1Swenshuai.xi //    incur related to your use of MStar Software.
40*53ee8cc1Swenshuai.xi //    In no event shall MStar be liable for any direct, indirect, incidental or
41*53ee8cc1Swenshuai.xi //    consequential damages, including without limitation, lost of profit or
42*53ee8cc1Swenshuai.xi //    revenues, lost or damage of data, and unauthorized system use.
43*53ee8cc1Swenshuai.xi //    You agree that this Section 4 shall still apply without being affected
44*53ee8cc1Swenshuai.xi //    even if MStar Software has been modified by MStar in accordance with your
45*53ee8cc1Swenshuai.xi //    request or instruction for your use, except otherwise agreed by both
46*53ee8cc1Swenshuai.xi //    parties in writing.
47*53ee8cc1Swenshuai.xi //
48*53ee8cc1Swenshuai.xi // 5. If requested, MStar may from time to time provide technical supports or
49*53ee8cc1Swenshuai.xi //    services in relation with MStar Software to you for your use of
50*53ee8cc1Swenshuai.xi //    MStar Software in conjunction with your or your customer`s product
51*53ee8cc1Swenshuai.xi //    ("Services").
52*53ee8cc1Swenshuai.xi //    You understand and agree that, except otherwise agreed by both parties in
53*53ee8cc1Swenshuai.xi //    writing, Services are provided on an "AS IS" basis and the warranty
54*53ee8cc1Swenshuai.xi //    disclaimer set forth in Section 4 above shall apply.
55*53ee8cc1Swenshuai.xi //
56*53ee8cc1Swenshuai.xi // 6. Nothing contained herein shall be construed as by implication, estoppels
57*53ee8cc1Swenshuai.xi //    or otherwise:
58*53ee8cc1Swenshuai.xi //    (a) conferring any license or right to use MStar name, trademark, service
59*53ee8cc1Swenshuai.xi //        mark, symbol or any other identification;
60*53ee8cc1Swenshuai.xi //    (b) obligating MStar or any of its affiliates to furnish any person,
61*53ee8cc1Swenshuai.xi //        including without limitation, you and your customers, any assistance
62*53ee8cc1Swenshuai.xi //        of any kind whatsoever, or any information; or
63*53ee8cc1Swenshuai.xi //    (c) conferring any license or right under any intellectual property right.
64*53ee8cc1Swenshuai.xi //
65*53ee8cc1Swenshuai.xi // 7. These terms shall be governed by and construed in accordance with the laws
66*53ee8cc1Swenshuai.xi //    of Taiwan, R.O.C., excluding its conflict of law rules.
67*53ee8cc1Swenshuai.xi //    Any and all dispute arising out hereof or related hereto shall be finally
68*53ee8cc1Swenshuai.xi //    settled by arbitration referred to the Chinese Arbitration Association,
69*53ee8cc1Swenshuai.xi //    Taipei in accordance with the ROC Arbitration Law and the Arbitration
70*53ee8cc1Swenshuai.xi //    Rules of the Association by three (3) arbitrators appointed in accordance
71*53ee8cc1Swenshuai.xi //    with the said Rules.
72*53ee8cc1Swenshuai.xi //    The place of arbitration shall be in Taipei, Taiwan and the language shall
73*53ee8cc1Swenshuai.xi //    be English.
74*53ee8cc1Swenshuai.xi //    The arbitration award shall be final and binding to both parties.
75*53ee8cc1Swenshuai.xi //
76*53ee8cc1Swenshuai.xi //******************************************************************************
77*53ee8cc1Swenshuai.xi //<MStar Software>
78*53ee8cc1Swenshuai.xi /*
79*53ee8cc1Swenshuai.xi  * jidctint.c
80*53ee8cc1Swenshuai.xi  *
81*53ee8cc1Swenshuai.xi  * Copyright (C) 1991-1998, Thomas G. Lane.
82*53ee8cc1Swenshuai.xi  * This file is part of the Independent JPEG Group's software.
83*53ee8cc1Swenshuai.xi  * For conditions of distribution and use, see the accompanying README file.
84*53ee8cc1Swenshuai.xi  *
85*53ee8cc1Swenshuai.xi  * This file contains a slow-but-accurate integer implementation of the
86*53ee8cc1Swenshuai.xi  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
87*53ee8cc1Swenshuai.xi  * must also perform dequantization of the input coefficients.
88*53ee8cc1Swenshuai.xi  *
89*53ee8cc1Swenshuai.xi  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
90*53ee8cc1Swenshuai.xi  * on each row (or vice versa, but it's more convenient to emit a row at
91*53ee8cc1Swenshuai.xi  * a time).  Direct algorithms are also available, but they are much more
92*53ee8cc1Swenshuai.xi  * complex and seem not to be any faster when reduced to code.
93*53ee8cc1Swenshuai.xi  *
94*53ee8cc1Swenshuai.xi  * This implementation is based on an algorithm described in
95*53ee8cc1Swenshuai.xi  *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
96*53ee8cc1Swenshuai.xi  *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
97*53ee8cc1Swenshuai.xi  *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
98*53ee8cc1Swenshuai.xi  * The primary algorithm described there uses 11 multiplies and 29 adds.
99*53ee8cc1Swenshuai.xi  * We use their alternate method with 12 multiplies and 32 adds.
100*53ee8cc1Swenshuai.xi  * The advantage of this method is that no data path contains more than one
101*53ee8cc1Swenshuai.xi  * multiplication; this allows a very simple and accurate implementation in
102*53ee8cc1Swenshuai.xi  * scaled fixed-point arithmetic, with a minimal number of shifts.
103*53ee8cc1Swenshuai.xi  */
104*53ee8cc1Swenshuai.xi 
105*53ee8cc1Swenshuai.xi #include "jpegmain.h"
106*53ee8cc1Swenshuai.xi #include "apiJPEG.h"
107*53ee8cc1Swenshuai.xi ///#define JPEG_INTERNALS
108*53ee8cc1Swenshuai.xi ///#include "jinclude.h"
109*53ee8cc1Swenshuai.xi ///#include "jpeglib.h"
110*53ee8cc1Swenshuai.xi ///#include "jdct.h"        /* Private declarations for DCT subsystem */
111*53ee8cc1Swenshuai.xi 
112*53ee8cc1Swenshuai.xi #if 1///def DCT_ISLOW_SUPPORTED
113*53ee8cc1Swenshuai.xi 
114*53ee8cc1Swenshuai.xi 
115*53ee8cc1Swenshuai.xi /*
116*53ee8cc1Swenshuai.xi  * This module is specialized to the case DCTSIZE = 8.
117*53ee8cc1Swenshuai.xi  */
118*53ee8cc1Swenshuai.xi 
119*53ee8cc1Swenshuai.xi #define DCTSIZE 8
120*53ee8cc1Swenshuai.xi #define BITS_IN_JSAMPLE 8
121*53ee8cc1Swenshuai.xi 
122*53ee8cc1Swenshuai.xi 
123*53ee8cc1Swenshuai.xi #if DCTSIZE != 8
124*53ee8cc1Swenshuai.xi Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
125*53ee8cc1Swenshuai.xi #endif
126*53ee8cc1Swenshuai.xi 
127*53ee8cc1Swenshuai.xi 
128*53ee8cc1Swenshuai.xi /*
129*53ee8cc1Swenshuai.xi  * The poop on this scaling stuff is as follows:
130*53ee8cc1Swenshuai.xi  *
131*53ee8cc1Swenshuai.xi  * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
132*53ee8cc1Swenshuai.xi  * larger than the true IDCT outputs.  The final outputs are therefore
133*53ee8cc1Swenshuai.xi  * a factor of N larger than desired; since N=8 this can be cured by
134*53ee8cc1Swenshuai.xi  * a simple right shift at the end of the algorithm.  The advantage of
135*53ee8cc1Swenshuai.xi  * this arrangement is that we save two multiplications per 1-D IDCT,
136*53ee8cc1Swenshuai.xi  * because the y0 and y4 inputs need not be divided by sqrt(N).
137*53ee8cc1Swenshuai.xi  *
138*53ee8cc1Swenshuai.xi  * We have to do addition and subtraction of the integer inputs, which
139*53ee8cc1Swenshuai.xi  * is no problem, and multiplication by fractional constants, which is
140*53ee8cc1Swenshuai.xi  * a problem to do in integer arithmetic.  We multiply all the constants
141*53ee8cc1Swenshuai.xi  * by CONST_SCALE and convert them to integer constants (thus retaining
142*53ee8cc1Swenshuai.xi  * CONST_BITS bits of precision in the constants).  After doing a
143*53ee8cc1Swenshuai.xi  * multiplication we have to divide the product by CONST_SCALE, with proper
144*53ee8cc1Swenshuai.xi  * rounding, to produce the correct output.  This division can be done
145*53ee8cc1Swenshuai.xi  * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
146*53ee8cc1Swenshuai.xi  * as long as possible so that partial sums can be added together with
147*53ee8cc1Swenshuai.xi  * full fractional precision.
148*53ee8cc1Swenshuai.xi  *
149*53ee8cc1Swenshuai.xi  * The outputs of the first pass are scaled up by PASS1_BITS bits so that
150*53ee8cc1Swenshuai.xi  * they are represented to better-than-integral precision.  These outputs
151*53ee8cc1Swenshuai.xi  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
152*53ee8cc1Swenshuai.xi  * with the recommended scaling.  (To scale up 12-bit sample data further, an
153*53ee8cc1Swenshuai.xi  * intermediate INT32 array would be needed.)
154*53ee8cc1Swenshuai.xi  *
155*53ee8cc1Swenshuai.xi  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
156*53ee8cc1Swenshuai.xi  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
157*53ee8cc1Swenshuai.xi  * shows that the values given below are the most effective.
158*53ee8cc1Swenshuai.xi  */
159*53ee8cc1Swenshuai.xi 
160*53ee8cc1Swenshuai.xi #if BITS_IN_JSAMPLE == 8
161*53ee8cc1Swenshuai.xi #define CONST_BITS  13
162*53ee8cc1Swenshuai.xi #define PASS1_BITS  2
163*53ee8cc1Swenshuai.xi #else
164*53ee8cc1Swenshuai.xi #define CONST_BITS  13
165*53ee8cc1Swenshuai.xi #define PASS1_BITS  1       /* lose a little precision to avoid overflow */
166*53ee8cc1Swenshuai.xi #endif
167*53ee8cc1Swenshuai.xi 
168*53ee8cc1Swenshuai.xi /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
169*53ee8cc1Swenshuai.xi  * causing a lot of useless floating-point operations at run time.
170*53ee8cc1Swenshuai.xi  * To get around this we use the following pre-calculated constants.
171*53ee8cc1Swenshuai.xi  * If you change CONST_BITS you may want to add appropriate values.
172*53ee8cc1Swenshuai.xi  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
173*53ee8cc1Swenshuai.xi  */
174*53ee8cc1Swenshuai.xi 
175*53ee8cc1Swenshuai.xi #if CONST_BITS == 13
176*53ee8cc1Swenshuai.xi #define FIX_0_298631336  ((INT32)  2446)    /* FIX(0.298631336) */
177*53ee8cc1Swenshuai.xi #define FIX_0_390180644  ((INT32)  3196)    /* FIX(0.390180644) */
178*53ee8cc1Swenshuai.xi #define FIX_0_541196100  ((INT32)  4433)    /* FIX(0.541196100) */
179*53ee8cc1Swenshuai.xi #define FIX_0_765366865  ((INT32)  6270)    /* FIX(0.765366865) */
180*53ee8cc1Swenshuai.xi #define FIX_0_899976223  ((INT32)  7373)    /* FIX(0.899976223) */
181*53ee8cc1Swenshuai.xi #define FIX_1_175875602  ((INT32)  9633)    /* FIX(1.175875602) */
182*53ee8cc1Swenshuai.xi #define FIX_1_501321110  ((INT32)  12299)   /* FIX(1.501321110) */
183*53ee8cc1Swenshuai.xi #define FIX_1_847759065  ((INT32)  15137)   /* FIX(1.847759065) */
184*53ee8cc1Swenshuai.xi #define FIX_1_961570560  ((INT32)  16069)   /* FIX(1.961570560) */
185*53ee8cc1Swenshuai.xi #define FIX_2_053119869  ((INT32)  16819)   /* FIX(2.053119869) */
186*53ee8cc1Swenshuai.xi #define FIX_2_562915447  ((INT32)  20995)   /* FIX(2.562915447) */
187*53ee8cc1Swenshuai.xi #define FIX_3_072711026  ((INT32)  25172)   /* FIX(3.072711026) */
188*53ee8cc1Swenshuai.xi #else
189*53ee8cc1Swenshuai.xi #define FIX_0_298631336  FIX(0.298631336)
190*53ee8cc1Swenshuai.xi #define FIX_0_390180644  FIX(0.390180644)
191*53ee8cc1Swenshuai.xi #define FIX_0_541196100  FIX(0.541196100)
192*53ee8cc1Swenshuai.xi #define FIX_0_765366865  FIX(0.765366865)
193*53ee8cc1Swenshuai.xi #define FIX_0_899976223  FIX(0.899976223)
194*53ee8cc1Swenshuai.xi #define FIX_1_175875602  FIX(1.175875602)
195*53ee8cc1Swenshuai.xi #define FIX_1_501321110  FIX(1.501321110)
196*53ee8cc1Swenshuai.xi #define FIX_1_847759065  FIX(1.847759065)
197*53ee8cc1Swenshuai.xi #define FIX_1_961570560  FIX(1.961570560)
198*53ee8cc1Swenshuai.xi #define FIX_2_053119869  FIX(2.053119869)
199*53ee8cc1Swenshuai.xi #define FIX_2_562915447  FIX(2.562915447)
200*53ee8cc1Swenshuai.xi #define FIX_3_072711026  FIX(3.072711026)
201*53ee8cc1Swenshuai.xi #endif
202*53ee8cc1Swenshuai.xi 
203*53ee8cc1Swenshuai.xi 
204*53ee8cc1Swenshuai.xi /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
205*53ee8cc1Swenshuai.xi  * For 8-bit samples with the recommended scaling, all the variable
206*53ee8cc1Swenshuai.xi  * and constant values involved are no more than 16 bits wide, so a
207*53ee8cc1Swenshuai.xi  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
208*53ee8cc1Swenshuai.xi  * For 12-bit samples, a full 32-bit multiplication will be needed.
209*53ee8cc1Swenshuai.xi  */
210*53ee8cc1Swenshuai.xi 
211*53ee8cc1Swenshuai.xi /*
212*53ee8cc1Swenshuai.xi #if 0 ///BITS_IN_JSAMPLE == 8
213*53ee8cc1Swenshuai.xi #define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
214*53ee8cc1Swenshuai.xi #else
215*53ee8cc1Swenshuai.xi #define MULTIPLY(var,const)  ((var) * (const))
216*53ee8cc1Swenshuai.xi #endif
217*53ee8cc1Swenshuai.xi */
218*53ee8cc1Swenshuai.xi #define MULTIPLY(var,cnst)  ((var) * (cnst))
219*53ee8cc1Swenshuai.xi 
220*53ee8cc1Swenshuai.xi /* Dequantize a coefficient by multiplying it by the multiplier-table
221*53ee8cc1Swenshuai.xi  * entry; produce an int result.  In this module, both inputs and result
222*53ee8cc1Swenshuai.xi  * are 16 bits or less, so either int or short multiply will work.
223*53ee8cc1Swenshuai.xi  */
224*53ee8cc1Swenshuai.xi 
225*53ee8cc1Swenshuai.xi #define ISLOW_MULT_TYPE int
226*53ee8cc1Swenshuai.xi #define DEQUANTIZE(coef,quantval)  (coef) //(((ISLOW_MULT_TYPE) (coef)) )  ///(((ISLOW_MULT_TYPE) (coef)) * (quantval))
227*53ee8cc1Swenshuai.xi 
228*53ee8cc1Swenshuai.xi //#define DESCALE(x,n)  ( ( (x) + (1 << ((n)-1)) ) >> n)
229*53ee8cc1Swenshuai.xi #define SCALEDONE ((int32) 1)
230*53ee8cc1Swenshuai.xi #define DESCALE(x,n)  (((x) + (SCALEDONE << ((n)-1))) >> (n))
231*53ee8cc1Swenshuai.xi 
232*53ee8cc1Swenshuai.xi /*
233*53ee8cc1Swenshuai.xi  * Perform dequantization and inverse DCT on one block of coefficients.
234*53ee8cc1Swenshuai.xi  */
235*53ee8cc1Swenshuai.xi 
236*53ee8cc1Swenshuai.xi ///GLOBAL(void)
237*53ee8cc1Swenshuai.xi ///jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
238*53ee8cc1Swenshuai.xi ///      JCOEFPTR coef_block,
239*53ee8cc1Swenshuai.xi ///      JSAMPARRAY output_buf, JDIMENSION output_col)
240*53ee8cc1Swenshuai.xi #define clamp(i) if (i & 0xFF00) i = (((~i) >> 15) & 0xFF);
241*53ee8cc1Swenshuai.xi void jpeg_idct_islow( JPEG_BLOCK_TYPE *data, U8 *Pdst_ptr )
242*53ee8cc1Swenshuai.xi {
243*53ee8cc1Swenshuai.xi     #define INT32   S32
244*53ee8cc1Swenshuai.xi     #define DCTSIZE2 64
245*53ee8cc1Swenshuai.xi     #define DCTSIZE 8
246*53ee8cc1Swenshuai.xi 
247*53ee8cc1Swenshuai.xi     INT32 tmp0, tmp1, tmp2, tmp3;
248*53ee8cc1Swenshuai.xi     INT32 tmp10, tmp11, tmp12, tmp13;
249*53ee8cc1Swenshuai.xi     INT32 z1, z2, z3, z4, z5;
250*53ee8cc1Swenshuai.xi     ///JCOEFPTR inptr;
251*53ee8cc1Swenshuai.xi     register JPEG_BLOCK_TYPE *inptr;
252*53ee8cc1Swenshuai.xi     ///ISLOW_MULT_TYPE *quantptr;
253*53ee8cc1Swenshuai.xi     U8 *outptr = Pdst_ptr;
254*53ee8cc1Swenshuai.xi     ///JSAMPLE *range_limit = IDCT_range_limit(cinfo);
255*53ee8cc1Swenshuai.xi     int ctr;
256*53ee8cc1Swenshuai.xi     JPEG_BLOCK_TYPE workspace[DCTSIZE2]; /* buffers data between passes */
257*53ee8cc1Swenshuai.xi     JPEG_BLOCK_TYPE *wsptr;
258*53ee8cc1Swenshuai.xi     ///SHIFT_TEMPS
259*53ee8cc1Swenshuai.xi     S16 i;
260*53ee8cc1Swenshuai.xi //printf("Jidctint::jpeg_idct_islow\n");
261*53ee8cc1Swenshuai.xi     /* Pass 1: process columns from input, store into work array. */
262*53ee8cc1Swenshuai.xi     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
263*53ee8cc1Swenshuai.xi     /* furthermore, we scale the results by 2**PASS1_BITS. */
264*53ee8cc1Swenshuai.xi 
265*53ee8cc1Swenshuai.xi     inptr = data;
266*53ee8cc1Swenshuai.xi     ///quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
267*53ee8cc1Swenshuai.xi     wsptr = workspace;
268*53ee8cc1Swenshuai.xi     for ( ctr = DCTSIZE; ctr > 0; ctr-- )
269*53ee8cc1Swenshuai.xi     {
270*53ee8cc1Swenshuai.xi         /* Due to quantization, we will usually find that many of the input
271*53ee8cc1Swenshuai.xi          * coefficients are zero, especially the AC terms.  We can exploit this
272*53ee8cc1Swenshuai.xi          * by short-circuiting the IDCT calculation for any column in which all
273*53ee8cc1Swenshuai.xi          * the AC terms are zero.  In that case each output is equal to the
274*53ee8cc1Swenshuai.xi          * DC coefficient (with scale factor as needed).
275*53ee8cc1Swenshuai.xi          * With typical images and quantization tables, half or more of the
276*53ee8cc1Swenshuai.xi          * column DCT calculations can be simplified this way.
277*53ee8cc1Swenshuai.xi          */
278*53ee8cc1Swenshuai.xi 
279*53ee8cc1Swenshuai.xi         if ( ( inptr[DCTSIZE * 1] | inptr[DCTSIZE * 2] | inptr[DCTSIZE * 3] | inptr[DCTSIZE * 4] | inptr[DCTSIZE * 5] | inptr[DCTSIZE * 6] | inptr[DCTSIZE * 7] ) == 0 )
280*53ee8cc1Swenshuai.xi         {
281*53ee8cc1Swenshuai.xi             /* AC terms all zero */
282*53ee8cc1Swenshuai.xi             int dcval = DEQUANTIZE( inptr[DCTSIZE*0], quantptr[DCTSIZE*0] ) << PASS1_BITS;
283*53ee8cc1Swenshuai.xi 
284*53ee8cc1Swenshuai.xi             wsptr[DCTSIZE * 0] = dcval;
285*53ee8cc1Swenshuai.xi             wsptr[DCTSIZE * 1] = dcval;
286*53ee8cc1Swenshuai.xi             wsptr[DCTSIZE * 2] = dcval;
287*53ee8cc1Swenshuai.xi             wsptr[DCTSIZE * 3] = dcval;
288*53ee8cc1Swenshuai.xi             wsptr[DCTSIZE * 4] = dcval;
289*53ee8cc1Swenshuai.xi             wsptr[DCTSIZE * 5] = dcval;
290*53ee8cc1Swenshuai.xi             wsptr[DCTSIZE * 6] = dcval;
291*53ee8cc1Swenshuai.xi             wsptr[DCTSIZE * 7] = dcval;
292*53ee8cc1Swenshuai.xi 
293*53ee8cc1Swenshuai.xi             inptr++;            /* advance pointers to next column */
294*53ee8cc1Swenshuai.xi             //quantptr++;
295*53ee8cc1Swenshuai.xi             wsptr++;
296*53ee8cc1Swenshuai.xi             continue;
297*53ee8cc1Swenshuai.xi         }
298*53ee8cc1Swenshuai.xi 
299*53ee8cc1Swenshuai.xi         /* Even part: reverse the even part of the forward DCT. */
300*53ee8cc1Swenshuai.xi         /* The rotator is sqrt(2)*c(-6). */
301*53ee8cc1Swenshuai.xi 
302*53ee8cc1Swenshuai.xi         z2 = DEQUANTIZE( inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2] );
303*53ee8cc1Swenshuai.xi         z3 = DEQUANTIZE( inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6] );
304*53ee8cc1Swenshuai.xi 
305*53ee8cc1Swenshuai.xi         z1 = MULTIPLY( z2 + z3, FIX_0_541196100 );
306*53ee8cc1Swenshuai.xi         tmp2 = z1 + MULTIPLY( z3, -FIX_1_847759065 );
307*53ee8cc1Swenshuai.xi         tmp3 = z1 + MULTIPLY( z2, FIX_0_765366865 );
308*53ee8cc1Swenshuai.xi 
309*53ee8cc1Swenshuai.xi         z2 = DEQUANTIZE( inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0] );
310*53ee8cc1Swenshuai.xi         z3 = DEQUANTIZE( inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4] );
311*53ee8cc1Swenshuai.xi 
312*53ee8cc1Swenshuai.xi         tmp0 = ( z2 + z3 ) << CONST_BITS;
313*53ee8cc1Swenshuai.xi         tmp1 = ( z2 - z3 ) << CONST_BITS;
314*53ee8cc1Swenshuai.xi 
315*53ee8cc1Swenshuai.xi         tmp10 = tmp0 + tmp3;
316*53ee8cc1Swenshuai.xi         tmp13 = tmp0 - tmp3;
317*53ee8cc1Swenshuai.xi         tmp11 = tmp1 + tmp2;
318*53ee8cc1Swenshuai.xi         tmp12 = tmp1 - tmp2;
319*53ee8cc1Swenshuai.xi 
320*53ee8cc1Swenshuai.xi         /* Odd part per figure 8; the matrix is unitary and hence its
321*53ee8cc1Swenshuai.xi          * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
322*53ee8cc1Swenshuai.xi          */
323*53ee8cc1Swenshuai.xi 
324*53ee8cc1Swenshuai.xi         tmp0 = DEQUANTIZE( inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7] );
325*53ee8cc1Swenshuai.xi         tmp1 = DEQUANTIZE( inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5] );
326*53ee8cc1Swenshuai.xi         tmp2 = DEQUANTIZE( inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3] );
327*53ee8cc1Swenshuai.xi         tmp3 = DEQUANTIZE( inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1] );
328*53ee8cc1Swenshuai.xi 
329*53ee8cc1Swenshuai.xi         z1 = tmp0 + tmp3;
330*53ee8cc1Swenshuai.xi         z2 = tmp1 + tmp2;
331*53ee8cc1Swenshuai.xi         z3 = tmp0 + tmp2;
332*53ee8cc1Swenshuai.xi         z4 = tmp1 + tmp3;
333*53ee8cc1Swenshuai.xi         z5 = MULTIPLY( z3 + z4, FIX_1_175875602 ); /* sqrt(2) * c3 */
334*53ee8cc1Swenshuai.xi 
335*53ee8cc1Swenshuai.xi         tmp0 = MULTIPLY( tmp0, FIX_0_298631336 ); /* sqrt(2) * (-c1+c3+c5-c7) */
336*53ee8cc1Swenshuai.xi         tmp1 = MULTIPLY( tmp1, FIX_2_053119869 ); /* sqrt(2) * ( c1+c3-c5+c7) */
337*53ee8cc1Swenshuai.xi         tmp2 = MULTIPLY( tmp2, FIX_3_072711026 ); /* sqrt(2) * ( c1+c3+c5-c7) */
338*53ee8cc1Swenshuai.xi         tmp3 = MULTIPLY( tmp3, FIX_1_501321110 ); /* sqrt(2) * ( c1+c3-c5-c7) */
339*53ee8cc1Swenshuai.xi         z1 = MULTIPLY( z1, -FIX_0_899976223 ); /* sqrt(2) * (c7-c3) */
340*53ee8cc1Swenshuai.xi         z2 = MULTIPLY( z2, -FIX_2_562915447 ); /* sqrt(2) * (-c1-c3) */
341*53ee8cc1Swenshuai.xi         z3 = MULTIPLY( z3, -FIX_1_961570560 ); /* sqrt(2) * (-c3-c5) */
342*53ee8cc1Swenshuai.xi         z4 = MULTIPLY( z4, -FIX_0_390180644 ); /* sqrt(2) * (c5-c3) */
343*53ee8cc1Swenshuai.xi 
344*53ee8cc1Swenshuai.xi         z3 += z5;
345*53ee8cc1Swenshuai.xi         z4 += z5;
346*53ee8cc1Swenshuai.xi 
347*53ee8cc1Swenshuai.xi         tmp0 += z1 + z3;
348*53ee8cc1Swenshuai.xi         tmp1 += z2 + z4;
349*53ee8cc1Swenshuai.xi         tmp2 += z2 + z3;
350*53ee8cc1Swenshuai.xi         tmp3 += z1 + z4;
351*53ee8cc1Swenshuai.xi 
352*53ee8cc1Swenshuai.xi         /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
353*53ee8cc1Swenshuai.xi 
354*53ee8cc1Swenshuai.xi         wsptr[DCTSIZE * 0] = ( int )DESCALE( tmp10 + tmp3, CONST_BITS - PASS1_BITS );
355*53ee8cc1Swenshuai.xi         wsptr[DCTSIZE * 7] = ( int )DESCALE( tmp10 - tmp3, CONST_BITS - PASS1_BITS );
356*53ee8cc1Swenshuai.xi         wsptr[DCTSIZE * 1] = ( int )DESCALE( tmp11 + tmp2, CONST_BITS - PASS1_BITS );
357*53ee8cc1Swenshuai.xi         wsptr[DCTSIZE * 6] = ( int )DESCALE( tmp11 - tmp2, CONST_BITS - PASS1_BITS );
358*53ee8cc1Swenshuai.xi         wsptr[DCTSIZE * 2] = ( int )DESCALE( tmp12 + tmp1, CONST_BITS - PASS1_BITS );
359*53ee8cc1Swenshuai.xi         wsptr[DCTSIZE * 5] = ( int )DESCALE( tmp12 - tmp1, CONST_BITS - PASS1_BITS );
360*53ee8cc1Swenshuai.xi         wsptr[DCTSIZE * 3] = ( int )DESCALE( tmp13 + tmp0, CONST_BITS - PASS1_BITS );
361*53ee8cc1Swenshuai.xi         wsptr[DCTSIZE * 4] = ( int )DESCALE( tmp13 - tmp0, CONST_BITS - PASS1_BITS );
362*53ee8cc1Swenshuai.xi 
363*53ee8cc1Swenshuai.xi         inptr++;            /* advance pointers to next column */
364*53ee8cc1Swenshuai.xi         //quantptr++;
365*53ee8cc1Swenshuai.xi         wsptr++;
366*53ee8cc1Swenshuai.xi     }
367*53ee8cc1Swenshuai.xi 
368*53ee8cc1Swenshuai.xi     /* Pass 2: process rows from work array, store into output array. */
369*53ee8cc1Swenshuai.xi     /* Note that we must descale the results by a factor of 8 == 2**3, */
370*53ee8cc1Swenshuai.xi     /* and also undo the PASS1_BITS scaling. */
371*53ee8cc1Swenshuai.xi 
372*53ee8cc1Swenshuai.xi     wsptr = workspace;
373*53ee8cc1Swenshuai.xi     for ( ctr = 0; ctr < DCTSIZE; ctr++ )
374*53ee8cc1Swenshuai.xi     {
375*53ee8cc1Swenshuai.xi         ///outptr = output_buf[ctr] + output_col;
376*53ee8cc1Swenshuai.xi         /* Rows of zeroes can be exploited in the same way as we did with columns.
377*53ee8cc1Swenshuai.xi          * However, the column calculation has created many nonzero AC terms, so
378*53ee8cc1Swenshuai.xi          * the simplification applies less often (typically 5% to 10% of the time).
379*53ee8cc1Swenshuai.xi          * On machines with very fast multiplication, it's possible that the
380*53ee8cc1Swenshuai.xi          * test takes more time than it's worth.  In that case this section
381*53ee8cc1Swenshuai.xi          * may be commented out.
382*53ee8cc1Swenshuai.xi          */
383*53ee8cc1Swenshuai.xi 
384*53ee8cc1Swenshuai.xi         #if 1///ndef NO_ZERO_ROW_TEST
385*53ee8cc1Swenshuai.xi         if ( ( wsptr[1] | wsptr[2] | wsptr[3] | wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7] ) == 0 )
386*53ee8cc1Swenshuai.xi         {
387*53ee8cc1Swenshuai.xi             /* AC terms all zero */
388*53ee8cc1Swenshuai.xi             int dcval = ( int )DESCALE( ( INT32 )wsptr[DCTSIZE*0], PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) & RANGE_MASK];
389*53ee8cc1Swenshuai.xi             clamp( dcval )
390*53ee8cc1Swenshuai.xi             outptr[0] = dcval;
391*53ee8cc1Swenshuai.xi             outptr[1] = dcval;
392*53ee8cc1Swenshuai.xi             outptr[2] = dcval;
393*53ee8cc1Swenshuai.xi             outptr[3] = dcval;
394*53ee8cc1Swenshuai.xi             outptr[4] = dcval;
395*53ee8cc1Swenshuai.xi             outptr[5] = dcval;
396*53ee8cc1Swenshuai.xi             outptr[6] = dcval;
397*53ee8cc1Swenshuai.xi             outptr[7] = dcval;
398*53ee8cc1Swenshuai.xi 
399*53ee8cc1Swenshuai.xi             wsptr += DCTSIZE;       /* advance pointer to next row */
400*53ee8cc1Swenshuai.xi             outptr += DCTSIZE;
401*53ee8cc1Swenshuai.xi             continue;
402*53ee8cc1Swenshuai.xi         }
403*53ee8cc1Swenshuai.xi         #endif
404*53ee8cc1Swenshuai.xi 
405*53ee8cc1Swenshuai.xi         /* Even part: reverse the even part of the forward DCT. */
406*53ee8cc1Swenshuai.xi         /* The rotator is sqrt(2)*c(-6). */
407*53ee8cc1Swenshuai.xi 
408*53ee8cc1Swenshuai.xi         z2 = ( INT32 )wsptr[2];
409*53ee8cc1Swenshuai.xi         z3 = ( INT32 )wsptr[6];
410*53ee8cc1Swenshuai.xi 
411*53ee8cc1Swenshuai.xi         z1 = MULTIPLY( z2 + z3, FIX_0_541196100 );
412*53ee8cc1Swenshuai.xi         tmp2 = z1 + MULTIPLY( z3, -FIX_1_847759065 );
413*53ee8cc1Swenshuai.xi         tmp3 = z1 + MULTIPLY( z2, FIX_0_765366865 );
414*53ee8cc1Swenshuai.xi 
415*53ee8cc1Swenshuai.xi         tmp0 = ( ( INT32 )wsptr[0] + ( INT32 )wsptr[4] ) << CONST_BITS;
416*53ee8cc1Swenshuai.xi         tmp1 = ( ( INT32 )wsptr[0] - ( INT32 )wsptr[4] ) << CONST_BITS;
417*53ee8cc1Swenshuai.xi 
418*53ee8cc1Swenshuai.xi         tmp10 = tmp0 + tmp3;
419*53ee8cc1Swenshuai.xi         tmp13 = tmp0 - tmp3;
420*53ee8cc1Swenshuai.xi         tmp11 = tmp1 + tmp2;
421*53ee8cc1Swenshuai.xi         tmp12 = tmp1 - tmp2;
422*53ee8cc1Swenshuai.xi 
423*53ee8cc1Swenshuai.xi         /* Odd part per figure 8; the matrix is unitary and hence its
424*53ee8cc1Swenshuai.xi          * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
425*53ee8cc1Swenshuai.xi          */
426*53ee8cc1Swenshuai.xi 
427*53ee8cc1Swenshuai.xi         tmp0 = ( INT32 )wsptr[7];
428*53ee8cc1Swenshuai.xi         tmp1 = ( INT32 )wsptr[5];
429*53ee8cc1Swenshuai.xi         tmp2 = ( INT32 )wsptr[3];
430*53ee8cc1Swenshuai.xi         tmp3 = ( INT32 )wsptr[1];
431*53ee8cc1Swenshuai.xi 
432*53ee8cc1Swenshuai.xi         z1 = tmp0 + tmp3;
433*53ee8cc1Swenshuai.xi         z2 = tmp1 + tmp2;
434*53ee8cc1Swenshuai.xi         z3 = tmp0 + tmp2;
435*53ee8cc1Swenshuai.xi         z4 = tmp1 + tmp3;
436*53ee8cc1Swenshuai.xi         z5 = MULTIPLY( z3 + z4, FIX_1_175875602 ); /* sqrt(2) * c3 */
437*53ee8cc1Swenshuai.xi 
438*53ee8cc1Swenshuai.xi         tmp0 = MULTIPLY( tmp0, FIX_0_298631336 ); /* sqrt(2) * (-c1+c3+c5-c7) */
439*53ee8cc1Swenshuai.xi         tmp1 = MULTIPLY( tmp1, FIX_2_053119869 ); /* sqrt(2) * ( c1+c3-c5+c7) */
440*53ee8cc1Swenshuai.xi         tmp2 = MULTIPLY( tmp2, FIX_3_072711026 ); /* sqrt(2) * ( c1+c3+c5-c7) */
441*53ee8cc1Swenshuai.xi         tmp3 = MULTIPLY( tmp3, FIX_1_501321110 ); /* sqrt(2) * ( c1+c3-c5-c7) */
442*53ee8cc1Swenshuai.xi         z1 = MULTIPLY( z1, -FIX_0_899976223 ); /* sqrt(2) * (c7-c3) */
443*53ee8cc1Swenshuai.xi         z2 = MULTIPLY( z2, -FIX_2_562915447 ); /* sqrt(2) * (-c1-c3) */
444*53ee8cc1Swenshuai.xi         z3 = MULTIPLY( z3, -FIX_1_961570560 ); /* sqrt(2) * (-c3-c5) */
445*53ee8cc1Swenshuai.xi         z4 = MULTIPLY( z4, -FIX_0_390180644 ); /* sqrt(2) * (c5-c3) */
446*53ee8cc1Swenshuai.xi 
447*53ee8cc1Swenshuai.xi         z3 += z5;
448*53ee8cc1Swenshuai.xi         z4 += z5;
449*53ee8cc1Swenshuai.xi 
450*53ee8cc1Swenshuai.xi         tmp0 += z1 + z3;
451*53ee8cc1Swenshuai.xi         tmp1 += z2 + z4;
452*53ee8cc1Swenshuai.xi         tmp2 += z2 + z3;
453*53ee8cc1Swenshuai.xi         tmp3 += z1 + z4;
454*53ee8cc1Swenshuai.xi 
455*53ee8cc1Swenshuai.xi         /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
456*53ee8cc1Swenshuai.xi         i = ( int )DESCALE( tmp10 + tmp3, CONST_BITS + PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
457*53ee8cc1Swenshuai.xi         clamp( i )
458*53ee8cc1Swenshuai.xi         outptr[0] = ( U8 )i;
459*53ee8cc1Swenshuai.xi 
460*53ee8cc1Swenshuai.xi         i = ( int )DESCALE( tmp10 - tmp3, CONST_BITS + PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
461*53ee8cc1Swenshuai.xi         clamp( i )
462*53ee8cc1Swenshuai.xi         outptr[7] = ( U8 )i;
463*53ee8cc1Swenshuai.xi         i = ( int )DESCALE( tmp11 + tmp2, CONST_BITS + PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
464*53ee8cc1Swenshuai.xi         clamp( i )
465*53ee8cc1Swenshuai.xi         outptr[1] = ( U8 )i;
466*53ee8cc1Swenshuai.xi         i = ( int )DESCALE( tmp11 - tmp2, CONST_BITS + PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
467*53ee8cc1Swenshuai.xi         clamp( i )
468*53ee8cc1Swenshuai.xi         outptr[6] = ( U8 )i;
469*53ee8cc1Swenshuai.xi         i = ( int )DESCALE( tmp12 + tmp1, CONST_BITS + PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
470*53ee8cc1Swenshuai.xi         clamp( i )
471*53ee8cc1Swenshuai.xi         outptr[2] = ( U8 )i;
472*53ee8cc1Swenshuai.xi         i = ( int )DESCALE( tmp12 - tmp1, CONST_BITS + PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
473*53ee8cc1Swenshuai.xi         clamp( i )
474*53ee8cc1Swenshuai.xi         outptr[5] = ( U8 )i;
475*53ee8cc1Swenshuai.xi         i = ( int )DESCALE( tmp13 + tmp0, CONST_BITS + PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
476*53ee8cc1Swenshuai.xi         clamp( i )
477*53ee8cc1Swenshuai.xi         outptr[3] = ( U8 )i;
478*53ee8cc1Swenshuai.xi         i = ( int )DESCALE( tmp13 - tmp0, CONST_BITS + PASS1_BITS + 3 ) + 128; ///range_limit[(int) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
479*53ee8cc1Swenshuai.xi         clamp( i )
480*53ee8cc1Swenshuai.xi         outptr[4] = ( U8 )i;
481*53ee8cc1Swenshuai.xi 
482*53ee8cc1Swenshuai.xi         wsptr += DCTSIZE;       /* advance pointer to next row */
483*53ee8cc1Swenshuai.xi         outptr += DCTSIZE;
484*53ee8cc1Swenshuai.xi     }
485*53ee8cc1Swenshuai.xi }
486*53ee8cc1Swenshuai.xi 
487*53ee8cc1Swenshuai.xi 
488*53ee8cc1Swenshuai.xi #endif /* DCT_ISLOW_SUPPORTED */
489*53ee8cc1Swenshuai.xi 
490