xref: /utopia/UTPA2-700.0.x/modules/vdec_v3/hal/maxim/vpu_v3/Film.c (revision 53ee8cc121a030b8d368113ac3e966b4705770ef)
1*53ee8cc1Swenshuai.xi #ifndef MSOS_TYPE_LINUX_KERNEL
2*53ee8cc1Swenshuai.xi #include <stdlib.h>
3*53ee8cc1Swenshuai.xi #include <stdio.h>
4*53ee8cc1Swenshuai.xi #include <string.h>
5*53ee8cc1Swenshuai.xi #else
6*53ee8cc1Swenshuai.xi #include <linux/string.h>
7*53ee8cc1Swenshuai.xi #endif
8*53ee8cc1Swenshuai.xi 
9*53ee8cc1Swenshuai.xi #include "Film.h"
10*53ee8cc1Swenshuai.xi #include "drvBDMA.h"
11*53ee8cc1Swenshuai.xi #define USE_NEON
12*53ee8cc1Swenshuai.xi #define REMOVE_T0B0
13*53ee8cc1Swenshuai.xi //#define REMOVE_T1B1
14*53ee8cc1Swenshuai.xi #define ONLY_Detect_Crop_Region
15*53ee8cc1Swenshuai.xi //W  x H                (x_start, x_end) (y_start, y_end)
16*53ee8cc1Swenshuai.xi //720x576   -> window = (  0,  720 ) x (  0, 576 )
17*53ee8cc1Swenshuai.xi //1920x1080 -> window = (288, 1632 ) x (192, 896 )
18*53ee8cc1Swenshuai.xi //#define FILE_DBG
19*53ee8cc1Swenshuai.xi 
20*53ee8cc1Swenshuai.xi #if (!defined(MSOS_TYPE_NUTTX) && !defined(MSOS_TYPE_OPTEE) && !defined(MSOS_TYPE_LINUX_KERNEL) && !defined(ANDROID)) || defined(SUPPORT_X_MODEL_FEATURE)
21*53ee8cc1Swenshuai.xi 
22*53ee8cc1Swenshuai.xi #ifdef USE_NEON
23*53ee8cc1Swenshuai.xi #ifdef __arm__
24*53ee8cc1Swenshuai.xi #include <arm_neon.h>
25*53ee8cc1Swenshuai.xi #else
26*53ee8cc1Swenshuai.xi #include "NEON_2_SSE.h"
27*53ee8cc1Swenshuai.xi #endif
28*53ee8cc1Swenshuai.xi #endif
29*53ee8cc1Swenshuai.xi 
30*53ee8cc1Swenshuai.xi static int g_Height = 0;
31*53ee8cc1Swenshuai.xi static int g_Width = 0;
32*53ee8cc1Swenshuai.xi static int g_Height_remain = 0;
33*53ee8cc1Swenshuai.xi static int g_Width_remain  = 0;
34*53ee8cc1Swenshuai.xi static int g_pitch         = 0;
35*53ee8cc1Swenshuai.xi static int g_tile_width    = 0;
36*53ee8cc1Swenshuai.xi static int g_tile_height   = 0;
37*53ee8cc1Swenshuai.xi static int g_tile_w_mod    = 0;
38*53ee8cc1Swenshuai.xi static int g_tile_w_shift  = 0;
39*53ee8cc1Swenshuai.xi static int g_fod_win_en    = 0;
40*53ee8cc1Swenshuai.xi static int g_base_pos      = 0;
41*53ee8cc1Swenshuai.xi static int g_sceneChangeTh = 800000;
42*53ee8cc1Swenshuai.xi 
43*53ee8cc1Swenshuai.xi #define X_START 288
44*53ee8cc1Swenshuai.xi #define X_END   1632
45*53ee8cc1Swenshuai.xi #define Y_START 192
46*53ee8cc1Swenshuai.xi #define Y_END   896
47*53ee8cc1Swenshuai.xi 
48*53ee8cc1Swenshuai.xi /*MPEG-2 tile:
49*53ee8cc1Swenshuai.xi 16x32, stripe align 16bytes
50*53ee8cc1Swenshuai.xi 32x32, stripe align 32bytes*/
51*53ee8cc1Swenshuai.xi /*
52*53ee8cc1Swenshuai.xi AVS/AVS+:
53*53ee8cc1Swenshuai.xi tile16x32, stripe align 16bytes.
54*53ee8cc1Swenshuai.xi */
55*53ee8cc1Swenshuai.xi 
56*53ee8cc1Swenshuai.xi //tile width is 16 or 32
57*53ee8cc1Swenshuai.xi #define TILE_Width1 16
58*53ee8cc1Swenshuai.xi #define TILE_Width2 32
59*53ee8cc1Swenshuai.xi #define W_shift1    4
60*53ee8cc1Swenshuai.xi #define W_shift2    5
61*53ee8cc1Swenshuai.xi #define W_Mod1      ((1<<W_shift1)- 1)
62*53ee8cc1Swenshuai.xi #define W_Mod2      ((1<<W_shift2)- 1)
63*53ee8cc1Swenshuai.xi 
64*53ee8cc1Swenshuai.xi //tile height is 32
65*53ee8cc1Swenshuai.xi #define TILE_Height 32
66*53ee8cc1Swenshuai.xi #define H_shift     5
67*53ee8cc1Swenshuai.xi #define H_Mod       ((1<<H_shift)- 1)
68*53ee8cc1Swenshuai.xi 
69*53ee8cc1Swenshuai.xi 
70*53ee8cc1Swenshuai.xi #define det_length 4
71*53ee8cc1Swenshuai.xi #define lowTh  196
72*53ee8cc1Swenshuai.xi #define highTh 320
73*53ee8cc1Swenshuai.xi #define MAXFRAME (det_length + 1) /* on purpose */
74*53ee8cc1Swenshuai.xi 
75*53ee8cc1Swenshuai.xi static int g_ary_idx = -1;
76*53ee8cc1Swenshuai.xi static int g_bInitPhase = 1;
77*53ee8cc1Swenshuai.xi static int ary_frame_motion_ratio[MAXFRAME];
78*53ee8cc1Swenshuai.xi static int ary_IsSceneChangeTh[MAXFRAME];
79*53ee8cc1Swenshuai.xi 
80*53ee8cc1Swenshuai.xi static int frame_motion_T1B0 = 0;
81*53ee8cc1Swenshuai.xi static int frame_motion_T0B1 = 0;
82*53ee8cc1Swenshuai.xi static int frame_motion_T0B0 = 0;
83*53ee8cc1Swenshuai.xi static int frame_motion_T1B1 = 0;
84*53ee8cc1Swenshuai.xi 
85*53ee8cc1Swenshuai.xi static int reg_fod_mot_diff_th = 0;
86*53ee8cc1Swenshuai.xi static int reg_fod_med_filter_th = 0;
87*53ee8cc1Swenshuai.xi static int reg_fod_mot_diff_fm_th = 0;
88*53ee8cc1Swenshuai.xi static int reg_fod_med_filter_fm_th = 0;
89*53ee8cc1Swenshuai.xi #define OneFrameStep 96
90*53ee8cc1Swenshuai.xi #define minmax(v,a,b)       (((v)<(a))? (a) : ((v)>(b)) ? (b) : (v))
91*53ee8cc1Swenshuai.xi //#define DEBUG_INFO
92*53ee8cc1Swenshuai.xi #ifdef DEBUG_INFO
93*53ee8cc1Swenshuai.xi static unsigned long g_checksum = 0;
94*53ee8cc1Swenshuai.xi #define BASE 65521      /* largest prime smaller than 65536 */
95*53ee8cc1Swenshuai.xi #define NMAX 5552
96*53ee8cc1Swenshuai.xi /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
97*53ee8cc1Swenshuai.xi 
98*53ee8cc1Swenshuai.xi #define DO1(buf,i)  {adler += (buf)[i]; sum2 += adler;}
99*53ee8cc1Swenshuai.xi #define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
100*53ee8cc1Swenshuai.xi #define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
101*53ee8cc1Swenshuai.xi #define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
102*53ee8cc1Swenshuai.xi #define DO16(buf)   DO8(buf,0); DO8(buf,8);
103*53ee8cc1Swenshuai.xi 
104*53ee8cc1Swenshuai.xi #  define MOD(a) a %= BASE
105*53ee8cc1Swenshuai.xi #  define MOD28(a) a %= BASE
106*53ee8cc1Swenshuai.xi #  define MOD63(a) a %= BASE
107*53ee8cc1Swenshuai.xi 
108*53ee8cc1Swenshuai.xi 
109*53ee8cc1Swenshuai.xi /* ========================================================================= */
adler32(unsigned long adler,const unsigned char * buf,unsigned int len)110*53ee8cc1Swenshuai.xi unsigned long adler32( unsigned long adler, const unsigned char *buf, unsigned int len )
111*53ee8cc1Swenshuai.xi {
112*53ee8cc1Swenshuai.xi   unsigned long sum2;
113*53ee8cc1Swenshuai.xi   unsigned n;
114*53ee8cc1Swenshuai.xi 
115*53ee8cc1Swenshuai.xi   /* split Adler-32 into component sums */
116*53ee8cc1Swenshuai.xi   sum2 = (adler >> 16) & 0xffff;
117*53ee8cc1Swenshuai.xi   adler &= 0xffff;
118*53ee8cc1Swenshuai.xi 
119*53ee8cc1Swenshuai.xi   /* in case user likes doing a byte at a time, keep it fast */
120*53ee8cc1Swenshuai.xi   if (len == 1) {
121*53ee8cc1Swenshuai.xi     adler += buf[0];
122*53ee8cc1Swenshuai.xi     if (adler >= BASE)
123*53ee8cc1Swenshuai.xi       adler -= BASE;
124*53ee8cc1Swenshuai.xi     sum2 += adler;
125*53ee8cc1Swenshuai.xi     if (sum2 >= BASE)
126*53ee8cc1Swenshuai.xi       sum2 -= BASE;
127*53ee8cc1Swenshuai.xi     return adler | (sum2 << 16);
128*53ee8cc1Swenshuai.xi   }
129*53ee8cc1Swenshuai.xi 
130*53ee8cc1Swenshuai.xi   /* initial Adler-32 value (deferred check for len == 1 speed) */
131*53ee8cc1Swenshuai.xi   if (buf == 0)
132*53ee8cc1Swenshuai.xi     return 1L;
133*53ee8cc1Swenshuai.xi 
134*53ee8cc1Swenshuai.xi   /* in case short lengths are provided, keep it somewhat fast */
135*53ee8cc1Swenshuai.xi   if (len < 16) {
136*53ee8cc1Swenshuai.xi     while (len--) {
137*53ee8cc1Swenshuai.xi       adler += *buf++;
138*53ee8cc1Swenshuai.xi       sum2 += adler;
139*53ee8cc1Swenshuai.xi     }
140*53ee8cc1Swenshuai.xi     if (adler >= BASE)
141*53ee8cc1Swenshuai.xi       adler -= BASE;
142*53ee8cc1Swenshuai.xi     MOD28(sum2);            /* only added so many BASE's */
143*53ee8cc1Swenshuai.xi     return adler | (sum2 << 16);
144*53ee8cc1Swenshuai.xi   }
145*53ee8cc1Swenshuai.xi 
146*53ee8cc1Swenshuai.xi   /* do length NMAX blocks -- requires just one modulo operation */
147*53ee8cc1Swenshuai.xi   while (len >= NMAX) {
148*53ee8cc1Swenshuai.xi     len -= NMAX;
149*53ee8cc1Swenshuai.xi     n = NMAX / 16;          /* NMAX is divisible by 16 */
150*53ee8cc1Swenshuai.xi     do {
151*53ee8cc1Swenshuai.xi       DO16(buf);          /* 16 sums unrolled */
152*53ee8cc1Swenshuai.xi       buf += 16;
153*53ee8cc1Swenshuai.xi     } while (--n);
154*53ee8cc1Swenshuai.xi     MOD(adler);
155*53ee8cc1Swenshuai.xi     MOD(sum2);
156*53ee8cc1Swenshuai.xi   }
157*53ee8cc1Swenshuai.xi 
158*53ee8cc1Swenshuai.xi   /* do remaining bytes (less than NMAX, still just one modulo) */
159*53ee8cc1Swenshuai.xi   if (len) {                  /* avoid modulos if none remaining */
160*53ee8cc1Swenshuai.xi     while (len >= 16) {
161*53ee8cc1Swenshuai.xi       len -= 16;
162*53ee8cc1Swenshuai.xi       DO16(buf);
163*53ee8cc1Swenshuai.xi       buf += 16;
164*53ee8cc1Swenshuai.xi     }
165*53ee8cc1Swenshuai.xi     while (len--) {
166*53ee8cc1Swenshuai.xi       adler += *buf++;
167*53ee8cc1Swenshuai.xi       sum2 += adler;
168*53ee8cc1Swenshuai.xi     }
169*53ee8cc1Swenshuai.xi     MOD(adler);
170*53ee8cc1Swenshuai.xi     MOD(sum2);
171*53ee8cc1Swenshuai.xi   }
172*53ee8cc1Swenshuai.xi 
173*53ee8cc1Swenshuai.xi   /* return recombined sums */
174*53ee8cc1Swenshuai.xi   return adler | (sum2 << 16);
175*53ee8cc1Swenshuai.xi }
176*53ee8cc1Swenshuai.xi #endif
177*53ee8cc1Swenshuai.xi 
InitFilmDetection(void)178*53ee8cc1Swenshuai.xi void InitFilmDetection(void)
179*53ee8cc1Swenshuai.xi {
180*53ee8cc1Swenshuai.xi   g_ary_idx = -1;
181*53ee8cc1Swenshuai.xi   g_bInitPhase = 1;
182*53ee8cc1Swenshuai.xi   memset((uint8_t*) ary_frame_motion_ratio, 0, sizeof(int)*MAXFRAME);
183*53ee8cc1Swenshuai.xi   memset((uint8_t*) ary_IsSceneChangeTh, 0, sizeof(int)*MAXFRAME);
184*53ee8cc1Swenshuai.xi   frame_motion_T1B0 = 0;
185*53ee8cc1Swenshuai.xi   frame_motion_T0B1 = 0;
186*53ee8cc1Swenshuai.xi   frame_motion_T0B0 = 0;
187*53ee8cc1Swenshuai.xi   frame_motion_T1B1 = 0;
188*53ee8cc1Swenshuai.xi 
189*53ee8cc1Swenshuai.xi   reg_fod_mot_diff_th = 0;
190*53ee8cc1Swenshuai.xi   reg_fod_med_filter_th = 0;
191*53ee8cc1Swenshuai.xi   reg_fod_mot_diff_fm_th = 0;
192*53ee8cc1Swenshuai.xi   reg_fod_med_filter_fm_th = 0;
193*53ee8cc1Swenshuai.xi   g_Height = g_Width = g_Height_remain = g_Width_remain = 0;
194*53ee8cc1Swenshuai.xi   g_pitch  = g_tile_width = g_tile_height = g_tile_w_mod = 0;
195*53ee8cc1Swenshuai.xi   g_tile_w_shift = g_fod_win_en = 0;
196*53ee8cc1Swenshuai.xi   g_sceneChangeTh = 800000;
197*53ee8cc1Swenshuai.xi   g_base_pos = 0;
198*53ee8cc1Swenshuai.xi }
199*53ee8cc1Swenshuai.xi 
200*53ee8cc1Swenshuai.xi extern void _MApi_PrintMem(MS_VIRT u32Addr, MS_U32 u32Size);
201*53ee8cc1Swenshuai.xi 
copyFrameBuffer(unsigned char * dst,unsigned char * src,int bufsize)202*53ee8cc1Swenshuai.xi void copyFrameBuffer(unsigned char *dst, unsigned char *src, int bufsize)
203*53ee8cc1Swenshuai.xi {
204*53ee8cc1Swenshuai.xi #ifdef DEBUG_INFO
205*53ee8cc1Swenshuai.xi   unsigned long curimg = 0;
206*53ee8cc1Swenshuai.xi   g_checksum = adler32(g_checksum, src, 0x100);
207*53ee8cc1Swenshuai.xi   curimg = adler32(curimg, src, 0x100);
208*53ee8cc1Swenshuai.xi   printf("[prv-chk=0x%lx][cur-chk=0x%lx][size=0x100][osize=%d]\n", g_checksum, curimg, bufsize);
209*53ee8cc1Swenshuai.xi 
210*53ee8cc1Swenshuai.xi //  unsigned long curimg = 0;
211*53ee8cc1Swenshuai.xi //  g_checksum = adler32(g_checksum, src, bufsize);
212*53ee8cc1Swenshuai.xi //  curimg = adler32(curimg, src, bufsize);
213*53ee8cc1Swenshuai.xi //  printf("[prv-chk=0x%lx][cur-chk=0x%lx][size=%d]\n", g_checksum, curimg, bufsize);
214*53ee8cc1Swenshuai.xi #endif
215*53ee8cc1Swenshuai.xi //                MS_U32 u32Timer= MsOS_GetSystemTime();
216*53ee8cc1Swenshuai.xi 
217*53ee8cc1Swenshuai.xi #ifdef SUPPORT_TILE
218*53ee8cc1Swenshuai.xi #ifdef ONLY_Detect_Crop_Region
219*53ee8cc1Swenshuai.xi   if (g_fod_win_en == 1) {
220*53ee8cc1Swenshuai.xi      int base_pos = g_base_pos;
221*53ee8cc1Swenshuai.xi      int y;
222*53ee8cc1Swenshuai.xi      for( y = Y_START ; y < Y_END ; y += TILE_Height ) {
223*53ee8cc1Swenshuai.xi        //memcpy(dst+base_pos, src+base_pos, (TILE_Height * (X_END - X_START)));
224*53ee8cc1Swenshuai.xi        MDrv_BDMA_MemCopy(MsOS_VA2PA((MS_VIRT)(src+base_pos)), MsOS_VA2PA((MS_VIRT)(dst+base_pos)), (TILE_Height * (X_END - X_START)));
225*53ee8cc1Swenshuai.xi 
226*53ee8cc1Swenshuai.xi        base_pos += (TILE_Height * g_pitch);
227*53ee8cc1Swenshuai.xi      }
228*53ee8cc1Swenshuai.xi   } else
229*53ee8cc1Swenshuai.xi #endif
230*53ee8cc1Swenshuai.xi #endif
231*53ee8cc1Swenshuai.xi   {
232*53ee8cc1Swenshuai.xi //    memcpy(dst, src, bufsize);
233*53ee8cc1Swenshuai.xi        MDrv_BDMA_MemCopy(MsOS_VA2PA((MS_VIRT)src), MsOS_VA2PA((MS_VIRT)dst), bufsize);
234*53ee8cc1Swenshuai.xi   }
235*53ee8cc1Swenshuai.xi //                printf("cp:[%dms]\n", MsOS_GetSystemTime()-u32Timer);
236*53ee8cc1Swenshuai.xi 
237*53ee8cc1Swenshuai.xi }
238*53ee8cc1Swenshuai.xi 
setHeight_Width(FODInfo * fod)239*53ee8cc1Swenshuai.xi int setHeight_Width(FODInfo *fod)
240*53ee8cc1Swenshuai.xi {
241*53ee8cc1Swenshuai.xi   int retval = 0;
242*53ee8cc1Swenshuai.xi   int pad_w;
243*53ee8cc1Swenshuai.xi   g_Height = fod->height;
244*53ee8cc1Swenshuai.xi   g_Width  = fod->width;
245*53ee8cc1Swenshuai.xi   g_Height_remain = (g_Height % fod->tile_height);
246*53ee8cc1Swenshuai.xi   g_tile_width  = fod->tile_width;
247*53ee8cc1Swenshuai.xi   g_tile_height = fod->tile_height;
248*53ee8cc1Swenshuai.xi 
249*53ee8cc1Swenshuai.xi   if (g_tile_height != TILE_Height) {
250*53ee8cc1Swenshuai.xi     printf("[FOD][Warning] TILE Height is invalid\n");
251*53ee8cc1Swenshuai.xi     g_tile_height = TILE_Height;
252*53ee8cc1Swenshuai.xi     retval = 1;
253*53ee8cc1Swenshuai.xi   }
254*53ee8cc1Swenshuai.xi   if (g_tile_width == TILE_Width1) {
255*53ee8cc1Swenshuai.xi     g_tile_w_mod = W_Mod1;
256*53ee8cc1Swenshuai.xi     g_tile_w_shift = W_shift1;
257*53ee8cc1Swenshuai.xi   } else if (g_tile_width == TILE_Width2) {
258*53ee8cc1Swenshuai.xi     g_tile_w_mod = W_Mod2;
259*53ee8cc1Swenshuai.xi     g_tile_w_shift = W_shift2;
260*53ee8cc1Swenshuai.xi   } else {
261*53ee8cc1Swenshuai.xi     printf("[FOD][Warning] TILE Width is invalid\n");
262*53ee8cc1Swenshuai.xi     g_tile_w_mod = W_Mod1;
263*53ee8cc1Swenshuai.xi     g_tile_width = TILE_Width1;
264*53ee8cc1Swenshuai.xi     g_tile_w_shift = W_shift1;
265*53ee8cc1Swenshuai.xi     retval = 1;
266*53ee8cc1Swenshuai.xi   }
267*53ee8cc1Swenshuai.xi   g_Width_remain  = (g_Width  % g_tile_width);
268*53ee8cc1Swenshuai.xi   pad_w = (g_Width_remain != 0) ? g_tile_width - g_Width_remain : 0;
269*53ee8cc1Swenshuai.xi   g_pitch = g_Width + pad_w;
270*53ee8cc1Swenshuai.xi 
271*53ee8cc1Swenshuai.xi #ifdef ONLY_Detect_Crop_Region
272*53ee8cc1Swenshuai.xi   if (g_Height >= 1080 && g_Width >= 1920) {
273*53ee8cc1Swenshuai.xi     int base_y = g_tile_width*(Y_START & H_Mod) + (Y_START  >> H_shift) * (TILE_Height * g_pitch);
274*53ee8cc1Swenshuai.xi     int base_x =  (X_START >> g_tile_w_shift) * (g_tile_width * TILE_Height);
275*53ee8cc1Swenshuai.xi     int x_pos  =  (X_START & g_tile_w_mod) + base_x;
276*53ee8cc1Swenshuai.xi     g_fod_win_en = 1;
277*53ee8cc1Swenshuai.xi     g_base_pos = base_y + x_pos;
278*53ee8cc1Swenshuai.xi     g_sceneChangeTh = 200000;
279*53ee8cc1Swenshuai.xi 
280*53ee8cc1Swenshuai.xi   } else
281*53ee8cc1Swenshuai.xi #endif
282*53ee8cc1Swenshuai.xi   {
283*53ee8cc1Swenshuai.xi     g_fod_win_en = 0;
284*53ee8cc1Swenshuai.xi     g_sceneChangeTh = 800000;
285*53ee8cc1Swenshuai.xi   }
286*53ee8cc1Swenshuai.xi 
287*53ee8cc1Swenshuai.xi   if (g_pitch != fod->pitch) {
288*53ee8cc1Swenshuai.xi     printf("[FOD][Warning] pitch mismatch: h=%d/w=%d/p=%d/Tw=%d/Th=%d\n",
289*53ee8cc1Swenshuai.xi             g_Height,
290*53ee8cc1Swenshuai.xi             g_Width,
291*53ee8cc1Swenshuai.xi             g_pitch,
292*53ee8cc1Swenshuai.xi             fod->tile_width,
293*53ee8cc1Swenshuai.xi             fod->tile_height);
294*53ee8cc1Swenshuai.xi     retval = 1;
295*53ee8cc1Swenshuai.xi   }
296*53ee8cc1Swenshuai.xi   return retval;
297*53ee8cc1Swenshuai.xi }
298*53ee8cc1Swenshuai.xi 
getTileWidth(void)299*53ee8cc1Swenshuai.xi int  getTileWidth(void)
300*53ee8cc1Swenshuai.xi {
301*53ee8cc1Swenshuai.xi   return g_tile_width;
302*53ee8cc1Swenshuai.xi }
getTileHeight(void)303*53ee8cc1Swenshuai.xi int  getTileHeight(void)
304*53ee8cc1Swenshuai.xi {
305*53ee8cc1Swenshuai.xi   return g_tile_height;
306*53ee8cc1Swenshuai.xi }
307*53ee8cc1Swenshuai.xi 
getFodWinEnable(void)308*53ee8cc1Swenshuai.xi int getFodWinEnable(void)
309*53ee8cc1Swenshuai.xi {
310*53ee8cc1Swenshuai.xi   return g_fod_win_en;
311*53ee8cc1Swenshuai.xi }
312*53ee8cc1Swenshuai.xi 
detFieldOrderFW(void)313*53ee8cc1Swenshuai.xi int detFieldOrderFW(void)
314*53ee8cc1Swenshuai.xi {
315*53ee8cc1Swenshuai.xi   int inv_det = 1;
316*53ee8cc1Swenshuai.xi   int ratio_sum2 = 0;
317*53ee8cc1Swenshuai.xi   int i;
318*53ee8cc1Swenshuai.xi 
319*53ee8cc1Swenshuai.xi   if ( g_bInitPhase == 1 && g_ary_idx < det_length) {
320*53ee8cc1Swenshuai.xi     return 0;
321*53ee8cc1Swenshuai.xi   }
322*53ee8cc1Swenshuai.xi   g_bInitPhase = 0;
323*53ee8cc1Swenshuai.xi 
324*53ee8cc1Swenshuai.xi     for (i = 0; i < det_length; i++)
325*53ee8cc1Swenshuai.xi     {
326*53ee8cc1Swenshuai.xi       int ary_idx = ((g_ary_idx-i) + MAXFRAME) % MAXFRAME;
327*53ee8cc1Swenshuai.xi 
328*53ee8cc1Swenshuai.xi       if(ary_IsSceneChangeTh[ary_idx] == 1) {
329*53ee8cc1Swenshuai.xi #ifdef DEBUG_INFO
330*53ee8cc1Swenshuai.xi         printf("[FOD] ret=0 scene changed\n");
331*53ee8cc1Swenshuai.xi #endif
332*53ee8cc1Swenshuai.xi         return 0;
333*53ee8cc1Swenshuai.xi       }
334*53ee8cc1Swenshuai.xi 
335*53ee8cc1Swenshuai.xi       ratio_sum2 += minmax( (256 - ary_frame_motion_ratio[ary_idx]), 0, OneFrameStep);
336*53ee8cc1Swenshuai.xi      }
337*53ee8cc1Swenshuai.xi 
338*53ee8cc1Swenshuai.xi      ratio_sum2 = minmax( ratio_sum2, 0, 255 );
339*53ee8cc1Swenshuai.xi 
340*53ee8cc1Swenshuai.xi     if ( ratio_sum2 > lowTh ) {
341*53ee8cc1Swenshuai.xi       inv_det = 1;
342*53ee8cc1Swenshuai.xi     } else {
343*53ee8cc1Swenshuai.xi       inv_det = 0;
344*53ee8cc1Swenshuai.xi     }
345*53ee8cc1Swenshuai.xi #ifdef DEBUG_INFO
346*53ee8cc1Swenshuai.xi     printf("[FOD] ret=%d\n", inv_det);
347*53ee8cc1Swenshuai.xi #endif
348*53ee8cc1Swenshuai.xi     return inv_det;
349*53ee8cc1Swenshuai.xi   }
350*53ee8cc1Swenshuai.xi 
351*53ee8cc1Swenshuai.xi 
352*53ee8cc1Swenshuai.xi #ifdef USE_NEON
vmed3_i8(uint8x8_t A,uint8x8_t B,uint8x8_t C)353*53ee8cc1Swenshuai.xi static uint8x8_t vmed3_i8 (uint8x8_t A, uint8x8_t B, uint8x8_t C)
354*53ee8cc1Swenshuai.xi {
355*53ee8cc1Swenshuai.xi   return vmax_u8(vmin_u8(A, B), vmin_u8(vmax_u8(A, B), C));
356*53ee8cc1Swenshuai.xi }
357*53ee8cc1Swenshuai.xi 
Create16ByteAligned(uint8_t ** org,int size)358*53ee8cc1Swenshuai.xi uint8_t* Create16ByteAligned(uint8_t** org, int size)
359*53ee8cc1Swenshuai.xi {
360*53ee8cc1Swenshuai.xi     *org = (uint8_t *) malloc(size+15);
361*53ee8cc1Swenshuai.xi     return (uint8_t *) (((uintptr_t)*org+15) & ~ (uintptr_t)0x0F);
362*53ee8cc1Swenshuai.xi }
363*53ee8cc1Swenshuai.xi #ifdef FILE_DBG2
myprintf(uint8x8_t rst,int x,int y,char * myname)364*53ee8cc1Swenshuai.xi static inline void myprintf(uint8x8_t rst, int x, int y, char* myname)
365*53ee8cc1Swenshuai.xi {
366*53ee8cc1Swenshuai.xi   FILE *fptr = fopen("debug.txt","a");
367*53ee8cc1Swenshuai.xi   fprintf(fptr, "%s[x=%d, y=%d] %x %x %x %x %x %x %x %x\n",
368*53ee8cc1Swenshuai.xi            myname, x, y,
369*53ee8cc1Swenshuai.xi            vget_lane_u8(rst, 0),
370*53ee8cc1Swenshuai.xi            vget_lane_u8(rst, 1),
371*53ee8cc1Swenshuai.xi            vget_lane_u8(rst, 2),
372*53ee8cc1Swenshuai.xi            vget_lane_u8(rst, 3),
373*53ee8cc1Swenshuai.xi            vget_lane_u8(rst, 4),
374*53ee8cc1Swenshuai.xi            vget_lane_u8(rst, 5),
375*53ee8cc1Swenshuai.xi            vget_lane_u8(rst, 6),
376*53ee8cc1Swenshuai.xi            vget_lane_u8(rst, 7));
377*53ee8cc1Swenshuai.xi   fclose(fptr);
378*53ee8cc1Swenshuai.xi }
379*53ee8cc1Swenshuai.xi #endif
380*53ee8cc1Swenshuai.xi 
detFieldOrderBot(unsigned char * preField,unsigned char * curField,unsigned char * nxtField,unsigned char * nx2Field,int CurIsBot)381*53ee8cc1Swenshuai.xi void detFieldOrderBot( unsigned char*preField, unsigned char*curField, unsigned char*nxtField, unsigned char*nx2Field, int CurIsBot)
382*53ee8cc1Swenshuai.xi {
383*53ee8cc1Swenshuai.xi   uint8x8_t mot_diff_th      = vdup_n_u8(2);    //o_reg_fod_mot_diff_th 2
384*53ee8cc1Swenshuai.xi   uint8x8_t med_filter_th    = vdup_n_u8(2);    //o_reg_fod_med_filter_th 2
385*53ee8cc1Swenshuai.xi   uint8x8_t mot_diff_fm_th   = vdup_n_u8(2);    //o_reg_fod_mot_diff_fm_th 2
386*53ee8cc1Swenshuai.xi   uint8x8_t med_filter_fm_th = vdup_n_u8(2);    //o_reg_fod_med_filter_fm_th 2
387*53ee8cc1Swenshuai.xi 
388*53ee8cc1Swenshuai.xi 
389*53ee8cc1Swenshuai.xi   uint8x8_t filter1, filter2, diff2;
390*53ee8cc1Swenshuai.xi   uint8x8_t filter4;
391*53ee8cc1Swenshuai.xi   uint8x8_t rst;
392*53ee8cc1Swenshuai.xi #ifndef REMOVE_T0B0
393*53ee8cc1Swenshuai.xi   uint8x8_t filter3;
394*53ee8cc1Swenshuai.xi #endif
395*53ee8cc1Swenshuai.xi 
396*53ee8cc1Swenshuai.xi   int yu, yd, y;
397*53ee8cc1Swenshuai.xi 
398*53ee8cc1Swenshuai.xi   int x_start = (g_fod_win_en == 1) ? X_START : 0;
399*53ee8cc1Swenshuai.xi   int x_end = (g_fod_win_en == 1) ? X_END : g_Width;
400*53ee8cc1Swenshuai.xi   int y_start = (g_fod_win_en == 1) ? Y_START : 0;
401*53ee8cc1Swenshuai.xi   int y_end = (g_fod_win_en == 1) ? Y_END : g_Height;
402*53ee8cc1Swenshuai.xi #ifdef SUPPORT_TILE
403*53ee8cc1Swenshuai.xi   int stride = g_tile_width;
404*53ee8cc1Swenshuai.xi   int base_x = 0;
405*53ee8cc1Swenshuai.xi #else
406*53ee8cc1Swenshuai.xi   int stride = g_Width;
407*53ee8cc1Swenshuai.xi #endif
408*53ee8cc1Swenshuai.xi 
409*53ee8cc1Swenshuai.xi   frame_motion_T1B0 = 0;
410*53ee8cc1Swenshuai.xi   frame_motion_T0B1 = 0;
411*53ee8cc1Swenshuai.xi #ifndef REMOVE_T0B0
412*53ee8cc1Swenshuai.xi   frame_motion_T0B0 = 0; //keep T1B1, remove T0B0
413*53ee8cc1Swenshuai.xi #endif
414*53ee8cc1Swenshuai.xi   frame_motion_T1B1 = 0;
415*53ee8cc1Swenshuai.xi 
416*53ee8cc1Swenshuai.xi   for( y = y_start ; y < y_end ; y+=2 )
417*53ee8cc1Swenshuai.xi   {
418*53ee8cc1Swenshuai.xi     int base_yu, base_y, base_yd, x;
419*53ee8cc1Swenshuai.xi     yu = y - 2;
420*53ee8cc1Swenshuai.xi     if (yu <= y_start)
421*53ee8cc1Swenshuai.xi       yu = y;
422*53ee8cc1Swenshuai.xi 
423*53ee8cc1Swenshuai.xi     yd = y + 2;
424*53ee8cc1Swenshuai.xi     if (yd >= y_end)
425*53ee8cc1Swenshuai.xi       yd = y;
426*53ee8cc1Swenshuai.xi 
427*53ee8cc1Swenshuai.xi 
428*53ee8cc1Swenshuai.xi #ifdef SUPPORT_TILE
429*53ee8cc1Swenshuai.xi     base_yu = stride*(yu & H_Mod) + (yu >> H_shift) * (TILE_Height * g_pitch);
430*53ee8cc1Swenshuai.xi     base_y  = stride*(y  & H_Mod) + (y  >> H_shift) * (TILE_Height * g_pitch);
431*53ee8cc1Swenshuai.xi     base_yd = stride*(yd & H_Mod)+  (yd >> H_shift) * (TILE_Height * g_pitch);
432*53ee8cc1Swenshuai.xi #else
433*53ee8cc1Swenshuai.xi     base_yu = stride*yu;
434*53ee8cc1Swenshuai.xi     base_y  = stride*y;
435*53ee8cc1Swenshuai.xi     base_yd = stride*yd;
436*53ee8cc1Swenshuai.xi #endif
437*53ee8cc1Swenshuai.xi 
438*53ee8cc1Swenshuai.xi     for( x = x_start ; x < x_end; x+=8 )
439*53ee8cc1Swenshuai.xi     {
440*53ee8cc1Swenshuai.xi       uint8x8_t f0_y0, f1_y0, f2_y0, f3_y0;
441*53ee8cc1Swenshuai.xi       uint8x8_t f0_y1, f2_y1;
442*53ee8cc1Swenshuai.xi       uint8x8_t r_filter1, r_filter2, r_filter4;
443*53ee8cc1Swenshuai.xi       uint8x8_t r_diff2;
444*53ee8cc1Swenshuai.xi       int x_pos;
445*53ee8cc1Swenshuai.xi #ifndef REMOVE_T0B0
446*53ee8cc1Swenshuai.xi       uint8x8_t  r_filter3;
447*53ee8cc1Swenshuai.xi #endif
448*53ee8cc1Swenshuai.xi #ifdef SUPPORT_TILE
449*53ee8cc1Swenshuai.xi       base_x = (x >> g_tile_w_shift) * (g_tile_width * TILE_Height);
450*53ee8cc1Swenshuai.xi       x_pos =  (x & g_tile_w_mod) + base_x;
451*53ee8cc1Swenshuai.xi #else
452*53ee8cc1Swenshuai.xi       x_pos = x;
453*53ee8cc1Swenshuai.xi #endif
454*53ee8cc1Swenshuai.xi 
455*53ee8cc1Swenshuai.xi       /*
456*53ee8cc1Swenshuai.xi       int test_f1_yu = curField.Y(x, yu)>>5;
457*53ee8cc1Swenshuai.xi       int test_f3_yu = nx2Field.Y(x, yu)>>5;
458*53ee8cc1Swenshuai.xi       int test_f0_y0 = preField.Y(x, y)>>5;
459*53ee8cc1Swenshuai.xi       int test_f1_y0 = curField.Y(x, y)>>5;
460*53ee8cc1Swenshuai.xi       int test_f2_y0 = nxtField.Y(x, y)>>5;
461*53ee8cc1Swenshuai.xi       int test_f3_y0 = nx2Field.Y(x, y)>>5;
462*53ee8cc1Swenshuai.xi       int test_f0_y1 = preField.Y(x, yd)>>5;
463*53ee8cc1Swenshuai.xi       int test_f2_y1 = nxtField.Y(x, yd)>>5;
464*53ee8cc1Swenshuai.xi       */
465*53ee8cc1Swenshuai.xi 
466*53ee8cc1Swenshuai.xi       f0_y0 = vshr_n_u8(vld1_u8(preField + base_y + x_pos ), 3);
467*53ee8cc1Swenshuai.xi       f1_y0 = vshr_n_u8(vld1_u8(curField + base_y + x_pos ), 3);
468*53ee8cc1Swenshuai.xi       f2_y0 = vshr_n_u8(vld1_u8(nxtField + base_y + x_pos ), 3);
469*53ee8cc1Swenshuai.xi       f3_y0 = vshr_n_u8(vld1_u8(nx2Field + base_y + x_pos ), 3);
470*53ee8cc1Swenshuai.xi       f0_y1 = vshr_n_u8(vld1_u8(preField + base_yd + x_pos), 3);
471*53ee8cc1Swenshuai.xi       f2_y1 = vshr_n_u8(vld1_u8(nxtField + base_yd + x_pos), 3);
472*53ee8cc1Swenshuai.xi       //myprintf(f0_y0, x, y, "f0_y0");
473*53ee8cc1Swenshuai.xi 
474*53ee8cc1Swenshuai.xi       /*
475*53ee8cc1Swenshuai.xi       f1_yu = vld1_u8(p_curField+base_yu);
476*53ee8cc1Swenshuai.xi       f3_yu = vld1_u8(p_nx2Field+base_yu);
477*53ee8cc1Swenshuai.xi       f0_y0 = vld1_u8(p_preField+base_y );
478*53ee8cc1Swenshuai.xi       f1_y0 = vld1_u8(p_curField+base_y );
479*53ee8cc1Swenshuai.xi       f2_y0 = vld1_u8(p_nxtField+base_y );
480*53ee8cc1Swenshuai.xi       f3_y0 = vld1_u8(p_nx2Field+base_y );
481*53ee8cc1Swenshuai.xi       f0_y1 = vld1_u8(p_preField+base_yd);
482*53ee8cc1Swenshuai.xi       f2_y1 = vld1_u8(p_nxtField+base_yd);
483*53ee8cc1Swenshuai.xi 
484*53ee8cc1Swenshuai.xi       f1_yu = vshr_n_u8(f1_yu, 5);
485*53ee8cc1Swenshuai.xi       f3_yu = vshr_n_u8(f3_yu, 5);
486*53ee8cc1Swenshuai.xi       f0_y0 = vshr_n_u8(f0_y0, 5);
487*53ee8cc1Swenshuai.xi       f1_y0 = vshr_n_u8(f1_y0, 5);
488*53ee8cc1Swenshuai.xi       f2_y0 = vshr_n_u8(f2_y0, 5);
489*53ee8cc1Swenshuai.xi       f3_y0 = vshr_n_u8(f3_y0, 5);
490*53ee8cc1Swenshuai.xi       f0_y1 = vshr_n_u8(f0_y1, 5);
491*53ee8cc1Swenshuai.xi       f2_y1 = vshr_n_u8(f2_y1, 5);*/
492*53ee8cc1Swenshuai.xi 
493*53ee8cc1Swenshuai.xi 
494*53ee8cc1Swenshuai.xi       /*
495*53ee8cc1Swenshuai.xi       diff1 = abs(f0_y0 - f2_y0);
496*53ee8cc1Swenshuai.xi       if (diff1 > mot_diff_th)
497*53ee8cc1Swenshuai.xi       {
498*53ee8cc1Swenshuai.xi       filter1 = abs(med3(f1_yu, f2_y0, f1_y0) - f2_y0);
499*53ee8cc1Swenshuai.xi       if ( (filter1 > med_filter_th) )
500*53ee8cc1Swenshuai.xi       frame_motion_T1B0++;
501*53ee8cc1Swenshuai.xi 
502*53ee8cc1Swenshuai.xi       filter2 = abs(med3(f3_yu, f0_y0, f3_y0) - f0_y0);
503*53ee8cc1Swenshuai.xi       if ( (filter2 > med_filter_th) )
504*53ee8cc1Swenshuai.xi       frame_motion_T0B1++;
505*53ee8cc1Swenshuai.xi       }
506*53ee8cc1Swenshuai.xi       */
507*53ee8cc1Swenshuai.xi 
508*53ee8cc1Swenshuai.xi       //if(realline_cycl==1)
509*53ee8cc1Swenshuai.xi       /*
510*53ee8cc1Swenshuai.xi       diff2 = abs(f1_y0 - f3_y0);
511*53ee8cc1Swenshuai.xi       if (diff2 > mot_diff_th)
512*53ee8cc1Swenshuai.xi       {
513*53ee8cc1Swenshuai.xi         filter1 = abs(med3(f2_y0, f1_y0, f2_y1) - f1_y0);
514*53ee8cc1Swenshuai.xi         if ( (filter1 > med_filter_th) )
515*53ee8cc1Swenshuai.xi           frame_motion_T1B0++;
516*53ee8cc1Swenshuai.xi 
517*53ee8cc1Swenshuai.xi         filter2 = abs(med3(f0_y0, f3_y0, f0_y1) - f3_y0);
518*53ee8cc1Swenshuai.xi         if ( (filter2 > med_filter_th) )
519*53ee8cc1Swenshuai.xi           frame_motion_T0B1++;
520*53ee8cc1Swenshuai.xi       }*/
521*53ee8cc1Swenshuai.xi 
522*53ee8cc1Swenshuai.xi       diff2   = vabd_u8(f1_y0, f3_y0);                                   //diff2 = abs(f1_y0 - f3_y0);
523*53ee8cc1Swenshuai.xi       r_diff2 = vcgt_u8(diff2, mot_diff_th);                             //if (diff2 > mot_diff_th)
524*53ee8cc1Swenshuai.xi       filter1 = vabd_u8(vmed3_i8(f2_y0, f1_y0, f2_y1), f1_y0);           //filter1 = abs(med3(f2_y0, f1_y0, f2_y1) - f1_y0);
525*53ee8cc1Swenshuai.xi       r_filter1 = vcgt_u8(filter1, med_filter_th);                       //if ( (filter1 > med_filter_th) )
526*53ee8cc1Swenshuai.xi       rst = vtst_u8(r_diff2, r_filter1);
527*53ee8cc1Swenshuai.xi       rst = vshr_n_u8(rst, 7);
528*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 0);               //frame_motion_T1B0++;
529*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 1);
530*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 2);
531*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 3);
532*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 4);
533*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 5);
534*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 6);
535*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 7);
536*53ee8cc1Swenshuai.xi 
537*53ee8cc1Swenshuai.xi       filter2 = vabd_u8(vmed3_i8(f0_y0, f3_y0, f0_y1), f3_y0);           //filter2 = abs(med3(f0_y0, f3_y0, f0_y1) - f3_y0);
538*53ee8cc1Swenshuai.xi       r_filter2 = vcgt_u8(filter2, med_filter_th);                       //if ( (filter2 > med_filter_th) )
539*53ee8cc1Swenshuai.xi       rst = vtst_u8(r_diff2, r_filter2);
540*53ee8cc1Swenshuai.xi       rst = vshr_n_u8(rst, 7);
541*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 0);               //frame_motion_T0B1++;
542*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 1);
543*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 2);
544*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 3);
545*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 4);
546*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 5);
547*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 6);
548*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 7);
549*53ee8cc1Swenshuai.xi       /*
550*53ee8cc1Swenshuai.xi       if (diff2 > mot_diff_fm_th)
551*53ee8cc1Swenshuai.xi       {
552*53ee8cc1Swenshuai.xi         filter3 = abs(med3(f0_y0, f1_y0, f0_y1) - f1_y0);
553*53ee8cc1Swenshuai.xi         if ( (filter3 > med_filter_fm_th) )
554*53ee8cc1Swenshuai.xi           frame_motion_T0B0++;
555*53ee8cc1Swenshuai.xi 
556*53ee8cc1Swenshuai.xi         filter4 = abs(med3(f2_y0, f3_y0, f2_y1) - f3_y0);
557*53ee8cc1Swenshuai.xi         if ( (filter4 > med_filter_fm_th) )
558*53ee8cc1Swenshuai.xi           frame_motion_T1B1++;
559*53ee8cc1Swenshuai.xi       }*/
560*53ee8cc1Swenshuai.xi       r_diff2 = vcgt_u8(diff2, mot_diff_fm_th);                          //if (diff2 > mot_diff_fm_th)
561*53ee8cc1Swenshuai.xi #ifndef REMOVE_T0B0
562*53ee8cc1Swenshuai.xi       filter3 = vabd_u8(vmed3_i8(f0_y0, f1_y0, f0_y1), f1_y0);           //filter3 = abs(med3(f0_y0, f1_y0, f0_y1) - f1_y0);
563*53ee8cc1Swenshuai.xi       r_filter3 = vcgt_u8(filter3, med_filter_fm_th);                    //if ( (filter3 > med_filter_fm_th) )
564*53ee8cc1Swenshuai.xi       rst = vtst_u8(r_diff2, r_filter3);
565*53ee8cc1Swenshuai.xi       rst = vshr_n_u8(rst, 7);
566*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 0);               //frame_motion_T0B0++;
567*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 1);
568*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 2);
569*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 3);
570*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 4);
571*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 5);
572*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 6);
573*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 7);
574*53ee8cc1Swenshuai.xi #endif
575*53ee8cc1Swenshuai.xi #ifndef REMOVE_T1B1
576*53ee8cc1Swenshuai.xi       filter4 = vabd_u8(vmed3_i8(f2_y0, f3_y0, f2_y1), f3_y0);           //filter4 = abs(med3(f2_y0, f3_y0, f2_y1) - f3_y0);
577*53ee8cc1Swenshuai.xi       r_filter4 = vcgt_u8(filter4, med_filter_fm_th);                    //if ( (filter4 > med_filter_fm_th) )
578*53ee8cc1Swenshuai.xi       rst = vtst_u8(r_diff2, r_filter4);
579*53ee8cc1Swenshuai.xi       rst = vshr_n_u8(rst, 7);
580*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 0);               //frame_motion_T1B1++;
581*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 1);
582*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 2);
583*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 3);
584*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 4);
585*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 5);
586*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 6);
587*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 7);
588*53ee8cc1Swenshuai.xi #endif
589*53ee8cc1Swenshuai.xi     }
590*53ee8cc1Swenshuai.xi   }
591*53ee8cc1Swenshuai.xi }
detFieldOrderTop(unsigned char * preField,unsigned char * curField,unsigned char * nxtField,unsigned char * nx2Field,int CurIsBot)592*53ee8cc1Swenshuai.xi void detFieldOrderTop( unsigned char*preField, unsigned char*curField, unsigned char*nxtField, unsigned char*nx2Field, int CurIsBot )
593*53ee8cc1Swenshuai.xi {
594*53ee8cc1Swenshuai.xi   uint8x8_t mot_diff_th      = vdup_n_u8(2);    //o_reg_fod_mot_diff_th 2
595*53ee8cc1Swenshuai.xi   uint8x8_t med_filter_th    = vdup_n_u8(2);    //o_reg_fod_med_filter_th 2
596*53ee8cc1Swenshuai.xi   uint8x8_t mot_diff_fm_th   = vdup_n_u8(2);    //o_reg_fod_mot_diff_fm_th 2
597*53ee8cc1Swenshuai.xi   uint8x8_t med_filter_fm_th = vdup_n_u8(2);    //o_reg_fod_med_filter_fm_th 2
598*53ee8cc1Swenshuai.xi 
599*53ee8cc1Swenshuai.xi   uint8x8_t filter1, filter2;
600*53ee8cc1Swenshuai.xi   uint8x8_t filter4;
601*53ee8cc1Swenshuai.xi #ifndef REMOVE_T0B0
602*53ee8cc1Swenshuai.xi   uint8x8_t filter3;
603*53ee8cc1Swenshuai.xi #endif
604*53ee8cc1Swenshuai.xi 
605*53ee8cc1Swenshuai.xi   int yu, yd, y;
606*53ee8cc1Swenshuai.xi 
607*53ee8cc1Swenshuai.xi   int x_start = (g_fod_win_en == 1) ? X_START : 0;
608*53ee8cc1Swenshuai.xi   int x_end = (g_fod_win_en == 1) ? X_END : g_Width;
609*53ee8cc1Swenshuai.xi   int y_start = (g_fod_win_en == 1) ? Y_START : 0;
610*53ee8cc1Swenshuai.xi   int y_end = (g_fod_win_en == 1) ? Y_END : g_Height;
611*53ee8cc1Swenshuai.xi #ifdef SUPPORT_TILE
612*53ee8cc1Swenshuai.xi   int stride = g_tile_width;
613*53ee8cc1Swenshuai.xi   int base_x = 0;
614*53ee8cc1Swenshuai.xi #else
615*53ee8cc1Swenshuai.xi   int stride = g_Width;
616*53ee8cc1Swenshuai.xi #endif
617*53ee8cc1Swenshuai.xi 
618*53ee8cc1Swenshuai.xi   frame_motion_T1B0 = 0;
619*53ee8cc1Swenshuai.xi   frame_motion_T0B1 = 0;
620*53ee8cc1Swenshuai.xi #ifndef REMOVE_T0B0
621*53ee8cc1Swenshuai.xi   frame_motion_T0B0 = 0;
622*53ee8cc1Swenshuai.xi #endif
623*53ee8cc1Swenshuai.xi   frame_motion_T1B1 = 0;
624*53ee8cc1Swenshuai.xi 
625*53ee8cc1Swenshuai.xi   for( y = y_start ; y < y_end ; y+=2 )
626*53ee8cc1Swenshuai.xi   {
627*53ee8cc1Swenshuai.xi     int base_yu, base_y, base_yd, x;
628*53ee8cc1Swenshuai.xi     yu = y - 2;
629*53ee8cc1Swenshuai.xi     if (yu < y_start)
630*53ee8cc1Swenshuai.xi       yu = y;
631*53ee8cc1Swenshuai.xi 
632*53ee8cc1Swenshuai.xi     yd = y + 2;
633*53ee8cc1Swenshuai.xi     if (yd >= y_end)
634*53ee8cc1Swenshuai.xi       yd = y;
635*53ee8cc1Swenshuai.xi 
636*53ee8cc1Swenshuai.xi #ifdef SUPPORT_TILE
637*53ee8cc1Swenshuai.xi     base_yu = stride*(yu & H_Mod) + (yu >> H_shift) * (TILE_Height * g_pitch);
638*53ee8cc1Swenshuai.xi     base_y  = stride*(y  & H_Mod) + (y  >> H_shift) * (TILE_Height * g_pitch);
639*53ee8cc1Swenshuai.xi     base_yd = stride*(yd & H_Mod)+  (yd >> H_shift) * (TILE_Height * g_pitch);
640*53ee8cc1Swenshuai.xi #else
641*53ee8cc1Swenshuai.xi     base_yu = stride*yu;
642*53ee8cc1Swenshuai.xi     base_y  = stride*y;
643*53ee8cc1Swenshuai.xi     base_yd = stride*yd;
644*53ee8cc1Swenshuai.xi #endif
645*53ee8cc1Swenshuai.xi 
646*53ee8cc1Swenshuai.xi 
647*53ee8cc1Swenshuai.xi 
648*53ee8cc1Swenshuai.xi     for( x = x_start ; x < x_end; x+=8 )
649*53ee8cc1Swenshuai.xi     {
650*53ee8cc1Swenshuai.xi       uint8x8_t f0_y0, f1_y0, f2_y0, f3_y0, f2_yu, f0_yu;
651*53ee8cc1Swenshuai.xi       uint8x8_t  r_filter1, r_filter2, r_filter4;
652*53ee8cc1Swenshuai.xi #ifndef REMOVE_T0B0
653*53ee8cc1Swenshuai.xi       uint8x8_t r_filter3;
654*53ee8cc1Swenshuai.xi       uint8x8_t diff3;
655*53ee8cc1Swenshuai.xi #endif
656*53ee8cc1Swenshuai.xi       uint8x8_t rst;
657*53ee8cc1Swenshuai.xi 
658*53ee8cc1Swenshuai.xi       uint8x8_t diff1, r_diff1;
659*53ee8cc1Swenshuai.xi       uint8x8_t diff2, r_diff2;
660*53ee8cc1Swenshuai.xi       uint8x8_t r_diff3;
661*53ee8cc1Swenshuai.xi       int x_pos;
662*53ee8cc1Swenshuai.xi 
663*53ee8cc1Swenshuai.xi #ifdef SUPPORT_TILE
664*53ee8cc1Swenshuai.xi       base_x = (x >> g_tile_w_shift) * (g_tile_width * TILE_Height);
665*53ee8cc1Swenshuai.xi       x_pos =  (x & g_tile_w_mod) + base_x;
666*53ee8cc1Swenshuai.xi #else
667*53ee8cc1Swenshuai.xi       x_pos = x;
668*53ee8cc1Swenshuai.xi #endif
669*53ee8cc1Swenshuai.xi 
670*53ee8cc1Swenshuai.xi 
671*53ee8cc1Swenshuai.xi 
672*53ee8cc1Swenshuai.xi       f0_y0 = vshr_n_u8(vld1_u8(preField + base_y + x_pos ), 3);
673*53ee8cc1Swenshuai.xi       f0_yu = vshr_n_u8(vld1_u8(preField + base_yu + x_pos ), 3);
674*53ee8cc1Swenshuai.xi       f1_y0 = vshr_n_u8(vld1_u8(curField + base_y + x_pos ), 3);
675*53ee8cc1Swenshuai.xi       f2_y0 = vshr_n_u8(vld1_u8(nxtField + base_y + x_pos ), 3);
676*53ee8cc1Swenshuai.xi       f2_yu = vshr_n_u8(vld1_u8(nxtField + base_yu + x_pos ), 3);
677*53ee8cc1Swenshuai.xi       f3_y0 = vshr_n_u8(vld1_u8(nx2Field + base_y + x_pos ), 3);
678*53ee8cc1Swenshuai.xi       /*
679*53ee8cc1Swenshuai.xi       //if(realline_cycl==0)
680*53ee8cc1Swenshuai.xi       diff = abs((f0_y0>>5) - (f2_y0>>5) );
681*53ee8cc1Swenshuai.xi       diff = (diff > mot_diff_th);
682*53ee8cc1Swenshuai.xi       filter1 = abs(med3( (f1_y0>>5), (f2_y0>>5), (f1_yd>>5) ) - (f2_y0>>5) );
683*53ee8cc1Swenshuai.xi       if ( diff && (filter1 > med_filter_th) )
684*53ee8cc1Swenshuai.xi         frame_motion_T1B0++;*/
685*53ee8cc1Swenshuai.xi 
686*53ee8cc1Swenshuai.xi       /*
687*53ee8cc1Swenshuai.xi       //if(realline_cycl==1)
688*53ee8cc1Swenshuai.xi       diff = abs((f1_y0>>5) - (f3_y0>>5) );
689*53ee8cc1Swenshuai.xi       diff = (diff > mot_diff_th);
690*53ee8cc1Swenshuai.xi       filter1 = abs(med3( (f2_yu>>5), (f1_y0>>5), (f2_yd>>5) ) - (f1_y0>>5) );
691*53ee8cc1Swenshuai.xi       if (diff && (filter1 > med_filter_th) )
692*53ee8cc1Swenshuai.xi         frame_motion_T1B0++;
693*53ee8cc1Swenshuai.xi       */
694*53ee8cc1Swenshuai.xi 
695*53ee8cc1Swenshuai.xi       diff1   = vabd_u8(f1_y0, f3_y0);
696*53ee8cc1Swenshuai.xi       r_diff1 = vcgt_u8(diff1, mot_diff_th);
697*53ee8cc1Swenshuai.xi       filter1 = vabd_u8(vmed3_i8(f2_yu, f1_y0, f2_y0), f1_y0);
698*53ee8cc1Swenshuai.xi       r_filter1 = vcgt_u8(filter1, med_filter_th);
699*53ee8cc1Swenshuai.xi       rst = vtst_u8(r_diff1, r_filter1);
700*53ee8cc1Swenshuai.xi       rst = vshr_n_u8(rst, 7);
701*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 0);
702*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 1);
703*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 2);
704*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 3);
705*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 4);
706*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 5);
707*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 6);
708*53ee8cc1Swenshuai.xi       frame_motion_T1B0  += vget_lane_u8(rst, 7);
709*53ee8cc1Swenshuai.xi       /*
710*53ee8cc1Swenshuai.xi       filter2 = abs(med3( (f0_yu>>5), (f3_y0>>5), (f0_yd>>5) ) - (f3_y0>>5) );
711*53ee8cc1Swenshuai.xi       if ( diff && (filter2 > med_filter_th) )
712*53ee8cc1Swenshuai.xi         frame_motion_T0B1++;*/
713*53ee8cc1Swenshuai.xi 
714*53ee8cc1Swenshuai.xi       diff2   = vabd_u8(f3_y0, f1_y0);
715*53ee8cc1Swenshuai.xi       r_diff2 = vcgt_u8(diff2, mot_diff_th);
716*53ee8cc1Swenshuai.xi       filter2 = vabd_u8(vmed3_i8(f0_yu, f3_y0, f0_y0), f3_y0);
717*53ee8cc1Swenshuai.xi       r_filter2 = vcgt_u8(filter2, med_filter_th);
718*53ee8cc1Swenshuai.xi       rst = vtst_u8(r_diff2, r_filter2);
719*53ee8cc1Swenshuai.xi       rst = vshr_n_u8(rst, 7);
720*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 0);
721*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 1);
722*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 2);
723*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 3);
724*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 4);
725*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 5);
726*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 6);
727*53ee8cc1Swenshuai.xi       frame_motion_T0B1  += vget_lane_u8(rst, 7);
728*53ee8cc1Swenshuai.xi 
729*53ee8cc1Swenshuai.xi       r_diff3 = vcgt_u8(diff1, mot_diff_fm_th);
730*53ee8cc1Swenshuai.xi #ifndef REMOVE_T0B0
731*53ee8cc1Swenshuai.xi       /*
732*53ee8cc1Swenshuai.xi       filter3 = abs(med3( (f0_yu>>5), (f1_y0>>5), (f0_y0>>5) ) - (f1_y0>>5) );
733*53ee8cc1Swenshuai.xi       if ( diff && (filter3 > med_filter_fm_th) )
734*53ee8cc1Swenshuai.xi         frame_motion_T0B0++;*/
735*53ee8cc1Swenshuai.xi       filter3 = vabd_u8(vmed3_i8(f0_yu, f1_y0, f0_y0), f1_y0);
736*53ee8cc1Swenshuai.xi       r_filter3 = vcgt_u8(filter3, med_filter_fm_th);
737*53ee8cc1Swenshuai.xi       rst = vtst_u8(r_diff3, r_filter3);
738*53ee8cc1Swenshuai.xi       rst = vshr_n_u8(rst, 7);
739*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 0);
740*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 1);
741*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 2);
742*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 3);
743*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 4);
744*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 5);
745*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 6);
746*53ee8cc1Swenshuai.xi       frame_motion_T0B0  += vget_lane_u8(rst, 7);
747*53ee8cc1Swenshuai.xi #endif
748*53ee8cc1Swenshuai.xi #ifndef REMOVE_T1B1
749*53ee8cc1Swenshuai.xi       /*
750*53ee8cc1Swenshuai.xi       filter4 = abs(med3( (f2_yu>>5), (f3_y0>>5), (f2_yd>>5) ) - (f3_y0>>5) );
751*53ee8cc1Swenshuai.xi       if ( diff && (filter4 > med_filter_fm_th) )
752*53ee8cc1Swenshuai.xi         frame_motion_T1B1++;*/
753*53ee8cc1Swenshuai.xi       filter4 = vabd_u8(vmed3_i8(f2_yu, f3_y0, f2_y0), f3_y0);
754*53ee8cc1Swenshuai.xi       r_filter4 = vcgt_u8(filter4, med_filter_fm_th);
755*53ee8cc1Swenshuai.xi       rst = vtst_u8(r_diff3, r_filter4);
756*53ee8cc1Swenshuai.xi       rst = vshr_n_u8(rst, 7);
757*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 0);
758*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 1);
759*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 2);
760*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 3);
761*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 4);
762*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 5);
763*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 6);
764*53ee8cc1Swenshuai.xi       frame_motion_T1B1  += vget_lane_u8(rst, 7);
765*53ee8cc1Swenshuai.xi #endif
766*53ee8cc1Swenshuai.xi     }
767*53ee8cc1Swenshuai.xi   }
768*53ee8cc1Swenshuai.xi 
769*53ee8cc1Swenshuai.xi }
770*53ee8cc1Swenshuai.xi #endif
771*53ee8cc1Swenshuai.xi #define mymax(a,b)            (((a) > (b)) ? (a) : (b))
fastabs(int v)772*53ee8cc1Swenshuai.xi static unsigned int fastabs(int v)
773*53ee8cc1Swenshuai.xi {
774*53ee8cc1Swenshuai.xi   unsigned int r;
775*53ee8cc1Swenshuai.xi   int const mask = (v >> sizeof(int)) * CHAR_BIT - 1;
776*53ee8cc1Swenshuai.xi   r = (v + mask) ^ mask;
777*53ee8cc1Swenshuai.xi   return r;
778*53ee8cc1Swenshuai.xi }
dumpDetFieldOrder_status(int CurIsBot)779*53ee8cc1Swenshuai.xi void dumpDetFieldOrder_status(int CurIsBot)
780*53ee8cc1Swenshuai.xi {
781*53ee8cc1Swenshuai.xi   float ratio;
782*53ee8cc1Swenshuai.xi   int motion_T0B1, motion_T1B0;
783*53ee8cc1Swenshuai.xi   int bT1B0, bT0B1, bRatio;
784*53ee8cc1Swenshuai.xi 
785*53ee8cc1Swenshuai.xi #ifdef FILE_DBG
786*53ee8cc1Swenshuai.xi     char ofile[128] = "DUMP_DetFieldOrder.txt";
787*53ee8cc1Swenshuai.xi     FILE *fptr = fopen(ofile, "a");
788*53ee8cc1Swenshuai.xi     if( fptr == NULL ) return;
789*53ee8cc1Swenshuai.xi #endif
790*53ee8cc1Swenshuai.xi     g_ary_idx++;
791*53ee8cc1Swenshuai.xi     if (g_ary_idx == MAXFRAME)
792*53ee8cc1Swenshuai.xi       g_ary_idx = 0;
793*53ee8cc1Swenshuai.xi #ifdef FILE_DBG
794*53ee8cc1Swenshuai.xi     if(g_ary_idx == 0 && g_bInitPhase == 1)
795*53ee8cc1Swenshuai.xi     {
796*53ee8cc1Swenshuai.xi       fprintf(fptr, "// motionT0B1(TFF), motionT1B0(BFF), motionT0B0, curField(Top0/Bot1)\n");
797*53ee8cc1Swenshuai.xi     }
798*53ee8cc1Swenshuai.xi #endif
799*53ee8cc1Swenshuai.xi     //dump1 << "// " << dec << item_idx++ << " Motion_cnt_all_status_pre32 " << _dumpPnrStatus.Motion_cnt_all_status_pre32 << "\n";
800*53ee8cc1Swenshuai.xi     //dump1 << hex << setw(5) << frame_motion_T0B1[idx] << ", " << frame_motion_T1B0[idx] << "\n";
801*53ee8cc1Swenshuai.xi     motion_T0B1 = mymax(frame_motion_T0B1, 1);
802*53ee8cc1Swenshuai.xi     motion_T1B0 = mymax(frame_motion_T1B0, 1);
803*53ee8cc1Swenshuai.xi 
804*53ee8cc1Swenshuai.xi     bT1B0 = frame_motion_T1B0 > g_sceneChangeTh;
805*53ee8cc1Swenshuai.xi     bT0B1 = frame_motion_T0B1 > g_sceneChangeTh;
806*53ee8cc1Swenshuai.xi 
807*53ee8cc1Swenshuai.xi     ratio = (float)motion_T0B1/(float)motion_T1B0;
808*53ee8cc1Swenshuai.xi 
809*53ee8cc1Swenshuai.xi     ary_frame_motion_ratio[g_ary_idx] = (int)(ratio*256.0);
810*53ee8cc1Swenshuai.xi     bRatio = (fastabs(ary_frame_motion_ratio[g_ary_idx] - 256) < 16);
811*53ee8cc1Swenshuai.xi     ary_IsSceneChangeTh[g_ary_idx] = (bT0B1 && bT1B0 && bRatio);
812*53ee8cc1Swenshuai.xi #ifdef FILE_DBG
813*53ee8cc1Swenshuai.xi     fprintf(fptr, "%05x_%05x_%05x_%d, ratio=%d, bT0B1=%d bT1B0=%d bRatio=%d\n",
814*53ee8cc1Swenshuai.xi                   frame_motion_T0B1,
815*53ee8cc1Swenshuai.xi                   frame_motion_T1B0,
816*53ee8cc1Swenshuai.xi                   frame_motion_T1B1,
817*53ee8cc1Swenshuai.xi                   CurIsBot, ary_frame_motion_ratio[g_ary_idx], bT0B1, bT1B0, bRatio);
818*53ee8cc1Swenshuai.xi     fclose(fptr);
819*53ee8cc1Swenshuai.xi #endif
820*53ee8cc1Swenshuai.xi 
821*53ee8cc1Swenshuai.xi #ifdef DEBUG_INFO
822*53ee8cc1Swenshuai.xi     printf("[FOD] %05x_%05x_%05x_%d, ratio=%d, bT0B1=%d bT1B0=%d bRatio=%d\n",
823*53ee8cc1Swenshuai.xi       frame_motion_T0B1,
824*53ee8cc1Swenshuai.xi       frame_motion_T1B0,
825*53ee8cc1Swenshuai.xi       frame_motion_T1B1,
826*53ee8cc1Swenshuai.xi       CurIsBot, ary_frame_motion_ratio[g_ary_idx], bT0B1, bT1B0, bRatio);
827*53ee8cc1Swenshuai.xi #endif
828*53ee8cc1Swenshuai.xi 
829*53ee8cc1Swenshuai.xi }
830*53ee8cc1Swenshuai.xi #endif