diff --git a/Makefile b/Makefile index f3a78b2de3..49a92e15d5 100644 --- a/Makefile +++ b/Makefile @@ -92,7 +92,6 @@ SRCS_COMMON = asxparser.c \ libmpcodecs/dec_video.c \ libmpcodecs/img_format.c \ libmpcodecs/mp_image.c \ - libmpcodecs/native/nuppelvideo.c \ libmpcodecs/native/rtjpegn.c \ libmpcodecs/native/xa_gsm.c \ libmpcodecs/pullup.c \ @@ -102,7 +101,6 @@ SRCS_COMMON = asxparser.c \ libmpcodecs/vd_mpegpes.c \ libmpcodecs/vd_mtga.c \ libmpcodecs/vd_null.c \ - libmpcodecs/vd_nuv.c \ libmpcodecs/vd_raw.c \ libmpcodecs/vd_sgi.c \ libmpcodecs/vf.c \ diff --git a/etc/codecs.conf b/etc/codecs.conf index f9dcf8fc76..c824ab9903 100644 --- a/etc/codecs.conf +++ b/etc/codecs.conf @@ -319,14 +319,6 @@ videocodec ffnuv dll nuv out I420 -videocodec nuv - info "NuppelVideo" - status working - fourcc NUV1 ; NUV1 is an internal MPlayer FOURCC - fourcc RJPG - driver nuv - out I420,IYUV - videocodec ffbmp info "FFmpeg BMP" status working diff --git a/libmpcodecs/native/nuppelvideo.c b/libmpcodecs/native/nuppelvideo.c deleted file mode 100644 index 8128f85c47..0000000000 --- a/libmpcodecs/native/nuppelvideo.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * NuppelVideo 0.05 file parser - * for MPlayer - * by Panagiotis Issaris - * - * Reworked by alex - */ - -#include -#include -#include -#include - -#include "config.h" -#include "mp_msg.h" -#include "mpbswap.h" - -#include "libvo/fastmemcpy.h" - -#include "libmpdemux/nuppelvideo.h" -#include "rtjpegn.h" -#include "libavutil/lzo.h" - -#define KEEP_BUFFER - -void decode_nuv( unsigned char *encoded, int encoded_size, - unsigned char *decoded, int width, int height) -{ - int r; - unsigned int out_len = width * height + ( width * height ) / 2; - struct rtframeheader *encodedh = ( struct rtframeheader* ) encoded; - static unsigned char *buffer = 0; /* for RTJpeg with LZO decompress */ -#ifdef KEEP_BUFFER - static unsigned char *previous_buffer = 0; /* to support Last-frame-copy */ -#endif - -// printf("frametype: %c, comtype: %c, encoded_size: %d, width: %d, height: %d\n", -// encodedh->frametype, encodedh->comptype, encoded_size, width, height); - - le2me_rtframeheader(encodedh); - switch(encodedh->frametype) - { - case 'D': /* additional data for compressors */ - { - /* tables are in encoded */ - if (encodedh->comptype == 'R') - { - RTjpeg_init_decompress ( (unsigned long *)(encoded+12), width, height ); - mp_msg(MSGT_DECVIDEO, MSGL_V, "Found RTjpeg tables (size: %d, width: %d, height: %d)\n", - encoded_size-12, width, height); - } - break; - } - case 'V': - { - int in_len = encodedh->packetlength; -#ifdef KEEP_BUFFER - if (!previous_buffer) - previous_buffer = ( unsigned char * ) malloc ( out_len + AV_LZO_OUTPUT_PADDING ); -#endif - - switch(encodedh->comptype) - { - case '0': /* raw YUV420 */ - fast_memcpy(decoded, encoded + 12, out_len); - break; - case '1': /* RTJpeg */ - RTjpeg_decompressYUV420 ( ( __s8 * ) encoded + 12, decoded ); - break; - case '2': /* RTJpeg with LZO */ - if (!buffer) - buffer = ( unsigned char * ) malloc ( out_len + AV_LZO_OUTPUT_PADDING ); - if (!buffer) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); - break; - } - r = av_lzo1x_decode ( buffer, &out_len, encoded + 12, &in_len ); - if ( r ) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); - break; - } - RTjpeg_decompressYUV420 ( ( __s8 * ) buffer, decoded ); - break; - case '3': /* raw YUV420 with LZO */ - r = av_lzo1x_decode ( decoded, &out_len, encoded + 12, &in_len ); - if ( r ) - { - mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n"); - break; - } - break; - case 'N': /* black frame */ - memset ( decoded, 0, width * height ); - memset ( decoded + width * height, 127, width * height / 2); - break; - case 'L': /* copy last frame */ -#ifdef KEEP_BUFFER - fast_memcpy ( decoded, previous_buffer, width*height*3/2); -#endif - break; - } - -#ifdef KEEP_BUFFER - fast_memcpy(previous_buffer, decoded, width*height*3/2); -#endif - break; - } - default: - mp_msg(MSGT_DECVIDEO, MSGL_V, "Nuppelvideo: unknwon frametype: %c\n", - encodedh->frametype); - } -} diff --git a/libmpcodecs/native/rtjpegn.c b/libmpcodecs/native/rtjpegn.c index 3aa1faac74..0eea073b61 100644 --- a/libmpcodecs/native/rtjpegn.c +++ b/libmpcodecs/native/rtjpegn.c @@ -293,152 +293,6 @@ fprintf(stdout, "\n\n"); return (int)co; } -/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* Stream to Block (decoding) */ -/* */ - -static int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl) -{ - int ci; - register int co; - register int i; - register unsigned char bitten; - register unsigned char bitoff; - - /* first byte always read */ - i=RTjpeg_ZZ[0]; - data[i]=((__u8)strm[0])*qtbl[i]; - - /* we start at the behind */ - - bitten = ((unsigned char)strm[1]) >> 2; - co = 63; - for(; co > bitten; co--) { - - data[RTjpeg_ZZ[co]] = 0; - - } - - if (co==0) { - ci = 2; - goto AUTOBAHN; - } - - /* we have to read the last 2 bits of the second byte */ - ci=1; - bitoff = 0; - - for(; co>0; co--) { - - bitten = ((unsigned char)strm[ci]) >> bitoff; - bitten &= 0x03; - - i=RTjpeg_ZZ[co]; - - switch( bitten ) { - case 0x03: - data[i]= -qtbl[i]; - break; - case 0x02: - goto FUSSWEG; - break; - case 0x01: - data[i]= qtbl[i]; - break; - case 0x00: - data[i]= 0; - break; - default: - break; - } - - if( bitoff == 0 ) { - bitoff = 8; - ci++; - } - bitoff -= 2; - } - /* co is 0 now */ - /* data is written properly */ - - /* if bitoff!=6 then ci is the index, but should be the byte count, so we increment by 1 */ - if (bitoff!=6) ci++; - - goto AUTOBAHN; - - -FUSSWEG: -/* correct bitoff to nibble */ - switch(bitoff){ - case 4: - case 6: - bitoff = 0; - break; - case 2: - case 0: - /* we have to read from the next byte */ - ci++; - bitoff = 4; - break; - default: - break; - } - - for(; co>0; co--) { - - bitten = ((unsigned char)strm[ci]) >> bitoff; - bitten &= 0x0f; - - i=RTjpeg_ZZ[co]; - - if( bitten == 0x08 ) { - goto STRASSE; - } - - /* the compiler cannot do sign extension for signed nibbles */ - if( bitten & 0x08 ) { - bitten |= 0xf0; - } - /* the unsigned char bitten now is a valid signed char */ - - data[i]=((signed char)bitten)*qtbl[i]; - - if( bitoff == 0 ) { - bitoff = 8; - ci++; - } - bitoff -= 4; - } - /* co is 0 */ - - /* if bitoff!=4 then ci is the index, but should be the byte count, so we increment by 1 */ - if (bitoff!=4) ci++; - - goto AUTOBAHN; - -STRASSE: - ci++; - - for(; co>0; co--) { - i=RTjpeg_ZZ[co]; - data[i]=strm[ci++]*qtbl[i]; - } - - /* ci now is the count, because it points to next element => no incrementing */ - -AUTOBAHN: - -#ifdef SHOWBLOCK -fprintf(stdout, "\nci = '%d'\n", ci); - for (i=0; i < 64; i++) { - fprintf(stdout, "%d ", data[RTjpeg_ZZ[i]]); - } -fprintf(stdout, "\n\n"); -#endif - - return ci; -} - #else static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) @@ -1536,1148 +1390,6 @@ static void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip) #endif } -#define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */ -#define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */ -#define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */ -#define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */ - -#define DESCALE(x) (__s16)( ((x)+4) >> 3) - -/* clip yuv to 16..235 (should be 16..240 for cr/cb but ... */ - -#define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x)) -#define MULTIPLY(var,const) (((__s32) ((var) * (const)) + 128)>>8) - -static void RTjpeg_idct_init(void) -{ - int i; - - for(i=0; i<64; i++) - { - RTjpeg_liqt[i]=((__u64)RTjpeg_liqt[i]*RTjpeg_aan_tab[i])>>32; - RTjpeg_ciqt[i]=((__u64)RTjpeg_ciqt[i]*RTjpeg_aan_tab[i])>>32; - } -} - -static void RTjpeg_idct(__u8 *odata, __s16 *data, int rskip) -{ -#if HAVE_MMX - -static mmx_t fix_141 = {0x5a825a825a825a82LL}; -static mmx_t fix_184n261 = {0xcf04cf04cf04cf04LL}; -static mmx_t fix_184 = {0x7641764176417641LL}; -static mmx_t fix_n184 = {0x896f896f896f896fLL}; -static mmx_t fix_108n184 = {0xcf04cf04cf04cf04LL}; - - mmx_t workspace[64]; - mmx_t *wsptr = workspace; - register mmx_t *dataptr = (mmx_t *)odata; - mmx_t *idata = (mmx_t *)data; - - rskip = rskip>>3; -/* - * Perform inverse DCT on one block of coefficients. - */ - - /* Odd part */ - - movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5] - - movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3] - - movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1] - - movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */ - - movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7] - - paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5; - - psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5 - - psllw_i2r(2, mm2); // shift z10 - movq_r2r(mm2, mm0); // copy z10 - - pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ - movq_r2r(mm3, mm5); // copy tmp4 - - pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ - paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7; - - movq_r2r(mm3, mm6); // copy z11 /* phase 5 */ - psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7; - - psubw_r2r(mm1, mm6); // z11-z13 - psllw_i2r(2, mm5); // shift z12 - - movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part - movq_r2r(mm5, mm7); // copy z12 - - pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part - paddw_r2r(mm1, mm3); // tmp7 = z11 + z13; - - //ok - - /* Even part */ - pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ - psllw_i2r(2, mm6); - - movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2] - - paddw_r2r(mm5, mm0); // tmp10 - - paddw_r2r(mm7, mm2); // tmp12 - - pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ - psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7 - - movq_r2r(mm1, mm5); // copy tmp1 - paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */ - - psubw_r2r(mm4, mm5); // tmp1-tmp3 - psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6; - - movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace - psllw_i2r(2, mm5); // shift tmp1-tmp3 - - movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0] - - pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5; - - movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4] - - psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ - - movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace - movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */ - - movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace - psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2; - - paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2; - movq_r2r(mm1, mm5); // copy tmp11 - - paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12; - movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */ - - paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13; - - psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13; - movq_r2r(mm7, mm0); // copy tmp0 - - psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12; - paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); - - psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); - - movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0] - movq_r2r(mm1, mm3); // copy tmp1 - - movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7] - paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); - - psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); - - movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1] - movq_r2r(mm4, mm1); // copy tmp3 - - movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6] - - paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); - - psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); - - movq_r2m(mm4, *(wsptr+8)); - movq_r2r(mm5, mm7); // copy tmp2 - - paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) - - movq_r2m(mm1, *(wsptr+6)); - psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); - - movq_r2m(mm5, *(wsptr+4)); - - movq_r2m(mm7, *(wsptr+10)); - - //ok - - -/*****************************************************************/ - - idata++; - wsptr++; - -/*****************************************************************/ - - movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5] - - movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3] - - movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1] - movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */ - - movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7] - paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5; - - psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5 - - psllw_i2r(2, mm2); // shift z10 - movq_r2r(mm2, mm0); // copy z10 - - pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ - movq_r2r(mm3, mm5); // copy tmp4 - - pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ - paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7; - - movq_r2r(mm3, mm6); // copy z11 /* phase 5 */ - psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7; - - psubw_r2r(mm1, mm6); // z11-z13 - psllw_i2r(2, mm5); // shift z12 - - movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part - movq_r2r(mm5, mm7); // copy z12 - - pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part - paddw_r2r(mm1, mm3); // tmp7 = z11 + z13; - - //ok - - /* Even part */ - pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ - psllw_i2r(2, mm6); - - movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2] - - paddw_r2r(mm5, mm0); // tmp10 - - paddw_r2r(mm7, mm2); // tmp12 - - pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ - psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7 - - movq_r2r(mm1, mm5); // copy tmp1 - paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */ - - psubw_r2r(mm4, mm5); // tmp1-tmp3 - psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6; - - movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace - psllw_i2r(2, mm5); // shift tmp1-tmp3 - - movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0] - paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5; - - pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - - movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4] - - psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ - - movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace - movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */ - - movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace - psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2; - - paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2; - movq_r2r(mm1, mm5); // copy tmp11 - - paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12; - movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */ - - paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13; - - psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13; - movq_r2r(mm7, mm0); // copy tmp0 - - psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12; - paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); - - psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); - - movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0] - movq_r2r(mm1, mm3); // copy tmp1 - - movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7] - paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); - - psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); - - movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1] - movq_r2r(mm4, mm1); // copy tmp3 - - movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6] - - paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); - - psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); - - movq_r2m(mm4, *(wsptr+8)); - movq_r2r(mm5, mm7); // copy tmp2 - - paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) - - movq_r2m(mm1, *(wsptr+6)); - psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); - - movq_r2m(mm5, *(wsptr+4)); - - movq_r2m(mm7, *(wsptr+10)); - -/*****************************************************************/ - - /* Pass 2: process rows from work array, store into output array. */ - /* Note that we must descale the results by a factor of 8 == 2**3, */ - /* and also undo the PASS1_BITS scaling. */ - -/*****************************************************************/ - /* Even part */ - - wsptr--; - -// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); -// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); -// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); -// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); - movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3] - - movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7] - movq_r2r(mm0, mm2); - - movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3] - paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] - - movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7] - psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] - - movq_r2r(mm0, mm6); - movq_r2r(mm3, mm5); - - paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] - movq_r2r(mm2, mm1); - - psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] - punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] - - movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7] - punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] - - movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3] - punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] - - punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] - movq_r2r(mm3, mm4); - - movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3] - punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] - - movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7] - punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] - - - paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] - movq_r2r(mm6, mm2); - - psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] - paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] - - movq_r2r(mm3, mm5); - punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] - - psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] - punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] - - movq_r2r(mm4, mm7); - punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] - - punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] - - punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] - - punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] - movq_r2r(mm1, mm6); - - //ok - -// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] -// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] - - - movq_r2r(mm0, mm2); - punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] - - punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] - psllw_i2r(2, mm6); - - pmulhw_m2r(fix_141, mm6); - punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] - - punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] - movq_r2r(mm0, mm7); - -// tmp0 = tmp10 + tmp13; -// tmp3 = tmp10 - tmp13; - paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] - psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] - -// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; - psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] -// tmp1 = tmp11 + tmp12; -// tmp2 = tmp11 - tmp12; - movq_r2r(mm1, mm5); - - //OK - - /* Odd part */ - -// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; -// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; -// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; -// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; - movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3] - paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] - - movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7] - psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] - - movq_r2r(mm3, mm6); - punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5] - - punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3] - movq_r2r(mm3, mm2); - -//Save tmp0 and tmp1 in wsptr - movq_r2m(mm0, *(wsptr)); // save tmp0 - paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13] - - -//Continue with z10 --- z13 - movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3] - psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10] - - movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7] - movq_r2r(mm6, mm4); - - movq_r2m(mm1, *(wsptr+1)); // save tmp1 - punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5] - - punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3] - movq_r2r(mm6, mm1); - -//Save tmp2 and tmp3 in wsptr - paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13] - movq_r2r(mm2, mm4); - -//Continue with z10 --- z13 - movq_r2m(mm5, *(wsptr+2)); // save tmp2 - punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11] - - psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10] - punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13] - - movq_r2r(mm3, mm0); - punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12] - - movq_r2m(mm7, *(wsptr+3)); // save tmp3 - punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10] - - movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3] - punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11] - - movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7] - punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13] - - movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3] - movq_r2r(mm6, mm4); - - punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5] - movq_r2r(mm1, mm5); - - punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3] - movq_r2r(mm6, mm2); - - movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7] - paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13] - - psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10] - punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5] - - punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3] - movq_r2r(mm1, mm7); - - paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13] - psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10] - - movq_r2r(mm6, mm5); - punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11] - - punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13] - movq_r2r(mm2, mm4); - - punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12] - - punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10] - - punpckhdq_r2r(mm6, mm4); /// wsptr[2,z10],[3,z10],[2,z11],[3,z11] - - punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13] - movq_r2r(mm0, mm5); - - punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10] - - punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11] - movq_r2r(mm3, mm4); - - punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13] - movq_r2r(mm5, mm1); - - punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12] -// tmp7 = z11 + z13; /* phase 5 */ -// tmp8 = z11 - z13; /* phase 5 */ - psubw_r2r(mm4, mm1); // tmp8 - - paddw_r2r(mm4, mm5); // tmp7 -// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ - psllw_i2r(2, mm1); - - psllw_i2r(2, mm0); - - pmulhw_m2r(fix_141, mm1); // tmp21 -// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */ -// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ - psllw_i2r(2, mm3); - movq_r2r(mm0, mm7); - - pmulhw_m2r(fix_n184, mm7); - movq_r2r(mm3, mm6); - - movq_m2r(*(wsptr), mm2); // tmp0,final1 - - pmulhw_m2r(fix_108n184, mm6); -// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */ -// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ - movq_r2r(mm2, mm4); // final1 - - pmulhw_m2r(fix_184n261, mm0); - paddw_r2r(mm5, mm2); // tmp0+tmp7,final1 - - pmulhw_m2r(fix_184, mm3); - psubw_r2r(mm5, mm4); // tmp0-tmp7,final1 - -// tmp6 = tmp22 - tmp7; /* phase 2 */ - psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1 - - paddw_r2r(mm6, mm7); // tmp20 - psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1 - - paddw_r2r(mm0, mm3); // tmp22 - -// tmp5 = tmp21 - tmp6; - psubw_r2r(mm5, mm3); // tmp6 - -// tmp4 = tmp20 + tmp5; - movq_m2r(*(wsptr+1), mm0); // tmp1,final2 - psubw_r2r(mm3, mm1); // tmp5 - - movq_r2r(mm0, mm6); // final2 - paddw_r2r(mm3, mm0); // tmp1+tmp6,final2 - - /* Final output stage: scale down by a factor of 8 and range-limit */ - - -// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) -// & RANGE_MASK]; final1 - - -// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) -// & RANGE_MASK]; final2 - psubw_r2r(mm3, mm6); // tmp1-tmp6,final2 - psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1] - - psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6] - - packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] - - movq_m2r(*(wsptr+2), mm5); // tmp2,final3 - packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] - -// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) -// & RANGE_MASK]; final3 - paddw_r2r(mm1, mm7); // tmp4 - movq_r2r(mm5, mm3); - - paddw_r2r(mm1, mm5); // tmp2+tmp5 - psubw_r2r(mm1, mm3); // tmp2-tmp5 - - psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2] - - movq_m2r(*(wsptr+3), mm4); // tmp3,final4 - psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5] - - - -// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) -// & RANGE_MASK]; final4 - movq_r2r(mm4, mm6); - paddw_r2r(mm7, mm4); // tmp3+tmp4 - - psubw_r2r(mm7, mm6); // tmp3-tmp4 - psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4] - - // mov ecx, [dataptr] - - psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3] - - packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] - - packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] - movq_r2r(mm2, mm4); - - movq_r2r(mm5, mm7); - punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] - - punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] - movq_r2r(mm2, mm1); - - punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] - - // add dataptr, 4 - - punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] - - punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] - - // add ecx, output_col - - movq_r2r(mm7, mm6); - punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] - - movq_r2r(mm2, mm0); - punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] - - // mov idata, [dataptr] - - punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] - - // add dataptr, 4 - - movq_r2r(mm1, mm3); - - // add idata, output_col - - punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] - - movq_r2m(mm2, *(dataptr)); - - punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] - - dataptr += rskip; - movq_r2m(mm0, *(dataptr)); - - punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] - punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] - - dataptr += rskip; - movq_r2m(mm1, *(dataptr)); - - dataptr += rskip; - movq_r2m(mm3, *(dataptr)); - -/*******************************************************************/ - - wsptr += 8; - -/*******************************************************************/ - -// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); -// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); -// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); -// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); - movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3] - - movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7] - movq_r2r(mm0, mm2); - - movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3] - paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] - - movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7] - psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] - - movq_r2r(mm0, mm6); - movq_r2r(mm3, mm5); - - paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] - movq_r2r(mm2, mm1); - - psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] - punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] - - movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7] - punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] - - movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3] - punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] - - punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] - movq_r2r(mm3, mm4); - - movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3] - punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] - - movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7] - punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] - - paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] - movq_r2r(mm6, mm2); - - psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] - paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] - - movq_r2r(mm3, mm5); - punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] - - psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] - punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] - - movq_r2r(mm4, mm7); - punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] - - punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] - - punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] - - punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] - movq_r2r(mm1, mm6); - - //OK - -// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] -// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] - - movq_r2r(mm0, mm2); - punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] - - punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] - psllw_i2r(2, mm6); - - pmulhw_m2r(fix_141, mm6); - punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] - - punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] - movq_r2r(mm0, mm7); - -// tmp0 = tmp10 + tmp13; -// tmp3 = tmp10 - tmp13; - paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] - psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] - -// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; - psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] -// tmp1 = tmp11 + tmp12; -// tmp2 = tmp11 - tmp12; - movq_r2r(mm1, mm5); - - //OK - - - /* Odd part */ - -// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; -// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; -// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; -// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; - movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3] - paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] - - movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7] - psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] - - movq_r2r(mm3, mm6); - punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5] - - punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3] - movq_r2r(mm3, mm2); - -//Save tmp0 and tmp1 in wsptr - movq_r2m(mm0, *(wsptr)); // save tmp0 - paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13] - - -//Continue with z10 --- z13 - movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3] - psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10] - - movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7] - movq_r2r(mm6, mm4); - - movq_r2m(mm1, *(wsptr+1)); // save tmp1 - punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5] - - punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3] - movq_r2r(mm6, mm1); - -//Save tmp2 and tmp3 in wsptr - paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13] - movq_r2r(mm2, mm4); - -//Continue with z10 --- z13 - movq_r2m(mm5, *(wsptr+2)); // save tmp2 - punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11] - - psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10] - punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13] - - movq_r2r(mm3, mm0); - punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12] - - movq_r2m(mm7, *(wsptr+3)); // save tmp3 - punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10] - - movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3] - punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11] - - movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7] - punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13] - - movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3] - movq_r2r(mm6, mm4); - - punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5] - movq_r2r(mm1, mm5); - - punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3] - movq_r2r(mm6, mm2); - - movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7] - paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13] - - psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10] - punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5] - - punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3] - movq_r2r(mm1, mm7); - - paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13] - psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10] - - movq_r2r(mm6, mm5); - punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11] - - punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13] - movq_r2r(mm2, mm4); - - punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12] - - punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10] - - punpckhdq_r2r(mm6, mm4); // wsptr[2,z10],[3,z10],[2,z11],[3,z11] - - punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13] - movq_r2r(mm0, mm5); - - punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10] - - punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11] - movq_r2r(mm3, mm4); - - punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13] - movq_r2r(mm5, mm1); - - punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12] -// tmp7 = z11 + z13; /* phase 5 */ -// tmp8 = z11 - z13; /* phase 5 */ - psubw_r2r(mm4, mm1); // tmp8 - - paddw_r2r(mm4, mm5); // tmp7 -// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ - psllw_i2r(2, mm1); - - psllw_i2r(2, mm0); - - pmulhw_m2r(fix_141, mm1); // tmp21 -// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */ -// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ - psllw_i2r(2, mm3); - movq_r2r(mm0, mm7); - - pmulhw_m2r(fix_n184, mm7); - movq_r2r(mm3, mm6); - - movq_m2r(*(wsptr), mm2); // tmp0,final1 - - pmulhw_m2r(fix_108n184, mm6); -// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */ -// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ - movq_r2r(mm2, mm4); // final1 - - pmulhw_m2r(fix_184n261, mm0); - paddw_r2r(mm5, mm2); // tmp0+tmp7,final1 - - pmulhw_m2r(fix_184, mm3); - psubw_r2r(mm5, mm4); // tmp0-tmp7,final1 - -// tmp6 = tmp22 - tmp7; /* phase 2 */ - psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1 - - paddw_r2r(mm6, mm7); // tmp20 - psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1 - - paddw_r2r(mm0, mm3); // tmp22 - -// tmp5 = tmp21 - tmp6; - psubw_r2r(mm5, mm3); // tmp6 - -// tmp4 = tmp20 + tmp5; - movq_m2r(*(wsptr+1), mm0); // tmp1,final2 - psubw_r2r(mm3, mm1); // tmp5 - - movq_r2r(mm0, mm6); // final2 - paddw_r2r(mm3, mm0); // tmp1+tmp6,final2 - - /* Final output stage: scale down by a factor of 8 and range-limit */ - -// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) -// & RANGE_MASK]; final1 - - -// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) -// & RANGE_MASK]; final2 - psubw_r2r(mm3, mm6); // tmp1-tmp6,final2 - psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1] - - psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6] - - packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] - - movq_m2r(*(wsptr+2), mm5); // tmp2,final3 - packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] - -// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) -// & RANGE_MASK]; final3 - paddw_r2r(mm1, mm7); // tmp4 - movq_r2r(mm5, mm3); - - paddw_r2r(mm1, mm5); // tmp2+tmp5 - psubw_r2r(mm1, mm3); // tmp2-tmp5 - - psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2] - - movq_m2r(*(wsptr+3), mm4); // tmp3,final4 - psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5] - - - -// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) -// & RANGE_MASK]; -// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) -// & RANGE_MASK]; final4 - movq_r2r(mm4, mm6); - paddw_r2r(mm7, mm4); // tmp3+tmp4 - - psubw_r2r(mm7, mm6); // tmp3-tmp4 - psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4] - - psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3] - - /* - movq_r2m(mm4, *dummy); - fprintf(stderr, "3-4 %016llx\n", dummy); - movq_r2m(mm4, *dummy); - fprintf(stderr, "3+4 %016llx\n", dummy); - */ - - - packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] - - packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] - movq_r2r(mm2, mm4); - - movq_r2r(mm5, mm7); - punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] - - punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] - movq_r2r(mm2, mm1); - - punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] - - punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] - - punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] - - movq_r2r(mm7, mm6); - punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] - - movq_r2r(mm2, mm0); - punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] - - punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] - - movq_r2r(mm1, mm3); - - punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] - - dataptr += rskip; - movq_r2m(mm2, *(dataptr)); - - punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] - - dataptr += rskip; - movq_r2m(mm0, *(dataptr)); - - punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] - - punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] - - dataptr += rskip; - movq_r2m(mm1, *(dataptr)); - - dataptr += rskip; - movq_r2m(mm3, *(dataptr)); - -#else - __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - __s32 tmp10, tmp11, tmp12, tmp13; - __s32 z5, z10, z11, z12, z13; - __s16 *inptr; - __s32 *wsptr; - __u8 *outptr; - int ctr; - __s32 dcval; - __s32 workspace[64]; - - inptr = data; - wsptr = workspace; - for (ctr = 8; ctr > 0; ctr--) { - - if ((inptr[8] | inptr[16] | inptr[24] | - inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) { - dcval = inptr[0]; - wsptr[0] = dcval; - wsptr[8] = dcval; - wsptr[16] = dcval; - wsptr[24] = dcval; - wsptr[32] = dcval; - wsptr[40] = dcval; - wsptr[48] = dcval; - wsptr[56] = dcval; - - inptr++; - wsptr++; - continue; - } - - tmp0 = inptr[0]; - tmp1 = inptr[16]; - tmp2 = inptr[32]; - tmp3 = inptr[48]; - - tmp10 = tmp0 + tmp2; - tmp11 = tmp0 - tmp2; - - tmp13 = tmp1 + tmp3; - tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; - - tmp0 = tmp10 + tmp13; - tmp3 = tmp10 - tmp13; - tmp1 = tmp11 + tmp12; - tmp2 = tmp11 - tmp12; - - tmp4 = inptr[8]; - tmp5 = inptr[24]; - tmp6 = inptr[40]; - tmp7 = inptr[56]; - - z13 = tmp6 + tmp5; - z10 = tmp6 - tmp5; - z11 = tmp4 + tmp7; - z12 = tmp4 - tmp7; - - tmp7 = z11 + z13; - tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); - - z5 = MULTIPLY(z10 + z12, FIX_1_847759065); - tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; - tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; - - tmp6 = tmp12 - tmp7; - tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; - - wsptr[0] = (__s32) (tmp0 + tmp7); - wsptr[56] = (__s32) (tmp0 - tmp7); - wsptr[8] = (__s32) (tmp1 + tmp6); - wsptr[48] = (__s32) (tmp1 - tmp6); - wsptr[16] = (__s32) (tmp2 + tmp5); - wsptr[40] = (__s32) (tmp2 - tmp5); - wsptr[32] = (__s32) (tmp3 + tmp4); - wsptr[24] = (__s32) (tmp3 - tmp4); - - inptr++; - wsptr++; - } - - wsptr = workspace; - for (ctr = 0; ctr < 8; ctr++) { - outptr = &(odata[ctr*rskip]); - - tmp10 = wsptr[0] + wsptr[4]; - tmp11 = wsptr[0] - wsptr[4]; - - tmp13 = wsptr[2] + wsptr[6]; - tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13; - - tmp0 = tmp10 + tmp13; - tmp3 = tmp10 - tmp13; - tmp1 = tmp11 + tmp12; - tmp2 = tmp11 - tmp12; - - z13 = wsptr[5] + wsptr[3]; - z10 = wsptr[5] - wsptr[3]; - z11 = wsptr[1] + wsptr[7]; - z12 = wsptr[1] - wsptr[7]; - - tmp7 = z11 + z13; - tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); - - z5 = MULTIPLY(z10 + z12, FIX_1_847759065); - tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; - tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; - - tmp6 = tmp12 - tmp7; - tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; - - outptr[0] = RL(DESCALE(tmp0 + tmp7)); - outptr[7] = RL(DESCALE(tmp0 - tmp7)); - outptr[1] = RL(DESCALE(tmp1 + tmp6)); - outptr[6] = RL(DESCALE(tmp1 - tmp6)); - outptr[2] = RL(DESCALE(tmp2 + tmp5)); - outptr[5] = RL(DESCALE(tmp2 - tmp5)); - outptr[4] = RL(DESCALE(tmp3 + tmp4)); - outptr[3] = RL(DESCALE(tmp3 - tmp4)); - - wsptr += 8; - } -#endif -} /* Main Routines @@ -2754,7 +1466,6 @@ static void RTjpeg_init_Q(__u8 Q) RTjpeg_cb8--; RTjpeg_dct_init(); - RTjpeg_idct_init(); RTjpeg_quant_init(); } @@ -2816,35 +1527,6 @@ void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q) buf[64+i]=le2me_32(RTjpeg_ciqt[i]); } -void RTjpeg_init_decompress(__u32 *buf, int width, int height) -{ - int i; - - RTjpeg_init_data(); - - RTjpeg_width=width; - RTjpeg_height=height; - RTjpeg_Ywidth = RTjpeg_width>>3; - RTjpeg_Ysize=width * height; - RTjpeg_Cwidth = RTjpeg_width>>4; - RTjpeg_Csize= (width>>1) * height; - - for(i=0; i<64; i++) - { - RTjpeg_liqt[i]=le2me_32(buf[i]); - RTjpeg_ciqt[i]=le2me_32(buf[i+64]); - } - - RTjpeg_lb8=0; - while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); - RTjpeg_lb8--; - RTjpeg_cb8=0; - while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); - RTjpeg_cb8--; - - RTjpeg_idct_init(); -} - int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp) { __s8 * sb; @@ -2899,68 +1581,6 @@ int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp) return (sp-sb); } -void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp) -{ - register __s8 * bp1 = bp + (RTjpeg_width<<3); - register __s8 * bp2 = bp + RTjpeg_Ysize; - register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); - int i, j,k; - -#if HAVE_MMX - emms(); -#endif - -/* Y */ - for(i=RTjpeg_height>>1; i; i-=8) - { - for(k=0, j=0; j>1); - } - if(*sp==-1)sp++; - else - { - sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt); - RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1); - } - } - bp+=RTjpeg_width<<4; - bp1+=RTjpeg_width<<4; - bp2+=RTjpeg_width<<2; - bp3+=RTjpeg_width<<2; - } -#if HAVE_MMX - emms(); -#endif -} - /* External Function diff --git a/libmpcodecs/native/rtjpegn.h b/libmpcodecs/native/rtjpegn.h index 4ffcdfa39d..859b8238f8 100644 --- a/libmpcodecs/native/rtjpegn.h +++ b/libmpcodecs/native/rtjpegn.h @@ -36,9 +36,7 @@ #define __s64 int64_t void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q); -void RTjpeg_init_decompress(__u32 *buf, int width, int height); int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp); -void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp); void RTjpeg_init_mcompress(void); int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask); diff --git a/libmpcodecs/vd.c b/libmpcodecs/vd.c index b47f85357b..fa2f74ef00 100644 --- a/libmpcodecs/vd.c +++ b/libmpcodecs/vd.c @@ -35,7 +35,6 @@ extern vd_functions_t mpcodecs_vd_vfwex; extern vd_functions_t mpcodecs_vd_raw; extern vd_functions_t mpcodecs_vd_hmblck; extern vd_functions_t mpcodecs_vd_xanim; -extern vd_functions_t mpcodecs_vd_nuv; extern vd_functions_t mpcodecs_vd_mpng; extern vd_functions_t mpcodecs_vd_ijpg; extern vd_functions_t mpcodecs_vd_mtga; @@ -70,7 +69,6 @@ const vd_functions_t * const mpcodecs_vd_drivers[] = { &mpcodecs_vd_lzo, &mpcodecs_vd_raw, &mpcodecs_vd_hmblck, - &mpcodecs_vd_nuv, #ifdef CONFIG_XANIM &mpcodecs_vd_xanim, #endif diff --git a/libmpcodecs/vd_nuv.c b/libmpcodecs/vd_nuv.c deleted file mode 100644 index cb89ddb391..0000000000 --- a/libmpcodecs/vd_nuv.c +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include - -#include "config.h" -#include "mp_msg.h" - -#include "vd_internal.h" - -static vd_info_t info = { - "NuppelVideo decoder", - "nuv", - "A'rpi", - "Alex & Panagiotis Issaris ", - "native codecs" -}; - -LIBVD_EXTERN(nuv) - -// to set/get/query special features/parameters -static int control(sh_video_t *sh,int cmd,void* arg,...){ - return CONTROL_UNKNOWN; -} - -// init driver -static int init(sh_video_t *sh){ - return mpcodecs_config_vo(sh,sh->disp_w,sh->disp_h,IMGFMT_I420); -} - -// uninit driver -static void uninit(sh_video_t *sh){ -} - -//mp_image_t* mpcodecs_get_image(sh_video_t *sh, int mp_imgtype, int mp_imgflag, int w, int h); - -void decode_nuv( - unsigned char *encoded, - int encoded_size, - unsigned char *decoded, - int width, - int height); - -// decode a frame -static mp_image_t* decode(sh_video_t *sh,void* data,int len,int flags){ - mp_image_t* mpi; - if(len<=0) return NULL; // skipped frame - - mpi=mpcodecs_get_image(sh, MP_IMGTYPE_TEMP, 0, - sh->disp_w, sh->disp_h); - if(!mpi) return NULL; - - decode_nuv(data, len, mpi->planes[0], sh->disp_w, sh->disp_h); - - return mpi; -}