mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-02 13:02:13 +00:00
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
x/yInc bugfix (bug was introduced during the global var removial) more emms & no messing with ARCH_X86 (=workaround against not cleared mmx state somewhere) sharpening filter (-ssf cs=<int> & -ssf ls=<int>) Originally committed as revision 4298 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
fe8054c0ff
commit
5521b193d6
@ -161,6 +161,8 @@ float sws_lum_gblur= 0.0;
|
||||
float sws_chr_gblur= 0.0;
|
||||
int sws_chr_vshift= 0;
|
||||
int sws_chr_hshift= 0;
|
||||
float sws_chr_sharpen= 0.0;
|
||||
float sws_lum_sharpen= 0.0;
|
||||
|
||||
/* cpuCaps combined from cpudetect and whats actually compiled in
|
||||
(if there is no support for something compiled in it wont appear here) */
|
||||
@ -298,6 +300,28 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
|
||||
else if(dstFormat==IMGFMT_BGR16)
|
||||
{
|
||||
int i;
|
||||
#ifdef DITHER1XBPP
|
||||
static int ditherb1=1<<14;
|
||||
static int ditherg1=1<<13;
|
||||
static int ditherr1=2<<14;
|
||||
static int ditherb2=3<<14;
|
||||
static int ditherg2=3<<13;
|
||||
static int ditherr2=0<<14;
|
||||
|
||||
ditherb1 ^= (1^2)<<14;
|
||||
ditherg1 ^= (1^2)<<13;
|
||||
ditherr1 ^= (1^2)<<14;
|
||||
ditherb2 ^= (3^0)<<14;
|
||||
ditherg2 ^= (3^0)<<13;
|
||||
ditherr2 ^= (3^0)<<14;
|
||||
#else
|
||||
const int ditherb1=0;
|
||||
const int ditherg1=0;
|
||||
const int ditherr1=0;
|
||||
const int ditherb2=0;
|
||||
const int ditherg2=0;
|
||||
const int ditherr2=0;
|
||||
#endif
|
||||
for(i=0; i<(dstW>>1); i++){
|
||||
int j;
|
||||
int Y1=0;
|
||||
@ -325,19 +349,41 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
|
||||
Cr= clip_yuvtab_3343[V+ 256];
|
||||
|
||||
((uint16_t*)dest)[2*i] =
|
||||
clip_table16b[(Y1 + Cb) >>13] |
|
||||
clip_table16g[(Y1 + Cg) >>13] |
|
||||
clip_table16r[(Y1 + Cr) >>13];
|
||||
clip_table16b[(Y1 + Cb + ditherb1) >>13] |
|
||||
clip_table16g[(Y1 + Cg + ditherg1) >>13] |
|
||||
clip_table16r[(Y1 + Cr + ditherr1) >>13];
|
||||
|
||||
((uint16_t*)dest)[2*i+1] =
|
||||
clip_table16b[(Y2 + Cb) >>13] |
|
||||
clip_table16g[(Y2 + Cg) >>13] |
|
||||
clip_table16r[(Y2 + Cr) >>13];
|
||||
clip_table16b[(Y2 + Cb + ditherb2) >>13] |
|
||||
clip_table16g[(Y2 + Cg + ditherg2) >>13] |
|
||||
clip_table16r[(Y2 + Cr + ditherr2) >>13];
|
||||
}
|
||||
}
|
||||
else if(dstFormat==IMGFMT_BGR15)
|
||||
{
|
||||
int i;
|
||||
#ifdef DITHER1XBPP
|
||||
static int ditherb1=1<<14;
|
||||
static int ditherg1=1<<14;
|
||||
static int ditherr1=2<<14;
|
||||
static int ditherb2=3<<14;
|
||||
static int ditherg2=3<<14;
|
||||
static int ditherr2=0<<14;
|
||||
|
||||
ditherb1 ^= (1^2)<<14;
|
||||
ditherg1 ^= (1^2)<<14;
|
||||
ditherr1 ^= (1^2)<<14;
|
||||
ditherb2 ^= (3^0)<<14;
|
||||
ditherg2 ^= (3^0)<<14;
|
||||
ditherr2 ^= (3^0)<<14;
|
||||
#else
|
||||
const int ditherb1=0;
|
||||
const int ditherg1=0;
|
||||
const int ditherr1=0;
|
||||
const int ditherb2=0;
|
||||
const int ditherg2=0;
|
||||
const int ditherr2=0;
|
||||
#endif
|
||||
for(i=0; i<(dstW>>1); i++){
|
||||
int j;
|
||||
int Y1=0;
|
||||
@ -365,14 +411,14 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
|
||||
Cr= clip_yuvtab_3343[V+ 256];
|
||||
|
||||
((uint16_t*)dest)[2*i] =
|
||||
clip_table15b[(Y1 + Cb) >>13] |
|
||||
clip_table15g[(Y1 + Cg) >>13] |
|
||||
clip_table15r[(Y1 + Cr) >>13];
|
||||
clip_table15b[(Y1 + Cb + ditherb1) >>13] |
|
||||
clip_table15g[(Y1 + Cg + ditherg1) >>13] |
|
||||
clip_table15r[(Y1 + Cr + ditherr1) >>13];
|
||||
|
||||
((uint16_t*)dest)[2*i+1] =
|
||||
clip_table15b[(Y2 + Cb) >>13] |
|
||||
clip_table15g[(Y2 + Cg) >>13] |
|
||||
clip_table15r[(Y2 + Cr) >>13];
|
||||
clip_table15b[(Y2 + Cb + ditherb2) >>13] |
|
||||
clip_table15g[(Y2 + Cg + ditherg2) >>13] |
|
||||
clip_table15r[(Y2 + Cr + ditherr2) >>13];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -402,13 +448,11 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
|
||||
#undef HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_3DNOW
|
||||
#undef ARCH_X86
|
||||
|
||||
#ifdef COMPILE_C
|
||||
#undef HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_3DNOW
|
||||
#undef ARCH_X86
|
||||
#define RENAME(a) a ## _C
|
||||
#include "swscale_template.c"
|
||||
#endif
|
||||
@ -431,7 +475,6 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
|
||||
#define HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_3DNOW
|
||||
#define ARCH_X86
|
||||
#define RENAME(a) a ## _MMX
|
||||
#include "swscale_template.c"
|
||||
#endif
|
||||
@ -442,7 +485,6 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
|
||||
#define HAVE_MMX
|
||||
#define HAVE_MMX2
|
||||
#undef HAVE_3DNOW
|
||||
#define ARCH_X86
|
||||
#define RENAME(a) a ## _MMX2
|
||||
#include "swscale_template.c"
|
||||
#endif
|
||||
@ -453,7 +495,6 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
|
||||
#define HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#define HAVE_3DNOW
|
||||
#define ARCH_X86
|
||||
#define RENAME(a) a ## _3DNow
|
||||
#include "swscale_template.c"
|
||||
#endif
|
||||
@ -477,6 +518,10 @@ void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,
|
||||
|
||||
if(firstTime)
|
||||
{
|
||||
#ifdef ARCH_X86
|
||||
if(gCpuCaps.hasMMX)
|
||||
asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
|
||||
#endif
|
||||
flags= SWS_PRINT_INFO;
|
||||
firstTime=0;
|
||||
|
||||
@ -501,12 +546,41 @@ void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,
|
||||
src_filter.chrV= getIdentityVec();
|
||||
}
|
||||
|
||||
if(sws_chr_sharpen!=0.0){
|
||||
SwsVector *g= getConstVec(-1.0, 3);
|
||||
SwsVector *id= getConstVec(10.0/sws_chr_sharpen, 1);
|
||||
g->coeff[1]=2.0;
|
||||
addVec(id, g);
|
||||
convVec(src_filter.chrH, id);
|
||||
convVec(src_filter.chrV, id);
|
||||
freeVec(g);
|
||||
freeVec(id);
|
||||
}
|
||||
|
||||
if(sws_lum_sharpen!=0.0){
|
||||
SwsVector *g= getConstVec(-1.0, 3);
|
||||
SwsVector *id= getConstVec(10.0/sws_lum_sharpen, 1);
|
||||
g->coeff[1]=2.0;
|
||||
addVec(id, g);
|
||||
convVec(src_filter.lumH, id);
|
||||
convVec(src_filter.lumV, id);
|
||||
freeVec(g);
|
||||
freeVec(id);
|
||||
}
|
||||
|
||||
if(sws_chr_hshift)
|
||||
shiftVec(src_filter.chrH, sws_chr_hshift);
|
||||
|
||||
if(sws_chr_vshift)
|
||||
shiftVec(src_filter.chrV, sws_chr_vshift);
|
||||
|
||||
normalizeVec(src_filter.chrH, 1.0);
|
||||
normalizeVec(src_filter.chrV, 1.0);
|
||||
normalizeVec(src_filter.lumH, 1.0);
|
||||
normalizeVec(src_filter.lumV, 1.0);
|
||||
|
||||
if(verbose > 1) printVec(src_filter.chrH);
|
||||
if(verbose > 1) printVec(src_filter.lumH);
|
||||
}
|
||||
|
||||
switch(dstbpp)
|
||||
@ -551,7 +625,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
|
||||
#endif
|
||||
|
||||
*filterPos = (int16_t*)memalign(8, dstW*sizeof(int16_t));
|
||||
|
||||
if(ABS(xInc - 0x10000) <10) // unscaled
|
||||
{
|
||||
int i;
|
||||
@ -1002,8 +1075,8 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
|
||||
c->srcH= srcH;
|
||||
c->dstW= dstW;
|
||||
c->dstH= dstH;
|
||||
c->lumXInc= ((srcW<<16) + (1<<15))/dstW;
|
||||
c->lumYInc= ((srcH<<16) + (1<<15))/dstH;
|
||||
c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
|
||||
c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
|
||||
c->flags= flags;
|
||||
c->dstFormat= dstFormat;
|
||||
c->srcFormat= srcFormat;
|
||||
@ -1120,7 +1193,9 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
|
||||
if(flags&SWS_PRINT_INFO)
|
||||
{
|
||||
#ifdef DITHER1XBPP
|
||||
char *dither= cpuCaps.hasMMX ? " dithered" : "";
|
||||
char *dither= " dithered";
|
||||
#else
|
||||
char *dither= "";
|
||||
#endif
|
||||
if(flags&SWS_FAST_BILINEAR)
|
||||
fprintf(stderr, "\nSwScaler: FAST_BILINEAR scaler ");
|
||||
@ -1244,6 +1319,21 @@ SwsVector *getGaussianVec(double variance, double quality){
|
||||
return vec;
|
||||
}
|
||||
|
||||
SwsVector *getConstVec(double c, int length){
|
||||
int i;
|
||||
double *coeff= memalign(sizeof(double), length*sizeof(double));
|
||||
SwsVector *vec= malloc(sizeof(SwsVector));
|
||||
|
||||
vec->coeff= coeff;
|
||||
vec->length= length;
|
||||
|
||||
for(i=0; i<length; i++)
|
||||
coeff[i]= c;
|
||||
|
||||
return vec;
|
||||
}
|
||||
|
||||
|
||||
SwsVector *getIdentityVec(void){
|
||||
double *coeff= memalign(sizeof(double), sizeof(double));
|
||||
SwsVector *vec= malloc(sizeof(SwsVector));
|
||||
|
@ -108,6 +108,7 @@ extern void (*swScale)(SwsContext *context, uint8_t* src[], int srcStride[], int
|
||||
int srcSliceH, uint8_t* dst[], int dstStride[]);
|
||||
|
||||
SwsVector *getGaussianVec(double variance, double quality);
|
||||
SwsVector *getConstVec(double c, int length);
|
||||
SwsVector *getIdentityVec(void);
|
||||
void scaleVec(SwsVector *a, double scalar);
|
||||
void normalizeVec(SwsVector *a, double height);
|
||||
|
@ -1163,6 +1163,28 @@ FULL_YSCALEYUV2RGB
|
||||
else if(dstFormat==IMGFMT_BGR16)
|
||||
{
|
||||
int i;
|
||||
#ifdef DITHER1XBPP
|
||||
static int ditherb1=1<<14;
|
||||
static int ditherg1=1<<13;
|
||||
static int ditherr1=2<<14;
|
||||
static int ditherb2=3<<14;
|
||||
static int ditherg2=3<<13;
|
||||
static int ditherr2=0<<14;
|
||||
|
||||
ditherb1 ^= (1^2)<<14;
|
||||
ditherg1 ^= (1^2)<<13;
|
||||
ditherr1 ^= (1^2)<<14;
|
||||
ditherb2 ^= (3^0)<<14;
|
||||
ditherg2 ^= (3^0)<<13;
|
||||
ditherr2 ^= (3^0)<<14;
|
||||
#else
|
||||
const int ditherb1=0;
|
||||
const int ditherg1=0;
|
||||
const int ditherr1=0;
|
||||
const int ditherb2=0;
|
||||
const int ditherg2=0;
|
||||
const int ditherr2=0;
|
||||
#endif
|
||||
for(i=0; i<dstW-1; i+=2){
|
||||
// vertical linear interpolation && yuv2rgb in a single step:
|
||||
int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
|
||||
@ -1175,19 +1197,41 @@ FULL_YSCALEYUV2RGB
|
||||
int Cr= yuvtab_3343[V];
|
||||
|
||||
((uint16_t*)dest)[i] =
|
||||
clip_table16b[(Y1 + Cb) >>13] |
|
||||
clip_table16g[(Y1 + Cg) >>13] |
|
||||
clip_table16r[(Y1 + Cr) >>13];
|
||||
clip_table16b[(Y1 + Cb + ditherb1) >>13] |
|
||||
clip_table16g[(Y1 + Cg + ditherg1) >>13] |
|
||||
clip_table16r[(Y1 + Cr + ditherr1) >>13];
|
||||
|
||||
((uint16_t*)dest)[i+1] =
|
||||
clip_table16b[(Y2 + Cb) >>13] |
|
||||
clip_table16g[(Y2 + Cg) >>13] |
|
||||
clip_table16r[(Y2 + Cr) >>13];
|
||||
clip_table16b[(Y2 + Cb + ditherb2) >>13] |
|
||||
clip_table16g[(Y2 + Cg + ditherg2) >>13] |
|
||||
clip_table16r[(Y2 + Cr + ditherr2) >>13];
|
||||
}
|
||||
}
|
||||
else if(dstFormat==IMGFMT_BGR15)
|
||||
{
|
||||
int i;
|
||||
#ifdef DITHER1XBPP
|
||||
static int ditherb1=1<<14;
|
||||
static int ditherg1=1<<14;
|
||||
static int ditherr1=2<<14;
|
||||
static int ditherb2=3<<14;
|
||||
static int ditherg2=3<<14;
|
||||
static int ditherr2=0<<14;
|
||||
|
||||
ditherb1 ^= (1^2)<<14;
|
||||
ditherg1 ^= (1^2)<<14;
|
||||
ditherr1 ^= (1^2)<<14;
|
||||
ditherb2 ^= (3^0)<<14;
|
||||
ditherg2 ^= (3^0)<<14;
|
||||
ditherr2 ^= (3^0)<<14;
|
||||
#else
|
||||
const int ditherb1=0;
|
||||
const int ditherg1=0;
|
||||
const int ditherr1=0;
|
||||
const int ditherb2=0;
|
||||
const int ditherg2=0;
|
||||
const int ditherr2=0;
|
||||
#endif
|
||||
for(i=0; i<dstW-1; i+=2){
|
||||
// vertical linear interpolation && yuv2rgb in a single step:
|
||||
int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
|
||||
@ -1200,14 +1244,14 @@ FULL_YSCALEYUV2RGB
|
||||
int Cr= yuvtab_3343[V];
|
||||
|
||||
((uint16_t*)dest)[i] =
|
||||
clip_table15b[(Y1 + Cb) >>13] |
|
||||
clip_table15g[(Y1 + Cg) >>13] |
|
||||
clip_table15r[(Y1 + Cr) >>13];
|
||||
clip_table15b[(Y1 + Cb + ditherb1) >>13] |
|
||||
clip_table15g[(Y1 + Cg + ditherg1) >>13] |
|
||||
clip_table15r[(Y1 + Cr + ditherr1) >>13];
|
||||
|
||||
((uint16_t*)dest)[i+1] =
|
||||
clip_table15b[(Y2 + Cb) >>13] |
|
||||
clip_table15g[(Y2 + Cg) >>13] |
|
||||
clip_table15r[(Y2 + Cr) >>13];
|
||||
clip_table15b[(Y2 + Cb + ditherb2) >>13] |
|
||||
clip_table15g[(Y2 + Cg + ditherg2) >>13] |
|
||||
clip_table15r[(Y2 + Cr + ditherr2) >>13];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -1397,6 +1441,28 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
|
||||
else if(dstFormat==IMGFMT_BGR16)
|
||||
{
|
||||
int i;
|
||||
#ifdef DITHER1XBPP
|
||||
static int ditherb1=1<<14;
|
||||
static int ditherg1=1<<13;
|
||||
static int ditherr1=2<<14;
|
||||
static int ditherb2=3<<14;
|
||||
static int ditherg2=3<<13;
|
||||
static int ditherr2=0<<14;
|
||||
|
||||
ditherb1 ^= (1^2)<<14;
|
||||
ditherg1 ^= (1^2)<<13;
|
||||
ditherr1 ^= (1^2)<<14;
|
||||
ditherb2 ^= (3^0)<<14;
|
||||
ditherg2 ^= (3^0)<<13;
|
||||
ditherr2 ^= (3^0)<<14;
|
||||
#else
|
||||
const int ditherb1=0;
|
||||
const int ditherg1=0;
|
||||
const int ditherr1=0;
|
||||
const int ditherb2=0;
|
||||
const int ditherg2=0;
|
||||
const int ditherr2=0;
|
||||
#endif
|
||||
for(i=0; i<dstW-1; i+=2){
|
||||
// vertical linear interpolation && yuv2rgb in a single step:
|
||||
int Y1=yuvtab_2568[buf0[i]>>7];
|
||||
@ -1409,19 +1475,41 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
|
||||
int Cr= yuvtab_3343[V];
|
||||
|
||||
((uint16_t*)dest)[i] =
|
||||
clip_table16b[(Y1 + Cb) >>13] |
|
||||
clip_table16g[(Y1 + Cg) >>13] |
|
||||
clip_table16r[(Y1 + Cr) >>13];
|
||||
clip_table16b[(Y1 + Cb + ditherb1) >>13] |
|
||||
clip_table16g[(Y1 + Cg + ditherg1) >>13] |
|
||||
clip_table16r[(Y1 + Cr + ditherr1) >>13];
|
||||
|
||||
((uint16_t*)dest)[i+1] =
|
||||
clip_table16b[(Y2 + Cb) >>13] |
|
||||
clip_table16g[(Y2 + Cg) >>13] |
|
||||
clip_table16r[(Y2 + Cr) >>13];
|
||||
clip_table16b[(Y2 + Cb + ditherb2) >>13] |
|
||||
clip_table16g[(Y2 + Cg + ditherg2) >>13] |
|
||||
clip_table16r[(Y2 + Cr + ditherr2) >>13];
|
||||
}
|
||||
}
|
||||
else if(dstFormat==IMGFMT_BGR15)
|
||||
{
|
||||
int i;
|
||||
#ifdef DITHER1XBPP
|
||||
static int ditherb1=1<<14;
|
||||
static int ditherg1=1<<14;
|
||||
static int ditherr1=2<<14;
|
||||
static int ditherb2=3<<14;
|
||||
static int ditherg2=3<<14;
|
||||
static int ditherr2=0<<14;
|
||||
|
||||
ditherb1 ^= (1^2)<<14;
|
||||
ditherg1 ^= (1^2)<<14;
|
||||
ditherr1 ^= (1^2)<<14;
|
||||
ditherb2 ^= (3^0)<<14;
|
||||
ditherg2 ^= (3^0)<<14;
|
||||
ditherr2 ^= (3^0)<<14;
|
||||
#else
|
||||
const int ditherb1=0;
|
||||
const int ditherg1=0;
|
||||
const int ditherr1=0;
|
||||
const int ditherb2=0;
|
||||
const int ditherg2=0;
|
||||
const int ditherr2=0;
|
||||
#endif
|
||||
for(i=0; i<dstW-1; i+=2){
|
||||
// vertical linear interpolation && yuv2rgb in a single step:
|
||||
int Y1=yuvtab_2568[buf0[i]>>7];
|
||||
@ -1434,14 +1522,14 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
|
||||
int Cr= yuvtab_3343[V];
|
||||
|
||||
((uint16_t*)dest)[i] =
|
||||
clip_table15b[(Y1 + Cb) >>13] |
|
||||
clip_table15g[(Y1 + Cg) >>13] |
|
||||
clip_table15r[(Y1 + Cr) >>13];
|
||||
clip_table15b[(Y1 + Cb + ditherb1) >>13] |
|
||||
clip_table15g[(Y1 + Cg + ditherg1) >>13] |
|
||||
clip_table15r[(Y1 + Cr + ditherr1) >>13];
|
||||
|
||||
((uint16_t*)dest)[i+1] =
|
||||
clip_table15b[(Y2 + Cb) >>13] |
|
||||
clip_table15g[(Y2 + Cg) >>13] |
|
||||
clip_table15r[(Y2 + Cr) >>13];
|
||||
clip_table15b[(Y2 + Cb + ditherb2) >>13] |
|
||||
clip_table15g[(Y2 + Cg + ditherg2) >>13] |
|
||||
clip_table15r[(Y2 + Cr + ditherr2) >>13];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user