From dc179ec81902e3c9d327f9e818454f2849308000 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 16 Jun 2011 12:04:24 -0400 Subject: [PATCH] swscale: split yuv2packedX_altivec in smaller functions. This will likely lead to a considerable performance boost, since it removes a branch from the inner loop. Part of the Great Evil Plan to simplify swscale. --- libswscale/ppc/swscale_altivec.c | 14 +++++++++----- libswscale/ppc/yuv2rgb_altivec.c | 28 ++++++++++++++++++++++++---- libswscale/ppc/yuv2rgb_altivec.h | 18 +++++++++++++----- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 47fe54c088..7161fe7963 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -414,10 +414,14 @@ void ff_sws_init_swScale_altivec(SwsContext *c) /* The following list of supported dstFormat values should * match what's found in the body of ff_yuv2packedX_altivec() */ - if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf && - (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA || - c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 || - c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) { - c->yuv2packedX = ff_yuv2packedX_altivec; + if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) { + switch (c->dstFormat) { + case PIX_FMT_ABGR: c->yuv2packedX = ff_yuv2abgr_X_altivec; break; + case PIX_FMT_BGRA: c->yuv2packedX = ff_yuv2bgra_X_altivec; break; + case PIX_FMT_ARGB: c->yuv2packedX = ff_yuv2argb_X_altivec; break; + case PIX_FMT_RGBA: c->yuv2packedX = ff_yuv2rgba_X_altivec; break; + case PIX_FMT_BGR24: c->yuv2packedX = ff_yuv2bgr24_X_altivec; break; + case PIX_FMT_RGB24: c->yuv2packedX = ff_yuv2rgb24_X_altivec; break; } + } } diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c index 476db22489..73c02e9494 100644 --- a/libswscale/ppc/yuv2rgb_altivec.c +++ b/libswscale/ppc/yuv2rgb_altivec.c @@ -626,13 +626,13 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b } -void +static av_always_inline void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, - int dstW, int dstY) + int dstW, int dstY, enum PixelFormat target) { int i,j; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; @@ -706,7 +706,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, G = vec_packclp (G0,G1); B = vec_packclp (B0,B1); - switch(c->dstFormat) { + switch(target) { case PIX_FMT_ABGR: out_abgr (R,G,B,out); break; case PIX_FMT_BGRA: out_bgra (R,G,B,out); break; case PIX_FMT_RGBA: out_rgba (R,G,B,out); break; @@ -785,7 +785,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, B = vec_packclp (B0,B1); nout = (vector unsigned char *)scratch; - switch(c->dstFormat) { + switch(target) { case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break; case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break; case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break; @@ -803,3 +803,23 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, } } + +#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \ +void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \ + const int16_t **lumSrc, int lumFilterSize, \ + const int16_t *chrFilter, const int16_t **chrUSrc, \ + const int16_t **chrVSrc, int chrFilterSize, \ + const int16_t **alpSrc, uint8_t *dest, \ + int dstW, int dstY) \ +{ \ + ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ + alpSrc, dest, dstW, dstY, pixfmt); \ +} + +YUV2PACKEDX_WRAPPER(abgr, PIX_FMT_ABGR); +YUV2PACKEDX_WRAPPER(bgra, PIX_FMT_BGRA); +YUV2PACKEDX_WRAPPER(argb, PIX_FMT_ARGB); +YUV2PACKEDX_WRAPPER(rgba, PIX_FMT_RGBA); +YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24); +YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24); diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h index b54a856905..b809fe13fe 100644 --- a/libswscale/ppc/yuv2rgb_altivec.h +++ b/libswscale/ppc/yuv2rgb_altivec.h @@ -24,11 +24,19 @@ #ifndef PPC_YUV2RGB_ALTIVEC_H #define PPC_YUV2RGB_ALTIVEC_H 1 -void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, +#define YUV2PACKEDX_HEADER(suffix) \ +void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \ + const int16_t **lumSrc, int lumFilterSize, \ + const int16_t *chrFilter, const int16_t **chrUSrc, \ + const int16_t **chrVSrc, int chrFilterSize, \ + const int16_t **alpSrc, uint8_t *dest, \ int dstW, int dstY); +YUV2PACKEDX_HEADER(abgr); +YUV2PACKEDX_HEADER(bgra); +YUV2PACKEDX_HEADER(argb); +YUV2PACKEDX_HEADER(rgba); +YUV2PACKEDX_HEADER(rgb24); +YUV2PACKEDX_HEADER(bgr24); + #endif /* PPC_YUV2RGB_ALTIVEC_H */