From f8547089776096b3a0c6a3de8227d8b5d7a820b0 Mon Sep 17 00:00:00 2001 From: ivo Date: Tue, 17 Apr 2007 20:38:17 +0000 Subject: [PATCH] slightly faster rgb32tobgr32; avoid one add and one cmp git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@23012 b3059339-0415-0410-9bf9-f77b7e298cf2 --- libswscale/rgb2rgb_template.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index 7147855fed..2053a6edbd 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -1364,21 +1364,22 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) { - uint8_t *d = dst, *s = (uint8_t *) src; - const uint8_t *end = s + src_size; + long idx = 15 - src_size; + uint8_t *s = (uint8_t *) src-idx, *d = dst-idx; #ifdef HAVE_MMX __asm __volatile( - " "PREFETCH" (%1) \n" + " test %0, %0 \n" + " jns 2f \n" + " "PREFETCH" (%1, %0) \n" " movq %3, %%mm7 \n" " pxor %4, %%mm7 \n" " movq %%mm7, %%mm6 \n" " pxor %5, %%mm7 \n" - " jmp 2f \n" ASMALIGN(4) "1: \n" - " "PREFETCH" 32(%1) \n" - " movq (%1), %%mm0 \n" - " movq 8(%1), %%mm1 \n" + " "PREFETCH" 32(%1, %0) \n" + " movq (%1, %0), %%mm0 \n" + " movq 8(%1, %0), %%mm1 \n" # ifdef HAVE_MMX2 " pshufw $177, %%mm0, %%mm3 \n" " pshufw $177, %%mm1, %%mm5 \n" @@ -1406,23 +1407,21 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long s " por %%mm3, %%mm0 \n" " por %%mm5, %%mm1 \n" # endif - " "MOVNTQ" %%mm0, (%0) \n" - " "MOVNTQ" %%mm1, 8(%0) \n" + " "MOVNTQ" %%mm0, (%2, %0) \n" + " "MOVNTQ" %%mm1, 8(%2, %0) \n" " add $16, %0 \n" - " add $16, %1 \n" - "2: \n" - " cmp %1, %2 \n" - " ja 1b \n" + " js 1b \n" " "SFENCE" \n" " "EMMS" \n" - : "+r"(d), "+r"(s) - : "r" (end-15), "m" (mask32b), "m" (mask32r), "m" (mmx_one) + "2: \n" + : "+&r"(idx) + : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) : "memory"); #endif - for (; s>16) + g + (v<<16); + *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); } }