Merge commit '62844c3fd66940c7747e9b2bb7804e265319f43f'

* commit '62844c3fd66940c7747e9b2bb7804e265319f43f': h264: Integrate clear_blocks calls with IDCT Conflicts: libavcodec/arm/h264idct_neon.S libavcodec/h264idct_template.c libavcodec/x86/h264_idct.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
2013-04-11 11:53:19 +02:00 · 2013-04-11 11:53:19 +02:00 · 0724b4a16d
parent 944ad46182 62844c3fd6
commit 0724b4a16d
3 changed files with 15 additions and 15 deletions
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@ -187,8 +187,8 @@ endfunc
        vshr.s16        q2,  q10, #1
        vadd.i16        q0,  q8,  q12
        vld1.16         {q14-q15},[r1,:128]
-        vst1.16         {q7},     [r1,:128]!
-        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
        vsub.i16        q1,  q8,  q12
        vshr.s16        q3,  q14, #1
        vsub.i16        q2,  q2,  q14
@ -267,16 +267,16 @@ endfunc
 .endm

 function ff_h264_idct8_add_neon, export=1
-        vmov.i16        q7,       #0
+        vmov.i16        q3,       #0
        vld1.16         {q8-q9},  [r1,:128]
-        vst1.16         {q7},     [r1,:128]!
-        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
        vld1.16         {q10-q11},[r1,:128]
-        vst1.16         {q7},     [r1,:128]!
-        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
        vld1.16         {q12-q13},[r1,:128]
-        vst1.16         {q7},     [r1,:128]!
-        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!
+        vst1.16         {q3},     [r1,:128]!

        idct8x8_cols    0
        idct8x8_cols    1
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@ -145,7 +145,7 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
    pixel *dst = (pixel*)_dst;
    dctcoef *block = (dctcoef*)_block;
    int dc = (block[0] + 32) >> 6;
-    stride >>= sizeof(pixel)-1;
+    stride /= sizeof(pixel);
    block[0] = 0;
    for( j = 0; j < 4; j++ )
    {
@ -161,7 +161,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
    dctcoef *block = (dctcoef*)_block;
    int dc = (block[0] + 32) >> 6;
    block[0] = 0;
-    stride >>= sizeof(pixel)-1;
+    stride /= sizeof(pixel);
    for( j = 0; j < 8; j++ )
    {
        for( i = 0; i < 8; i++ )
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@ -312,7 +312,7 @@ INIT_MMX mmxext
 %if ARCH_X86_64
 cglobal h264_idct_dc_add_8, 3, 4, 0
    movsx        r3, word [r1]
-    mov   word [r1], 0
+    mov  dword [r1], 0
    DC_ADD_MMXEXT_INIT r3, r2
    DC_ADD_MMXEXT_OP movh, r0, r2, r3
    RET
@ -320,7 +320,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
 ; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
 cglobal h264_idct8_dc_add_8, 3, 4, 0
    movsx        r3, word [r1]
-    mov   word [r1], 0
+    mov  dword [r1], 0
    DC_ADD_MMXEXT_INIT r3, r2
    DC_ADD_MMXEXT_OP mova, r0, r2, r3
    lea          r0, [r0+r2*4]
@ -329,7 +329,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0
 %else
 cglobal h264_idct_dc_add_8, 2, 3, 0
    movsx        r2, word [r1]
-    mov   word [r1], 0
+    mov  dword [r1], 0
    mov          r1, r2m
    DC_ADD_MMXEXT_INIT r2, r1
    DC_ADD_MMXEXT_OP movh, r0, r1, r2
@ -338,7 +338,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0
 ; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
 cglobal h264_idct8_dc_add_8, 2, 3, 0
    movsx        r2, word [r1]
-    mov   word [r1], 0
+    mov  dword [r1], 0
    mov          r1, r2m
    DC_ADD_MMXEXT_INIT r2, r1
    DC_ADD_MMXEXT_OP mova, r0, r1, r2