avcodec/x86/hpeldsp: fix crash on AMD K6-3+

There are instructions pavgb and pavgusb. Both instructions do the same operation but they have different enconding. Pavgb exists in SSE (or MMXEXT) instruction set and pavgusb exists in 3D-NOW instruction set. livavcodec uses the macro PAVGB to select the proper instruction. However, the function avg_pixels8_xy2 doesn't use this macro, it uses pavgb directly. As a consequence, the function avg_pixels8_xy2 crashes on AMD K6-2 and K6-3 processors, because they have pavgusb, but not pavgb. This bug seems to be introduced by commit 71155d7b41, "dsputil: x86: Convert mpeg4 qpel and dsputil avg to yasm" Signed-off-by: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2013-11-03 18:00:17 +01:00 · 2013-11-03 18:00:17 +01:00 · 074155360d
parent 535d58959d
commit 074155360d
1 changed files with 13 additions and 13 deletions
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@ -423,30 +423,30 @@ cglobal avg_pixels8_xy2, 4,5
    mova         m6, [pb_1]
    lea          r4, [r2*2]
    mova         m0, [r1]
-    pavgb        m0, [r1+1]
+    PAVGB        m0, [r1+1]
 .loop:
    mova         m2, [r1+r4]
    mova         m1, [r1+r2]
    psubusb      m2, m6
-    pavgb        m1, [r1+r2+1]
-    pavgb        m2, [r1+r4+1]
+    PAVGB        m1, [r1+r2+1]
+    PAVGB        m2, [r1+r4+1]
    add          r1, r4
-    pavgb        m0, m1
-    pavgb        m1, m2
-    pavgb        m0, [r0]
-    pavgb        m1, [r0+r2]
+    PAVGB        m0, m1
+    PAVGB        m1, m2
+    PAVGB        m0, [r0]
+    PAVGB        m1, [r0+r2]
    mova       [r0], m0
    mova    [r0+r2], m1
    mova         m1, [r1+r2]
    mova         m0, [r1+r4]
-    pavgb        m1, [r1+r2+1]
-    pavgb        m0, [r1+r4+1]
+    PAVGB        m1, [r1+r2+1]
+    PAVGB        m0, [r1+r4+1]
    add          r0, r4
    add          r1, r4
-    pavgb        m2, m1
-    pavgb        m1, m0
-    pavgb        m2, [r0]
-    pavgb        m1, [r0+r2]
+    PAVGB        m2, m1
+    PAVGB        m1, m0
+    PAVGB        m2, [r0]
+    PAVGB        m1, [r0+r2]
    mova       [r0], m2
    mova    [r0+r2], m1
    add          r0, r4