diff --git a/libavcodec/x86/diracdsp_yasm.asm b/libavcodec/x86/diracdsp_yasm.asm index d12fc64dd5..3e9765b42d 100644 --- a/libavcodec/x86/diracdsp_yasm.asm +++ b/libavcodec/x86/diracdsp_yasm.asm @@ -136,6 +136,8 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w, and wd, ~(mmsize-1) %if ARCH_X86_64 + movsxd dst_strideq, dst_strided + movsxd src_strideq, src_strided mov r7d, r5m mov r8d, wd %define wspill r8d @@ -177,6 +179,8 @@ cglobal add_rect_clamped_%1, 7,9,3, dst, src, stride, idwt, idwt_stride, w, h and wd, ~(mmsize-1) %if ARCH_X86_64 + movsxd strideq, strided + movsxd idwt_strideq, idwt_strided mov r8d, wd %define wspill r8d %else diff --git a/libavcodec/x86/dwt_yasm.asm b/libavcodec/x86/dwt_yasm.asm index f6280d1a45..5253abc6c8 100644 --- a/libavcodec/x86/dwt_yasm.asm +++ b/libavcodec/x86/dwt_yasm.asm @@ -64,6 +64,9 @@ section .text ; int width) cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width mova m2, [pw_2] +%if ARCH_X86_64 + mov widthd, widthd +%endif .loop: sub widthq, mmsize/2 mova m1, [b0q+2*widthq] @@ -77,6 +80,9 @@ cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width ; int width) cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width mova m1, [pw_1] +%if ARCH_X86_64 + mov widthd, widthd +%endif .loop: sub widthq, mmsize/2 mova m0, [b0q+2*widthq] @@ -93,6 +99,9 @@ cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width mova m3, [pw_8] mova m4, [pw_1991] +%if ARCH_X86_64 + mov widthd, widthd +%endif .loop: sub widthq, mmsize/2 mova m0, [b0q+2*widthq] @@ -107,6 +116,9 @@ cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width mova m3, [pw_16] mova m4, [pw_1991] +%if ARCH_X86_64 + mov widthd, widthd +%endif .loop: sub widthq, mmsize/2 mova m0, [b0q+2*widthq] @@ -131,6 +143,9 @@ cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width ; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width) cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width mova m3, [pw_1] +%if ARCH_X86_64 + mov widthd, widthd +%endif .loop: sub widthq, mmsize/2 mova m1, [b1q+2*widthq]