From 6892df9294d93322d43255ada299507465bc93c8 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Tue, 23 Aug 2016 23:19:30 +0200 Subject: [PATCH] vp3: Change type of stride parameters to ptrdiff_t This avoids SIMD-optimized functions having to sign-extend their stride argument manually to be able to do pointer arithmetic. Also adjust parameter names to be "stride" everywhere. --- libavcodec/arm/vp3dsp_init_arm.c | 6 +++--- libavcodec/ppc/vp3dsp_altivec.c | 4 ++-- libavcodec/vp3dsp.c | 20 ++++++++++---------- libavcodec/vp3dsp.h | 10 +++++----- libavcodec/x86/vp3dsp.asm | 11 ----------- libavcodec/x86/vp3dsp_init.c | 15 +++++++-------- 6 files changed, 27 insertions(+), 39 deletions(-) diff --git a/libavcodec/arm/vp3dsp_init_arm.c b/libavcodec/arm/vp3dsp_init_arm.c index 11e1f1ca11..1c914343d3 100644 --- a/libavcodec/arm/vp3dsp_init_arm.c +++ b/libavcodec/arm/vp3dsp_init_arm.c @@ -23,9 +23,9 @@ #include "libavutil/arm/cpu.h" #include "libavcodec/vp3dsp.h" -void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); -void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); -void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, int16_t *data); +void ff_vp3_idct_put_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data); +void ff_vp3_idct_add_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data); +void ff_vp3_idct_dc_add_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data); void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); diff --git a/libavcodec/ppc/vp3dsp_altivec.c b/libavcodec/ppc/vp3dsp_altivec.c index 68e7102a53..1d907d7d2e 100644 --- a/libavcodec/ppc/vp3dsp_altivec.c +++ b/libavcodec/ppc/vp3dsp_altivec.c @@ -114,7 +114,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C) #define ADD8(a) vec_add(a, eight) #define SHIFT4(a) vec_sra(a, four) -static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64]) +static void vp3_idct_put_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64]) { vec_u8 t; IDCT_START @@ -143,7 +143,7 @@ static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64]) memset(block, 0, sizeof(*block) * 64); } -static void vp3_idct_add_altivec(uint8_t *dst, int stride, int16_t block[64]) +static void vp3_idct_add_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64]) { LOAD_ZERO; vec_u8 t, vdst; diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index ab65f2b763..459441ea07 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -44,7 +44,7 @@ #define M(a, b) (((a) * (b)) >> 16) -static av_always_inline void idct(uint8_t *dst, int stride, +static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride, int16_t *input, int type) { int16_t *ip = input; @@ -195,21 +195,21 @@ static av_always_inline void idct(uint8_t *dst, int stride, } } -static void vp3_idct_put_c(uint8_t *dest /* align 8 */, int line_size, +static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, int16_t *block /* align 16 */) { - idct(dest, line_size, block, 1); + idct(dest, stride, block, 1); memset(block, 0, sizeof(*block) * 64); } -static void vp3_idct_add_c(uint8_t *dest /* align 8 */, int line_size, +static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, int16_t *block /* align 16 */) { - idct(dest, line_size, block, 2); + idct(dest, stride, block, 2); memset(block, 0, sizeof(*block) * 64); } -static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size, +static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, int16_t *block /* align 16 */) { int i, dc = (block[0] + 15) >> 5; @@ -223,17 +223,17 @@ static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size, dest[5] = av_clip_uint8(dest[5] + dc); dest[6] = av_clip_uint8(dest[6] + dc); dest[7] = av_clip_uint8(dest[7] + dc); - dest += line_size; + dest += stride; } block[0] = 0; } -static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, +static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values) { unsigned char *end; int filter_value; - const int nstride = -stride; + const ptrdiff_t nstride = -stride; for (end = first_pixel + 8; first_pixel < end; first_pixel++) { filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) + @@ -245,7 +245,7 @@ static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, } } -static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, +static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values) { unsigned char *end; diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h index 3099a7e13c..010f905fce 100644 --- a/libavcodec/vp3dsp.h +++ b/libavcodec/vp3dsp.h @@ -38,11 +38,11 @@ typedef struct VP3DSPContext { const uint8_t *b, ptrdiff_t stride, int h); - void (*idct_put)(uint8_t *dest, int line_size, int16_t *block); - void (*idct_add)(uint8_t *dest, int line_size, int16_t *block); - void (*idct_dc_add)(uint8_t *dest, int line_size, int16_t *block); - void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values); - void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values); + void (*idct_put)(uint8_t *dest, ptrdiff_t stride, int16_t *block); + void (*idct_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block); + void (*idct_dc_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block); + void (*v_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values); + void (*h_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values); } VP3DSPContext; void ff_vp3dsp_init(VP3DSPContext *c, int flags); diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm index fc8a047224..8587741f95 100644 --- a/libavcodec/x86/vp3dsp.asm +++ b/libavcodec/x86/vp3dsp.asm @@ -104,9 +104,6 @@ SECTION .text INIT_MMX mmxext cglobal vp3_v_loop_filter, 3, 4 -%if ARCH_X86_64 - movsxd r1, r1d -%endif mov r3, r1 neg r1 movq m6, [r0+r1*2] @@ -121,9 +118,6 @@ cglobal vp3_v_loop_filter, 3, 4 RET cglobal vp3_h_loop_filter, 3, 4 -%if ARCH_X86_64 - movsxd r1, r1d -%endif lea r3, [r1*3] movd m6, [r0 -2] @@ -525,7 +519,6 @@ cglobal vp3_h_loop_filter, 3, 4 cglobal vp3_idct_put, 3, 4, 9 VP3_IDCT r2 - movsxdifnidn r1, r1d mova m4, [pb_80] lea r3, [r1*3] %assign %%i 0 @@ -582,7 +575,6 @@ cglobal vp3_idct_put, 3, 4, 9 cglobal vp3_idct_add, 3, 4, 9 VP3_IDCT r2 - movsxdifnidn r1, r1d lea r3, [r1*3] pxor m4, m4 %if mmsize == 16 @@ -689,9 +681,6 @@ vp3_idct_funcs INIT_MMX mmxext cglobal vp3_idct_dc_add, 3, 4 -%if ARCH_X86_64 - movsxd r1, r1d -%endif movsx r3, word [r2] mov word [r2], 0 lea r2, [r1*3] diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c index b320dc5db9..043e10f720 100644 --- a/libavcodec/x86/vp3dsp_init.c +++ b/libavcodec/x86/vp3dsp_init.c @@ -25,18 +25,17 @@ #include "libavcodec/vp3dsp.h" #include "config.h" -void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block); -void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block); +void ff_vp3_idct_put_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block); +void ff_vp3_idct_add_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block); -void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, int16_t *block); -void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, int16_t *block); +void ff_vp3_idct_put_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block); +void ff_vp3_idct_add_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block); -void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size, - int16_t *block); +void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, ptrdiff_t stride, int16_t *block); -void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride, +void ff_vp3_v_loop_filter_mmxext(uint8_t *src, ptrdiff_t stride, int *bounding_values); -void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride, +void ff_vp3_h_loop_filter_mmxext(uint8_t *src, ptrdiff_t stride, int *bounding_values); av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)