mirror of https://git.ffmpeg.org/ffmpeg.git
vp3: Change type of stride parameters to ptrdiff_t
This avoids SIMD-optimized functions having to sign-extend their stride argument manually to be able to do pointer arithmetic. Also adjust parameter names to be "stride" everywhere.
This commit is contained in:
parent
963b3ab11f
commit
6892df9294
|
@ -23,9 +23,9 @@
|
|||
#include "libavutil/arm/cpu.h"
|
||||
#include "libavcodec/vp3dsp.h"
|
||||
|
||||
void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
|
||||
void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
|
||||
void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, int16_t *data);
|
||||
void ff_vp3_idct_put_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data);
|
||||
void ff_vp3_idct_add_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data);
|
||||
void ff_vp3_idct_dc_add_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data);
|
||||
|
||||
void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
|
||||
void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
|
||||
|
|
|
@ -114,7 +114,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C)
|
|||
#define ADD8(a) vec_add(a, eight)
|
||||
#define SHIFT4(a) vec_sra(a, four)
|
||||
|
||||
static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64])
|
||||
static void vp3_idct_put_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64])
|
||||
{
|
||||
vec_u8 t;
|
||||
IDCT_START
|
||||
|
@ -143,7 +143,7 @@ static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64])
|
|||
memset(block, 0, sizeof(*block) * 64);
|
||||
}
|
||||
|
||||
static void vp3_idct_add_altivec(uint8_t *dst, int stride, int16_t block[64])
|
||||
static void vp3_idct_add_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64])
|
||||
{
|
||||
LOAD_ZERO;
|
||||
vec_u8 t, vdst;
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
|
||||
#define M(a, b) (((a) * (b)) >> 16)
|
||||
|
||||
static av_always_inline void idct(uint8_t *dst, int stride,
|
||||
static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride,
|
||||
int16_t *input, int type)
|
||||
{
|
||||
int16_t *ip = input;
|
||||
|
@ -195,21 +195,21 @@ static av_always_inline void idct(uint8_t *dst, int stride,
|
|||
}
|
||||
}
|
||||
|
||||
static void vp3_idct_put_c(uint8_t *dest /* align 8 */, int line_size,
|
||||
static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
|
||||
int16_t *block /* align 16 */)
|
||||
{
|
||||
idct(dest, line_size, block, 1);
|
||||
idct(dest, stride, block, 1);
|
||||
memset(block, 0, sizeof(*block) * 64);
|
||||
}
|
||||
|
||||
static void vp3_idct_add_c(uint8_t *dest /* align 8 */, int line_size,
|
||||
static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
|
||||
int16_t *block /* align 16 */)
|
||||
{
|
||||
idct(dest, line_size, block, 2);
|
||||
idct(dest, stride, block, 2);
|
||||
memset(block, 0, sizeof(*block) * 64);
|
||||
}
|
||||
|
||||
static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size,
|
||||
static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
|
||||
int16_t *block /* align 16 */)
|
||||
{
|
||||
int i, dc = (block[0] + 15) >> 5;
|
||||
|
@ -223,17 +223,17 @@ static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size,
|
|||
dest[5] = av_clip_uint8(dest[5] + dc);
|
||||
dest[6] = av_clip_uint8(dest[6] + dc);
|
||||
dest[7] = av_clip_uint8(dest[7] + dc);
|
||||
dest += line_size;
|
||||
dest += stride;
|
||||
}
|
||||
block[0] = 0;
|
||||
}
|
||||
|
||||
static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
|
||||
static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
|
||||
int *bounding_values)
|
||||
{
|
||||
unsigned char *end;
|
||||
int filter_value;
|
||||
const int nstride = -stride;
|
||||
const ptrdiff_t nstride = -stride;
|
||||
|
||||
for (end = first_pixel + 8; first_pixel < end; first_pixel++) {
|
||||
filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
|
||||
|
@ -245,7 +245,7 @@ static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
|
|||
}
|
||||
}
|
||||
|
||||
static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
|
||||
static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
|
||||
int *bounding_values)
|
||||
{
|
||||
unsigned char *end;
|
||||
|
|
|
@ -38,11 +38,11 @@ typedef struct VP3DSPContext {
|
|||
const uint8_t *b,
|
||||
ptrdiff_t stride, int h);
|
||||
|
||||
void (*idct_put)(uint8_t *dest, int line_size, int16_t *block);
|
||||
void (*idct_add)(uint8_t *dest, int line_size, int16_t *block);
|
||||
void (*idct_dc_add)(uint8_t *dest, int line_size, int16_t *block);
|
||||
void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
|
||||
void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
|
||||
void (*idct_put)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
void (*idct_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
void (*idct_dc_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
void (*v_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
|
||||
void (*h_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
|
||||
} VP3DSPContext;
|
||||
|
||||
void ff_vp3dsp_init(VP3DSPContext *c, int flags);
|
||||
|
|
|
@ -104,9 +104,6 @@ SECTION .text
|
|||
|
||||
INIT_MMX mmxext
|
||||
cglobal vp3_v_loop_filter, 3, 4
|
||||
%if ARCH_X86_64
|
||||
movsxd r1, r1d
|
||||
%endif
|
||||
mov r3, r1
|
||||
neg r1
|
||||
movq m6, [r0+r1*2]
|
||||
|
@ -121,9 +118,6 @@ cglobal vp3_v_loop_filter, 3, 4
|
|||
RET
|
||||
|
||||
cglobal vp3_h_loop_filter, 3, 4
|
||||
%if ARCH_X86_64
|
||||
movsxd r1, r1d
|
||||
%endif
|
||||
lea r3, [r1*3]
|
||||
|
||||
movd m6, [r0 -2]
|
||||
|
@ -525,7 +519,6 @@ cglobal vp3_h_loop_filter, 3, 4
|
|||
cglobal vp3_idct_put, 3, 4, 9
|
||||
VP3_IDCT r2
|
||||
|
||||
movsxdifnidn r1, r1d
|
||||
mova m4, [pb_80]
|
||||
lea r3, [r1*3]
|
||||
%assign %%i 0
|
||||
|
@ -582,7 +575,6 @@ cglobal vp3_idct_put, 3, 4, 9
|
|||
cglobal vp3_idct_add, 3, 4, 9
|
||||
VP3_IDCT r2
|
||||
|
||||
movsxdifnidn r1, r1d
|
||||
lea r3, [r1*3]
|
||||
pxor m4, m4
|
||||
%if mmsize == 16
|
||||
|
@ -689,9 +681,6 @@ vp3_idct_funcs
|
|||
|
||||
INIT_MMX mmxext
|
||||
cglobal vp3_idct_dc_add, 3, 4
|
||||
%if ARCH_X86_64
|
||||
movsxd r1, r1d
|
||||
%endif
|
||||
movsx r3, word [r2]
|
||||
mov word [r2], 0
|
||||
lea r2, [r1*3]
|
||||
|
|
|
@ -25,18 +25,17 @@
|
|||
#include "libavcodec/vp3dsp.h"
|
||||
#include "config.h"
|
||||
|
||||
void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block);
|
||||
void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
|
||||
void ff_vp3_idct_put_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
void ff_vp3_idct_add_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
|
||||
void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, int16_t *block);
|
||||
void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, int16_t *block);
|
||||
void ff_vp3_idct_put_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
void ff_vp3_idct_add_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
|
||||
void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size,
|
||||
int16_t *block);
|
||||
void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
|
||||
void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
|
||||
void ff_vp3_v_loop_filter_mmxext(uint8_t *src, ptrdiff_t stride,
|
||||
int *bounding_values);
|
||||
void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride,
|
||||
void ff_vp3_h_loop_filter_mmxext(uint8_t *src, ptrdiff_t stride,
|
||||
int *bounding_values);
|
||||
|
||||
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
|
||||
|
|
Loading…
Reference in New Issue