mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-02 21:12:12 +00:00
ac3enc: add int32_t array clipping function to DSPUtil, including x86 versions.
This commit is contained in:
parent
8a8d0ce208
commit
6054cd25b4
@ -2676,6 +2676,22 @@ static void apply_window_int16_c(int16_t *output, const int16_t *input,
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len)
|
||||
{
|
||||
do {
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
len -= 8;
|
||||
} while (len > 0);
|
||||
}
|
||||
|
||||
#define W0 2048
|
||||
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
|
||||
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
|
||||
@ -3122,6 +3138,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
c->scalarproduct_int16 = scalarproduct_int16_c;
|
||||
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
|
||||
c->apply_window_int16 = apply_window_int16_c;
|
||||
c->vector_clip_int32 = vector_clip_int32_c;
|
||||
c->scalarproduct_float = scalarproduct_float_c;
|
||||
c->butterflies_float = butterflies_float_c;
|
||||
c->vector_fmul_scalar = vector_fmul_scalar_c;
|
||||
|
@ -555,6 +555,22 @@ typedef struct DSPContext {
|
||||
void (*apply_window_int16)(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
|
||||
/**
|
||||
* Clip each element in an array of int32_t to a given minimum and maximum value.
|
||||
* @param dst destination array
|
||||
* constraints: 16-byte aligned
|
||||
* @param src source array
|
||||
* constraints: 16-byte aligned
|
||||
* @param min minimum value
|
||||
* constraints: must in the the range [-(1<<24), 1<<24]
|
||||
* @param max maximum value
|
||||
* constraints: must in the the range [-(1<<24), 1<<24]
|
||||
* @param len number of elements in the array
|
||||
* constraints: multiple of 32 greater than zero
|
||||
*/
|
||||
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
|
||||
/* rv30 functions */
|
||||
qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
|
||||
qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
|
||||
|
@ -2429,6 +2429,15 @@ int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i
|
||||
|
||||
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
||||
|
||||
void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
|
||||
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
{
|
||||
int mm_flags = av_get_cpu_flags();
|
||||
@ -2570,6 +2579,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
|
||||
c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
|
||||
c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
|
||||
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
|
||||
#endif
|
||||
|
||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
||||
@ -2855,6 +2866,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
#if HAVE_YASM
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
|
||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
|
||||
if (mm_flags & AV_CPU_FLAG_ATOM) {
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse2_int;
|
||||
} else {
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
|
||||
}
|
||||
if (avctx->flags & CODEC_FLAG_BITEXACT) {
|
||||
c->apply_window_int16 = ff_apply_window_int16_sse2_ba;
|
||||
} else {
|
||||
@ -2880,6 +2896,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
|
||||
#if HAVE_YASM
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse41;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HAVE_AVX && HAVE_YASM
|
||||
if (mm_flags & AV_CPU_FLAG_AVX) {
|
||||
if (bit_depth == 10) {
|
||||
|
@ -1048,3 +1048,118 @@ emu_edge sse
|
||||
%ifdef ARCH_X86_32
|
||||
emu_edge mmx
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
|
||||
; int32_t max, unsigned int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro PMINSD_MMX 3 ; dst, src, tmp
|
||||
mova %3, %2
|
||||
pcmpgtd %3, %1
|
||||
pxor %1, %2
|
||||
pand %1, %3
|
||||
pxor %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro PMAXSD_MMX 3 ; dst, src, tmp
|
||||
mova %3, %1
|
||||
pcmpgtd %3, %2
|
||||
pand %1, %3
|
||||
pandn %3, %2
|
||||
por %1, %3
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
|
||||
PMINSD_MMX %1, %3, %4
|
||||
PMAXSD_MMX %1, %2, %4
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
|
||||
cvtdq2ps %1, %1
|
||||
minps %1, %3
|
||||
maxps %1, %2
|
||||
cvtps2dq %1, %1
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused
|
||||
pminsd %1, %3
|
||||
pmaxsd %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro SPLATD_MMX 1
|
||||
punpckldq %1, %1
|
||||
%endmacro
|
||||
|
||||
%macro SPLATD_SSE2 1
|
||||
pshufd %1, %1, 0
|
||||
%endmacro
|
||||
|
||||
%macro VECTOR_CLIP_INT32 4
|
||||
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
|
||||
%ifidn %1, sse2
|
||||
cvtsi2ss m4, minm
|
||||
cvtsi2ss m5, maxm
|
||||
%else
|
||||
movd m4, minm
|
||||
movd m5, maxm
|
||||
%endif
|
||||
SPLATD m4
|
||||
SPLATD m5
|
||||
.loop:
|
||||
%assign %%i 1
|
||||
%rep %3
|
||||
mova m0, [srcq+mmsize*0*%%i]
|
||||
mova m1, [srcq+mmsize*1*%%i]
|
||||
mova m2, [srcq+mmsize*2*%%i]
|
||||
mova m3, [srcq+mmsize*3*%%i]
|
||||
%if %4
|
||||
mova m7, [srcq+mmsize*4*%%i]
|
||||
mova m8, [srcq+mmsize*5*%%i]
|
||||
mova m9, [srcq+mmsize*6*%%i]
|
||||
mova m10, [srcq+mmsize*7*%%i]
|
||||
%endif
|
||||
CLIPD m0, m4, m5, m6
|
||||
CLIPD m1, m4, m5, m6
|
||||
CLIPD m2, m4, m5, m6
|
||||
CLIPD m3, m4, m5, m6
|
||||
%if %4
|
||||
CLIPD m7, m4, m5, m6
|
||||
CLIPD m8, m4, m5, m6
|
||||
CLIPD m9, m4, m5, m6
|
||||
CLIPD m10, m4, m5, m6
|
||||
%endif
|
||||
mova [dstq+mmsize*0*%%i], m0
|
||||
mova [dstq+mmsize*1*%%i], m1
|
||||
mova [dstq+mmsize*2*%%i], m2
|
||||
mova [dstq+mmsize*3*%%i], m3
|
||||
%if %4
|
||||
mova [dstq+mmsize*4*%%i], m7
|
||||
mova [dstq+mmsize*5*%%i], m8
|
||||
mova [dstq+mmsize*6*%%i], m9
|
||||
mova [dstq+mmsize*7*%%i], m10
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
add srcq, mmsize*4*(%3+%4)
|
||||
add dstq, mmsize*4*(%3+%4)
|
||||
sub lend, mmsize*(%3+%4)
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
%define SPLATD SPLATD_MMX
|
||||
%define CLIPD CLIPD_MMX
|
||||
VECTOR_CLIP_INT32 mmx, 0, 1, 0
|
||||
INIT_XMM
|
||||
%define SPLATD SPLATD_SSE2
|
||||
VECTOR_CLIP_INT32 sse2_int, 6, 1, 0
|
||||
%define CLIPD CLIPD_SSE2
|
||||
VECTOR_CLIP_INT32 sse2, 6, 2, 0
|
||||
%define CLIPD CLIPD_SSE41
|
||||
%ifdef m8
|
||||
VECTOR_CLIP_INT32 sse41, 11, 1, 1
|
||||
%else
|
||||
VECTOR_CLIP_INT32 sse41, 6, 1, 0
|
||||
%endif
|
||||
|
Loading…
Reference in New Issue
Block a user