mirror of https://git.ffmpeg.org/ffmpeg.git
x86: af_volume: add SSE2-optimized s16 volume scaling
This commit is contained in:
parent
b384e031da
commit
f96f1e06a4
|
@ -213,6 +213,9 @@ static void volume_init(VolumeContext *vol)
|
|||
vol->samples_align = 8;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_volume_init_x86(vol);
|
||||
}
|
||||
|
||||
static int config_output(AVFilterLink *outlink)
|
||||
|
|
|
@ -50,4 +50,6 @@ typedef struct VolumeContext {
|
|||
int samples_align;
|
||||
} VolumeContext;
|
||||
|
||||
void ff_volume_init_x86(VolumeContext *vol);
|
||||
|
||||
#endif /* AVFILTER_AF_VOLUME_H */
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/gradfun.o
|
||||
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
||||
OBJS-$(CONFIG_YADIF_FILTER) += x86/yadif.o
|
||||
|
||||
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/hqdn3d.o
|
||||
YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
;*****************************************************************************
|
||||
;* x86-optimized functions for volume filter
|
||||
;* Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
|
||||
;*
|
||||
;* This file is part of Libav.
|
||||
;*
|
||||
;* Libav is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* Libav is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with Libav; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86inc.asm"
|
||||
|
||||
SECTION_RODATA 32
|
||||
|
||||
pw_1: times 8 dw 1
|
||||
pw_128: times 8 dw 128
|
||||
|
||||
SECTION_TEXT
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_scale_samples_s16(uint8_t *dst, const uint8_t *src, int len,
|
||||
; int volume)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal scale_samples_s16, 4,4,4, dst, src, len, volume
|
||||
movd m0, volumem
|
||||
pshuflw m0, m0, 0
|
||||
punpcklwd m0, [pw_1]
|
||||
mova m1, [pw_128]
|
||||
lea lenq, [lend*2-mmsize]
|
||||
.loop:
|
||||
; dst[i] = av_clip_int16((src[i] * volume + 128) >> 8);
|
||||
mova m2, [srcq+lenq]
|
||||
punpcklwd m3, m2, m1
|
||||
punpckhwd m2, m1
|
||||
pmaddwd m3, m0
|
||||
pmaddwd m2, m0
|
||||
psrad m3, 8
|
||||
psrad m2, 8
|
||||
packssdw m3, m2
|
||||
mova [dstq+lenq], m3
|
||||
sub lenq, mmsize
|
||||
jge .loop
|
||||
REP_RET
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/samplefmt.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavfilter/af_volume.h"
|
||||
|
||||
void ff_scale_samples_s16_sse2(uint8_t *dst, const uint8_t *src, int len,
|
||||
int volume);
|
||||
|
||||
void ff_volume_init_x86(VolumeContext *vol)
|
||||
{
|
||||
int mm_flags = av_get_cpu_flags();
|
||||
enum AVSampleFormat sample_fmt = av_get_packed_sample_fmt(vol->sample_fmt);
|
||||
|
||||
if (sample_fmt == AV_SAMPLE_FMT_S16) {
|
||||
if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768) {
|
||||
vol->scale_samples = ff_scale_samples_s16_sse2;
|
||||
vol->samples_align = 8;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue