mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-29 02:42:21 +00:00
3e892b2bcd
In some cases, 2 or 3 calls are performed to functions for unusual widths. Instead, perform 2 calls for different widths to split the workload. The 8+16 and 4+8 widths for respectively 8 and more than 8 bits can't be processed that way without modifications: some calls use unaligned buffers, and having branches to handle this was resulting in no micro-benchmark benefit. For block_w == 12 (around 1% of the pixels of the sequence): Before: 12758 decicycles in epel_uni, 4093 runs, 3 skips 19389 decicycles in qpel_uni, 8187 runs, 5 skips 22699 decicycles in epel_bi, 32743 runs, 25 skips 34736 decicycles in qpel_bi, 32733 runs, 35 skips After: 11929 decicycles in epel_uni, 4096 runs, 0 skips 18131 decicycles in qpel_uni, 8184 runs, 8 skips 20065 decicycles in epel_bi, 32750 runs, 18 skips 31458 decicycles in qpel_bi, 32753 runs, 15 skips Signed-off-by: Michael Niedermayer <michaelni@gmx.at> |
||
---|---|---|
.. | ||
ac3dsp_init.c | ||
ac3dsp.asm | ||
audiodsp_init.c | ||
audiodsp.asm | ||
blockdsp_init.c | ||
blockdsp.asm | ||
bswapdsp_init.c | ||
bswapdsp.asm | ||
cabac.h | ||
cavsdsp.c | ||
constants.c | ||
constants.h | ||
dcadsp_init.c | ||
dcadsp.asm | ||
dct32.asm | ||
dct_init.c | ||
dct-test.c | ||
deinterlace.asm | ||
dirac_dwt.c | ||
dirac_dwt.h | ||
diracdsp_mmx.c | ||
diracdsp_mmx.h | ||
diracdsp_yasm.asm | ||
dnxhdenc_init.c | ||
dnxhdenc.asm | ||
dwt_yasm.asm | ||
fdct.c | ||
fdct.h | ||
fdctdsp_init.c | ||
fft_init.c | ||
fft.asm | ||
fft.h | ||
flac_dsp_gpl.asm | ||
flacdsp_init.c | ||
flacdsp.asm | ||
fmtconvert_init.c | ||
fmtconvert.asm | ||
fpel.asm | ||
fpel.h | ||
h263_loopfilter.asm | ||
h263dsp_init.c | ||
h264_chromamc_10bit.asm | ||
h264_chromamc.asm | ||
h264_deblock_10bit.asm | ||
h264_deblock.asm | ||
h264_i386.h | ||
h264_idct_10bit.asm | ||
h264_idct.asm | ||
h264_intrapred_10bit.asm | ||
h264_intrapred_init.c | ||
h264_intrapred.asm | ||
h264_qpel_8bit.asm | ||
h264_qpel_10bit.asm | ||
h264_qpel.c | ||
h264_weight_10bit.asm | ||
h264_weight.asm | ||
h264chroma_init.c | ||
h264dsp_init.c | ||
hevc_deblock.asm | ||
hevc_idct.asm | ||
hevc_mc.asm | ||
hevc_res_add.asm | ||
hevcdsp_init.c | ||
hevcdsp.h | ||
hpeldsp_init.c | ||
hpeldsp_rnd_template.c | ||
hpeldsp.asm | ||
hpeldsp.h | ||
huffyuvdsp_init.c | ||
huffyuvdsp.asm | ||
huffyuvencdsp_mmx.c | ||
idct_mmx_xvid.c | ||
idct_sse2_xvid.c | ||
idct_xvid.h | ||
idctdsp_init.c | ||
idctdsp_mmx.c | ||
idctdsp.asm | ||
idctdsp.h | ||
imdct36.asm | ||
inline_asm.h | ||
lossless_audiodsp_init.c | ||
lossless_audiodsp.asm | ||
lossless_videodsp_init.c | ||
lossless_videodsp.asm | ||
lpc.c | ||
Makefile | ||
mathops.h | ||
me_cmp_init.c | ||
me_cmp.asm | ||
mlpdsp.c | ||
mpegaudiodsp.c | ||
mpegvideo.c | ||
mpegvideodsp.c | ||
mpegvideoenc_qns_template.c | ||
mpegvideoenc_template.c | ||
mpegvideoenc.c | ||
mpegvideoencdsp_init.c | ||
mpegvideoencdsp.asm | ||
pixblockdsp_init.c | ||
pixblockdsp.asm | ||
pngdsp_init.c | ||
pngdsp.asm | ||
proresdsp_init.c | ||
proresdsp.asm | ||
qpel.asm | ||
qpeldsp_init.c | ||
qpeldsp.asm | ||
rnd_template.c | ||
rv34dsp_init.c | ||
rv34dsp.asm | ||
rv40dsp_init.c | ||
rv40dsp.asm | ||
sbrdsp_init.c | ||
sbrdsp.asm | ||
simple_idct.c | ||
simple_idct.h | ||
snowdsp.c | ||
svq1enc_init.c | ||
svq1enc.asm | ||
ttadsp_init.c | ||
ttadsp.asm | ||
v210-init.c | ||
v210.asm | ||
vc1dsp_init.c | ||
vc1dsp_mmx.c | ||
vc1dsp.asm | ||
vc1dsp.h | ||
videodsp_init.c | ||
videodsp.asm | ||
vorbisdsp_init.c | ||
vorbisdsp.asm | ||
vp3dsp_init.c | ||
vp3dsp.asm | ||
vp6dsp_init.c | ||
vp6dsp.asm | ||
vp8dsp_init.c | ||
vp8dsp_loopfilter.asm | ||
vp8dsp.asm | ||
vp9dsp_init.c | ||
vp9intrapred.asm | ||
vp9itxfm.asm | ||
vp9lpf.asm | ||
vp9mc.asm | ||
vp56_arith.h | ||
w64xmmtest.c | ||
xvididct_init.c |