diff --git a/configure b/configure index 3c13e4f95e..49804ffca2 100755 --- a/configure +++ b/configure @@ -144,6 +144,7 @@ Component options: --disable-mdct disable MDCT code --disable-rdft disable RDFT code --disable-fft disable FFT code + --disable-pixelutils disable pixel utils in libavutil Hardware accelerators: --disable-dxva2 disable DXVA2 code [autodetect] @@ -1451,6 +1452,7 @@ SUBSYSTEM_LIST=" lsp lzo mdct + pixelutils network rdft " diff --git a/doc/APIchanges b/doc/APIchanges index 387a2f985a..8a78529bbb 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2012-10-22 API changes, most recent first: +2014-08-02 - xxxxxxx - lavu 52.98.100 - pixelutils.h + Add pixelutils API with SAD functions + 2014-08-xx - xxxxxxx - lavu 53.22.0 - pixfmt.h Add AV_PIX_FMT_YA16 pixel format for 16 bit packed gray with alpha. diff --git a/libavutil/Makefile b/libavutil/Makefile index 91751dc4c3..d57a741a0a 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -44,6 +44,7 @@ HEADERS = adler32.h \ opt.h \ parseutils.h \ pixdesc.h \ + pixelutils.h \ pixfmt.h \ random_seed.h \ replaygain.h \ @@ -113,6 +114,7 @@ OBJS = adler32.o \ opt.o \ parseutils.o \ pixdesc.o \ + pixelutils.o \ random_seed.o \ rational.o \ rc4.o \ @@ -170,6 +172,7 @@ TESTPROGS = adler32 \ pca \ parseutils \ pixdesc \ + pixelutils \ random_seed \ rational \ ripemd \ diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c new file mode 100644 index 0000000000..cfdd35afc2 --- /dev/null +++ b/libavutil/pixelutils.c @@ -0,0 +1,153 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "common.h" +#include "pixelutils.h" + +#if CONFIG_PIXELUTILS + +#include "x86/pixelutils.h" + +static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2, + int w, int h) +{ + int x, y, sum = 0; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) + sum += abs(src1[x] - src2[x]); + src1 += stride1; + src2 += stride2; + } + return sum; +} + +#define DECLARE_BLOCK_FUNCTIONS(size) \ +static int block_sad_##size##x##size##_c(const uint8_t *src1, ptrdiff_t stride1, \ + const uint8_t *src2, ptrdiff_t stride2) \ +{ \ + return sad_wxh(src1, stride1, src2, stride2, size, size); \ +} + +DECLARE_BLOCK_FUNCTIONS(2) +DECLARE_BLOCK_FUNCTIONS(4) +DECLARE_BLOCK_FUNCTIONS(8) +DECLARE_BLOCK_FUNCTIONS(16) + +static const av_pixelutils_sad_fn sad_c[] = { + block_sad_2x2_c, + block_sad_4x4_c, + block_sad_8x8_c, + block_sad_16x16_c, +}; + +#endif /* CONFIG_PIXELUTILS */ + +av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligned, void *log_ctx) +{ +#if !CONFIG_PIXELUTILS + av_log(log_ctx, AV_LOG_ERROR, "pixelutils support is required " + "but libavutil is not compiled with it\n"); + return NULL; +#else + av_pixelutils_sad_fn sad[FF_ARRAY_ELEMS(sad_c)]; + + memcpy(sad, sad_c, sizeof(sad)); + + if (w_bits < 1 || w_bits > FF_ARRAY_ELEMS(sad) || + h_bits < 1 || h_bits > FF_ARRAY_ELEMS(sad)) + return NULL; + if (w_bits != h_bits) // only squared sad for now + return NULL; + +#if ARCH_X86 + ff_pixelutils_sad_init_x86(sad, aligned); +#endif + + return sad[w_bits - 1]; +#endif +} + +#ifdef TEST +#define W1 320 +#define H1 240 +#define W2 640 +#define H2 480 + +static int run_test(const char *test, + const uint32_t *b1, const uint32_t *b2) +{ + int i, a, ret = 0; + + for (a = 0; a < 3; a++) { + const uint8_t *block1 = (const uint8_t *)b1; + const uint8_t *block2 = (const uint8_t *)b2; + + switch (a) { + case 0: block1++; block2++; break; + case 1: block2++; break; + case 2: break; + } + for (i = 1; i <= FF_ARRAY_ELEMS(sad_c); i++) { + av_pixelutils_sad_fn f_ref = sad_c[i - 1]; + av_pixelutils_sad_fn f_out = av_pixelutils_get_sad_fn(i, i, a, NULL); + const int out = f_out(block1, W1, block2, W2); + const int ref = f_ref(block1, W1, block2, W2); + printf("[%s] [%c%c] SAD [%s] %dx%d=%d ref=%d\n", + out == ref ? "OK" : "FAIL", + a ? 'A' : 'U', a == 2 ? 'A' : 'U', + test, 1< +#include +#include "common.h" + +/** + * Sum of abs(src1[x] - src2[x]) + */ +typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +/** + * Get a potentially optimized pointer to a Sum-of-absolute-differences + * function (see the av_pixelutils_sad_fn prototype). + * + * @param w_bits 1< +;* Copyright (C) 2014 Clément Bœsch +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86util.asm" + +SECTION_TEXT + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmx +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m7, m7 + pxor m6, m6 +%rep 4 + mova m0, [src1q] + mova m2, [src1q + stride1q] + mova m1, [src2q] + mova m3, [src2q + stride2q] + psubusb m4, m0, m1 + psubusb m5, m2, m3 + psubusb m1, m0 + psubusb m3, m2 + por m1, m4 + por m3, m5 + punpcklbw m0, m1, m7 + punpcklbw m2, m3, m7 + punpckhbw m1, m7 + punpckhbw m3, m7 + paddw m0, m1 + paddw m2, m3 + paddw m0, m2 + paddw m6, m0 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + psrlq m0, m6, 32 + paddw m6, m0 + psrlq m0, m6, 16 + paddw m6, m0 + movd eax, m6 + movzx eax, ax + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 4 + mova m0, [src1q] + mova m1, [src1q + stride1q] + psadbw m0, [src2q] + psadbw m1, [src2q + stride2q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 16 + mova m0, [src1q] + mova m1, [src1q + 8] + psadbw m0, [src2q] + psadbw m1, [src2q + 8] + paddw m2, m0 + paddw m2, m1 + add src1q, stride1q + add src2q, stride2q +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 + pxor m4, m4 +%rep 8 + movu m0, [src1q] + movu m1, [src1q + stride1q] + movu m2, [src2q] + movu m3, [src2q + stride2q] + psadbw m0, m2 + psadbw m1, m3 + paddw m4, m0 + paddw m4, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m4 + paddw m4, m0 + movd eax, m4 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +%macro SAD_XMM_16x16 1 +INIT_XMM sse2 +cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 8 + mov%1 m0, [src2q] + mov%1 m1, [src2q + stride2q] + psadbw m0, [src1q] + psadbw m1, [src1q + stride1q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m2 + paddw m2, m0 + movd eax, m2 + RET +%endmacro + +SAD_XMM_16x16 a +SAD_XMM_16x16 u diff --git a/libavutil/x86/pixelutils.h b/libavutil/x86/pixelutils.h new file mode 100644 index 0000000000..876cf46053 --- /dev/null +++ b/libavutil/x86/pixelutils.h @@ -0,0 +1,26 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_X86_PIXELUTILS_H +#define AVUTIL_X86_PIXELUTILS_H + +#include "libavutil/pixelutils.h" + +void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned); + +#endif /* AVUTIL_X86_PIXELUTILS_H */ diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c new file mode 100644 index 0000000000..d60051067a --- /dev/null +++ b/libavutil/x86/pixelutils_init.c @@ -0,0 +1,58 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "pixelutils.h" +#include "cpu.h" + +int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { + sad[2] = ff_pixelutils_sad_8x8_mmx; + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + sad[2] = ff_pixelutils_sad_8x8_mmxext; + sad[3] = ff_pixelutils_sad_16x16_mmxext; + } + + if (EXTERNAL_SSE2(cpu_flags)) { + switch (aligned) { + case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned + case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned + case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned + } + } +} diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak index de070ae115..9f676a83ad 100644 --- a/tests/fate/libavutil.mak +++ b/tests/fate/libavutil.mak @@ -73,6 +73,10 @@ FATE_LIBAVUTIL += fate-parseutils fate-parseutils: libavutil/parseutils-test$(EXESUF) fate-parseutils: CMD = run libavutil/parseutils-test +FATE_LIBAVUTIL-$(CONFIG_PIXELUTILS) += fate-pixelutils +fate-pixelutils: libavutil/pixelutils-test$(EXESUF) +fate-pixelutils: CMD = run libavutil/pixelutils-test + FATE_LIBAVUTIL += fate-random_seed fate-random_seed: libavutil/random_seed-test$(EXESUF) fate-random_seed: CMD = run libavutil/random_seed-test @@ -98,5 +102,6 @@ FATE_LIBAVUTIL += fate-xtea fate-xtea: libavutil/xtea-test$(EXESUF) fate-xtea: CMD = run libavutil/xtea-test +FATE_LIBAVUTIL += $(FATE_LIBAVUTIL-yes) FATE-$(CONFIG_AVUTIL) += $(FATE_LIBAVUTIL) fate-libavutil: $(FATE_LIBAVUTIL) diff --git a/tests/ref/fate/pixelutils b/tests/ref/fate/pixelutils new file mode 100644 index 0000000000..f7f507a5c0 --- /dev/null +++ b/tests/ref/fate/pixelutils @@ -0,0 +1,36 @@ +[OK] [UU] SAD [random] 2x2=314 ref=314 +[OK] [UU] SAD [random] 4x4=1129 ref=1129 +[OK] [UU] SAD [random] 8x8=4936 ref=4936 +[OK] [UU] SAD [random] 16x16=20704 ref=20704 +[OK] [AU] SAD [random] 2x2=440 ref=440 +[OK] [AU] SAD [random] 4x4=1317 ref=1317 +[OK] [AU] SAD [random] 8x8=5262 ref=5262 +[OK] [AU] SAD [random] 16x16=21040 ref=21040 +[OK] [AA] SAD [random] 2x2=196 ref=196 +[OK] [AA] SAD [random] 4x4=1225 ref=1225 +[OK] [AA] SAD [random] 8x8=4712 ref=4712 +[OK] [AA] SAD [random] 16x16=21184 ref=21184 +[OK] [UU] SAD [max] 2x2=1020 ref=1020 +[OK] [UU] SAD [max] 4x4=4080 ref=4080 +[OK] [UU] SAD [max] 8x8=16320 ref=16320 +[OK] [UU] SAD [max] 16x16=65280 ref=65280 +[OK] [AU] SAD [max] 2x2=1020 ref=1020 +[OK] [AU] SAD [max] 4x4=4080 ref=4080 +[OK] [AU] SAD [max] 8x8=16320 ref=16320 +[OK] [AU] SAD [max] 16x16=65280 ref=65280 +[OK] [AA] SAD [max] 2x2=1020 ref=1020 +[OK] [AA] SAD [max] 4x4=4080 ref=4080 +[OK] [AA] SAD [max] 8x8=16320 ref=16320 +[OK] [AA] SAD [max] 16x16=65280 ref=65280 +[OK] [UU] SAD [min] 2x2=0 ref=0 +[OK] [UU] SAD [min] 4x4=0 ref=0 +[OK] [UU] SAD [min] 8x8=0 ref=0 +[OK] [UU] SAD [min] 16x16=0 ref=0 +[OK] [AU] SAD [min] 2x2=0 ref=0 +[OK] [AU] SAD [min] 4x4=0 ref=0 +[OK] [AU] SAD [min] 8x8=0 ref=0 +[OK] [AU] SAD [min] 16x16=0 ref=0 +[OK] [AA] SAD [min] 2x2=0 ref=0 +[OK] [AA] SAD [min] 4x4=0 ref=0 +[OK] [AA] SAD [min] 8x8=0 ref=0 +[OK] [AA] SAD [min] 16x16=0 ref=0