From fab9df63a3156ffe1f9490aafaea41e03ef60ddf Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 24 Jan 2014 01:41:12 -0800 Subject: [PATCH] dsputil: Split off global motion compensation bits into a separate context --- libavcodec/Makefile | 4 +- libavcodec/dsputil.c | 91 ----------- libavcodec/dsputil.h | 16 -- libavcodec/mpegvideo.c | 1 + libavcodec/mpegvideo.h | 2 + libavcodec/mpegvideo_motion.c | 67 ++++---- libavcodec/mpegvideodsp.c | 119 ++++++++++++++ libavcodec/mpegvideodsp.h | 47 ++++++ libavcodec/ppc/Makefile | 4 +- libavcodec/ppc/dsputil_altivec.h | 3 +- libavcodec/ppc/dsputil_ppc.c | 2 - .../ppc/{gmc_altivec.c => mpegvideodsp.c} | 15 +- libavcodec/x86/Makefile | 3 +- libavcodec/x86/dsputil_init.c | 2 - libavcodec/x86/dsputil_mmx.c | 114 ------------- libavcodec/x86/dsputil_x86.h | 5 - libavcodec/x86/mpegvideodsp.c | 153 ++++++++++++++++++ 17 files changed, 374 insertions(+), 274 deletions(-) create mode 100644 libavcodec/mpegvideodsp.c create mode 100644 libavcodec/mpegvideodsp.h rename libavcodec/ppc/{gmc_altivec.c => mpegvideodsp.c} (92%) create mode 100644 libavcodec/x86/mpegvideodsp.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 90c88c37e4..1881b0b963 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -63,8 +63,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \ mpegaudiodsp_data.o \ mpegaudiodsp_fixed.o \ mpegaudiodsp_float.o -OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideo_motion.o \ - mpegutils.o +OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ + mpegvideo_motion.o mpegutils.o OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ motion_est.o ratecontrol.o OBJS-$(CONFIG_QPELDSP) += qpeldsp.o diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index eb86c0c908..5d706bd6e7 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3,8 +3,6 @@ * Copyright (c) 2000, 2001 Fabrice Bellard * Copyright (c) 2002-2004 Michael Niedermayer * - * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer - * * This file is part of Libav. * * Libav is free software; you can redistribute it and/or @@ -352,92 +350,6 @@ static int sum_abs_dctelem_c(int16_t *block) #define avg2(a, b) ((a + b + 1) >> 1) #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2) -static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, - int x16, int y16, int rounder) -{ - const int A = (16 - x16) * (16 - y16); - const int B = (x16) * (16 - y16); - const int C = (16 - x16) * (y16); - const int D = (x16) * (y16); - int i; - - for (i = 0; i < h; i++) { - dst[0] = (A * src[0] + B * src[1] + C * src[stride + 0] + D * src[stride + 1] + rounder) >> 8; - dst[1] = (A * src[1] + B * src[2] + C * src[stride + 1] + D * src[stride + 2] + rounder) >> 8; - dst[2] = (A * src[2] + B * src[3] + C * src[stride + 2] + D * src[stride + 3] + rounder) >> 8; - dst[3] = (A * src[3] + B * src[4] + C * src[stride + 3] + D * src[stride + 4] + rounder) >> 8; - dst[4] = (A * src[4] + B * src[5] + C * src[stride + 4] + D * src[stride + 5] + rounder) >> 8; - dst[5] = (A * src[5] + B * src[6] + C * src[stride + 5] + D * src[stride + 6] + rounder) >> 8; - dst[6] = (A * src[6] + B * src[7] + C * src[stride + 6] + D * src[stride + 7] + rounder) >> 8; - dst[7] = (A * src[7] + B * src[8] + C * src[stride + 7] + D * src[stride + 8] + rounder) >> 8; - dst += stride; - src += stride; - } -} - -void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, - int width, int height) -{ - int y, vx, vy; - const int s = 1 << shift; - - width--; - height--; - - for (y = 0; y < h; y++) { - int x; - - vx = ox; - vy = oy; - for (x = 0; x < 8; x++) { // FIXME: optimize - int index; - int src_x = vx >> 16; - int src_y = vy >> 16; - int frac_x = src_x & (s - 1); - int frac_y = src_y & (s - 1); - - src_x >>= shift; - src_y >>= shift; - - if ((unsigned) src_x < width) { - if ((unsigned) src_y < height) { - index = src_x + src_y * stride; - dst[y * stride + x] = - ((src[index] * (s - frac_x) + - src[index + 1] * frac_x) * (s - frac_y) + - (src[index + stride] * (s - frac_x) + - src[index + stride + 1] * frac_x) * frac_y + - r) >> (shift * 2); - } else { - index = src_x + av_clip(src_y, 0, height) * stride; - dst[y * stride + x] = - ((src[index] * (s - frac_x) + - src[index + 1] * frac_x) * s + - r) >> (shift * 2); - } - } else { - if ((unsigned) src_y < height) { - index = av_clip(src_x, 0, width) + src_y * stride; - dst[y * stride + x] = - ((src[index] * (s - frac_y) + - src[index + stride] * frac_y) * s + - r) >> (shift * 2); - } else { - index = av_clip(src_x, 0, width) + - av_clip(src_y, 0, height) * stride; - dst[y * stride + x] = src[index]; - } - } - - vx += dxx; - vy += dyx; - } - ox += dxy; - oy += dyy; - } -} - static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { @@ -1346,9 +1258,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) c->sum_abs_dctelem = sum_abs_dctelem_c; - c->gmc1 = gmc1_c; - c->gmc = ff_gmc_c; - c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index a7dc3a2d66..1dbc784fbb 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -34,10 +34,6 @@ extern uint32_t ff_square_tab[512]; -void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, - int width, int height); - struct MpegEncContext; /* Motion estimation: * h is limited to { width / 2, width, 2 * width }, @@ -84,18 +80,6 @@ typedef struct DSPContext { uint8_t *pixels /* align 8 */, int line_size); int (*sum_abs_dctelem)(int16_t *block /* align 16 */); - /** - * translational global motion compensation. - */ - void (*gmc1)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, - int srcStride, int h, int x16, int y16, int rounder); - /** - * global motion compensation. - */ - void (*gmc)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, - int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, - int shift, int r, int width, int height); int (*pix_sum)(uint8_t *pix, int line_size); int (*pix_norm1)(uint8_t *pix, int line_size); diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index f602dd6f23..fb63d6afda 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -380,6 +380,7 @@ av_cold int ff_dct_common_init(MpegEncContext *s) ff_blockdsp_init(&s->bdsp, s->avctx); ff_dsputil_init(&s->dsp, s->avctx); ff_hpeldsp_init(&s->hdsp, s->avctx->flags); + ff_mpegvideodsp_init(&s->mdsp); ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample); s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c; diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 6409a86b1e..6fe6a4345b 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -35,6 +35,7 @@ #include "get_bits.h" #include "h263dsp.h" #include "hpeldsp.h" +#include "mpegvideodsp.h" #include "put_bits.h" #include "ratecontrol.h" #include "parser.h" @@ -351,6 +352,7 @@ typedef struct MpegEncContext { BlockDSPContext bdsp; DSPContext dsp; ///< pointers for accelerated dsp functions HpelDSPContext hdsp; + MpegVideoDSPContext mdsp; QpelDSPContext qdsp; VideoDSPContext vdsp; H263DSPContext h263dsp; diff --git a/libavcodec/mpegvideo_motion.c b/libavcodec/mpegvideo_motion.c index c57a26d5f8..b399db844b 100644 --- a/libavcodec/mpegvideo_motion.c +++ b/libavcodec/mpegvideo_motion.c @@ -25,7 +25,6 @@ #include "libavutil/internal.h" #include "avcodec.h" -#include "dsputil.h" #include "h261.h" #include "mpegutils.h" #include "mpegvideo.h" @@ -72,10 +71,10 @@ static void gmc1_motion(MpegEncContext *s, } if ((motion_x | motion_y) & 7) { - s->dsp.gmc1(dest_y, ptr, linesize, 16, - motion_x & 15, motion_y & 15, 128 - s->no_rounding); - s->dsp.gmc1(dest_y + 8, ptr + 8, linesize, 16, - motion_x & 15, motion_y & 15, 128 - s->no_rounding); + s->mdsp.gmc1(dest_y, ptr, linesize, 16, + motion_x & 15, motion_y & 15, 128 - s->no_rounding); + s->mdsp.gmc1(dest_y + 8, ptr + 8, linesize, 16, + motion_x & 15, motion_y & 15, 128 - s->no_rounding); } else { int dxy; @@ -115,8 +114,8 @@ static void gmc1_motion(MpegEncContext *s, ptr = s->edge_emu_buffer; emu = 1; } - s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, - motion_x & 15, motion_y & 15, 128 - s->no_rounding); + s->mdsp.gmc1(dest_cb, ptr, uvlinesize, 8, + motion_x & 15, motion_y & 15, 128 - s->no_rounding); ptr = ref_picture[2] + offset; if (emu) { @@ -127,8 +126,8 @@ static void gmc1_motion(MpegEncContext *s, s->h_edge_pos >> 1, s->v_edge_pos >> 1); ptr = s->edge_emu_buffer; } - s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, - motion_x & 15, motion_y & 15, 128 - s->no_rounding); + s->mdsp.gmc1(dest_cr, ptr, uvlinesize, 8, + motion_x & 15, motion_y & 15, 128 - s->no_rounding); } static void gmc_motion(MpegEncContext *s, @@ -150,19 +149,19 @@ static void gmc_motion(MpegEncContext *s, oy = s->sprite_offset[0][1] + s->sprite_delta[1][0] * s->mb_x * 16 + s->sprite_delta[1][1] * s->mb_y * 16; - s->dsp.gmc(dest_y, ptr, linesize, 16, - ox, oy, - s->sprite_delta[0][0], s->sprite_delta[0][1], - s->sprite_delta[1][0], s->sprite_delta[1][1], - a + 1, (1 << (2 * a + 1)) - s->no_rounding, - s->h_edge_pos, s->v_edge_pos); - s->dsp.gmc(dest_y + 8, ptr, linesize, 16, - ox + s->sprite_delta[0][0] * 8, - oy + s->sprite_delta[1][0] * 8, - s->sprite_delta[0][0], s->sprite_delta[0][1], - s->sprite_delta[1][0], s->sprite_delta[1][1], - a + 1, (1 << (2 * a + 1)) - s->no_rounding, - s->h_edge_pos, s->v_edge_pos); + s->mdsp.gmc(dest_y, ptr, linesize, 16, + ox, oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a + 1, (1 << (2 * a + 1)) - s->no_rounding, + s->h_edge_pos, s->v_edge_pos); + s->mdsp.gmc(dest_y + 8, ptr, linesize, 16, + ox + s->sprite_delta[0][0] * 8, + oy + s->sprite_delta[1][0] * 8, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a + 1, (1 << (2 * a + 1)) - s->no_rounding, + s->h_edge_pos, s->v_edge_pos); if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY) return; @@ -173,20 +172,20 @@ static void gmc_motion(MpegEncContext *s, s->sprite_delta[1][1] * s->mb_y * 8; ptr = ref_picture[1]; - s->dsp.gmc(dest_cb, ptr, uvlinesize, 8, - ox, oy, - s->sprite_delta[0][0], s->sprite_delta[0][1], - s->sprite_delta[1][0], s->sprite_delta[1][1], - a + 1, (1 << (2 * a + 1)) - s->no_rounding, - s->h_edge_pos >> 1, s->v_edge_pos >> 1); + s->mdsp.gmc(dest_cb, ptr, uvlinesize, 8, + ox, oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a + 1, (1 << (2 * a + 1)) - s->no_rounding, + s->h_edge_pos >> 1, s->v_edge_pos >> 1); ptr = ref_picture[2]; - s->dsp.gmc(dest_cr, ptr, uvlinesize, 8, - ox, oy, - s->sprite_delta[0][0], s->sprite_delta[0][1], - s->sprite_delta[1][0], s->sprite_delta[1][1], - a + 1, (1 << (2 * a + 1)) - s->no_rounding, - s->h_edge_pos >> 1, s->v_edge_pos >> 1); + s->mdsp.gmc(dest_cr, ptr, uvlinesize, 8, + ox, oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a + 1, (1 << (2 * a + 1)) - s->no_rounding, + s->h_edge_pos >> 1, s->v_edge_pos >> 1); } static inline int hpel_motion(MpegEncContext *s, diff --git a/libavcodec/mpegvideodsp.c b/libavcodec/mpegvideodsp.c new file mode 100644 index 0000000000..915a844d6b --- /dev/null +++ b/libavcodec/mpegvideodsp.c @@ -0,0 +1,119 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/common.h" +#include "mpegvideodsp.h" + +static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, + int x16, int y16, int rounder) +{ + const int A = (16 - x16) * (16 - y16); + const int B = (x16) * (16 - y16); + const int C = (16 - x16) * (y16); + const int D = (x16) * (y16); + int i; + + for (i = 0; i < h; i++) { + dst[0] = (A * src[0] + B * src[1] + C * src[stride + 0] + D * src[stride + 1] + rounder) >> 8; + dst[1] = (A * src[1] + B * src[2] + C * src[stride + 1] + D * src[stride + 2] + rounder) >> 8; + dst[2] = (A * src[2] + B * src[3] + C * src[stride + 2] + D * src[stride + 3] + rounder) >> 8; + dst[3] = (A * src[3] + B * src[4] + C * src[stride + 3] + D * src[stride + 4] + rounder) >> 8; + dst[4] = (A * src[4] + B * src[5] + C * src[stride + 4] + D * src[stride + 5] + rounder) >> 8; + dst[5] = (A * src[5] + B * src[6] + C * src[stride + 5] + D * src[stride + 6] + rounder) >> 8; + dst[6] = (A * src[6] + B * src[7] + C * src[stride + 6] + D * src[stride + 7] + rounder) >> 8; + dst[7] = (A * src[7] + B * src[8] + C * src[stride + 7] + D * src[stride + 8] + rounder) >> 8; + dst += stride; + src += stride; + } +} + +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, + int width, int height) +{ + int y, vx, vy; + const int s = 1 << shift; + + width--; + height--; + + for (y = 0; y < h; y++) { + int x; + + vx = ox; + vy = oy; + for (x = 0; x < 8; x++) { // FIXME: optimize + int index; + int src_x = vx >> 16; + int src_y = vy >> 16; + int frac_x = src_x & (s - 1); + int frac_y = src_y & (s - 1); + + src_x >>= shift; + src_y >>= shift; + + if ((unsigned) src_x < width) { + if ((unsigned) src_y < height) { + index = src_x + src_y * stride; + dst[y * stride + x] = + ((src[index] * (s - frac_x) + + src[index + 1] * frac_x) * (s - frac_y) + + (src[index + stride] * (s - frac_x) + + src[index + stride + 1] * frac_x) * frac_y + + r) >> (shift * 2); + } else { + index = src_x + av_clip(src_y, 0, height) * stride; + dst[y * stride + x] = + ((src[index] * (s - frac_x) + + src[index + 1] * frac_x) * s + + r) >> (shift * 2); + } + } else { + if ((unsigned) src_y < height) { + index = av_clip(src_x, 0, width) + src_y * stride; + dst[y * stride + x] = + ((src[index] * (s - frac_y) + + src[index + stride] * frac_y) * s + + r) >> (shift * 2); + } else { + index = av_clip(src_x, 0, width) + + av_clip(src_y, 0, height) * stride; + dst[y * stride + x] = src[index]; + } + } + + vx += dxx; + vy += dyx; + } + ox += dxy; + oy += dyy; + } +} + +av_cold void ff_mpegvideodsp_init(MpegVideoDSPContext *c) +{ + c->gmc1 = gmc1_c; + c->gmc = ff_gmc_c; + + if (ARCH_PPC) + ff_mpegvideodsp_init_ppc(c); + if (ARCH_X86) + ff_mpegvideodsp_init_x86(c); +} diff --git a/libavcodec/mpegvideodsp.h b/libavcodec/mpegvideodsp.h new file mode 100644 index 0000000000..b0f45db0bb --- /dev/null +++ b/libavcodec/mpegvideodsp.h @@ -0,0 +1,47 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MPEGVIDEODSP_H +#define AVCODEC_MPEGVIDEODSP_H + +#include + +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, + int width, int height); + +typedef struct MpegVideoDSPContext { + /** + * translational global motion compensation. + */ + void (*gmc1)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, + int srcStride, int h, int x16, int y16, int rounder); + /** + * global motion compensation. + */ + void (*gmc)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height); +} MpegVideoDSPContext; + +void ff_mpegvideodsp_init(MpegVideoDSPContext *c); +void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c); +void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c); + +#endif /* AVCODEC_MPEGVIDEODSP_H */ diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 8a4a789037..c6c0bcb241 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -10,7 +10,8 @@ OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o OBJS-$(CONFIG_HUFFYUVDSP) += ppc/huffyuvdsp_altivec.o OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o -OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o +OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ + ppc/mpegvideodsp.o OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o @@ -23,7 +24,6 @@ OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ ppc/fdct_altivec.o \ - ppc/gmc_altivec.o \ ppc/idct_altivec.o \ FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index 2ad4910bb0..42da933dfa 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -28,8 +28,7 @@ #include "libavcodec/dsputil.h" void ff_fdct_altivec(int16_t *block); -void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, - int x16, int y16, int rounder); + void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index fb1ee4a940..778d3e1247 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -35,8 +35,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, if (PPC_ALTIVEC(av_get_cpu_flags())) { ff_dsputil_init_altivec(c, avctx, high_bit_depth); - c->gmc1 = ff_gmc1_altivec; - if (!high_bit_depth) { #if CONFIG_ENCODERS if (avctx->dct_algo == FF_DCT_AUTO || diff --git a/libavcodec/ppc/gmc_altivec.c b/libavcodec/ppc/mpegvideodsp.c similarity index 92% rename from libavcodec/ppc/gmc_altivec.c rename to libavcodec/ppc/mpegvideodsp.c index ef35f9d46d..2bdf909e4a 100644 --- a/libavcodec/ppc/gmc_altivec.c +++ b/libavcodec/ppc/mpegvideodsp.c @@ -23,12 +23,13 @@ #include "libavutil/mem.h" #include "libavutil/ppc/types_altivec.h" #include "libavutil/ppc/util_altivec.h" -#include "dsputil_altivec.h" +#include "libavcodec/mpegvideodsp.h" +#if HAVE_ALTIVEC /* AltiVec-enhanced gmc1. ATM this code assumes stride is a multiple of 8 * to preserve proper dst alignment. */ -void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, - int stride, int h, int x16, int y16, int rounder) +static void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, + int stride, int h, int x16, int y16, int rounder) { int i; const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder; @@ -122,3 +123,11 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, src += stride; } } +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c) +{ +#if HAVE_ALTIVEC + c->gmc1 = gmc1_altivec; +#endif /* HAVE_ALTIVEC */ +} diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 587ff39bf4..13f9affdb2 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -20,7 +20,8 @@ OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o OBJS-$(CONFIG_LPC) += x86/lpc.o OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o -OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o +OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ + x86/mpegvideodsp.o OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index e69db8e9f0..90e62715db 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -52,8 +52,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, break; } } - - c->gmc = ff_gmc_mmx; #endif /* HAVE_MMX_INLINE */ } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index fe43804428..5fa047da7b 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -260,118 +260,4 @@ void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, } } -void ff_gmc_mmx(uint8_t *dst, uint8_t *src, - int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, - int shift, int r, int width, int height) -{ - const int w = 8; - const int ix = ox >> (16 + shift); - const int iy = oy >> (16 + shift); - const int oxs = ox >> 4; - const int oys = oy >> 4; - const int dxxs = dxx >> 4; - const int dxys = dxy >> 4; - const int dyxs = dyx >> 4; - const int dyys = dyy >> 4; - const uint16_t r4[4] = { r, r, r, r }; - const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys }; - const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys }; - const uint64_t shift2 = 2 * shift; - int x, y; - - const int dxw = (dxx - (1 << (16 + shift))) * (w - 1); - const int dyh = (dyy - (1 << (16 + shift))) * (h - 1); - const int dxh = dxy * (h - 1); - const int dyw = dyx * (w - 1); - - if ( // non-constant fullpel offset (3% of blocks) - ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) | - (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift) || - // uses more than 16 bits of subpel mv (only at huge resolution) - (dxx | dxy | dyx | dyy) & 15 || - (unsigned) ix >= width - w || - (unsigned) iy >= height - h) { - // FIXME could still use mmx for some of the rows - ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, - shift, r, width, height); - return; - } - - src += ix + iy * stride; - - __asm__ volatile ( - "movd %0, %%mm6 \n\t" - "pxor %%mm7, %%mm7 \n\t" - "punpcklwd %%mm6, %%mm6 \n\t" - "punpcklwd %%mm6, %%mm6 \n\t" - :: "r" (1 << shift)); - - for (x = 0; x < w; x += 4) { - uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0), - oxs - dxys + dxxs * (x + 1), - oxs - dxys + dxxs * (x + 2), - oxs - dxys + dxxs * (x + 3) }; - uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0), - oys - dyys + dyxs * (x + 1), - oys - dyys + dyxs * (x + 2), - oys - dyys + dyxs * (x + 3) }; - - for (y = 0; y < h; y++) { - __asm__ volatile ( - "movq %0, %%mm4 \n\t" - "movq %1, %%mm5 \n\t" - "paddw %2, %%mm4 \n\t" - "paddw %3, %%mm5 \n\t" - "movq %%mm4, %0 \n\t" - "movq %%mm5, %1 \n\t" - "psrlw $12, %%mm4 \n\t" - "psrlw $12, %%mm5 \n\t" - : "+m" (*dx4), "+m" (*dy4) - : "m" (*dxy4), "m" (*dyy4)); - - __asm__ volatile ( - "movq %%mm6, %%mm2 \n\t" - "movq %%mm6, %%mm1 \n\t" - "psubw %%mm4, %%mm2 \n\t" - "psubw %%mm5, %%mm1 \n\t" - "movq %%mm2, %%mm0 \n\t" - "movq %%mm4, %%mm3 \n\t" - "pmullw %%mm1, %%mm0 \n\t" // (s - dx) * (s - dy) - "pmullw %%mm5, %%mm3 \n\t" // dx * dy - "pmullw %%mm5, %%mm2 \n\t" // (s - dx) * dy - "pmullw %%mm4, %%mm1 \n\t" // dx * (s - dy) - - "movd %4, %%mm5 \n\t" - "movd %3, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "pmullw %%mm5, %%mm3 \n\t" // src[1, 1] * dx * dy - "pmullw %%mm4, %%mm2 \n\t" // src[0, 1] * (s - dx) * dy - - "movd %2, %%mm5 \n\t" - "movd %1, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "pmullw %%mm5, %%mm1 \n\t" // src[1, 0] * dx * (s - dy) - "pmullw %%mm4, %%mm0 \n\t" // src[0, 0] * (s - dx) * (s - dy) - "paddw %5, %%mm1 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm1, %%mm0 \n\t" - "paddw %%mm2, %%mm0 \n\t" - - "psrlw %6, %%mm0 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "movd %%mm0, %0 \n\t" - - : "=m" (dst[x + y * stride]) - : "m" (src[0]), "m" (src[1]), - "m" (src[stride]), "m" (src[stride + 1]), - "m" (*r4), "m" (shift2)); - src += stride; - } - src += 4 - h * stride; - } -} - #endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index eeb9ca6114..4beb6c11ca 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -41,9 +41,4 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides); -void ff_gmc_mmx(uint8_t *dst, uint8_t *src, - int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, - int shift, int r, int width, int height); - #endif /* AVCODEC_X86_DSPUTIL_X86_H */ diff --git a/libavcodec/x86/mpegvideodsp.c b/libavcodec/x86/mpegvideodsp.c new file mode 100644 index 0000000000..0e5dd0f153 --- /dev/null +++ b/libavcodec/x86/mpegvideodsp.c @@ -0,0 +1,153 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/mpegvideodsp.h" + +#if HAVE_INLINE_ASM + +static void gmc_mmx(uint8_t *dst, uint8_t *src, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height) +{ + const int w = 8; + const int ix = ox >> (16 + shift); + const int iy = oy >> (16 + shift); + const int oxs = ox >> 4; + const int oys = oy >> 4; + const int dxxs = dxx >> 4; + const int dxys = dxy >> 4; + const int dyxs = dyx >> 4; + const int dyys = dyy >> 4; + const uint16_t r4[4] = { r, r, r, r }; + const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys }; + const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys }; + const uint64_t shift2 = 2 * shift; + int x, y; + + const int dxw = (dxx - (1 << (16 + shift))) * (w - 1); + const int dyh = (dyy - (1 << (16 + shift))) * (h - 1); + const int dxh = dxy * (h - 1); + const int dyw = dyx * (w - 1); + + if ( // non-constant fullpel offset (3% of blocks) + ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) | + (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift) || + // uses more than 16 bits of subpel mv (only at huge resolution) + (dxx | dxy | dyx | dyy) & 15 || + (unsigned) ix >= width - w || + (unsigned) iy >= height - h) { + // FIXME could still use mmx for some of the rows + ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, + shift, r, width, height); + return; + } + + src += ix + iy * stride; + + __asm__ volatile ( + "movd %0, %%mm6 \n\t" + "pxor %%mm7, %%mm7 \n\t" + "punpcklwd %%mm6, %%mm6 \n\t" + "punpcklwd %%mm6, %%mm6 \n\t" + :: "r" (1 << shift)); + + for (x = 0; x < w; x += 4) { + uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0), + oxs - dxys + dxxs * (x + 1), + oxs - dxys + dxxs * (x + 2), + oxs - dxys + dxxs * (x + 3) }; + uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0), + oys - dyys + dyxs * (x + 1), + oys - dyys + dyxs * (x + 2), + oys - dyys + dyxs * (x + 3) }; + + for (y = 0; y < h; y++) { + __asm__ volatile ( + "movq %0, %%mm4 \n\t" + "movq %1, %%mm5 \n\t" + "paddw %2, %%mm4 \n\t" + "paddw %3, %%mm5 \n\t" + "movq %%mm4, %0 \n\t" + "movq %%mm5, %1 \n\t" + "psrlw $12, %%mm4 \n\t" + "psrlw $12, %%mm5 \n\t" + : "+m" (*dx4), "+m" (*dy4) + : "m" (*dxy4), "m" (*dyy4)); + + __asm__ volatile ( + "movq %%mm6, %%mm2 \n\t" + "movq %%mm6, %%mm1 \n\t" + "psubw %%mm4, %%mm2 \n\t" + "psubw %%mm5, %%mm1 \n\t" + "movq %%mm2, %%mm0 \n\t" + "movq %%mm4, %%mm3 \n\t" + "pmullw %%mm1, %%mm0 \n\t" // (s - dx) * (s - dy) + "pmullw %%mm5, %%mm3 \n\t" // dx * dy + "pmullw %%mm5, %%mm2 \n\t" // (s - dx) * dy + "pmullw %%mm4, %%mm1 \n\t" // dx * (s - dy) + + "movd %4, %%mm5 \n\t" + "movd %3, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "pmullw %%mm5, %%mm3 \n\t" // src[1, 1] * dx * dy + "pmullw %%mm4, %%mm2 \n\t" // src[0, 1] * (s - dx) * dy + + "movd %2, %%mm5 \n\t" + "movd %1, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "pmullw %%mm5, %%mm1 \n\t" // src[1, 0] * dx * (s - dy) + "pmullw %%mm4, %%mm0 \n\t" // src[0, 0] * (s - dx) * (s - dy) + "paddw %5, %%mm1 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm2, %%mm0 \n\t" + + "psrlw %6, %%mm0 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "movd %%mm0, %0 \n\t" + + : "=m" (dst[x + y * stride]) + : "m" (src[0]), "m" (src[1]), + "m" (src[stride]), "m" (src[stride + 1]), + "m" (*r4), "m" (shift2)); + src += stride; + } + src += 4 - h * stride; + } +} + +#endif /* HAVE_INLINE_ASM */ + +av_cold void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c) +{ +#if HAVE_INLINE_ASM + int cpu_flags = av_get_cpu_flags(); + + if (INLINE_MMX(cpu_flags)) + c->gmc = gmc_mmx; +#endif /* HAVE_INLINE_ASM */ +} +