From d69f4be1c6c6e305b4f32235bcab35dcc8c9c98b Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Tue, 9 Feb 2021 19:17:27 +0100 Subject: [PATCH] avcodec/cfhdenc: refactor DSP code for CFHD encoder This is needed to implement x86 SIMD. --- libavcodec/Makefile | 2 +- libavcodec/cfhdenc.c | 123 +++++++++++----------------------------- libavcodec/cfhdencdsp.c | 76 +++++++++++++++++++++++++ libavcodec/cfhdencdsp.h | 41 ++++++++++++++ 4 files changed, 151 insertions(+), 91 deletions(-) create mode 100644 libavcodec/cfhdencdsp.c create mode 100644 libavcodec/cfhdencdsp.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 431e05b642..6dda8cbb1f 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -258,7 +258,7 @@ OBJS-$(CONFIG_CDGRAPHICS_DECODER) += cdgraphics.o OBJS-$(CONFIG_CDTOONS_DECODER) += cdtoons.o OBJS-$(CONFIG_CDXL_DECODER) += cdxl.o OBJS-$(CONFIG_CFHD_DECODER) += cfhd.o cfhddata.o cfhddsp.o -OBJS-$(CONFIG_CFHD_ENCODER) += cfhdenc.o cfhddata.o +OBJS-$(CONFIG_CFHD_ENCODER) += cfhdenc.o cfhddata.o cfhdencdsp.o OBJS-$(CONFIG_CINEPAK_DECODER) += cinepak.o OBJS-$(CONFIG_CINEPAK_ENCODER) += cinepakenc.o elbg.o OBJS-$(CONFIG_CLEARVIDEO_DECODER) += clearvideo.o diff --git a/libavcodec/cfhdenc.c b/libavcodec/cfhdenc.c index 7b6b3c92f2..370b1e5063 100644 --- a/libavcodec/cfhdenc.c +++ b/libavcodec/cfhdenc.c @@ -33,6 +33,7 @@ #include "avcodec.h" #include "bytestream.h" #include "cfhd.h" +#include "cfhdencdsp.h" #include "put_bits.h" #include "internal.h" #include "thread.h" @@ -239,6 +240,8 @@ typedef struct CFHDEncContext { Runbook rb[321]; Codebook cb[513]; int16_t *alpha; + + CFHDEncDSPContext dsp; } CFHDEncContext; static av_cold int cfhd_encode_init(AVCodecContext *avctx) @@ -359,6 +362,8 @@ static av_cold int cfhd_encode_init(AVCodecContext *avctx) s->lut[i] = last; } + ff_cfhdencdsp_init(&s->dsp); + if (s->planes != 4) return 0; @@ -369,42 +374,6 @@ static av_cold int cfhd_encode_init(AVCodecContext *avctx) return 0; } -static av_always_inline void filter(int16_t *input, ptrdiff_t in_stride, - int16_t *low, ptrdiff_t low_stride, - int16_t *high, ptrdiff_t high_stride, - int len) -{ - low[(0>>1) * low_stride] = av_clip_int16(input[0*in_stride] + input[1*in_stride]); - high[(0>>1) * high_stride] = av_clip_int16((5 * input[0*in_stride] - 11 * input[1*in_stride] + - 4 * input[2*in_stride] + 4 * input[3*in_stride] - - 1 * input[4*in_stride] - 1 * input[5*in_stride] + 4) >> 3); - - for (int i = 2; i < len - 2; i += 2) { - low[(i>>1) * low_stride] = av_clip_int16(input[i*in_stride] + input[(i+1)*in_stride]); - high[(i>>1) * high_stride] = av_clip_int16(((-input[(i-2)*in_stride] - input[(i-1)*in_stride] + - input[(i+2)*in_stride] + input[(i+3)*in_stride] + 4) >> 3) + - input[(i+0)*in_stride] - input[(i+1)*in_stride]); - } - - low[((len-2)>>1) * low_stride] = av_clip_int16(input[((len-2)+0)*in_stride] + input[((len-2)+1)*in_stride]); - high[((len-2)>>1) * high_stride] = av_clip_int16((11* input[((len-2)+0)*in_stride] - 5 * input[((len-2)+1)*in_stride] - - 4 * input[((len-2)-1)*in_stride] - 4 * input[((len-2)-2)*in_stride] + - 1 * input[((len-2)-3)*in_stride] + 1 * input[((len-2)-4)*in_stride] + 4) >> 3); -} - -static void horiz_filter(int16_t *input, int16_t *low, int16_t *high, - int width) -{ - filter(input, 1, low, 1, high, 1, width); -} - -static void vert_filter(int16_t *input, ptrdiff_t in_stride, - int16_t *low, ptrdiff_t low_stride, - int16_t *high, ptrdiff_t high_stride, int len) -{ - filter(input, in_stride, low, low_stride, high, high_stride, len); -} - static void quantize_band(int16_t *input, int width, int a_width, int height, unsigned quantization) { @@ -454,6 +423,7 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet) { CFHDEncContext *s = avctx->priv_data; + CFHDEncDSPContext *dsp = &s->dsp; PutByteContext *pby = &s->pby; PutBitContext *pb = &s->pb; const Codebook *const cb = s->cb; @@ -480,12 +450,9 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt, in_stride = avctx->width; } - for (int i = 0; i < height * 2; i++) { - horiz_filter(input, low, high, width * 2); - input += in_stride; - low += a_width; - high += a_width; - } + dsp->horiz_filter(input, low, high, + in_stride, a_width, a_width, + width * 2, height * 2); input = s->plane[plane].l_h[7]; low = s->plane[plane].subband[7]; @@ -493,23 +460,17 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt, high = s->plane[plane].subband[9]; high_stride = s->plane[plane].band[2][0].a_width; - for (int i = 0; i < width; i++) { - vert_filter(input, a_width, low, low_stride, high, high_stride, height * 2); - input++; - low++; - high++; - } + dsp->vert_filter(input, low, high, + a_width, low_stride, high_stride, + width, height * 2); input = s->plane[plane].l_h[6]; low = s->plane[plane].l_h[7]; high = s->plane[plane].subband[8]; - for (int i = 0; i < width; i++) { - vert_filter(input, a_width, low, low_stride, high, high_stride, height * 2); - input++; - low++; - high++; - } + dsp->vert_filter(input, low, high, + a_width, low_stride, high_stride, + width, height * 2); a_width = s->plane[plane].band[1][0].a_width; width = s->plane[plane].band[1][0].width; @@ -527,34 +488,25 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt, } input = s->plane[plane].l_h[7]; - for (int i = 0; i < height * 2; i++) { - horiz_filter(input, low, high, width * 2); - input += a_width * 2; - low += low_stride; - high += high_stride; - } + dsp->horiz_filter(input, low, high, + a_width * 2, low_stride, high_stride, + width * 2, height * 2); input = s->plane[plane].l_h[4]; low = s->plane[plane].subband[4]; high = s->plane[plane].subband[6]; - for (int i = 0; i < width; i++) { - vert_filter(input, a_width, low, low_stride, high, high_stride, height * 2); - input++; - low++; - high++; - } + dsp->vert_filter(input, low, high, + a_width, low_stride, high_stride, + width, height * 2); input = s->plane[plane].l_h[3]; low = s->plane[plane].l_h[4]; high = s->plane[plane].subband[5]; - for (int i = 0; i < width; i++) { - vert_filter(input, a_width, low, low_stride, high, high_stride, height * 2); - input++; - low++; - high++; - } + dsp->vert_filter(input, low, high, + a_width, low_stride, high_stride, + width, height * 2); a_width = s->plane[plane].band[0][0].a_width; width = s->plane[plane].band[0][0].width; @@ -574,34 +526,25 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt, } input = s->plane[plane].l_h[4]; - for (int i = 0; i < height * 2; i++) { - horiz_filter(input, low, high, width * 2); - input += a_width * 2; - low += low_stride; - high += high_stride; - } + dsp->horiz_filter(input, low, high, + a_width * 2, low_stride, high_stride, + width * 2, height * 2); low = s->plane[plane].subband[1]; high = s->plane[plane].subband[3]; input = s->plane[plane].l_h[1]; - for (int i = 0; i < width; i++) { - vert_filter(input, a_width, low, low_stride, high, high_stride, height * 2); - input++; - low++; - high++; - } + dsp->vert_filter(input, low, high, + a_width, low_stride, high_stride, + width, height * 2); low = s->plane[plane].subband[0]; high = s->plane[plane].subband[2]; input = s->plane[plane].l_h[0]; - for (int i = 0; i < width; i++) { - vert_filter(input, a_width, low, low_stride, high, high_stride, height * 2); - input++; - low++; - high++; - } + dsp->vert_filter(input, low, high, + a_width, low_stride, high_stride, + width, height * 2); } ret = ff_alloc_packet2(avctx, pkt, 64LL + s->planes * (2LL * avctx->width * avctx->height + 1000LL), 0); diff --git a/libavcodec/cfhdencdsp.c b/libavcodec/cfhdencdsp.c new file mode 100644 index 0000000000..0becb76d1d --- /dev/null +++ b/libavcodec/cfhdencdsp.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2015-2016 Kieran Kunhya + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/common.h" +#include "libavutil/avassert.h" + +#include "cfhdencdsp.h" + +static av_always_inline void filter(int16_t *input, ptrdiff_t in_stride, + int16_t *low, ptrdiff_t low_stride, + int16_t *high, ptrdiff_t high_stride, + int len) +{ + low[(0>>1) * low_stride] = av_clip_int16(input[0*in_stride] + input[1*in_stride]); + high[(0>>1) * high_stride] = av_clip_int16((5 * input[0*in_stride] - 11 * input[1*in_stride] + + 4 * input[2*in_stride] + 4 * input[3*in_stride] - + 1 * input[4*in_stride] - 1 * input[5*in_stride] + 4) >> 3); + + for (int i = 2; i < len - 2; i += 2) { + low[(i>>1) * low_stride] = av_clip_int16(input[i*in_stride] + input[(i+1)*in_stride]); + high[(i>>1) * high_stride] = av_clip_int16(((-input[(i-2)*in_stride] - input[(i-1)*in_stride] + + input[(i+2)*in_stride] + input[(i+3)*in_stride] + 4) >> 3) + + input[(i+0)*in_stride] - input[(i+1)*in_stride]); + } + + low[((len-2)>>1) * low_stride] = av_clip_int16(input[((len-2)+0)*in_stride] + input[((len-2)+1)*in_stride]); + high[((len-2)>>1) * high_stride] = av_clip_int16((11* input[((len-2)+0)*in_stride] - 5 * input[((len-2)+1)*in_stride] - + 4 * input[((len-2)-1)*in_stride] - 4 * input[((len-2)-2)*in_stride] + + 1 * input[((len-2)-3)*in_stride] + 1 * input[((len-2)-4)*in_stride] + 4) >> 3); +} + +static void horiz_filter(int16_t *input, int16_t *low, int16_t *high, + ptrdiff_t in_stride, ptrdiff_t low_stride, + ptrdiff_t high_stride, + int width, int height) +{ + for (int i = 0; i < height; i++) { + filter(input, 1, low, 1, high, 1, width); + input += in_stride; + low += low_stride; + high += high_stride; + } +} + +static void vert_filter(int16_t *input, int16_t *low, int16_t *high, + ptrdiff_t in_stride, ptrdiff_t low_stride, + ptrdiff_t high_stride, + int width, int height) +{ + for (int i = 0; i < width; i++) + filter(&input[i], in_stride, &low[i], low_stride, &high[i], high_stride, height); +} + +av_cold void ff_cfhdencdsp_init(CFHDEncDSPContext *c) +{ + c->horiz_filter = horiz_filter; + c->vert_filter = vert_filter; +} diff --git a/libavcodec/cfhdencdsp.h b/libavcodec/cfhdencdsp.h new file mode 100644 index 0000000000..b3aac8d0a7 --- /dev/null +++ b/libavcodec/cfhdencdsp.h @@ -0,0 +1,41 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_CFHDENCDSP_H +#define AVCODEC_CFHDENCDSP_H + +#include +#include + +typedef struct CFHDEncDSPContext { + void (*horiz_filter)(int16_t *input, int16_t *low, int16_t *high, + ptrdiff_t in_stride, ptrdiff_t low_stride, + ptrdiff_t high_stride, + int width, int height); + + void (*vert_filter)(int16_t *input, int16_t *low, int16_t *high, + ptrdiff_t in_stride, ptrdiff_t low_stride, + ptrdiff_t high_stride, + int width, int height); +} CFHDEncDSPContext; + +void ff_cfhdencdsp_init(CFHDEncDSPContext *c); + +void ff_cfhdencdsp_init_x86(CFHDEncDSPContext *c); + +#endif /* AVCODEC_CFHDENCDSP_H */