diff --git a/libswscale/Makefile b/libswscale/Makefile index 4c950e6c43..757997b401 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -9,6 +9,7 @@ OBJS = alphablend.o \ hscale.o \ hscale_fast_bilinear.o \ gamma.o \ + half2float.o \ input.o \ options.o \ output.o \ diff --git a/libswscale/half2float.c b/libswscale/half2float.c new file mode 100644 index 0000000000..1b023f96a5 --- /dev/null +++ b/libswscale/half2float.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/half2float.c" diff --git a/libswscale/input.c b/libswscale/input.c index 36ef1e43ac..1077d01e91 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1124,6 +1124,112 @@ static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src, rgbf32_planar_funcs_endian(le, 0) rgbf32_planar_funcs_endian(be, 1) +#define rdpx(src) av_int2float(half2float(is_be ? AV_RB16(&src) : AV_RL16(&src), h2f_tbl)) + +static av_always_inline void rgbaf16ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const uint16_t *src, int width, + int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+0]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+4]), 0.0f, 65535.0f))) >> 1; + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+1]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+5]), 0.0f, 65535.0f))) >> 1; + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+2]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+6]), 0.0f, 65535.0f))) >> 1; + + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const uint16_t *src, int width, + int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*4+0]), 0.0f, 65535.0f)); + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*4+1]), 0.0f, 65535.0f)); + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*4+2]), 0.0f, 65535.0f)); + + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToY_endian(uint16_t *dst, const uint16_t *src, int is_be, + int width, int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*4+0]), 0.0f, 65535.0f)); + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*4+1]), 0.0f, 65535.0f)); + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*4+2]), 0.0f, 65535.0f)); + + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToA_endian(uint16_t *dst, const uint16_t *src, int is_be, + int width, Half2FloatTables *h2f_tbl) +{ + int i; + for (i=0; isrcFormat; @@ -1388,6 +1494,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->chrToYV12 = bgr30leToUV_half_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->chrToYV12 = rgbaf16beToUV_half_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->chrToYV12 = rgbaf16leToUV_half_c; + break; } } else { switch (srcFormat) { @@ -1475,6 +1587,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->chrToYV12 = bgr30leToUV_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->chrToYV12 = rgbaf16beToUV_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->chrToYV12 = rgbaf16leToUV_c; + break; } } @@ -1763,6 +1881,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->lumToYV12 = bgr30leToY_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->lumToYV12 = rgbaf16beToY_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->lumToYV12 = rgbaf16leToY_c; + break; } if (c->needAlpha) { if (is16BPS(srcFormat) || isNBPS(srcFormat)) { @@ -1782,6 +1906,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->alpToYV12 = rgbaf16beToA_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->alpToYV12 = rgbaf16leToA_c; + break; case AV_PIX_FMT_YA8: c->alpToYV12 = uyvyToY_c; break; diff --git a/libswscale/slice.c b/libswscale/slice.c index b3ee06d632..db1c696727 100644 --- a/libswscale/slice.c +++ b/libswscale/slice.c @@ -282,7 +282,13 @@ int ff_init_filters(SwsContext * c) c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0); c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0); - + if (isFloat16(c->srcFormat)) { + c->h2f_tables = av_malloc(sizeof(*c->h2f_tables)); + if (!c->h2f_tables) + return AVERROR(ENOMEM); + ff_init_half2float_tables(c->h2f_tables); + c->input_opaque = c->h2f_tables; + } c->desc = av_calloc(c->numDesc, sizeof(*c->desc)); if (!c->desc) @@ -393,5 +399,6 @@ int ff_free_filters(SwsContext *c) free_slice(&c->slice[i]); av_freep(&c->slice); } + av_freep(&c->h2f_tables); return 0; } diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 9ab542933f..6c14ce8536 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -35,6 +35,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/slicethread.h" #include "libavutil/ppc/util_altivec.h" +#include "libavutil/half2float.h" #define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long @@ -679,6 +680,8 @@ typedef struct SwsContext { unsigned int dst_slice_align; atomic_int stride_unaligned_warned; atomic_int data_unaligned_warned; + + Half2FloatTables *h2f_tables; } SwsContext; //FIXME check init (where 0) @@ -840,6 +843,13 @@ static av_always_inline int isFloat(enum AVPixelFormat pix_fmt) return desc->flags & AV_PIX_FMT_FLAG_FLOAT; } +static av_always_inline int isFloat16(enum AVPixelFormat pix_fmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + av_assert0(desc); + return (desc->flags & AV_PIX_FMT_FLAG_FLOAT) && desc->comp[0].depth == 16; +} + static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); diff --git a/libswscale/utils.c b/libswscale/utils.c index baa1791ebe..9ef157c006 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -259,6 +259,8 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_P416LE] = { 1, 1 }, [AV_PIX_FMT_NV16] = { 1, 1 }, [AV_PIX_FMT_VUYA] = { 1, 1 }, + [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, + [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, }; int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, diff --git a/libswscale/version.h b/libswscale/version.h index 3193562d18..d8694bb5c0 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 102 +#define LIBSWSCALE_VERSION_MICRO 103 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \