From fba894615d694584057adb0ddb4d609486cad807 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Wed, 8 Oct 2014 02:05:54 +0200 Subject: [PATCH] swscale: support internal scaler cascades Fixes Ticket3170 Signed-off-by: Michael Niedermayer --- libswscale/swscale.c | 13 +++++++++ libswscale/swscale_internal.h | 10 +++++++ libswscale/utils.c | 54 ++++++++++++++++++++++++++++------- libswscale/version.h | 2 +- 4 files changed, 67 insertions(+), 12 deletions(-) diff --git a/libswscale/swscale.c b/libswscale/swscale.c index e54d4486e9..16a31cee40 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -27,6 +27,7 @@ #include "libavutil/avutil.h" #include "libavutil/bswap.h" #include "libavutil/cpu.h" +#include "libavutil/imgutils.h" #include "libavutil/intreadwrite.h" #include "libavutil/mathematics.h" #include "libavutil/pixdesc.h" @@ -899,6 +900,18 @@ int attribute_align_arg sws_scale(struct SwsContext *c, av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n"); return 0; } + if (c->cascaded_context[0] && srcSliceY == 0 && srcSliceH == c->cascaded_context[0]->srcH) { + ret = sws_scale(c->cascaded_context[0], + srcSlice, srcStride, srcSliceY, srcSliceH, + c->cascaded_tmp, c->cascaded_tmpStride); + if (ret < 0) + return ret; + ret = sws_scale(c->cascaded_context[1], + (const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride, 0, c->cascaded_context[0]->dstH, + dst, dstStride); + return ret; + } + memcpy(src2, srcSlice, sizeof(src2)); memcpy(dst2, dst, sizeof(dst2)); diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 1e53690908..63b4eca829 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -61,6 +61,8 @@ # define APCK_SIZE 16 #endif +#define RETCODE_USE_CASCADE -12345 + struct SwsContext; typedef enum SwsDither { @@ -301,6 +303,14 @@ typedef struct SwsContext { int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top). double param[2]; ///< Input parameters for scaling algorithms that need them. + /* The cascaded_* fields allow spliting a scaler task into multiple + * sequential steps, this is for example used to limit the maximum + * downscaling factor that needs to be supported in one scaler. + */ + struct SwsContext *cascaded_context[2]; + int cascaded_tmpStride[4]; + uint8_t *cascaded_tmp[4]; + uint32_t pal_yuv[256]; uint32_t pal_rgb[256]; diff --git a/libswscale/utils.c b/libswscale/utils.c index dfdeb38479..5904ef85fe 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -42,6 +42,7 @@ #include "libavutil/avutil.h" #include "libavutil/bswap.h" #include "libavutil/cpu.h" +#include "libavutil/imgutils.h" #include "libavutil/intreadwrite.h" #include "libavutil/mathematics.h" #include "libavutil/opt.h" @@ -571,8 +572,7 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos, goto fail; if (filterSize >= MAX_FILTER_SIZE * 16 / ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16)) { - av_log(NULL, AV_LOG_ERROR, "sws: filterSize %d is too large, try less extreme scaling or set --sws-max-filter-size and recompile\n", - FF_CEIL_RSHIFT((filterSize+1) * ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16), 4)); + ret = RETCODE_USE_CASCADE; goto fail; } *outFilterSize = filterSize; @@ -664,7 +664,7 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos, fail: if(ret < 0) - av_log(NULL, AV_LOG_ERROR, "sws: initFilter failed\n"); + av_log(NULL, ret == RETCODE_USE_CASCADE ? AV_LOG_DEBUG : AV_LOG_ERROR, "sws: initFilter failed\n"); av_free(filter); av_free(filter2); return ret; @@ -959,6 +959,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, enum AVPixelFormat dstFormat = c->dstFormat; const AVPixFmtDescriptor *desc_src; const AVPixFmtDescriptor *desc_dst; + int ret = 0; cpu_flags = av_get_cpu_flags(); flags = c->flags; @@ -1284,23 +1285,23 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, const int filterAlign = X86_MMX(cpu_flags) ? 4 : PPC_ALTIVEC(cpu_flags) ? 8 : 1; - if (initFilter(&c->hLumFilter, &c->hLumFilterPos, + if ((ret = initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, srcW, dstW, filterAlign, 1 << 14, (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumH, dstFilter->lumH, c->param, get_local_pos(c, 0, 0, 0), - get_local_pos(c, 0, 0, 0)) < 0) + get_local_pos(c, 0, 0, 0))) < 0) goto fail; - if (initFilter(&c->hChrFilter, &c->hChrFilterPos, + if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, c->chrSrcW, c->chrDstW, filterAlign, 1 << 14, (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, cpu_flags, srcFilter->chrH, dstFilter->chrH, c->param, get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0), - get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0)) < 0) + get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0))) < 0) goto fail; } } // initialize horizontal stuff @@ -1310,22 +1311,22 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, const int filterAlign = X86_MMX(cpu_flags) ? 2 : PPC_ALTIVEC(cpu_flags) ? 8 : 1; - if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, + if ((ret = initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, srcH, dstH, filterAlign, (1 << 12), (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumV, dstFilter->lumV, c->param, get_local_pos(c, 0, 0, 1), - get_local_pos(c, 0, 0, 1)) < 0) + get_local_pos(c, 0, 0, 1))) < 0) goto fail; - if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, + if ((ret = initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, c->chrSrcH, c->chrDstH, filterAlign, (1 << 12), (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, cpu_flags, srcFilter->chrV, dstFilter->chrV, c->param, get_local_pos(c, c->chrSrcVSubSample, c->src_v_chr_pos, 1), - get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1)) < 0) + get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1))) < 0) goto fail; @@ -1479,6 +1480,32 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->swscale = ff_getSwsFunc(c); return 0; fail: // FIXME replace things by appropriate error codes + if (ret == RETCODE_USE_CASCADE) { + int tmpW = sqrt(srcW * (int64_t)dstW); + int tmpH = sqrt(srcH * (int64_t)dstH); + enum AVPixelFormat tmpFormat = AV_PIX_FMT_YUV420P; + + if (srcW*(int64_t)srcH <= 4LL*dstW*dstH) + return AVERROR(EINVAL); + + ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride, + tmpW, tmpH, tmpFormat, 64); + if (ret < 0) + return ret; + + c->cascaded_context[0] = sws_getContext(srcW, srcH, srcFormat, + tmpW, tmpH, tmpFormat, + flags, srcFilter, NULL, c->param); + if (!c->cascaded_context[0]) + return -1; + + c->cascaded_context[1] = sws_getContext(tmpW, tmpH, tmpFormat, + dstW, dstH, dstFormat, + flags, NULL, dstFilter, c->param); + if (!c->cascaded_context[1]) + return -1; + return 0; + } return -1; } @@ -1890,6 +1917,11 @@ void sws_freeContext(SwsContext *c) av_freep(&c->yuvTable); av_freep(&c->formatConvBuffer); + sws_freeContext(c->cascaded_context[0]); + sws_freeContext(c->cascaded_context[1]); + memset(c->cascaded_context, 0, sizeof(c->cascaded_context)); + av_freep(&c->cascaded_tmp[0]); + av_free(c); } diff --git a/libswscale/version.h b/libswscale/version.h index ff7de293f5..228c5770eb 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -28,7 +28,7 @@ #define LIBSWSCALE_VERSION_MAJOR 3 #define LIBSWSCALE_VERSION_MINOR 1 -#define LIBSWSCALE_VERSION_MICRO 100 +#define LIBSWSCALE_VERSION_MICRO 101 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \