diff --git a/libavfilter/version.h b/libavfilter/version.h index f191d98883..f84dec4805 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -32,7 +32,7 @@ #include "version_major.h" #define LIBAVFILTER_VERSION_MINOR 6 -#define LIBAVFILTER_VERSION_MICRO 100 +#define LIBAVFILTER_VERSION_MICRO 101 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index 54a340949d..eb8beee771 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -407,7 +407,7 @@ fail: } static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, - CUtexObject src_tex[4], int src_width, int src_height, + CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height, AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch) { CUDAScaleContext *s = ctx->priv; @@ -422,7 +422,7 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3], &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3], &dst_width, &dst_height, &dst_pitch, - &src_width, &src_height, &s->param + &src_left, &src_top, &src_width, &src_height, &s->param }; return CHECK_CU(cu->cuLaunchKernel(func, @@ -440,6 +440,9 @@ static int scalecuda_resize(AVFilterContext *ctx, CUtexObject tex[4] = { 0, 0, 0, 0 }; + int crop_width = (in->width - in->crop_right) - in->crop_left; + int crop_height = (in->height - in->crop_bottom) - in->crop_top; + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) return ret; @@ -477,7 +480,7 @@ static int scalecuda_resize(AVFilterContext *ctx, // scale primary plane(s). Usually Y (and A), or single plane of RGB frames. ret = call_resize_kernel(ctx, s->cu_func, - tex, in->width, in->height, + tex, in->crop_left, in->crop_top, crop_width, crop_height, out, out->width, out->height, out->linesize[0]); if (ret < 0) goto exit; @@ -485,8 +488,10 @@ static int scalecuda_resize(AVFilterContext *ctx, if (s->out_planes > 1) { // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane. ret = call_resize_kernel(ctx, s->cu_func_uv, tex, - AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), - AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), + AV_CEIL_RSHIFT(in->crop_left, s->in_desc->log2_chroma_w), + AV_CEIL_RSHIFT(in->crop_top, s->in_desc->log2_chroma_h), + AV_CEIL_RSHIFT(crop_width, s->in_desc->log2_chroma_w), + AV_CEIL_RSHIFT(crop_height, s->in_desc->log2_chroma_h), out, AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu index de06ba9433..271b55cd5d 100644 --- a/libavfilter/vf_scale_cuda.cu +++ b/libavfilter/vf_scale_cuda.cu @@ -26,6 +26,7 @@ template using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo, int dst_width, int dst_height, + int src_left, int src_top, int src_width, int src_height, int bit_depth, float param); @@ -64,11 +65,12 @@ static inline __device__ ushort conv_16to10(ushort in) subsample_function_t subsample_func_uv> \ __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \ int dst_width, int dst_height, int dst_pitch, \ - int src_width, int src_height, float param) + int src_left, int src_top, int src_width, int src_height, float param) #define SUB_F(m, plane) \ subsample_func_##m(src_tex[plane], xo, yo, \ dst_width, dst_height, \ + src_left, src_top, \ src_width, src_height, \ in_bit_depth, param) @@ -1063,13 +1065,14 @@ template __device__ static inline T Subsample_Nearest(cudaTextureObject_t tex, int xo, int yo, int dst_width, int dst_height, + int src_left, int src_top, int src_width, int src_height, int bit_depth, float param) { float hscale = (float)src_width / (float)dst_width; float vscale = (float)src_height / (float)dst_height; - float xi = (xo + 0.5f) * hscale; - float yi = (yo + 0.5f) * vscale; + float xi = (xo + 0.5f) * hscale + src_left; + float yi = (yo + 0.5f) * vscale + src_top; return tex2D(tex, xi, yi); } @@ -1078,13 +1081,14 @@ template __device__ static inline T Subsample_Bilinear(cudaTextureObject_t tex, int xo, int yo, int dst_width, int dst_height, + int src_left, int src_top, int src_width, int src_height, int bit_depth, float param) { float hscale = (float)src_width / (float)dst_width; float vscale = (float)src_height / (float)dst_height; - float xi = (xo + 0.5f) * hscale; - float yi = (yo + 0.5f) * vscale; + float xi = (xo + 0.5f) * hscale + src_left; + float yi = (yo + 0.5f) * vscale + src_top; // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv} float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f); float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f); @@ -1109,13 +1113,14 @@ template __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, int xo, int yo, int dst_width, int dst_height, + int src_left, int src_top, int src_width, int src_height, int bit_depth, float param) { float hscale = (float)src_width / (float)dst_width; float vscale = (float)src_height / (float)dst_height; - float xi = (xo + 0.5f) * hscale - 0.5f; - float yi = (yo + 0.5f) * vscale - 0.5f; + float xi = (xo + 0.5f) * hscale - 0.5f + src_left; + float yi = (yo + 0.5f) * vscale - 0.5f + src_top; float px = floor(xi); float py = floor(yi); float fx = xi - px; @@ -1147,7 +1152,7 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \ T *dst_0, T *dst_1, T *dst_2, T *dst_3, \ int dst_width, int dst_height, int dst_pitch, \ - int src_width, int src_height, float param + int src_left, int src_top, int src_width, int src_height, float param #define SUBSAMPLE(Convert, T) \ cudaTextureObject_t src_tex[4] = \ @@ -1159,6 +1164,7 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, Convert( \ src_tex, dst, xo, yo, \ dst_width, dst_height, dst_pitch, \ + src_left, src_top, \ src_width, src_height, param); extern "C" {