diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index bf98370eee..82c51fc503 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -703,8 +703,8 @@ Video mechanism in the opengl output path. To use this deinterlacing you must pass the option: ``vd-lavc-o=deint=[weave|bob|adaptive]``. Pass ``weave`` to not attempt any deinterlacing. - 10bit HEVC is available if the hardware supports it but it will be - rounded down to 8 bits. + 10 and 12bit HEVC is available if the hardware supports it and a + sufficiently new driver (> 375.xx) is used. ``cuda-copy`` has the same behaviour as ``cuda`` - including the ability to deinterlace inside the decoder. However, traditional deinterlacing diff --git a/video/decode/cuda.c b/video/decode/cuda.c index f9dd418fd5..b606315906 100644 --- a/video/decode/cuda.c +++ b/video/decode/cuda.c @@ -21,6 +21,7 @@ #include #include "common/av_common.h" +#include "video/fmt-conversion.h" #include "video/decode/lavc.h" typedef struct CUVIDContext { @@ -114,7 +115,7 @@ static void uninit(struct lavc_ctx *ctx) static struct mp_image *process_image(struct lavc_ctx *ctx, struct mp_image *img) { if (img->imgfmt == IMGFMT_CUDA) - img->params.hw_subfmt = IMGFMT_NV12; + img->params.hw_subfmt = pixfmt2imgfmt(ctx->avctx->sw_pix_fmt); return img; } diff --git a/video/fmt-conversion.c b/video/fmt-conversion.c index 32330ddeb1..8b991c5d19 100644 --- a/video/fmt-conversion.c +++ b/video/fmt-conversion.c @@ -112,6 +112,9 @@ static const struct { #ifdef AV_PIX_FMT_P010 {IMGFMT_P010, AV_PIX_FMT_P010}, #endif +#ifdef AV_PIX_FMT_P016 + {IMGFMT_P016, AV_PIX_FMT_P016}, +#endif {0, AV_PIX_FMT_NONE} }; diff --git a/video/img_format.h b/video/img_format.h index a91dcf865c..ee731aa51c 100644 --- a/video/img_format.h +++ b/video/img_format.h @@ -151,8 +151,10 @@ enum mp_imgfmt { IMGFMT_NV12, IMGFMT_NV21, - // Like IMGFMT_NV12, but with 16 bits per component + // Like IMGFMT_NV12, but with 10 bits per component (and 6 bits of padding) IMGFMT_P010, + // Like IMGFMT_NV12, but with 16 bits per component + IMGFMT_P016, // RGB/BGR Formats diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index 539acbd4ba..4dc842706c 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -42,7 +42,7 @@ struct priv { GLuint gl_textures[2]; CUgraphicsResource cu_res[2]; CUarray cu_array[2]; - bool mapped; + int sample_width; CUcontext cuda_ctx; }; @@ -81,7 +81,21 @@ static struct mp_image *cuda_download_image(struct mp_hwdec_ctx *ctx, if (hw_image->imgfmt != IMGFMT_CUDA) return NULL; - struct mp_image *out = mp_image_pool_get(swpool, IMGFMT_NV12, + int sample_width; + switch (hw_image->params.hw_subfmt) { + case IMGFMT_NV12: + sample_width = 1; + break; + case IMGFMT_P010: + case IMGFMT_P016: + sample_width = 2; + break; + default: + return NULL; + } + + struct mp_image *out = mp_image_pool_get(swpool, + hw_image->params.hw_subfmt, hw_image->w, hw_image->h); if (!out) return NULL; @@ -101,7 +115,8 @@ static struct mp_image *cuda_download_image(struct mp_hwdec_ctx *ctx, .dstHost = out->planes[n], .srcPitch = hw_image->stride[n], .dstPitch = out->stride[n], - .WidthInBytes = mp_image_plane_w(out, n) * (n + 1), + .WidthInBytes = mp_image_plane_w(out, n) * + (n + 1) * sample_width, .Height = mp_image_plane_h(out, n), }; @@ -176,11 +191,32 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) int ret = 0, eret = 0; assert(params->imgfmt == hw->driver->imgfmt); - params->imgfmt = IMGFMT_NV12; + params->imgfmt = params->hw_subfmt; params->hw_subfmt = 0; mp_image_set_params(&p->layout, params); + GLint luma_format, chroma_format; + GLenum type; + switch (params->imgfmt) { + case IMGFMT_NV12: + luma_format = GL_R8; + chroma_format = GL_RG8; + type = GL_UNSIGNED_BYTE; + p->sample_width = 1; + break; + case IMGFMT_P010: + case IMGFMT_P016: + luma_format = GL_R16; + chroma_format = GL_RG16; + type = GL_UNSIGNED_SHORT; + p->sample_width = 2; + break; + default: + MP_ERR(hw, "Unsupported format: %s\n", mp_imgfmt_to_name(params->imgfmt)); + return -1; + } + ret = CHECK_CU(cuCtxPushCurrent(p->cuda_ctx)); if (ret < 0) return ret; @@ -193,10 +229,10 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - gl->TexImage2D(GL_TEXTURE_2D, 0, n == 0 ? GL_R8 : GL_RG8, + gl->TexImage2D(GL_TEXTURE_2D, 0, n == 0 ? luma_format : chroma_format, mp_image_plane_w(&p->layout, n), mp_image_plane_h(&p->layout, n), - 0, n == 0 ? GL_RED : GL_RG, GL_UNSIGNED_BYTE, NULL); + 0, n == 0 ? GL_RED : GL_RG, type, NULL); gl->BindTexture(GL_TEXTURE_2D, 0); ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], p->gl_textures[n], @@ -261,7 +297,7 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, for (int n = 0; n < 2; n++) { // widthInBytes must account for the chroma plane - // elements being two bytes wide. + // elements being two samples wide. CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_ARRAY, @@ -269,7 +305,8 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, .srcPitch = hw_image->stride[n], .srcY = 0, .dstArray = p->cu_array[n], - .WidthInBytes = mp_image_plane_w(&p->layout, n) * (n + 1), + .WidthInBytes = mp_image_plane_w(&p->layout, n) * + (n + 1) * p->sample_width, .Height = mp_image_plane_h(&p->layout, n), }; ret = CHECK_CU(cuMemcpy2D(&cpy));