From e7bf5576e599593b1bba5bbf2a7cd6d4270c7809 Mon Sep 17 00:00:00 2001 From: James Ross-Gowan Date: Wed, 1 Nov 2017 22:38:41 +1100 Subject: [PATCH] vo_gpu: hwdec_d3d11va: allow zero-copy video decoding Like the manual says, this is technically undefined behaviour. See: https://msdn.microsoft.com/en-us/library/windows/desktop/ff476085.aspx In particular, MSDN says texture arrays created with the BIND_DECODER flag cannot be used with CreateShaderResourceView, which means they can't be sampled through SRVs like normal Direct3D textures. However, some programs (Google Chrome included) do this anyway for performance and power-usage reasons, and it appears to work with most drivers. Older AMD drivers had a "bug" with zero-copy decoding, but this appears to have been fixed. See #3255, #3464 and http://crbug.com/623029. --- DOCS/man/options.rst | 12 +++ options/options.c | 4 + options/options.h | 1 + video/out/d3d11/hwdec_d3d11va.c | 152 ++++++++++++++++++++++---------- video/out/d3d11/ra_d3d11.c | 64 +++++++++++--- video/out/d3d11/ra_d3d11.h | 3 +- 6 files changed, 175 insertions(+), 61 deletions(-) diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index d6dfc48535..7dd1dc6af9 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -4297,6 +4297,18 @@ The following video options are currently all specific to ``--vo=gpu`` and Schedule each frame to be presented for this number of VBlank intervals. (default: 1) Setting to 1 will enable VSync, setting to 0 will disable it. +``--d3d11va-zero-copy=`` + By default, when using hardware decoding with ``--gpu-api=d3d11``, the + video image will be copied (GPU-to-GPU) from the decoder surface to a + shader resource. Set this option to avoid that copy by sampling directly + from the decoder image. This may increase performance and reduce power + usage, but can cause the image to be sampled incorrectly on the bottom and + right edges due to padding, and may invoke driver bugs, since Direct3D 11 + technically does not allow sampling from a decoder surface (though most + drivers support it.) + + Currently only relevant for ``--gpu-api=d3d11``. + ``--spirv-compiler=`` Controls which compiler is used to translate GLSL to SPIR-V. This is (currently) only relevant for ``--gpu-api=vulkan``. The possible choices diff --git a/options/options.c b/options/options.c index 3bf4ee1108..e295afa9bc 100644 --- a/options/options.c +++ b/options/options.c @@ -91,6 +91,7 @@ extern const struct m_sub_options opengl_conf; extern const struct m_sub_options vulkan_conf; extern const struct m_sub_options spirv_conf; extern const struct m_sub_options d3d11_conf; +extern const struct m_sub_options d3d11va_conf; extern const struct m_sub_options angle_conf; extern const struct m_sub_options cocoa_conf; @@ -702,6 +703,9 @@ const m_option_t mp_opts[] = { #if HAVE_D3D11 OPT_SUBSTRUCT("", d3d11_opts, d3d11_conf, 0), +#if HAVE_D3D_HWACCEL + OPT_SUBSTRUCT("", d3d11va_opts, d3d11va_conf, 0), +#endif #endif #if HAVE_EGL_ANGLE_WIN32 diff --git a/options/options.h b/options/options.h index 47a4622430..170de9cddc 100644 --- a/options/options.h +++ b/options/options.h @@ -333,6 +333,7 @@ typedef struct MPOpts { struct vulkan_opts *vulkan_opts; struct spirv_opts *spirv_opts; struct d3d11_opts *d3d11_opts; + struct d3d11va_opts *d3d11va_opts; struct cocoa_opts *cocoa_opts; struct dvd_opts *dvd_opts; diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c index f179298ac1..7de24dde28 100644 --- a/video/out/d3d11/hwdec_d3d11va.c +++ b/video/out/d3d11/hwdec_d3d11va.c @@ -22,21 +22,45 @@ #include "config.h" #include "common/common.h" +#include "options/m_config.h" #include "osdep/windows_utils.h" #include "video/hwdec.h" #include "video/decode/d3d.h" #include "video/out/d3d11/ra_d3d11.h" #include "video/out/gpu/hwdec.h" +struct d3d11va_opts { + int zero_copy; +}; + +#define OPT_BASE_STRUCT struct d3d11va_opts +const struct m_sub_options d3d11va_conf = { + .opts = (const struct m_option[]) { + OPT_FLAG("d3d11va-zero-copy", zero_copy, 0), + {0} + }, + .defaults = &(const struct d3d11va_opts) { + .zero_copy = 0, + }, + .size = sizeof(struct d3d11va_opts) +}; + struct priv_owner { + struct d3d11va_opts *opts; + struct mp_hwdec_ctx hwctx; ID3D11Device *device; ID3D11Device1 *device1; }; struct priv { + // 1-copy path ID3D11DeviceContext1 *ctx; ID3D11Texture2D *copy_tex; + + // zero-copy path + int num_planes; + const struct ra_format *fmt[4]; }; static void uninit(struct ra_hwdec *hw) @@ -59,6 +83,8 @@ static int init(struct ra_hwdec *hw) if (!p->device) return -1; + p->opts = mp_get_config_group(hw->priv, hw->global, &d3d11va_conf); + // D3D11VA requires Direct3D 11.1, so this should always succeed hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1, (void**)&p->device1); @@ -109,52 +135,56 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) mapper->dst_params.hw_subfmt = 0; struct ra_imgfmt_desc desc = {0}; - struct mp_image layout = {0}; if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) return -1; - mp_image_set_params(&layout, &mapper->dst_params); + if (o->opts->zero_copy) { + // In the zero-copy path, we create the ra_tex objects in the map + // operation, so we just need to store the format of each plane + p->num_planes = desc.num_planes; + for (int i = 0; i < desc.num_planes; i++) + p->fmt[i] = desc.planes[i]; + } else { + struct mp_image layout = {0}; + mp_image_set_params(&layout, &mapper->dst_params); - DXGI_FORMAT copy_fmt; - switch (mapper->dst_params.imgfmt) { - case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break; - case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break; - default: return -1; - } + DXGI_FORMAT copy_fmt; + switch (mapper->dst_params.imgfmt) { + case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break; + case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break; + default: return -1; + } - // We copy decoder images to an intermediate texture. This is slower than - // the zero-copy path, but according to MSDN, decoder textures should not - // be bound to SRVs, so it is technically correct, and it works around some - // driver "bugs" that can happen with the zero-copy path. It also allows - // samplers to work correctly when the decoder image includes padding. - D3D11_TEXTURE2D_DESC copy_desc = { - .Width = mapper->dst_params.w, - .Height = mapper->dst_params.h, - .MipLevels = 1, - .ArraySize = 1, - .SampleDesc.Count = 1, - .Format = copy_fmt, - .BindFlags = D3D11_BIND_SHADER_RESOURCE, - }; - hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, &p->copy_tex); - if (FAILED(hr)) { - MP_FATAL(mapper, "Could not create shader resource texture\n"); - return -1; - } - - for (int i = 0; i < desc.num_planes; i++) { - mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex, - mp_image_plane_w(&layout, i), - mp_image_plane_h(&layout, i), - desc.planes[i]); - if (!mapper->tex[i]) { - MP_FATAL(mapper, "Could not create RA texture view\n"); + D3D11_TEXTURE2D_DESC copy_desc = { + .Width = mapper->dst_params.w, + .Height = mapper->dst_params.h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = copy_fmt, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, + &p->copy_tex); + if (FAILED(hr)) { + MP_FATAL(mapper, "Could not create shader resource texture\n"); return -1; } - } - ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx); + for (int i = 0; i < desc.num_planes; i++) { + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex, + mp_image_plane_w(&layout, i), mp_image_plane_h(&layout, i), 0, + desc.planes[i]); + if (!mapper->tex[i]) { + MP_FATAL(mapper, "Could not create RA texture view\n"); + return -1; + } + } + + // A ref to the immediate context is needed for CopySubresourceRegion + ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx); + } return 0; } @@ -165,20 +195,47 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) ID3D11Texture2D *tex = (void *)mapper->src->planes[0]; int subresource = (intptr_t)mapper->src->planes[1]; - ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx, - (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0, - (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) { - .left = 0, - .top = 0, - .front = 0, - .right = mapper->dst_params.w, - .bottom = mapper->dst_params.h, - .back = 1, - }), D3D11_COPY_DISCARD); + if (p->copy_tex) { + ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx, + (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0, + (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) { + .left = 0, + .top = 0, + .front = 0, + .right = mapper->dst_params.w, + .bottom = mapper->dst_params.h, + .back = 1, + }), D3D11_COPY_DISCARD); + } else { + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex, &desc2d); + + for (int i = 0; i < p->num_planes; i++) { + // The video decode texture may include padding, so the size of the + // ra_tex needs to be determined by the actual size of the Tex2D + bool chroma = i >= 1; + int w = desc2d.Width / (chroma ? 2 : 1); + int h = desc2d.Height / (chroma ? 2 : 1); + + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, tex, + w, h, subresource, p->fmt[i]); + if (!mapper->tex[i]) + return -1; + } + } return 0; } +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + if (p->copy_tex) + return; + for (int i = 0; i < 4; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); +} + const struct ra_hwdec_driver ra_hwdec_d3d11va = { .name = "d3d11va", .priv_size = sizeof(struct priv_owner), @@ -191,5 +248,6 @@ const struct ra_hwdec_driver ra_hwdec_d3d11va = { .init = mapper_init, .uninit = mapper_uninit, .map = mapper_map, + .unmap = mapper_unmap, }, }; diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c index 394f99c70b..3037966f23 100644 --- a/video/out/d3d11/ra_d3d11.c +++ b/video/out/d3d11/ra_d3d11.c @@ -75,6 +75,7 @@ struct d3d_tex { ID3D11Texture1D *tex1d; ID3D11Texture2D *tex2d; ID3D11Texture3D *tex3d; + int array_slice; ID3D11ShaderResourceView *srv; ID3D11RenderTargetView *rtv; @@ -259,14 +260,29 @@ static bool tex_init(struct ra *ra, struct ra_tex *tex) }; switch (params->dimensions) { case 1: - srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; - srvdesc.Texture1D.MipLevels = 1; + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + srvdesc.Texture1DArray.MipLevels = 1; + srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture1DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + srvdesc.Texture1D.MipLevels = 1; + } break; case 2: - srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; - srvdesc.Texture2D.MipLevels = 1; + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvdesc.Texture2DArray.MipLevels = 1; + srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture2DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvdesc.Texture2D.MipLevels = 1; + } break; case 3: + // D3D11 does not have Texture3D arrays srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; srvdesc.Texture3D.MipLevels = 1; break; @@ -442,6 +458,8 @@ static struct ra_tex *tex_create(struct ra *ra, abort(); } + tex_p->array_slice = -1; + if (!tex_init(ra, tex)) goto error; @@ -478,10 +496,18 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res) D3D11_TEXTURE2D_DESC desc2d; ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); - if (desc2d.MipLevels != 1 || desc2d.ArraySize != 1) + if (desc2d.MipLevels != 1) { + MP_ERR(ra, "Mipmapped textures not supported for wrapping\n"); goto error; - if (desc2d.SampleDesc.Count != 1) + } + if (desc2d.ArraySize != 1) { + MP_ERR(ra, "Texture arrays not supported for wrapping\n"); goto error; + } + if (desc2d.SampleDesc.Count != 1) { + MP_ERR(ra, "Multisampled textures not supported for wrapping\n"); + goto error; + } params->dimensions = 2; params->w = desc2d.Width; @@ -522,6 +548,8 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res) goto error; } + tex_p->array_slice = -1; + if (!tex_init(ra, tex)) goto error; @@ -532,7 +560,7 @@ error: } struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, - int w, int h, + int w, int h, int array_slice, const struct ra_format *fmt) { struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); @@ -559,6 +587,12 @@ struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, // fmt can be different to the texture format for planar video textures params->format = fmt; + if (desc2d.ArraySize > 1) { + tex_p->array_slice = array_slice; + } else { + tex_p->array_slice = -1; + } + if (!tex_init(ra, tex)) goto error; @@ -611,12 +645,14 @@ static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params) } } + int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0; if (p->ctx1) { - ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, 0, rc, - src, stride, pitch, invalidate ? D3D11_COPY_DISCARD : 0); + ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, + subresource, rc, src, stride, pitch, + invalidate ? D3D11_COPY_DISCARD : 0); } else { - ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, 0, rc, - src, stride, pitch); + ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource, + rc, src, stride, pitch); } return true; @@ -1174,8 +1210,10 @@ static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, { blit_rpass(ra, dst, src, &dst_rc, &src_rc); } else { - ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, 0, - dst_rc.x0, dst_rc.y0, 0, src_p->res, 0, (&(D3D11_BOX) { + int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0; + int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0; + ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr, + dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) { .left = src_rc.x0, .top = src_rc.y0, .front = 0, diff --git a/video/out/d3d11/ra_d3d11.h b/video/out/d3d11/ra_d3d11.h index eeadc7994e..54033b6cee 100644 --- a/video/out/d3d11/ra_d3d11.h +++ b/video/out/d3d11/ra_d3d11.h @@ -22,8 +22,9 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res); // As above, but for a D3D11VA video resource. The fmt parameter selects which // plane of a planar format will be mapped when the RA texture is used. +// array_slice should be set for texture arrays and is ignored for non-arrays. struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, - int w, int h, + int w, int h, int array_slice, const struct ra_format *fmt); // Get the underlying D3D11 device from an RA instance. The returned device is