mirror of
https://github.com/mpv-player/mpv
synced 2025-01-03 05:22:23 +00:00
vo_gpu: hwdec_d3d11va: allow zero-copy video decoding
Like the manual says, this is technically undefined behaviour. See: https://msdn.microsoft.com/en-us/library/windows/desktop/ff476085.aspx In particular, MSDN says texture arrays created with the BIND_DECODER flag cannot be used with CreateShaderResourceView, which means they can't be sampled through SRVs like normal Direct3D textures. However, some programs (Google Chrome included) do this anyway for performance and power-usage reasons, and it appears to work with most drivers. Older AMD drivers had a "bug" with zero-copy decoding, but this appears to have been fixed. See #3255, #3464 and http://crbug.com/623029.
This commit is contained in:
parent
b258d82d6e
commit
e7bf5576e5
@ -4297,6 +4297,18 @@ The following video options are currently all specific to ``--vo=gpu`` and
|
||||
Schedule each frame to be presented for this number of VBlank intervals.
|
||||
(default: 1) Setting to 1 will enable VSync, setting to 0 will disable it.
|
||||
|
||||
``--d3d11va-zero-copy=<yes|no>``
|
||||
By default, when using hardware decoding with ``--gpu-api=d3d11``, the
|
||||
video image will be copied (GPU-to-GPU) from the decoder surface to a
|
||||
shader resource. Set this option to avoid that copy by sampling directly
|
||||
from the decoder image. This may increase performance and reduce power
|
||||
usage, but can cause the image to be sampled incorrectly on the bottom and
|
||||
right edges due to padding, and may invoke driver bugs, since Direct3D 11
|
||||
technically does not allow sampling from a decoder surface (though most
|
||||
drivers support it.)
|
||||
|
||||
Currently only relevant for ``--gpu-api=d3d11``.
|
||||
|
||||
``--spirv-compiler=<compiler>``
|
||||
Controls which compiler is used to translate GLSL to SPIR-V. This is
|
||||
(currently) only relevant for ``--gpu-api=vulkan``. The possible choices
|
||||
|
@ -91,6 +91,7 @@ extern const struct m_sub_options opengl_conf;
|
||||
extern const struct m_sub_options vulkan_conf;
|
||||
extern const struct m_sub_options spirv_conf;
|
||||
extern const struct m_sub_options d3d11_conf;
|
||||
extern const struct m_sub_options d3d11va_conf;
|
||||
extern const struct m_sub_options angle_conf;
|
||||
extern const struct m_sub_options cocoa_conf;
|
||||
|
||||
@ -702,6 +703,9 @@ const m_option_t mp_opts[] = {
|
||||
|
||||
#if HAVE_D3D11
|
||||
OPT_SUBSTRUCT("", d3d11_opts, d3d11_conf, 0),
|
||||
#if HAVE_D3D_HWACCEL
|
||||
OPT_SUBSTRUCT("", d3d11va_opts, d3d11va_conf, 0),
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_EGL_ANGLE_WIN32
|
||||
|
@ -333,6 +333,7 @@ typedef struct MPOpts {
|
||||
struct vulkan_opts *vulkan_opts;
|
||||
struct spirv_opts *spirv_opts;
|
||||
struct d3d11_opts *d3d11_opts;
|
||||
struct d3d11va_opts *d3d11va_opts;
|
||||
struct cocoa_opts *cocoa_opts;
|
||||
struct dvd_opts *dvd_opts;
|
||||
|
||||
|
@ -22,21 +22,45 @@
|
||||
#include "config.h"
|
||||
|
||||
#include "common/common.h"
|
||||
#include "options/m_config.h"
|
||||
#include "osdep/windows_utils.h"
|
||||
#include "video/hwdec.h"
|
||||
#include "video/decode/d3d.h"
|
||||
#include "video/out/d3d11/ra_d3d11.h"
|
||||
#include "video/out/gpu/hwdec.h"
|
||||
|
||||
struct d3d11va_opts {
|
||||
int zero_copy;
|
||||
};
|
||||
|
||||
#define OPT_BASE_STRUCT struct d3d11va_opts
|
||||
const struct m_sub_options d3d11va_conf = {
|
||||
.opts = (const struct m_option[]) {
|
||||
OPT_FLAG("d3d11va-zero-copy", zero_copy, 0),
|
||||
{0}
|
||||
},
|
||||
.defaults = &(const struct d3d11va_opts) {
|
||||
.zero_copy = 0,
|
||||
},
|
||||
.size = sizeof(struct d3d11va_opts)
|
||||
};
|
||||
|
||||
struct priv_owner {
|
||||
struct d3d11va_opts *opts;
|
||||
|
||||
struct mp_hwdec_ctx hwctx;
|
||||
ID3D11Device *device;
|
||||
ID3D11Device1 *device1;
|
||||
};
|
||||
|
||||
struct priv {
|
||||
// 1-copy path
|
||||
ID3D11DeviceContext1 *ctx;
|
||||
ID3D11Texture2D *copy_tex;
|
||||
|
||||
// zero-copy path
|
||||
int num_planes;
|
||||
const struct ra_format *fmt[4];
|
||||
};
|
||||
|
||||
static void uninit(struct ra_hwdec *hw)
|
||||
@ -59,6 +83,8 @@ static int init(struct ra_hwdec *hw)
|
||||
if (!p->device)
|
||||
return -1;
|
||||
|
||||
p->opts = mp_get_config_group(hw->priv, hw->global, &d3d11va_conf);
|
||||
|
||||
// D3D11VA requires Direct3D 11.1, so this should always succeed
|
||||
hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1,
|
||||
(void**)&p->device1);
|
||||
@ -109,11 +135,18 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
|
||||
mapper->dst_params.hw_subfmt = 0;
|
||||
|
||||
struct ra_imgfmt_desc desc = {0};
|
||||
struct mp_image layout = {0};
|
||||
|
||||
if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
|
||||
return -1;
|
||||
|
||||
if (o->opts->zero_copy) {
|
||||
// In the zero-copy path, we create the ra_tex objects in the map
|
||||
// operation, so we just need to store the format of each plane
|
||||
p->num_planes = desc.num_planes;
|
||||
for (int i = 0; i < desc.num_planes; i++)
|
||||
p->fmt[i] = desc.planes[i];
|
||||
} else {
|
||||
struct mp_image layout = {0};
|
||||
mp_image_set_params(&layout, &mapper->dst_params);
|
||||
|
||||
DXGI_FORMAT copy_fmt;
|
||||
@ -123,11 +156,6 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
|
||||
default: return -1;
|
||||
}
|
||||
|
||||
// We copy decoder images to an intermediate texture. This is slower than
|
||||
// the zero-copy path, but according to MSDN, decoder textures should not
|
||||
// be bound to SRVs, so it is technically correct, and it works around some
|
||||
// driver "bugs" that can happen with the zero-copy path. It also allows
|
||||
// samplers to work correctly when the decoder image includes padding.
|
||||
D3D11_TEXTURE2D_DESC copy_desc = {
|
||||
.Width = mapper->dst_params.w,
|
||||
.Height = mapper->dst_params.h,
|
||||
@ -137,7 +165,8 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
|
||||
.Format = copy_fmt,
|
||||
.BindFlags = D3D11_BIND_SHADER_RESOURCE,
|
||||
};
|
||||
hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, &p->copy_tex);
|
||||
hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL,
|
||||
&p->copy_tex);
|
||||
if (FAILED(hr)) {
|
||||
MP_FATAL(mapper, "Could not create shader resource texture\n");
|
||||
return -1;
|
||||
@ -145,8 +174,7 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
|
||||
|
||||
for (int i = 0; i < desc.num_planes; i++) {
|
||||
mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex,
|
||||
mp_image_plane_w(&layout, i),
|
||||
mp_image_plane_h(&layout, i),
|
||||
mp_image_plane_w(&layout, i), mp_image_plane_h(&layout, i), 0,
|
||||
desc.planes[i]);
|
||||
if (!mapper->tex[i]) {
|
||||
MP_FATAL(mapper, "Could not create RA texture view\n");
|
||||
@ -154,7 +182,9 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
|
||||
}
|
||||
}
|
||||
|
||||
// A ref to the immediate context is needed for CopySubresourceRegion
|
||||
ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -165,6 +195,7 @@ static int mapper_map(struct ra_hwdec_mapper *mapper)
|
||||
ID3D11Texture2D *tex = (void *)mapper->src->planes[0];
|
||||
int subresource = (intptr_t)mapper->src->planes[1];
|
||||
|
||||
if (p->copy_tex) {
|
||||
ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx,
|
||||
(ID3D11Resource *)p->copy_tex, 0, 0, 0, 0,
|
||||
(ID3D11Resource *)tex, subresource, (&(D3D11_BOX) {
|
||||
@ -175,10 +206,36 @@ static int mapper_map(struct ra_hwdec_mapper *mapper)
|
||||
.bottom = mapper->dst_params.h,
|
||||
.back = 1,
|
||||
}), D3D11_COPY_DISCARD);
|
||||
} else {
|
||||
D3D11_TEXTURE2D_DESC desc2d;
|
||||
ID3D11Texture2D_GetDesc(tex, &desc2d);
|
||||
|
||||
for (int i = 0; i < p->num_planes; i++) {
|
||||
// The video decode texture may include padding, so the size of the
|
||||
// ra_tex needs to be determined by the actual size of the Tex2D
|
||||
bool chroma = i >= 1;
|
||||
int w = desc2d.Width / (chroma ? 2 : 1);
|
||||
int h = desc2d.Height / (chroma ? 2 : 1);
|
||||
|
||||
mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, tex,
|
||||
w, h, subresource, p->fmt[i]);
|
||||
if (!mapper->tex[i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mapper_unmap(struct ra_hwdec_mapper *mapper)
|
||||
{
|
||||
struct priv *p = mapper->priv;
|
||||
if (p->copy_tex)
|
||||
return;
|
||||
for (int i = 0; i < 4; i++)
|
||||
ra_tex_free(mapper->ra, &mapper->tex[i]);
|
||||
}
|
||||
|
||||
const struct ra_hwdec_driver ra_hwdec_d3d11va = {
|
||||
.name = "d3d11va",
|
||||
.priv_size = sizeof(struct priv_owner),
|
||||
@ -191,5 +248,6 @@ const struct ra_hwdec_driver ra_hwdec_d3d11va = {
|
||||
.init = mapper_init,
|
||||
.uninit = mapper_uninit,
|
||||
.map = mapper_map,
|
||||
.unmap = mapper_unmap,
|
||||
},
|
||||
};
|
||||
|
@ -75,6 +75,7 @@ struct d3d_tex {
|
||||
ID3D11Texture1D *tex1d;
|
||||
ID3D11Texture2D *tex2d;
|
||||
ID3D11Texture3D *tex3d;
|
||||
int array_slice;
|
||||
|
||||
ID3D11ShaderResourceView *srv;
|
||||
ID3D11RenderTargetView *rtv;
|
||||
@ -259,14 +260,29 @@ static bool tex_init(struct ra *ra, struct ra_tex *tex)
|
||||
};
|
||||
switch (params->dimensions) {
|
||||
case 1:
|
||||
if (tex_p->array_slice >= 0) {
|
||||
srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY;
|
||||
srvdesc.Texture1DArray.MipLevels = 1;
|
||||
srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice;
|
||||
srvdesc.Texture1DArray.ArraySize = 1;
|
||||
} else {
|
||||
srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
|
||||
srvdesc.Texture1D.MipLevels = 1;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (tex_p->array_slice >= 0) {
|
||||
srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
|
||||
srvdesc.Texture2DArray.MipLevels = 1;
|
||||
srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice;
|
||||
srvdesc.Texture2DArray.ArraySize = 1;
|
||||
} else {
|
||||
srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
|
||||
srvdesc.Texture2D.MipLevels = 1;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
// D3D11 does not have Texture3D arrays
|
||||
srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
|
||||
srvdesc.Texture3D.MipLevels = 1;
|
||||
break;
|
||||
@ -442,6 +458,8 @@ static struct ra_tex *tex_create(struct ra *ra,
|
||||
abort();
|
||||
}
|
||||
|
||||
tex_p->array_slice = -1;
|
||||
|
||||
if (!tex_init(ra, tex))
|
||||
goto error;
|
||||
|
||||
@ -478,10 +496,18 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res)
|
||||
|
||||
D3D11_TEXTURE2D_DESC desc2d;
|
||||
ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
|
||||
if (desc2d.MipLevels != 1 || desc2d.ArraySize != 1)
|
||||
if (desc2d.MipLevels != 1) {
|
||||
MP_ERR(ra, "Mipmapped textures not supported for wrapping\n");
|
||||
goto error;
|
||||
if (desc2d.SampleDesc.Count != 1)
|
||||
}
|
||||
if (desc2d.ArraySize != 1) {
|
||||
MP_ERR(ra, "Texture arrays not supported for wrapping\n");
|
||||
goto error;
|
||||
}
|
||||
if (desc2d.SampleDesc.Count != 1) {
|
||||
MP_ERR(ra, "Multisampled textures not supported for wrapping\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
params->dimensions = 2;
|
||||
params->w = desc2d.Width;
|
||||
@ -522,6 +548,8 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res)
|
||||
goto error;
|
||||
}
|
||||
|
||||
tex_p->array_slice = -1;
|
||||
|
||||
if (!tex_init(ra, tex))
|
||||
goto error;
|
||||
|
||||
@ -532,7 +560,7 @@ error:
|
||||
}
|
||||
|
||||
struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
|
||||
int w, int h,
|
||||
int w, int h, int array_slice,
|
||||
const struct ra_format *fmt)
|
||||
{
|
||||
struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
|
||||
@ -559,6 +587,12 @@ struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
|
||||
// fmt can be different to the texture format for planar video textures
|
||||
params->format = fmt;
|
||||
|
||||
if (desc2d.ArraySize > 1) {
|
||||
tex_p->array_slice = array_slice;
|
||||
} else {
|
||||
tex_p->array_slice = -1;
|
||||
}
|
||||
|
||||
if (!tex_init(ra, tex))
|
||||
goto error;
|
||||
|
||||
@ -611,12 +645,14 @@ static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params)
|
||||
}
|
||||
}
|
||||
|
||||
int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0;
|
||||
if (p->ctx1) {
|
||||
ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, 0, rc,
|
||||
src, stride, pitch, invalidate ? D3D11_COPY_DISCARD : 0);
|
||||
ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res,
|
||||
subresource, rc, src, stride, pitch,
|
||||
invalidate ? D3D11_COPY_DISCARD : 0);
|
||||
} else {
|
||||
ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, 0, rc,
|
||||
src, stride, pitch);
|
||||
ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource,
|
||||
rc, src, stride, pitch);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -1174,8 +1210,10 @@ static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
|
||||
{
|
||||
blit_rpass(ra, dst, src, &dst_rc, &src_rc);
|
||||
} else {
|
||||
ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, 0,
|
||||
dst_rc.x0, dst_rc.y0, 0, src_p->res, 0, (&(D3D11_BOX) {
|
||||
int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0;
|
||||
int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0;
|
||||
ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr,
|
||||
dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) {
|
||||
.left = src_rc.x0,
|
||||
.top = src_rc.y0,
|
||||
.front = 0,
|
||||
|
@ -22,8 +22,9 @@ struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res);
|
||||
|
||||
// As above, but for a D3D11VA video resource. The fmt parameter selects which
|
||||
// plane of a planar format will be mapped when the RA texture is used.
|
||||
// array_slice should be set for texture arrays and is ignored for non-arrays.
|
||||
struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
|
||||
int w, int h,
|
||||
int w, int h, int array_slice,
|
||||
const struct ra_format *fmt);
|
||||
|
||||
// Get the underlying D3D11 device from an RA instance. The returned device is
|
||||
|
Loading…
Reference in New Issue
Block a user