mirror of https://github.com/mpv-player/mpv
video: drop old D3D11/DXVA2 support
Now you need FFmpeg git, or something. This also gets rid of the last real use of gpu_memcpy(). libavutil does that itself. (vaapi.c still used it, but it was essentially unused, because the code path isn't really in use anymore. It wasn't even included due to the d3d-hwaccel dependency in wscript.)
This commit is contained in:
parent
5d57e9b1be
commit
ae7db6503b
|
@ -128,6 +128,7 @@ FFmpeg dependencies:
|
|||
- libx264/libmp3lame/libfdk-aac if you want to use encoding (have to be
|
||||
explicitly enabled when compiling FFmpeg)
|
||||
- Libav also works, but some features will not work. (See section below.)
|
||||
- FFmpeg/Libav git for Windows/D3D11 and Cuda decoding.
|
||||
|
||||
Most of the above libraries are available in suitable versions on normal
|
||||
Linux distributions. However, FFmpeg is an exception (distro versions may be
|
||||
|
|
|
@ -17,9 +17,16 @@
|
|||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <libavcodec/avcodec.h>
|
||||
|
||||
#include "config.h"
|
||||
#include <libavutil/hwcontext.h>
|
||||
#include <libavutil/hwcontext_d3d11va.h>
|
||||
|
||||
#if HAVE_D3D9_HWACCEL
|
||||
#include <libavutil/hwcontext_dxva2.h>
|
||||
#endif
|
||||
|
||||
#include "lavc.h"
|
||||
#include "common/common.h"
|
||||
|
@ -31,81 +38,6 @@
|
|||
|
||||
#include "d3d.h"
|
||||
|
||||
#if !HAVE_D3D_HWACCEL_NEW
|
||||
|
||||
// define all the GUIDs used directly here, to avoid problems with inconsistent
|
||||
// dxva2api.h versions in mingw-w64 and different MSVC version
|
||||
#include <guiddef.h>
|
||||
#include <cguid.h>
|
||||
DEFINE_GUID(DXVA2_ModeMPEG2_VLD, 0xee27417f, 0x5e28, 0x4e65, 0xbe, 0xea, 0x1d, 0x26, 0xb5, 0x08, 0xad, 0xc9);
|
||||
DEFINE_GUID(DXVA2_ModeMPEG2and1_VLD, 0x86695f12, 0x340e, 0x4f04, 0x9f, 0xd3, 0x92, 0x53, 0xdd, 0x32, 0x74, 0x60);
|
||||
|
||||
DEFINE_GUID(DXVA2_ModeH264_E, 0x1b81be68, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
|
||||
DEFINE_GUID(DXVA2_ModeH264_F, 0x1b81be69, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
|
||||
DEFINE_GUID(DXVA_ModeH264_VLD_WithFMOASO_NoFGT, 0xd5f04ff9, 0x3418, 0x45d8, 0x95, 0x61, 0x32, 0xa7, 0x6a, 0xae, 0x2d, 0xdd);
|
||||
DEFINE_GUID(DXVA_Intel_H264_NoFGT_ClearVideo, 0x604F8E68, 0x4951, 0x4c54, 0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6);
|
||||
DEFINE_GUID(DXVA_ModeH264_VLD_NoFGT_Flash, 0x4245F676, 0x2BBC, 0x4166, 0xa0, 0xBB, 0x54, 0xE7, 0xB8, 0x49, 0xC3, 0x80);
|
||||
|
||||
DEFINE_GUID(DXVA2_ModeVC1_D, 0x1b81beA3, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
|
||||
DEFINE_GUID(DXVA2_ModeVC1_D2010, 0x1b81beA4, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); // August 2010 update
|
||||
|
||||
DEFINE_GUID(DXVA2_ModeHEVC_VLD_Main, 0x5b11d51b, 0x2f4c, 0x4452, 0xbc, 0xc3, 0x09, 0xf2, 0xa1, 0x16, 0x0c, 0xc0);
|
||||
DEFINE_GUID(DXVA2_ModeHEVC_VLD_Main10, 0x107af0e0, 0xef1a, 0x4d19, 0xab, 0xa8, 0x67, 0xa1, 0x63, 0x07, 0x3d, 0x13);
|
||||
|
||||
DEFINE_GUID(DXVA2_ModeVP9_VLD_Profile0, 0x463707f8, 0xa1d0, 0x4585, 0x87, 0x6d, 0x83, 0xaa, 0x6d, 0x60, 0xb8, 0x9e);
|
||||
|
||||
DEFINE_GUID(DXVA2_NoEncrypt, 0x1b81beD0, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
|
||||
|
||||
static const int PROF_MPEG2_MAIN[] = {FF_PROFILE_MPEG2_SIMPLE,
|
||||
FF_PROFILE_MPEG2_MAIN, 0};
|
||||
static const int PROF_H264_HIGH[] = {FF_PROFILE_H264_CONSTRAINED_BASELINE,
|
||||
FF_PROFILE_H264_MAIN,
|
||||
FF_PROFILE_H264_HIGH, 0};
|
||||
static const int PROF_HEVC_MAIN[] = {FF_PROFILE_HEVC_MAIN, 0};
|
||||
static const int PROF_HEVC_MAIN10[] = {FF_PROFILE_HEVC_MAIN,
|
||||
FF_PROFILE_HEVC_MAIN_10, 0};
|
||||
|
||||
struct d3dva_mode {
|
||||
const GUID *guid;
|
||||
const char *name;
|
||||
enum AVCodecID codec;
|
||||
const int *profiles; // NULL or ends with 0
|
||||
};
|
||||
|
||||
#define MODE2(id) &MP_CONCAT(DXVA2_Mode, id), # id
|
||||
#define MODE(id) &MP_CONCAT(DXVA_, id), # id
|
||||
// Preferred modes must come first
|
||||
static const struct d3dva_mode d3dva_modes[] = {
|
||||
// MPEG-1/2
|
||||
{MODE2(MPEG2_VLD), AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_MAIN},
|
||||
{MODE2(MPEG2and1_VLD), AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_MAIN},
|
||||
{MODE2(MPEG2and1_VLD), AV_CODEC_ID_MPEG1VIDEO},
|
||||
|
||||
// H.264
|
||||
{MODE2(H264_F), AV_CODEC_ID_H264, PROF_H264_HIGH},
|
||||
{MODE2(H264_E), AV_CODEC_ID_H264, PROF_H264_HIGH},
|
||||
{MODE (Intel_H264_NoFGT_ClearVideo), AV_CODEC_ID_H264, PROF_H264_HIGH},
|
||||
{MODE (ModeH264_VLD_WithFMOASO_NoFGT), AV_CODEC_ID_H264, PROF_H264_HIGH},
|
||||
{MODE (ModeH264_VLD_NoFGT_Flash), AV_CODEC_ID_H264, PROF_H264_HIGH},
|
||||
|
||||
// VC-1 / WMV3
|
||||
{MODE2(VC1_D), AV_CODEC_ID_VC1},
|
||||
{MODE2(VC1_D), AV_CODEC_ID_WMV3},
|
||||
{MODE2(VC1_D2010), AV_CODEC_ID_VC1},
|
||||
{MODE2(VC1_D2010), AV_CODEC_ID_WMV3},
|
||||
|
||||
// HEVC
|
||||
{MODE2(HEVC_VLD_Main), AV_CODEC_ID_HEVC, PROF_HEVC_MAIN},
|
||||
{MODE2(HEVC_VLD_Main10), AV_CODEC_ID_HEVC, PROF_HEVC_MAIN10},
|
||||
|
||||
// VP9
|
||||
{MODE2(VP9_VLD_Profile0), AV_CODEC_ID_VP9},
|
||||
};
|
||||
#undef MODE
|
||||
#undef MODE2
|
||||
|
||||
#endif
|
||||
|
||||
HMODULE d3d11_dll, d3d9_dll, dxva2_dll;
|
||||
PFN_D3D11_CREATE_DEVICE d3d11_D3D11CreateDevice;
|
||||
|
||||
|
@ -136,7 +68,6 @@ void d3d_load_dlls(void)
|
|||
pthread_once(&d3d_load_once, d3d_do_load);
|
||||
}
|
||||
|
||||
|
||||
// Test if Direct3D11 can be used by us. Basically, this prevents trying to use
|
||||
// D3D11 on Win7, and then failing somewhere in the process.
|
||||
bool d3d11_check_decoding(ID3D11Device *dev)
|
||||
|
@ -149,254 +80,6 @@ bool d3d11_check_decoding(ID3D11Device *dev)
|
|||
return !FAILED(hr) && (supported & D3D11_BIND_DECODER);
|
||||
}
|
||||
|
||||
#if !HAVE_D3D_HWACCEL_NEW
|
||||
|
||||
int d3d_probe_codec(const char *codec)
|
||||
{
|
||||
enum AVCodecID codecid = mp_codec_to_av_codec_id(codec);
|
||||
for (int i = 0; i < MP_ARRAY_SIZE(d3dva_modes); i++) {
|
||||
const struct d3dva_mode *mode = &d3dva_modes[i];
|
||||
if (mode->codec == codecid)
|
||||
return 0;
|
||||
}
|
||||
return HWDEC_ERR_NO_CODEC;
|
||||
}
|
||||
|
||||
static bool profile_compatible(const struct d3dva_mode *mode, int profile)
|
||||
{
|
||||
if (!mode->profiles)
|
||||
return true;
|
||||
|
||||
for (int i = 0; mode->profiles[i]; i++){
|
||||
if(mode->profiles[i] == profile)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool mode_supported(const struct d3dva_mode *mode,
|
||||
const GUID *device_modes, UINT n_modes)
|
||||
{
|
||||
for (int i = 0; i < n_modes; i++) {
|
||||
if (IsEqualGUID(mode->guid, &device_modes[i]))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
struct d3d_decoder_fmt d3d_select_decoder_mode(
|
||||
struct lavc_ctx *s, const GUID *device_guids, UINT n_guids,
|
||||
const struct d3d_decoded_format *formats, int n_formats,
|
||||
bool (*test_fmt_cb)(struct lavc_ctx *s, const GUID *guid,
|
||||
const struct d3d_decoded_format *fmt))
|
||||
{
|
||||
struct d3d_decoder_fmt fmt = {
|
||||
.guid = &GUID_NULL,
|
||||
.format = NULL,
|
||||
};
|
||||
|
||||
// this has the right bit-depth, but is unfortunately not the native format
|
||||
int sw_img_fmt = pixfmt2imgfmt(s->avctx->sw_pix_fmt);
|
||||
if (sw_img_fmt == IMGFMT_NONE)
|
||||
return fmt;
|
||||
|
||||
int depth = IMGFMT_RGB_DEPTH(sw_img_fmt);
|
||||
|
||||
for (int i = 0; i < MP_ARRAY_SIZE(d3dva_modes); i++) {
|
||||
const struct d3dva_mode *mode = &d3dva_modes[i];
|
||||
if (mode->codec == s->avctx->codec_id &&
|
||||
profile_compatible(mode, s->avctx->profile) &&
|
||||
mode_supported(mode, device_guids, n_guids)) {
|
||||
|
||||
for (int n = 0; n < n_formats; n++) {
|
||||
const struct d3d_decoded_format *format = &formats[n];
|
||||
|
||||
if (depth <= format->depth && test_fmt_cb(s, mode->guid, format))
|
||||
{
|
||||
MP_VERBOSE(s, "Selecting %s ",
|
||||
d3d_decoder_guid_to_desc(mode->guid));
|
||||
if (format->dxfmt >= (1 << 16)) {
|
||||
MP_VERBOSE(s, "%s\n", mp_tag_str(format->dxfmt));
|
||||
} else {
|
||||
MP_VERBOSE(s, "%d\n", (int)format->dxfmt);
|
||||
}
|
||||
|
||||
fmt.guid = mode->guid;
|
||||
fmt.format = format;
|
||||
return fmt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt;
|
||||
}
|
||||
|
||||
char *d3d_decoder_guid_to_desc_buf(char *buf, size_t buf_size,
|
||||
const GUID *mode_guid)
|
||||
{
|
||||
const char *name = "<unknown>";
|
||||
for (int i = 0; i < MP_ARRAY_SIZE(d3dva_modes); i++) {
|
||||
const struct d3dva_mode *mode = &d3dva_modes[i];
|
||||
if (IsEqualGUID(mode->guid, mode_guid)) {
|
||||
name = mode->name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
snprintf(buf, buf_size, "%s %s", mp_GUID_to_str(mode_guid), name);
|
||||
return buf;
|
||||
}
|
||||
|
||||
void d3d_surface_align(struct lavc_ctx *s, int *w, int *h)
|
||||
{
|
||||
int alignment = 16;
|
||||
switch (s->avctx->codec_id) {
|
||||
// decoding MPEG-2 requires additional alignment on some Intel GPUs, but it
|
||||
// causes issues for H.264 on certain AMD GPUs.....
|
||||
case AV_CODEC_ID_MPEG2VIDEO:
|
||||
alignment = 32;
|
||||
break;
|
||||
// the HEVC DXVA2 spec asks for 128 pixel aligned surfaces to ensure
|
||||
// all coding features have enough room to work with
|
||||
case AV_CODEC_ID_HEVC:
|
||||
alignment = 128;
|
||||
break;
|
||||
}
|
||||
*w = FFALIGN(*w, alignment);
|
||||
*h = FFALIGN(*h, alignment);
|
||||
}
|
||||
|
||||
unsigned d3d_decoder_config_score(struct lavc_ctx *s,
|
||||
GUID *guidConfigBitstreamEncryption,
|
||||
UINT ConfigBitstreamRaw)
|
||||
{
|
||||
unsigned score = 0;
|
||||
if (ConfigBitstreamRaw == 1) {
|
||||
score = 1;
|
||||
} else if (s->avctx->codec_id == AV_CODEC_ID_H264
|
||||
&& ConfigBitstreamRaw == 2) {
|
||||
score = 2;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (IsEqualGUID(guidConfigBitstreamEncryption, &DXVA2_NoEncrypt))
|
||||
score += 16;
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
BOOL is_clearvideo(const GUID *mode_guid)
|
||||
{
|
||||
return IsEqualGUID(mode_guid, &DXVA_Intel_H264_NoFGT_ClearVideo);
|
||||
}
|
||||
|
||||
void copy_nv12(struct mp_image *dest, uint8_t *src_bits,
|
||||
unsigned src_pitch, unsigned surf_height)
|
||||
{
|
||||
struct mp_image buf = {0};
|
||||
mp_image_setfmt(&buf, dest->imgfmt);
|
||||
mp_image_set_size(&buf, dest->w, dest->h);
|
||||
|
||||
buf.planes[0] = src_bits;
|
||||
buf.stride[0] = src_pitch;
|
||||
buf.planes[1] = src_bits + src_pitch * surf_height;
|
||||
buf.stride[1] = src_pitch;
|
||||
mp_image_copy_gpu(dest, &buf);
|
||||
}
|
||||
|
||||
static int get_dxgi_mpfmt(DWORD dxgi_fmt)
|
||||
{
|
||||
switch (dxgi_fmt) {
|
||||
case DXGI_FORMAT_NV12: return IMGFMT_NV12;
|
||||
case DXGI_FORMAT_P010: return IMGFMT_P010;
|
||||
case DXGI_FORMAT_P016: return IMGFMT_P010;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct mp_image *d3d11_download_image(struct mp_hwdec_ctx *ctx,
|
||||
struct mp_image *mpi,
|
||||
struct mp_image_pool *swpool)
|
||||
{
|
||||
HRESULT hr;
|
||||
ID3D11Device *device = ctx->ctx;
|
||||
|
||||
if (mpi->imgfmt != IMGFMT_D3D11VA && mpi->imgfmt != IMGFMT_D3D11NV12)
|
||||
return NULL;
|
||||
|
||||
ID3D11Texture2D *texture = (void *)mpi->planes[0];
|
||||
int subindex = (intptr_t)mpi->planes[1];
|
||||
if (!texture)
|
||||
return NULL;
|
||||
|
||||
D3D11_TEXTURE2D_DESC tex_desc;
|
||||
ID3D11Texture2D_GetDesc(texture, &tex_desc);
|
||||
int mpfmt = get_dxgi_mpfmt(tex_desc.Format);
|
||||
if (!mpfmt)
|
||||
return NULL;
|
||||
|
||||
// create staging texture shared with the CPU with mostly the same
|
||||
// parameters as the source texture
|
||||
tex_desc.MipLevels = 1;
|
||||
tex_desc.MiscFlags = 0;
|
||||
tex_desc.ArraySize = 1;
|
||||
tex_desc.Usage = D3D11_USAGE_STAGING;
|
||||
tex_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
tex_desc.BindFlags = 0;
|
||||
ID3D11Texture2D *staging = NULL;
|
||||
hr = ID3D11Device_CreateTexture2D(device, &tex_desc, NULL, &staging);
|
||||
if (FAILED(hr))
|
||||
return NULL;
|
||||
|
||||
bool ok = false;
|
||||
struct mp_image *sw_img = NULL;
|
||||
ID3D11DeviceContext *device_ctx = NULL;
|
||||
ID3D11Device_GetImmediateContext(device, &device_ctx);
|
||||
|
||||
// copy to the staging texture
|
||||
ID3D11DeviceContext_CopySubresourceRegion(
|
||||
device_ctx,
|
||||
(ID3D11Resource *)staging, 0, 0, 0, 0,
|
||||
(ID3D11Resource *)texture, subindex, NULL);
|
||||
|
||||
sw_img = mp_image_pool_get(swpool, mpfmt, tex_desc.Width, tex_desc.Height);
|
||||
if (!sw_img)
|
||||
goto done;
|
||||
|
||||
// copy staging texture to the cpu mp_image
|
||||
D3D11_MAPPED_SUBRESOURCE lock;
|
||||
hr = ID3D11DeviceContext_Map(device_ctx, (ID3D11Resource *)staging,
|
||||
0, D3D11_MAP_READ, 0, &lock);
|
||||
if (FAILED(hr))
|
||||
goto done;
|
||||
copy_nv12(sw_img, lock.pData, lock.RowPitch, tex_desc.Height);
|
||||
ID3D11DeviceContext_Unmap(device_ctx, (ID3D11Resource *)staging, 0);
|
||||
|
||||
mp_image_set_size(sw_img, mpi->w, mpi->h);
|
||||
mp_image_copy_attributes(sw_img, mpi);
|
||||
ok = true;
|
||||
|
||||
done:
|
||||
ID3D11Texture2D_Release(staging);
|
||||
ID3D11DeviceContext_Release(device_ctx);
|
||||
if (!ok)
|
||||
mp_image_unrefp(&sw_img);
|
||||
return sw_img;
|
||||
}
|
||||
|
||||
// Dummies for simpler compat.
|
||||
AVBufferRef *d3d11_wrap_device_ref(ID3D11Device *device) { return NULL; }
|
||||
AVBufferRef *d3d9_wrap_device_ref(struct IDirect3DDevice9 *device) { return NULL; }
|
||||
|
||||
#else /* !HAVE_D3D_HWACCEL_NEW */
|
||||
|
||||
#include <libavutil/hwcontext.h>
|
||||
#include <libavutil/hwcontext_d3d11va.h>
|
||||
|
||||
#if HAVE_D3D9_HWACCEL
|
||||
#include <libavutil/hwcontext_dxva2.h>
|
||||
#endif
|
||||
|
||||
void d3d_hwframes_refine(struct lavc_ctx *ctx, AVBufferRef *hw_frames_ctx)
|
||||
{
|
||||
AVHWFramesContext *fctx = (void *)hw_frames_ctx->data;
|
||||
|
@ -449,13 +132,3 @@ AVBufferRef *d3d11_wrap_device_ref(ID3D11Device *device)
|
|||
|
||||
return device_ref;
|
||||
}
|
||||
|
||||
// Dummy for simpler compat.
|
||||
struct mp_image *d3d11_download_image(struct mp_hwdec_ctx *ctx,
|
||||
struct mp_image *mpi,
|
||||
struct mp_image_pool *swpool)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* else !HAVE_D3D_HWACCEL_NEW */
|
||||
|
|
|
@ -24,21 +24,8 @@
|
|||
#include <stdbool.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
struct mp_image;
|
||||
struct lavc_ctx;
|
||||
|
||||
struct d3d_decoded_format {
|
||||
DWORD dxfmt; // D3DFORMAT or DXGI_FORMAT
|
||||
const char *name; // informational string repr. of dxfmt_decoded
|
||||
int depth; // significant bits (not full size)
|
||||
int mpfmt; // IMGFMT_ with compatible memory layout and semantics
|
||||
};
|
||||
|
||||
struct d3d_decoder_fmt {
|
||||
const GUID *guid;
|
||||
const struct d3d_decoded_format *format;
|
||||
};
|
||||
|
||||
// Must call d3d_load_dlls() before accessing. Once this is done, the DLLs
|
||||
// remain loaded forever.
|
||||
extern HMODULE d3d11_dll, d3d9_dll, dxva2_dll;
|
||||
|
@ -46,32 +33,8 @@ extern PFN_D3D11_CREATE_DEVICE d3d11_D3D11CreateDevice;
|
|||
|
||||
void d3d_load_dlls(void);
|
||||
|
||||
int d3d_probe_codec(const char *codec);
|
||||
|
||||
struct d3d_decoder_fmt d3d_select_decoder_mode(
|
||||
struct lavc_ctx *s, const GUID *device_guids, UINT n_guids,
|
||||
const struct d3d_decoded_format *formats, int n_formats,
|
||||
bool (*test_fmt_cb)(struct lavc_ctx *s, const GUID *guid,
|
||||
const struct d3d_decoded_format *fmt));
|
||||
|
||||
char *d3d_decoder_guid_to_desc_buf(char *buf, size_t buf_size,
|
||||
const GUID *mode_guid);
|
||||
#define d3d_decoder_guid_to_desc(guid) d3d_decoder_guid_to_desc_buf((char[256]){0}, 256, (guid))
|
||||
|
||||
void d3d_surface_align(struct lavc_ctx *s, int *w, int *h);
|
||||
unsigned d3d_decoder_config_score(struct lavc_ctx *s,
|
||||
GUID *guidConfigBitstreamEncryption,
|
||||
UINT ConfigBitstreamRaw);
|
||||
BOOL is_clearvideo(const GUID *mode_guid);
|
||||
void copy_nv12(struct mp_image *dest, uint8_t *src_bits,
|
||||
unsigned src_pitch, unsigned surf_height);
|
||||
|
||||
bool d3d11_check_decoding(ID3D11Device *dev);
|
||||
|
||||
struct mp_image *d3d11_download_image(struct mp_hwdec_ctx *ctx,
|
||||
struct mp_image *mpi,
|
||||
struct mp_image_pool *swpool);
|
||||
|
||||
struct AVBufferRef;
|
||||
struct IDirect3DDevice9;
|
||||
|
||||
|
|
|
@ -30,566 +30,6 @@
|
|||
|
||||
#include "d3d.h"
|
||||
|
||||
#if !HAVE_D3D_HWACCEL_NEW
|
||||
|
||||
#define ADDITIONAL_SURFACES HWDEC_EXTRA_SURFACES
|
||||
|
||||
struct d3d11va_decoder {
|
||||
ID3D11VideoDecoder *decoder;
|
||||
struct mp_image_pool *pool;
|
||||
ID3D11Texture2D *staging;
|
||||
int mpfmt_decoded;
|
||||
};
|
||||
|
||||
struct priv {
|
||||
struct mp_log *log;
|
||||
|
||||
ID3D11Device *device;
|
||||
ID3D11DeviceContext *device_ctx;
|
||||
ID3D11VideoDevice *video_dev;
|
||||
ID3D11VideoContext *video_ctx;
|
||||
|
||||
struct d3d11va_decoder *decoder;
|
||||
struct mp_image_pool *sw_pool;
|
||||
};
|
||||
|
||||
struct d3d11va_surface {
|
||||
ID3D11Texture2D *texture;
|
||||
ID3D11VideoDecoderOutputView *surface;
|
||||
};
|
||||
|
||||
static void d3d11va_release_img(void *arg)
|
||||
{
|
||||
struct d3d11va_surface *surface = arg;
|
||||
if (surface->surface)
|
||||
ID3D11VideoDecoderOutputView_Release(surface->surface);
|
||||
|
||||
if (surface->texture)
|
||||
ID3D11Texture2D_Release(surface->texture);
|
||||
|
||||
talloc_free(surface);
|
||||
}
|
||||
|
||||
static struct mp_image *d3d11va_new_ref(ID3D11VideoDecoderOutputView *view,
|
||||
int w, int h)
|
||||
{
|
||||
if (!view)
|
||||
return NULL;
|
||||
struct d3d11va_surface *surface = talloc_zero(NULL, struct d3d11va_surface);
|
||||
|
||||
surface->surface = view;
|
||||
ID3D11VideoDecoderOutputView_AddRef(surface->surface);
|
||||
ID3D11VideoDecoderOutputView_GetResource(
|
||||
surface->surface, (ID3D11Resource **)&surface->texture);
|
||||
|
||||
D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC surface_desc;
|
||||
ID3D11VideoDecoderOutputView_GetDesc(surface->surface, &surface_desc);
|
||||
|
||||
struct mp_image *mpi =
|
||||
mp_image_new_custom_ref(NULL, surface, d3d11va_release_img);
|
||||
if (!mpi)
|
||||
abort();
|
||||
|
||||
mp_image_setfmt(mpi, IMGFMT_D3D11VA);
|
||||
mp_image_set_size(mpi, w, h);
|
||||
mpi->planes[0] = (void *)surface->texture;
|
||||
mpi->planes[1] = (void *)(intptr_t)surface_desc.Texture2D.ArraySlice;
|
||||
mpi->planes[2] = NULL;
|
||||
mpi->planes[3] = (void *)surface->surface;
|
||||
|
||||
return mpi;
|
||||
}
|
||||
|
||||
static struct mp_image *d3d11va_allocate_image(struct lavc_ctx *s, int w, int h)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
struct mp_image *img = mp_image_pool_get_no_alloc(p->decoder->pool,
|
||||
IMGFMT_D3D11VA, w, h);
|
||||
if (!img)
|
||||
MP_ERR(p, "Failed to get free D3D11VA surface\n");
|
||||
return img;
|
||||
}
|
||||
|
||||
static struct mp_image *d3d11va_retrieve_image(struct lavc_ctx *s,
|
||||
struct mp_image *img)
|
||||
{
|
||||
HRESULT hr;
|
||||
struct priv *p = s->hwdec_priv;
|
||||
ID3D11Texture2D *staging = p->decoder->staging;
|
||||
|
||||
if (img->imgfmt != IMGFMT_D3D11VA)
|
||||
return img;
|
||||
|
||||
ID3D11Texture2D *texture = (void *)img->planes[0];
|
||||
int subindex = (intptr_t)img->planes[1];
|
||||
|
||||
if (!texture) {
|
||||
MP_ERR(p, "Failed to get Direct3D texture and surface from mp_image\n");
|
||||
return img;
|
||||
}
|
||||
|
||||
D3D11_TEXTURE2D_DESC texture_desc;
|
||||
ID3D11Texture2D_GetDesc(texture, &texture_desc);
|
||||
if (texture_desc.Width < img->w || texture_desc.Height < img->h) {
|
||||
MP_ERR(p, "Direct3D11 texture smaller than mp_image dimensions\n");
|
||||
return img;
|
||||
}
|
||||
|
||||
// copy to the staging texture
|
||||
ID3D11DeviceContext_CopySubresourceRegion(
|
||||
p->device_ctx,
|
||||
(ID3D11Resource *)staging, 0, 0, 0, 0,
|
||||
(ID3D11Resource *)texture, subindex, NULL);
|
||||
|
||||
struct mp_image *sw_img = mp_image_pool_get(p->sw_pool,
|
||||
p->decoder->mpfmt_decoded,
|
||||
texture_desc.Width,
|
||||
texture_desc.Height);
|
||||
if (!sw_img) {
|
||||
MP_ERR(p, "Failed to get %s surface from CPU pool\n",
|
||||
mp_imgfmt_to_name(p->decoder->mpfmt_decoded));
|
||||
return img;
|
||||
}
|
||||
|
||||
// copy staging texture to the cpu mp_image
|
||||
D3D11_MAPPED_SUBRESOURCE lock;
|
||||
hr = ID3D11DeviceContext_Map(p->device_ctx, (ID3D11Resource *)staging,
|
||||
0, D3D11_MAP_READ, 0, &lock);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to map D3D11 surface: %s\n", mp_HRESULT_to_str(hr));
|
||||
talloc_free(sw_img);
|
||||
return img;
|
||||
}
|
||||
copy_nv12(sw_img, lock.pData, lock.RowPitch, texture_desc.Height);
|
||||
ID3D11DeviceContext_Unmap(p->device_ctx, (ID3D11Resource *)staging, 0);
|
||||
|
||||
mp_image_set_size(sw_img, img->w, img->h);
|
||||
mp_image_copy_attributes(sw_img, img);
|
||||
talloc_free(img);
|
||||
return sw_img;
|
||||
}
|
||||
|
||||
#define DFMT(name) MP_CONCAT(DXGI_FORMAT_, name), # name
|
||||
static const struct d3d_decoded_format d3d11_formats[] = {
|
||||
{DFMT(NV12), 8, IMGFMT_NV12},
|
||||
{DFMT(P010), 10, IMGFMT_P010},
|
||||
{DFMT(P016), 16, IMGFMT_P010},
|
||||
};
|
||||
#undef DFMT
|
||||
|
||||
// Update hw_subfmt to the underlying format. Needed because AVFrame does not
|
||||
// have such an attribute, so it can't be passed through, and is updated here
|
||||
// instead. (But in the future, AVHWFramesContext could be used.)
|
||||
static struct mp_image *d3d11va_update_image_attribs(struct lavc_ctx *s,
|
||||
struct mp_image *img)
|
||||
{
|
||||
ID3D11Texture2D *texture = (void *)img->planes[0];
|
||||
|
||||
if (!texture)
|
||||
return img;
|
||||
|
||||
D3D11_TEXTURE2D_DESC texture_desc;
|
||||
ID3D11Texture2D_GetDesc(texture, &texture_desc);
|
||||
for (int n = 0; n < MP_ARRAY_SIZE(d3d11_formats); n++) {
|
||||
if (d3d11_formats[n].dxfmt == texture_desc.Format) {
|
||||
img->params.hw_subfmt = d3d11_formats[n].mpfmt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (img->params.hw_subfmt == IMGFMT_NV12)
|
||||
mp_image_setfmt(img, IMGFMT_D3D11NV12);
|
||||
|
||||
return img;
|
||||
}
|
||||
|
||||
static bool d3d11_format_supported(struct lavc_ctx *s, const GUID *guid,
|
||||
const struct d3d_decoded_format *format)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
BOOL is_supported = FALSE;
|
||||
HRESULT hr = ID3D11VideoDevice_CheckVideoDecoderFormat(
|
||||
p->video_dev, guid, format->dxfmt, &is_supported);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Check decoder output format %s for decoder %s: %s\n",
|
||||
format->name, d3d_decoder_guid_to_desc(guid),
|
||||
mp_HRESULT_to_str(hr));
|
||||
}
|
||||
return is_supported;
|
||||
}
|
||||
|
||||
static void dump_decoder_info(struct lavc_ctx *s, const GUID *guid)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
char fmts[256] = {0};
|
||||
for (int i = 0; i < MP_ARRAY_SIZE(d3d11_formats); i++) {
|
||||
const struct d3d_decoded_format *format = &d3d11_formats[i];
|
||||
if (d3d11_format_supported(s, guid, format))
|
||||
mp_snprintf_cat(fmts, sizeof(fmts), " %s", format->name);
|
||||
}
|
||||
MP_VERBOSE(p, "%s %s\n", d3d_decoder_guid_to_desc(guid), fmts);
|
||||
}
|
||||
|
||||
static void d3d11va_destroy_decoder(void *arg)
|
||||
{
|
||||
struct d3d11va_decoder *decoder = arg;
|
||||
|
||||
if (decoder->decoder)
|
||||
ID3D11VideoDecoder_Release(decoder->decoder);
|
||||
|
||||
if (decoder->staging)
|
||||
ID3D11Texture2D_Release(decoder->staging);
|
||||
}
|
||||
|
||||
static int d3d11va_init_decoder(struct lavc_ctx *s, int w, int h)
|
||||
{
|
||||
HRESULT hr;
|
||||
int ret = -1;
|
||||
struct priv *p = s->hwdec_priv;
|
||||
TA_FREEP(&p->decoder);
|
||||
|
||||
ID3D11Texture2D *texture = NULL;
|
||||
void *tmp = talloc_new(NULL);
|
||||
|
||||
UINT n_guids = ID3D11VideoDevice_GetVideoDecoderProfileCount(p->video_dev);
|
||||
GUID *device_guids = talloc_array(tmp, GUID, n_guids);
|
||||
for (UINT i = 0; i < n_guids; i++) {
|
||||
GUID *guid = &device_guids[i];
|
||||
hr = ID3D11VideoDevice_GetVideoDecoderProfile(p->video_dev, i, guid);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to get VideoDecoderProfile %d: %s\n",
|
||||
i, mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
dump_decoder_info(s, guid);
|
||||
}
|
||||
|
||||
struct d3d_decoder_fmt fmt =
|
||||
d3d_select_decoder_mode(s, device_guids, n_guids,
|
||||
d3d11_formats, MP_ARRAY_SIZE(d3d11_formats),
|
||||
d3d11_format_supported);
|
||||
if (!fmt.format) {
|
||||
MP_ERR(p, "Failed to find a suitable decoder\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
struct d3d11va_decoder *decoder = talloc_zero(tmp, struct d3d11va_decoder);
|
||||
talloc_set_destructor(decoder, d3d11va_destroy_decoder);
|
||||
decoder->mpfmt_decoded = fmt.format->mpfmt;
|
||||
|
||||
int n_surfaces = hwdec_get_max_refs(s) + ADDITIONAL_SURFACES;
|
||||
int w_align = w, h_align = h;
|
||||
d3d_surface_align(s, &w_align, &h_align);
|
||||
|
||||
D3D11_TEXTURE2D_DESC tex_desc = {
|
||||
.Width = w_align,
|
||||
.Height = h_align,
|
||||
.MipLevels = 1,
|
||||
.Format = fmt.format->dxfmt,
|
||||
.SampleDesc.Count = 1,
|
||||
.MiscFlags = 0,
|
||||
.ArraySize = n_surfaces,
|
||||
.Usage = D3D11_USAGE_DEFAULT,
|
||||
.BindFlags = D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE,
|
||||
.CPUAccessFlags = 0,
|
||||
};
|
||||
hr = ID3D11Device_CreateTexture2D(p->device, &tex_desc, NULL, &texture);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create Direct3D11 texture with %d surfaces: %s\n",
|
||||
n_surfaces, mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (s->hwdec->type == HWDEC_D3D11VA_COPY) {
|
||||
// create staging texture shared with the CPU with mostly the same
|
||||
// parameters as the above decoder-bound texture
|
||||
ID3D11Texture2D_GetDesc(texture, &tex_desc);
|
||||
tex_desc.MipLevels = 1;
|
||||
tex_desc.MiscFlags = 0;
|
||||
tex_desc.ArraySize = 1;
|
||||
tex_desc.Usage = D3D11_USAGE_STAGING;
|
||||
tex_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
tex_desc.BindFlags = 0;
|
||||
hr = ID3D11Device_CreateTexture2D(p->device, &tex_desc, NULL,
|
||||
&decoder->staging);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create staging texture: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
// pool to hold the mp_image wrapped surfaces
|
||||
decoder->pool = talloc_steal(decoder, mp_image_pool_new(n_surfaces));
|
||||
// array of the same surfaces (needed by ffmpeg)
|
||||
ID3D11VideoDecoderOutputView **surfaces =
|
||||
talloc_array_ptrtype(decoder->pool, surfaces, n_surfaces);
|
||||
|
||||
D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC view_desc = {
|
||||
.DecodeProfile = *fmt.guid,
|
||||
.ViewDimension = D3D11_VDOV_DIMENSION_TEXTURE2D,
|
||||
};
|
||||
for (int i = 0; i < n_surfaces; i++) {
|
||||
ID3D11VideoDecoderOutputView **surface = &surfaces[i];
|
||||
view_desc.Texture2D.ArraySlice = i;
|
||||
hr = ID3D11VideoDevice_CreateVideoDecoderOutputView(
|
||||
p->video_dev, (ID3D11Resource *)texture, &view_desc, surface);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed getting decoder output view %d: %s\n",
|
||||
i, mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
struct mp_image *img = d3d11va_new_ref(*surface, w, h);
|
||||
ID3D11VideoDecoderOutputView_Release(*surface); // transferred to img
|
||||
if (!img) {
|
||||
MP_ERR(p, "Failed to create D3D11VA image %d\n", i);
|
||||
goto done;
|
||||
}
|
||||
mp_image_pool_add(decoder->pool, img); // transferred to pool
|
||||
}
|
||||
|
||||
D3D11_VIDEO_DECODER_DESC decoder_desc = {
|
||||
.Guid = *fmt.guid,
|
||||
.SampleWidth = w,
|
||||
.SampleHeight = h,
|
||||
.OutputFormat = fmt.format->dxfmt,
|
||||
};
|
||||
UINT n_cfg;
|
||||
hr = ID3D11VideoDevice_GetVideoDecoderConfigCount(p->video_dev,
|
||||
&decoder_desc, &n_cfg);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to get number of decoder configurations: %s)",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
|
||||
// pick the config with the highest score
|
||||
D3D11_VIDEO_DECODER_CONFIG *decoder_config =
|
||||
talloc_zero(decoder, D3D11_VIDEO_DECODER_CONFIG);
|
||||
unsigned max_score = 0;
|
||||
for (UINT i = 0; i < n_cfg; i++) {
|
||||
D3D11_VIDEO_DECODER_CONFIG cfg;
|
||||
hr = ID3D11VideoDevice_GetVideoDecoderConfig(p->video_dev,
|
||||
&decoder_desc,
|
||||
i, &cfg);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to get decoder config %d: %s\n",
|
||||
i, mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
unsigned score = d3d_decoder_config_score(
|
||||
s, &cfg.guidConfigBitstreamEncryption, cfg.ConfigBitstreamRaw);
|
||||
if (score > max_score) {
|
||||
max_score = score;
|
||||
*decoder_config = cfg;
|
||||
}
|
||||
}
|
||||
if (!max_score) {
|
||||
MP_ERR(p, "Failed to find a suitable decoder configuration\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
hr = ID3D11VideoDevice_CreateVideoDecoder(p->video_dev, &decoder_desc,
|
||||
decoder_config,
|
||||
&decoder->decoder);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create video decoder: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
|
||||
struct AVD3D11VAContext *avd3d11va_ctx = s->avctx->hwaccel_context;
|
||||
avd3d11va_ctx->decoder = decoder->decoder;
|
||||
avd3d11va_ctx->video_context = p->video_ctx;
|
||||
avd3d11va_ctx->cfg = decoder_config;
|
||||
avd3d11va_ctx->surface_count = n_surfaces;
|
||||
avd3d11va_ctx->surface = surfaces;
|
||||
avd3d11va_ctx->workaround = is_clearvideo(fmt.guid) ?
|
||||
FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO : 0;
|
||||
|
||||
p->decoder = talloc_steal(NULL, decoder);
|
||||
ret = 0;
|
||||
done:
|
||||
// still referenced by pool images / surfaces
|
||||
if (texture)
|
||||
ID3D11Texture2D_Release(texture);
|
||||
|
||||
talloc_free(tmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void destroy_device(struct lavc_ctx *s)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
|
||||
if (p->device)
|
||||
ID3D11Device_Release(p->device);
|
||||
|
||||
if (p->device_ctx)
|
||||
ID3D11DeviceContext_Release(p->device_ctx);
|
||||
}
|
||||
|
||||
static bool create_device(struct lavc_ctx *s, BOOL thread_safe)
|
||||
{
|
||||
HRESULT hr;
|
||||
struct priv *p = s->hwdec_priv;
|
||||
|
||||
if (!d3d11_dll) {
|
||||
MP_ERR(p, "Failed to load D3D11 library\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
PFN_D3D11_CREATE_DEVICE CreateDevice =
|
||||
(void *)GetProcAddress(d3d11_dll, "D3D11CreateDevice");
|
||||
if (!CreateDevice) {
|
||||
MP_ERR(p, "Failed to get D3D11CreateDevice symbol from DLL: %s\n",
|
||||
mp_LastError_to_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
hr = CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL,
|
||||
D3D11_CREATE_DEVICE_VIDEO_SUPPORT, NULL, 0,
|
||||
D3D11_SDK_VERSION, &p->device, NULL, &p->device_ctx);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create D3D11 Device: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
return false;
|
||||
}
|
||||
|
||||
ID3D10Multithread *multithread;
|
||||
hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread,
|
||||
(void **)&multithread);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to get Multithread interface: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
return false;
|
||||
}
|
||||
ID3D10Multithread_SetMultithreadProtected(multithread, thread_safe);
|
||||
ID3D10Multithread_Release(multithread);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void d3d11va_uninit(struct lavc_ctx *s)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
talloc_free(p->decoder);
|
||||
av_freep(&s->avctx->hwaccel_context);
|
||||
|
||||
if (p->video_dev)
|
||||
ID3D11VideoDevice_Release(p->video_dev);
|
||||
|
||||
if (p->video_ctx)
|
||||
ID3D11VideoContext_Release(p->video_ctx);
|
||||
|
||||
destroy_device(s);
|
||||
|
||||
TA_FREEP(&s->hwdec_priv);
|
||||
}
|
||||
|
||||
static int d3d11va_init(struct lavc_ctx *s)
|
||||
{
|
||||
HRESULT hr;
|
||||
struct priv *p = talloc_zero(NULL, struct priv);
|
||||
if (!p)
|
||||
return -1;
|
||||
|
||||
// Unconditionally load Direct3D DLLs, even when using a VO-supplied D3D11
|
||||
// device. This prevents a crash that occurs at least with NVIDIA drivers,
|
||||
// where D3D objects are accessed after ANGLE unloads d3d11.dll.
|
||||
d3d_load_dlls();
|
||||
|
||||
s->hwdec_priv = p;
|
||||
p->log = mp_log_new(s, s->log, "d3d11va");
|
||||
if (s->hwdec->type == HWDEC_D3D11VA_COPY) {
|
||||
mp_check_gpu_memcpy(p->log, NULL);
|
||||
p->sw_pool = talloc_steal(p, mp_image_pool_new(17));
|
||||
}
|
||||
|
||||
p->device = hwdec_devices_load(s->hwdec_devs, s->hwdec->type);
|
||||
if (p->device) {
|
||||
ID3D11Device_AddRef(p->device);
|
||||
ID3D11Device_GetImmediateContext(p->device, &p->device_ctx);
|
||||
if (!p->device_ctx)
|
||||
goto fail;
|
||||
MP_VERBOSE(p, "Using VO-supplied device %p.\n", p->device);
|
||||
} else if (s->hwdec->type == HWDEC_D3D11VA) {
|
||||
MP_ERR(p, "No Direct3D device provided for native d3d11 decoding\n");
|
||||
goto fail;
|
||||
} else {
|
||||
if (!create_device(s, FALSE))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hr = ID3D11DeviceContext_QueryInterface(p->device_ctx,
|
||||
&IID_ID3D11VideoContext,
|
||||
(void **)&p->video_ctx);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to get VideoContext interface: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hr = ID3D11Device_QueryInterface(p->device,
|
||||
&IID_ID3D11VideoDevice,
|
||||
(void **)&p->video_dev);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to get VideoDevice interface. %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
s->avctx->hwaccel_context = av_d3d11va_alloc_context();
|
||||
if (!s->avctx->hwaccel_context) {
|
||||
MP_ERR(p, "Failed to allocate hwaccel_context\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
d3d11va_uninit(s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int d3d11va_probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec,
|
||||
const char *codec)
|
||||
{
|
||||
// d3d11va-copy can do without external context; dxva2 requires it.
|
||||
if (hwdec->type != HWDEC_D3D11VA_COPY) {
|
||||
if (!hwdec_devices_load(ctx->hwdec_devs, HWDEC_D3D11VA))
|
||||
return HWDEC_ERR_NO_CTX;
|
||||
}
|
||||
return d3d_probe_codec(codec);
|
||||
}
|
||||
|
||||
const struct vd_lavc_hwdec mp_vd_lavc_d3d11va = {
|
||||
.type = HWDEC_D3D11VA,
|
||||
.image_format = IMGFMT_D3D11VA,
|
||||
.probe = d3d11va_probe,
|
||||
.init = d3d11va_init,
|
||||
.uninit = d3d11va_uninit,
|
||||
.init_decoder = d3d11va_init_decoder,
|
||||
.allocate_image = d3d11va_allocate_image,
|
||||
.process_image = d3d11va_update_image_attribs,
|
||||
};
|
||||
|
||||
const struct vd_lavc_hwdec mp_vd_lavc_d3d11va_copy = {
|
||||
.type = HWDEC_D3D11VA_COPY,
|
||||
.copying = true,
|
||||
.image_format = IMGFMT_D3D11VA,
|
||||
.probe = d3d11va_probe,
|
||||
.init = d3d11va_init,
|
||||
.uninit = d3d11va_uninit,
|
||||
.init_decoder = d3d11va_init_decoder,
|
||||
.allocate_image = d3d11va_allocate_image,
|
||||
.process_image = d3d11va_retrieve_image,
|
||||
.delay_queue = HWDEC_DELAY_QUEUE_COUNT,
|
||||
};
|
||||
|
||||
#else /* !HAVE_D3D_HWACCEL_NEW */
|
||||
|
||||
#include <libavutil/hwcontext.h>
|
||||
#include <libavutil/hwcontext_d3d11va.h>
|
||||
|
||||
|
@ -680,5 +120,3 @@ const struct vd_lavc_hwdec mp_vd_lavc_d3d11va_copy = {
|
|||
},
|
||||
.delay_queue = HWDEC_DELAY_QUEUE_COUNT,
|
||||
};
|
||||
|
||||
#endif /* else !HAVE_D3D_HWACCEL_NEW */
|
||||
|
|
|
@ -35,528 +35,6 @@
|
|||
|
||||
#include "d3d.h"
|
||||
|
||||
#if !HAVE_D3D_HWACCEL_NEW
|
||||
|
||||
#define ADDITIONAL_SURFACES HWDEC_EXTRA_SURFACES
|
||||
|
||||
struct priv {
|
||||
struct mp_log *log;
|
||||
|
||||
IDirect3D9 *d3d9;
|
||||
IDirect3DDevice9 *device;
|
||||
HANDLE device_handle;
|
||||
IDirect3DDeviceManager9 *device_manager;
|
||||
IDirectXVideoDecoderService *decoder_service;
|
||||
|
||||
struct mp_image_pool *decoder_pool;
|
||||
struct mp_image_pool *sw_pool;
|
||||
int mpfmt_decoded;
|
||||
};
|
||||
|
||||
struct dxva2_surface {
|
||||
IDirectXVideoDecoder *decoder;
|
||||
IDirect3DSurface9 *surface;
|
||||
};
|
||||
|
||||
static void dxva2_release_img(void *arg)
|
||||
{
|
||||
struct dxva2_surface *surface = arg;
|
||||
if (surface->surface)
|
||||
IDirect3DSurface9_Release(surface->surface);
|
||||
|
||||
if (surface->decoder)
|
||||
IDirectXVideoDecoder_Release(surface->decoder);
|
||||
|
||||
talloc_free(surface);
|
||||
}
|
||||
|
||||
static struct mp_image *dxva2_new_ref(IDirectXVideoDecoder *decoder,
|
||||
IDirect3DSurface9 *d3d9_surface,
|
||||
int w, int h)
|
||||
{
|
||||
if (!decoder || !d3d9_surface)
|
||||
return NULL;
|
||||
struct dxva2_surface *surface = talloc_zero(NULL, struct dxva2_surface);
|
||||
|
||||
surface->surface = d3d9_surface;
|
||||
IDirect3DSurface9_AddRef(surface->surface);
|
||||
surface->decoder = decoder;
|
||||
IDirectXVideoDecoder_AddRef(surface->decoder);
|
||||
|
||||
struct mp_image *mpi =
|
||||
mp_image_new_custom_ref(NULL, surface, dxva2_release_img);
|
||||
if (!mpi)
|
||||
abort();
|
||||
|
||||
mp_image_setfmt(mpi, IMGFMT_DXVA2);
|
||||
mp_image_set_size(mpi, w, h);
|
||||
mpi->planes[3] = (void *)surface->surface;
|
||||
return mpi;
|
||||
}
|
||||
|
||||
static struct mp_image *dxva2_allocate_image(struct lavc_ctx *s, int w, int h)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
struct mp_image *img = mp_image_pool_get_no_alloc(p->decoder_pool,
|
||||
IMGFMT_DXVA2, w, h);
|
||||
if (!img)
|
||||
MP_ERR(p, "Failed to allocate additional DXVA2 surface.\n");
|
||||
return img;
|
||||
}
|
||||
|
||||
static struct mp_image *dxva2_retrieve_image(struct lavc_ctx *s,
|
||||
struct mp_image *img)
|
||||
{
|
||||
HRESULT hr;
|
||||
struct priv *p = s->hwdec_priv;
|
||||
IDirect3DSurface9 *surface = img->imgfmt == IMGFMT_DXVA2 ?
|
||||
(IDirect3DSurface9 *)img->planes[3] : NULL;
|
||||
|
||||
if (!surface) {
|
||||
MP_ERR(p, "Failed to get Direct3D surface from mp_image\n");
|
||||
return img;
|
||||
}
|
||||
|
||||
D3DSURFACE_DESC surface_desc;
|
||||
IDirect3DSurface9_GetDesc(surface, &surface_desc);
|
||||
if (surface_desc.Width < img->w || surface_desc.Height < img->h) {
|
||||
MP_ERR(p, "Direct3D11 texture smaller than mp_image dimensions\n");
|
||||
return img;
|
||||
}
|
||||
|
||||
struct mp_image *sw_img = mp_image_pool_get(p->sw_pool,
|
||||
p->mpfmt_decoded,
|
||||
surface_desc.Width,
|
||||
surface_desc.Height);
|
||||
if (!sw_img) {
|
||||
MP_ERR(p, "Failed to get %s surface from CPU pool\n",
|
||||
mp_imgfmt_to_name(p->mpfmt_decoded));
|
||||
return img;
|
||||
}
|
||||
|
||||
D3DLOCKED_RECT lock;
|
||||
hr = IDirect3DSurface9_LockRect(surface, &lock, NULL, D3DLOCK_READONLY);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Unable to lock DXVA2 surface: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
talloc_free(sw_img);
|
||||
return img;
|
||||
}
|
||||
copy_nv12(sw_img, lock.pBits, lock.Pitch, surface_desc.Height);
|
||||
IDirect3DSurface9_UnlockRect(surface);
|
||||
|
||||
mp_image_set_size(sw_img, img->w, img->h);
|
||||
mp_image_copy_attributes(sw_img, img);
|
||||
talloc_free(img);
|
||||
return sw_img;
|
||||
}
|
||||
|
||||
static const struct d3d_decoded_format d3d9_formats[] = {
|
||||
{MKTAG('N','V','1','2'), "NV12", 8, IMGFMT_NV12},
|
||||
{MKTAG('P','0','1','0'), "P010", 10, IMGFMT_P010},
|
||||
{MKTAG('P','0','1','6'), "P016", 16, IMGFMT_P010},
|
||||
};
|
||||
|
||||
static void dump_decoder_info(struct lavc_ctx *s,
|
||||
GUID *device_guids, UINT n_guids)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
MP_VERBOSE(p, "%u decoder devices:\n", (unsigned)n_guids);
|
||||
for (UINT i = 0; i < n_guids; i++) {
|
||||
GUID *guid = &device_guids[i];
|
||||
char *description = d3d_decoder_guid_to_desc(guid);
|
||||
|
||||
D3DFORMAT *formats = NULL;
|
||||
UINT n_formats = 0;
|
||||
HRESULT hr = IDirectXVideoDecoderService_GetDecoderRenderTargets(
|
||||
p->decoder_service, guid, &n_formats, &formats);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to get render targets for decoder %s:%s\n",
|
||||
description, mp_HRESULT_to_str(hr));
|
||||
}
|
||||
|
||||
char fmts[256] = {0};
|
||||
for (UINT j = 0; j < n_formats; j++) {
|
||||
mp_snprintf_cat(fmts, sizeof(fmts),
|
||||
" %s", mp_tag_str(formats[j]));
|
||||
}
|
||||
CoTaskMemFree(formats);
|
||||
|
||||
MP_VERBOSE(p, "%s %s\n", description, fmts);
|
||||
}
|
||||
}
|
||||
|
||||
static bool dxva2_format_supported(struct lavc_ctx *s, const GUID *guid,
|
||||
const struct d3d_decoded_format *format)
|
||||
{
|
||||
bool ret = false;
|
||||
struct priv *p = s->hwdec_priv;
|
||||
D3DFORMAT *formats = NULL;
|
||||
UINT n_formats = 0;
|
||||
HRESULT hr = IDirectXVideoDecoderService_GetDecoderRenderTargets(
|
||||
p->decoder_service, guid, &n_formats, &formats);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Callback failed to get render targets for decoder %s: %s",
|
||||
d3d_decoder_guid_to_desc(guid), mp_HRESULT_to_str(hr));
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_formats; i++) {
|
||||
ret = formats[i] == format->dxfmt;
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
CoTaskMemFree(formats);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dxva2_init_decoder(struct lavc_ctx *s, int w, int h)
|
||||
{
|
||||
HRESULT hr;
|
||||
int ret = -1;
|
||||
struct priv *p = s->hwdec_priv;
|
||||
TA_FREEP(&p->decoder_pool);
|
||||
|
||||
int n_surfaces = hwdec_get_max_refs(s) + ADDITIONAL_SURFACES;
|
||||
IDirect3DSurface9 **surfaces = NULL;
|
||||
IDirectXVideoDecoder *decoder = NULL;
|
||||
void *tmp = talloc_new(NULL);
|
||||
|
||||
UINT n_guids;
|
||||
GUID *device_guids;
|
||||
hr = IDirectXVideoDecoderService_GetDecoderDeviceGuids(
|
||||
p->decoder_service, &n_guids, &device_guids);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to retrieve decoder device GUIDs: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
|
||||
dump_decoder_info(s, device_guids, n_guids);
|
||||
|
||||
struct d3d_decoder_fmt fmt =
|
||||
d3d_select_decoder_mode(s, device_guids, n_guids,
|
||||
d3d9_formats, MP_ARRAY_SIZE(d3d9_formats),
|
||||
dxva2_format_supported);
|
||||
CoTaskMemFree(device_guids);
|
||||
if (!fmt.format) {
|
||||
MP_ERR(p, "Failed to find a suitable decoder\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
p->mpfmt_decoded = fmt.format->mpfmt;
|
||||
struct mp_image_pool *decoder_pool =
|
||||
talloc_steal(tmp, mp_image_pool_new(n_surfaces));
|
||||
DXVA2_ConfigPictureDecode *decoder_config =
|
||||
talloc_zero(decoder_pool, DXVA2_ConfigPictureDecode);
|
||||
|
||||
int w_align = w, h_align = h;
|
||||
d3d_surface_align(s, &w_align, &h_align);
|
||||
DXVA2_VideoDesc video_desc ={
|
||||
.SampleWidth = w,
|
||||
.SampleHeight = h,
|
||||
.Format = fmt.format->dxfmt,
|
||||
};
|
||||
UINT n_configs = 0;
|
||||
DXVA2_ConfigPictureDecode *configs = NULL;
|
||||
hr = IDirectXVideoDecoderService_GetDecoderConfigurations(
|
||||
p->decoder_service, fmt.guid, &video_desc, NULL,
|
||||
&n_configs, &configs);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Unable to retrieve decoder configurations: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
|
||||
unsigned max_score = 0;
|
||||
for (UINT i = 0; i < n_configs; i++) {
|
||||
unsigned score = d3d_decoder_config_score(
|
||||
s, &configs[i].guidConfigBitstreamEncryption,
|
||||
configs[i].ConfigBitstreamRaw);
|
||||
if (score > max_score) {
|
||||
max_score = score;
|
||||
*decoder_config = configs[i];
|
||||
}
|
||||
}
|
||||
CoTaskMemFree(configs);
|
||||
if (!max_score) {
|
||||
MP_ERR(p, "Failed to find a suitable decoder configuration\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
surfaces = talloc_zero_array(decoder_pool, IDirect3DSurface9*, n_surfaces);
|
||||
hr = IDirectXVideoDecoderService_CreateSurface(
|
||||
p->decoder_service,
|
||||
w_align, h_align,
|
||||
n_surfaces - 1, fmt.format->dxfmt, D3DPOOL_DEFAULT, 0,
|
||||
DXVA2_VideoDecoderRenderTarget, surfaces, NULL);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create %d video surfaces: %s\n",
|
||||
n_surfaces, mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
|
||||
hr = IDirectXVideoDecoderService_CreateVideoDecoder(
|
||||
p->decoder_service, fmt.guid, &video_desc, decoder_config,
|
||||
surfaces, n_surfaces, &decoder);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create DXVA2 video decoder: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto done;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_surfaces; i++) {
|
||||
struct mp_image *img = dxva2_new_ref(decoder, surfaces[i], w, h);
|
||||
if (!img) {
|
||||
MP_ERR(p, "Failed to create DXVA2 image\n");
|
||||
goto done;
|
||||
}
|
||||
mp_image_pool_add(decoder_pool, img); // transferred to pool
|
||||
}
|
||||
|
||||
// Pass required information on to ffmpeg.
|
||||
struct dxva_context *dxva_ctx = s->avctx->hwaccel_context;
|
||||
dxva_ctx->cfg = decoder_config;
|
||||
dxva_ctx->decoder = decoder;
|
||||
dxva_ctx->surface_count = n_surfaces;
|
||||
dxva_ctx->surface = surfaces;
|
||||
dxva_ctx->workaround = is_clearvideo(fmt.guid) ?
|
||||
FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO : 0;
|
||||
|
||||
p->decoder_pool = talloc_steal(NULL, decoder_pool);
|
||||
ret = 0;
|
||||
done:
|
||||
// On success, `p->decoder_pool` mp_images still hold refs to `surfaces` and
|
||||
// `decoder`, so the pointers in the ffmpeg `dxva_context` strcture remain
|
||||
// valid for the lifetime of the pool.
|
||||
if (surfaces) {
|
||||
for (int i = 0; i < n_surfaces; i++)
|
||||
IDirect3DSurface9_Release(surfaces[i]);
|
||||
}
|
||||
if (decoder)
|
||||
IDirectXVideoDecoder_Release(decoder);
|
||||
|
||||
talloc_free(tmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void destroy_device(struct lavc_ctx *s)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
|
||||
if (p->device)
|
||||
IDirect3DDevice9_Release(p->device);
|
||||
|
||||
if (p->d3d9)
|
||||
IDirect3D9_Release(p->d3d9);
|
||||
}
|
||||
|
||||
static bool create_device(struct lavc_ctx *s)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
|
||||
d3d_load_dlls();
|
||||
if (!d3d9_dll) {
|
||||
MP_ERR(p, "Failed to load D3D9 library\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
HRESULT (WINAPI *Direct3DCreate9Ex)(UINT, IDirect3D9Ex **) =
|
||||
(void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex");
|
||||
if (!Direct3DCreate9Ex) {
|
||||
MP_ERR(p, "Failed to locate Direct3DCreate9Ex\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
IDirect3D9Ex *d3d9ex = NULL;
|
||||
HRESULT hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &d3d9ex);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create IDirect3D9Ex object\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT adapter = D3DADAPTER_DEFAULT;
|
||||
D3DDISPLAYMODEEX modeex = {0};
|
||||
IDirect3D9Ex_GetAdapterDisplayModeEx(d3d9ex, adapter, &modeex, NULL);
|
||||
|
||||
D3DPRESENT_PARAMETERS present_params = {
|
||||
.Windowed = TRUE,
|
||||
.BackBufferWidth = 640,
|
||||
.BackBufferHeight = 480,
|
||||
.BackBufferCount = 0,
|
||||
.BackBufferFormat = modeex.Format,
|
||||
.SwapEffect = D3DSWAPEFFECT_DISCARD,
|
||||
.Flags = D3DPRESENTFLAG_VIDEO,
|
||||
};
|
||||
|
||||
IDirect3DDevice9Ex *exdev = NULL;
|
||||
hr = IDirect3D9Ex_CreateDeviceEx(d3d9ex, adapter,
|
||||
D3DDEVTYPE_HAL,
|
||||
GetShellWindow(),
|
||||
D3DCREATE_SOFTWARE_VERTEXPROCESSING |
|
||||
D3DCREATE_MULTITHREADED |
|
||||
D3DCREATE_FPU_PRESERVE,
|
||||
&present_params,
|
||||
NULL,
|
||||
&exdev);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create Direct3D device: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
IDirect3D9_Release(d3d9ex);
|
||||
return false;
|
||||
}
|
||||
|
||||
p->d3d9 = (IDirect3D9 *)d3d9ex;
|
||||
p->device = (IDirect3DDevice9 *)exdev;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void dxva2_uninit(struct lavc_ctx *s)
|
||||
{
|
||||
struct priv *p = s->hwdec_priv;
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
av_freep(&s->avctx->hwaccel_context);
|
||||
talloc_free(p->decoder_pool);
|
||||
|
||||
if (p->decoder_service)
|
||||
IDirectXVideoDecoderService_Release(p->decoder_service);
|
||||
|
||||
if (p->device_manager && p->device_handle != INVALID_HANDLE_VALUE)
|
||||
IDirect3DDeviceManager9_CloseDeviceHandle(p->device_manager, p->device_handle);
|
||||
|
||||
if (p->device_manager)
|
||||
IDirect3DDeviceManager9_Release(p->device_manager);
|
||||
|
||||
destroy_device(s);
|
||||
|
||||
TA_FREEP(&s->hwdec_priv);
|
||||
}
|
||||
|
||||
static int dxva2_init(struct lavc_ctx *s)
|
||||
{
|
||||
HRESULT hr;
|
||||
struct priv *p = talloc_zero(NULL, struct priv);
|
||||
if (!p)
|
||||
return -1;
|
||||
|
||||
s->hwdec_priv = p;
|
||||
p->device_handle = INVALID_HANDLE_VALUE;
|
||||
p->log = mp_log_new(s, s->log, "dxva2");
|
||||
|
||||
if (s->hwdec->type == HWDEC_DXVA2_COPY) {
|
||||
mp_check_gpu_memcpy(p->log, NULL);
|
||||
p->sw_pool = talloc_steal(p, mp_image_pool_new(17));
|
||||
}
|
||||
|
||||
p->device = hwdec_devices_load(s->hwdec_devs, s->hwdec->type);
|
||||
if (p->device) {
|
||||
IDirect3D9_AddRef(p->device);
|
||||
MP_VERBOSE(p, "Using VO-supplied device %p.\n", p->device);
|
||||
} else if (s->hwdec->type == HWDEC_DXVA2) {
|
||||
MP_ERR(p, "No Direct3D device provided for native dxva2 decoding\n");
|
||||
goto fail;
|
||||
} else {
|
||||
if (!create_device(s))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
d3d_load_dlls();
|
||||
if (!dxva2_dll) {
|
||||
MP_ERR(p, "Failed to load DXVA2 library\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
HRESULT (WINAPI *CreateDeviceManager9)(UINT *, IDirect3DDeviceManager9 **) =
|
||||
(void *)GetProcAddress(dxva2_dll, "DXVA2CreateDirect3DDeviceManager9");
|
||||
if (!CreateDeviceManager9) {
|
||||
MP_ERR(p, "Failed to locate DXVA2CreateDirect3DDeviceManager9\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
unsigned reset_token = 0;
|
||||
hr = CreateDeviceManager9(&reset_token, &p->device_manager);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create Direct3D device manager: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hr = IDirect3DDeviceManager9_ResetDevice(p->device_manager,
|
||||
p->device, reset_token);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to bind Direct3D device to device manager: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hr = IDirect3DDeviceManager9_OpenDeviceHandle(p->device_manager,
|
||||
&p->device_handle);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to open device handle: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hr = IDirect3DDeviceManager9_GetVideoService(
|
||||
p->device_manager, p->device_handle, &IID_IDirectXVideoDecoderService,
|
||||
(void **)&p->decoder_service);
|
||||
if (FAILED(hr)) {
|
||||
MP_ERR(p, "Failed to create IDirectXVideoDecoderService: %s\n",
|
||||
mp_HRESULT_to_str(hr));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
s->avctx->hwaccel_context = av_mallocz(sizeof(struct dxva_context));
|
||||
if (!s->avctx->hwaccel_context)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
dxva2_uninit(s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int dxva2_probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec,
|
||||
const char *codec)
|
||||
{
|
||||
// dxva2-copy can do without external context; dxva2 requires it.
|
||||
if (hwdec->type == HWDEC_DXVA2) {
|
||||
if (!hwdec_devices_load(ctx->hwdec_devs, HWDEC_DXVA2))
|
||||
return HWDEC_ERR_NO_CTX;
|
||||
} else {
|
||||
hwdec_devices_load(ctx->hwdec_devs, HWDEC_DXVA2_COPY);
|
||||
}
|
||||
return d3d_probe_codec(codec);
|
||||
}
|
||||
|
||||
const struct vd_lavc_hwdec mp_vd_lavc_dxva2 = {
|
||||
.type = HWDEC_DXVA2,
|
||||
.image_format = IMGFMT_DXVA2,
|
||||
.probe = dxva2_probe,
|
||||
.init = dxva2_init,
|
||||
.uninit = dxva2_uninit,
|
||||
.init_decoder = dxva2_init_decoder,
|
||||
.allocate_image = dxva2_allocate_image,
|
||||
};
|
||||
|
||||
const struct vd_lavc_hwdec mp_vd_lavc_dxva2_copy = {
|
||||
.type = HWDEC_DXVA2_COPY,
|
||||
.copying = true,
|
||||
.image_format = IMGFMT_DXVA2,
|
||||
.probe = dxva2_probe,
|
||||
.init = dxva2_init,
|
||||
.uninit = dxva2_uninit,
|
||||
.init_decoder = dxva2_init_decoder,
|
||||
.allocate_image = dxva2_allocate_image,
|
||||
.process_image = dxva2_retrieve_image,
|
||||
.delay_queue = HWDEC_DELAY_QUEUE_COUNT,
|
||||
};
|
||||
|
||||
#else /* !HAVE_D3D_HWACCEL_NEW */
|
||||
|
||||
#include <libavutil/hwcontext.h>
|
||||
#include <libavutil/hwcontext_dxva2.h>
|
||||
|
||||
|
@ -720,5 +198,3 @@ const struct vd_lavc_hwdec mp_vd_lavc_dxva2_copy = {
|
|||
},
|
||||
.delay_queue = HWDEC_DELAY_QUEUE_COUNT,
|
||||
};
|
||||
|
||||
#endif /* else #if !HAVE_D3D_HWACCEL_NEW */
|
||||
|
|
|
@ -64,12 +64,8 @@ static const struct {
|
|||
{IMGFMT_VAAPI, AV_PIX_FMT_VAAPI_VLD},
|
||||
{IMGFMT_DXVA2, AV_PIX_FMT_DXVA2_VLD},
|
||||
#if HAVE_D3D_HWACCEL
|
||||
#if HAVE_D3D_HWACCEL_NEW
|
||||
{IMGFMT_D3D11VA, AV_PIX_FMT_D3D11},
|
||||
{IMGFMT_D3D11NV12, AV_PIX_FMT_D3D11},
|
||||
#else
|
||||
{IMGFMT_D3D11VA, AV_PIX_FMT_D3D11VA_VLD},
|
||||
#endif
|
||||
#endif
|
||||
{IMGFMT_MMAL, AV_PIX_FMT_MMAL},
|
||||
#if HAVE_CUDA_HWACCEL
|
||||
|
|
|
@ -1,135 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2011-2014 Hendrik Leppkes
|
||||
* http://www.1f0.de
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Taken from the QuickSync decoder by Eric Gur
|
||||
*/
|
||||
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4.1")
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gpu_memcpy.h"
|
||||
|
||||
// gpu_memcpy is a memcpy style function that copied data very fast from a
|
||||
// GPU tiled memory (write back)
|
||||
// Performance tip: page offset (12 lsb) of both addresses should be different
|
||||
// optimally use a 2K offset between them.
|
||||
void *gpu_memcpy(void *restrict d, const void *restrict s, size_t size)
|
||||
{
|
||||
static const size_t regsInLoop = sizeof(size_t) * 2; // 8 or 16
|
||||
|
||||
if (d == NULL || s == NULL) return NULL;
|
||||
|
||||
// If memory is not aligned, use memcpy
|
||||
bool isAligned = (((size_t)(s) | (size_t)(d)) & 0xF) == 0;
|
||||
if (!isAligned)
|
||||
{
|
||||
return memcpy(d, s, size);
|
||||
}
|
||||
|
||||
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
#ifdef __x86_64__
|
||||
__m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
#endif
|
||||
|
||||
size_t reminder = size & (regsInLoop * sizeof(xmm0) - 1); // Copy 128 or 256 bytes every loop
|
||||
size_t end = 0;
|
||||
|
||||
__m128i* pTrg = (__m128i*)d;
|
||||
__m128i* pTrgEnd = pTrg + ((size - reminder) >> 4);
|
||||
__m128i* pSrc = (__m128i*)s;
|
||||
|
||||
// Make sure source is synced - doesn't hurt if not needed.
|
||||
_mm_sfence();
|
||||
|
||||
while (pTrg < pTrgEnd)
|
||||
{
|
||||
// _mm_stream_load_si128 emits the Streaming SIMD Extensions 4 (SSE4.1) instruction MOVNTDQA
|
||||
// Fastest method for copying GPU RAM. Available since Penryn (45nm Core 2 Duo/Quad)
|
||||
xmm0 = _mm_stream_load_si128(pSrc);
|
||||
xmm1 = _mm_stream_load_si128(pSrc + 1);
|
||||
xmm2 = _mm_stream_load_si128(pSrc + 2);
|
||||
xmm3 = _mm_stream_load_si128(pSrc + 3);
|
||||
xmm4 = _mm_stream_load_si128(pSrc + 4);
|
||||
xmm5 = _mm_stream_load_si128(pSrc + 5);
|
||||
xmm6 = _mm_stream_load_si128(pSrc + 6);
|
||||
xmm7 = _mm_stream_load_si128(pSrc + 7);
|
||||
#ifdef __x86_64__ // Use all 16 xmm registers
|
||||
xmm8 = _mm_stream_load_si128(pSrc + 8);
|
||||
xmm9 = _mm_stream_load_si128(pSrc + 9);
|
||||
xmm10 = _mm_stream_load_si128(pSrc + 10);
|
||||
xmm11 = _mm_stream_load_si128(pSrc + 11);
|
||||
xmm12 = _mm_stream_load_si128(pSrc + 12);
|
||||
xmm13 = _mm_stream_load_si128(pSrc + 13);
|
||||
xmm14 = _mm_stream_load_si128(pSrc + 14);
|
||||
xmm15 = _mm_stream_load_si128(pSrc + 15);
|
||||
#endif
|
||||
pSrc += regsInLoop;
|
||||
// _mm_store_si128 emit the SSE2 instruction MOVDQA (aligned store)
|
||||
_mm_store_si128(pTrg , xmm0);
|
||||
_mm_store_si128(pTrg + 1, xmm1);
|
||||
_mm_store_si128(pTrg + 2, xmm2);
|
||||
_mm_store_si128(pTrg + 3, xmm3);
|
||||
_mm_store_si128(pTrg + 4, xmm4);
|
||||
_mm_store_si128(pTrg + 5, xmm5);
|
||||
_mm_store_si128(pTrg + 6, xmm6);
|
||||
_mm_store_si128(pTrg + 7, xmm7);
|
||||
#ifdef __x86_64__ // Use all 16 xmm registers
|
||||
_mm_store_si128(pTrg + 8, xmm8);
|
||||
_mm_store_si128(pTrg + 9, xmm9);
|
||||
_mm_store_si128(pTrg + 10, xmm10);
|
||||
_mm_store_si128(pTrg + 11, xmm11);
|
||||
_mm_store_si128(pTrg + 12, xmm12);
|
||||
_mm_store_si128(pTrg + 13, xmm13);
|
||||
_mm_store_si128(pTrg + 14, xmm14);
|
||||
_mm_store_si128(pTrg + 15, xmm15);
|
||||
#endif
|
||||
pTrg += regsInLoop;
|
||||
}
|
||||
|
||||
// Copy in 16 byte steps
|
||||
if (reminder >= 16)
|
||||
{
|
||||
size = reminder;
|
||||
reminder = size & 15;
|
||||
end = size >> 4;
|
||||
for (size_t i = 0; i < end; ++i)
|
||||
{
|
||||
pTrg[i] = _mm_stream_load_si128(pSrc + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy last bytes - shouldn't happen as strides are modulu 16
|
||||
if (reminder)
|
||||
{
|
||||
__m128i temp = _mm_stream_load_si128(pSrc + end);
|
||||
|
||||
char* ps = (char*)(&temp);
|
||||
char* pt = (char*)(pTrg + end);
|
||||
|
||||
for (size_t i = 0; i < reminder; ++i)
|
||||
{
|
||||
pt[i] = ps[i];
|
||||
}
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
#ifndef GPU_MEMCPY_SSE4_H_
|
||||
#define GPU_MEMCPY_SSE4_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
void *gpu_memcpy(void *restrict d, const void *restrict s, size_t size);
|
||||
|
||||
#endif
|
|
@ -33,7 +33,6 @@
|
|||
#include "mp_image.h"
|
||||
#include "sws_utils.h"
|
||||
#include "fmt-conversion.h"
|
||||
#include "gpu_memcpy.h"
|
||||
|
||||
#include "video/filter/vf.h"
|
||||
|
||||
|
@ -478,37 +477,6 @@ void mp_image_copy(struct mp_image *dst, struct mp_image *src)
|
|||
mp_image_copy_cb(dst, src, memcpy);
|
||||
}
|
||||
|
||||
void mp_image_copy_gpu(struct mp_image *dst, struct mp_image *src)
|
||||
{
|
||||
#if HAVE_SSE4_INTRINSICS
|
||||
if (av_get_cpu_flags() & AV_CPU_FLAG_SSE4) {
|
||||
mp_image_copy_cb(dst, src, gpu_memcpy);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
mp_image_copy(dst, src);
|
||||
}
|
||||
|
||||
// Helper, only for outputting some log info.
|
||||
void mp_check_gpu_memcpy(struct mp_log *log, bool *once)
|
||||
{
|
||||
if (once) {
|
||||
if (*once)
|
||||
return;
|
||||
*once = true;
|
||||
}
|
||||
|
||||
bool have_sse = false;
|
||||
#if HAVE_SSE4_INTRINSICS
|
||||
have_sse = av_get_cpu_flags() & AV_CPU_FLAG_SSE4;
|
||||
#endif
|
||||
if (have_sse) {
|
||||
mp_verbose(log, "Using SSE4 memcpy\n");
|
||||
} else {
|
||||
mp_warn(log, "Using fallback memcpy (slow)\n");
|
||||
}
|
||||
}
|
||||
|
||||
static enum mp_csp mp_image_params_get_forced_csp(struct mp_image_params *params)
|
||||
{
|
||||
int imgfmt = params->hw_subfmt ? params->hw_subfmt : params->imgfmt;
|
||||
|
|
|
@ -127,7 +127,6 @@ struct mp_image *mp_image_from_buffer(int imgfmt, int w, int h, int stride_align
|
|||
|
||||
struct mp_image *mp_image_alloc(int fmt, int w, int h);
|
||||
void mp_image_copy(struct mp_image *dmpi, struct mp_image *mpi);
|
||||
void mp_image_copy_gpu(struct mp_image *dst, struct mp_image *src);
|
||||
void mp_image_copy_attributes(struct mp_image *dmpi, struct mp_image *mpi);
|
||||
struct mp_image *mp_image_new_copy(struct mp_image *img);
|
||||
struct mp_image *mp_image_new_ref(struct mp_image *img);
|
||||
|
@ -183,6 +182,4 @@ void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height,
|
|||
void memset_pic(void *dst, int fill, int bytesPerLine, int height, int stride);
|
||||
void memset16_pic(void *dst, int fill, int unitsPerLine, int height, int stride);
|
||||
|
||||
void mp_check_gpu_memcpy(struct mp_log *log, bool *once);
|
||||
|
||||
#endif /* MPLAYER_MP_IMAGE_H */
|
||||
|
|
|
@ -183,7 +183,6 @@ static int init(struct ra_hwdec *hw)
|
|||
.type = HWDEC_D3D11VA,
|
||||
.driver_name = hw->driver->name,
|
||||
.ctx = p->d3d11_device,
|
||||
.download_image = d3d11_download_image,
|
||||
.av_device_ref = d3d11_wrap_device_ref(p->d3d11_device),
|
||||
};
|
||||
hwdec_devices_add(hw->devs, &p->hwctx);
|
||||
|
|
|
@ -508,9 +508,7 @@ static struct mp_image *try_download(struct va_surface *p, struct mp_image *src,
|
|||
mp_image_set_size(&tmp, src->w, src->h); // copy only visible part
|
||||
dst = mp_image_pool_get(pool, tmp.imgfmt, tmp.w, tmp.h);
|
||||
if (dst) {
|
||||
mp_check_gpu_memcpy(p->ctx->log, &p->ctx->gpu_memcpy_message);
|
||||
|
||||
mp_image_copy_gpu(dst, &tmp);
|
||||
mp_image_copy(dst, &tmp);
|
||||
mp_image_copy_attributes(dst, src);
|
||||
}
|
||||
va_image_unmap(p->ctx, image);
|
||||
|
|
12
wscript
12
wscript
|
@ -873,12 +873,8 @@ hwaccel_features = [
|
|||
}, {
|
||||
# (conflated with ANGLE for easier deps)
|
||||
'name': '--d3d-hwaccel',
|
||||
'desc': 'D3D11VA hwaccel (plus ANGLE)',
|
||||
'desc': 'D3D11VA hwaccel (new API, plus ANGLE)',
|
||||
'deps': 'os-win32 && egl-angle',
|
||||
'func': check_true,
|
||||
}, {
|
||||
'name': '--d3d-hwaccel-new',
|
||||
'desc': 'D3D11VA hwaccel (new API)',
|
||||
'func': check_statement('libavcodec/version.h',
|
||||
'int x[(LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(58, 4, 0) && '
|
||||
' LIBAVCODEC_VERSION_MICRO < 100) ||'
|
||||
|
@ -886,7 +882,6 @@ hwaccel_features = [
|
|||
' LIBAVCODEC_VERSION_MICRO >= 100)'
|
||||
' ? 1 : -1]',
|
||||
use='libav'),
|
||||
'deps': 'd3d-hwaccel',
|
||||
}, {
|
||||
'name': '--d3d9-hwaccel',
|
||||
'desc': 'DXVA2 hwaccel (plus ANGLE)',
|
||||
|
@ -904,11 +899,6 @@ hwaccel_features = [
|
|||
'deps': 'gl',
|
||||
'func': check_cc(fragment=load_fragment('cuda.c'),
|
||||
use='libav'),
|
||||
}, {
|
||||
'name': 'sse4-intrinsics',
|
||||
'desc': 'GCC SSE4 intrinsics for GPU memcpy',
|
||||
'deps': 'd3d-hwaccel && !d3d-hwaccel-new && gpl',
|
||||
'func': check_cc(fragment=load_fragment('sse.c')),
|
||||
}
|
||||
]
|
||||
|
||||
|
|
|
@ -335,7 +335,6 @@ def build(ctx):
|
|||
## Video
|
||||
( "video/csputils.c" ),
|
||||
( "video/fmt-conversion.c" ),
|
||||
( "video/gpu_memcpy.c", "sse4-intrinsics" ),
|
||||
( "video/image_loader.c" ),
|
||||
( "video/image_writer.c" ),
|
||||
( "video/img_format.c" ),
|
||||
|
|
Loading…
Reference in New Issue