mpv/video/decode/vaapi.c

518 lines
14 KiB
C

/*
* This file is part of mpv.
*
* With some chunks from original MPlayer VAAPI patch:
* Copyright (C) 2008-2009 Splitted-Desktop Systems
*
* mpv is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stddef.h>
#include <assert.h>
#include <libavcodec/avcodec.h>
#include <libavcodec/vaapi.h>
#include <libavutil/common.h>
#include "config.h"
#include "lavc.h"
#include "common/common.h"
#include "common/av_common.h"
#include "video/fmt-conversion.h"
#include "video/vaapi.h"
#include "video/mp_image_pool.h"
#include "video/hwdec.h"
#include "video/filter/vf.h"
/*
* The VAAPI decoder can work only with surfaces passed to the decoder at
* creation time. This means all surfaces have to be created in advance.
* So, additionally to the maximum number of reference frames, we need
* surfaces for all kinds of buffering between decoder and VO.
* Note that redundant additional surfaces also might allow for some
* buffering (i.e. not trying to reuse a surface while it's busy).
*/
#define ADDTIONAL_SURFACES MPMAX(6, HWDEC_DELAY_QUEUE_COUNT)
// Some upper bound.
#define MAX_SURFACES 25
struct priv {
struct mp_log *log;
struct mp_vaapi_ctx *ctx;
VADisplay display;
const struct va_native_display *native_display_fns;
void *native_display;
// libavcodec shared struct
struct vaapi_context *va_context;
struct vaapi_context va_context_storage;
struct mp_image_pool *pool;
int rt_format;
struct mp_image_pool *sw_pool;
};
struct va_native_display {
void (*create)(struct priv *p);
void (*destroy)(struct priv *p);
};
#if HAVE_VAAPI_X11
#include <X11/Xlib.h>
#include <va/va_x11.h>
static void x11_destroy(struct priv *p)
{
if (p->native_display)
XCloseDisplay(p->native_display);
p->native_display = NULL;
}
static void x11_create(struct priv *p)
{
p->native_display = XOpenDisplay(NULL);
if (!p->native_display)
return;
p->display = vaGetDisplay(p->native_display);
if (!p->display)
x11_destroy(p);
}
static const struct va_native_display disp_x11 = {
.create = x11_create,
.destroy = x11_destroy,
};
#endif
static const struct va_native_display *const native_displays[] = {
#if HAVE_VAAPI_X11
&disp_x11,
#endif
NULL
};
#define HAS_HEVC VA_CHECK_VERSION(0, 38, 0)
#define HAS_VP9 (VA_CHECK_VERSION(0, 38, 1) && defined(FF_PROFILE_VP9_0))
#define PE(av_codec_id, ff_profile, vdp_profile) \
{AV_CODEC_ID_ ## av_codec_id, FF_PROFILE_ ## ff_profile, \
VAProfile ## vdp_profile}
static const struct hwdec_profile_entry profiles[] = {
PE(MPEG2VIDEO, MPEG2_MAIN, MPEG2Main),
PE(MPEG2VIDEO, MPEG2_SIMPLE, MPEG2Simple),
PE(MPEG4, MPEG4_ADVANCED_SIMPLE, MPEG4AdvancedSimple),
PE(MPEG4, MPEG4_MAIN, MPEG4Main),
PE(MPEG4, MPEG4_SIMPLE, MPEG4Simple),
PE(H264, H264_HIGH, H264High),
PE(H264, H264_MAIN, H264Main),
PE(H264, H264_BASELINE, H264Baseline),
PE(VC1, VC1_ADVANCED, VC1Advanced),
PE(VC1, VC1_MAIN, VC1Main),
PE(VC1, VC1_SIMPLE, VC1Simple),
PE(WMV3, VC1_ADVANCED, VC1Advanced),
PE(WMV3, VC1_MAIN, VC1Main),
PE(WMV3, VC1_SIMPLE, VC1Simple),
#if HAS_HEVC
PE(HEVC, HEVC_MAIN, HEVCMain),
PE(HEVC, HEVC_MAIN_10, HEVCMain10),
#endif
#if HAS_VP9
PE(VP9, VP9_0, VP9Profile0),
#endif
{0}
};
static const char *str_va_profile(VAProfile profile)
{
switch (profile) {
#define PROFILE(profile) \
case VAProfile##profile: return "VAProfile" #profile
PROFILE(MPEG2Simple);
PROFILE(MPEG2Main);
PROFILE(MPEG4Simple);
PROFILE(MPEG4AdvancedSimple);
PROFILE(MPEG4Main);
PROFILE(H264Baseline);
PROFILE(H264Main);
PROFILE(H264High);
PROFILE(VC1Simple);
PROFILE(VC1Main);
PROFILE(VC1Advanced);
#if HAS_HEVC
PROFILE(HEVCMain);
PROFILE(HEVCMain10);
#endif
#if HAS_VP9
PROFILE(VP9Profile0);
#endif
#undef PROFILE
}
return "<unknown>";
}
static int find_entrypoint(int format, VAEntrypoint *ep, int num_ep)
{
int entrypoint = -1;
switch (format) {
case IMGFMT_VAAPI: entrypoint = VAEntrypointVLD; break;
}
for (int n = 0; n < num_ep; n++) {
if (ep[n] == entrypoint)
return entrypoint;
}
return -1;
}
// We must allocate only surfaces that were passed to the decoder on creation.
// We achieve this by reserving surfaces in the pool as needed.
// Releasing surfaces is necessary after filling the surface id list so
// that reserved surfaces can be reused for decoding.
static bool preallocate_surfaces(struct lavc_ctx *ctx, int num, int w, int h,
VASurfaceID out_surfaces[MAX_SURFACES])
{
struct priv *p = ctx->hwdec_priv;
struct mp_image *reserve[MAX_SURFACES] = {0};
bool res = true;
if (num > MAX_SURFACES)
return false;
for (int n = 0; n < num; n++) {
reserve[n] = mp_image_pool_get(p->pool, IMGFMT_VAAPI, w, h);
out_surfaces[n] = va_surface_id(reserve[n]);
if (out_surfaces[n] == VA_INVALID_ID) {
MP_ERR(p, "Could not allocate surfaces.\n");
res = false;
break;
}
}
for (int i = 0; i < num; i++)
talloc_free(reserve[i]);
return res;
}
static void destroy_decoder(struct lavc_ctx *ctx)
{
struct priv *p = ctx->hwdec_priv;
va_lock(p->ctx);
if (p->va_context->context_id != VA_INVALID_ID) {
vaDestroyContext(p->display, p->va_context->context_id);
p->va_context->context_id = VA_INVALID_ID;
}
if (p->va_context->config_id != VA_INVALID_ID) {
vaDestroyConfig(p->display, p->va_context->config_id);
p->va_context->config_id = VA_INVALID_ID;
}
va_unlock(p->ctx);
mp_image_pool_clear(p->pool);
}
static bool has_profile(VAProfile *va_profiles, int num_profiles, VAProfile p)
{
for (int i = 0; i < num_profiles; i++) {
if (va_profiles[i] == p)
return true;
}
return false;
}
static int init_decoder(struct lavc_ctx *ctx, int w, int h)
{
void *tmp = talloc_new(NULL);
struct priv *p = ctx->hwdec_priv;
VAStatus status;
int res = -1;
destroy_decoder(ctx);
va_lock(p->ctx);
const struct hwdec_profile_entry *pe = hwdec_find_profile(ctx, profiles);
if (!pe) {
MP_ERR(p, "Unsupported codec or profile.\n");
goto error;
}
int num_profiles = vaMaxNumProfiles(p->display);
VAProfile *va_profiles = talloc_zero_array(tmp, VAProfile, num_profiles);
status = vaQueryConfigProfiles(p->display, va_profiles, &num_profiles);
if (!CHECK_VA_STATUS(p, "vaQueryConfigProfiles()"))
goto error;
MP_DBG(p, "%d profiles available:\n", num_profiles);
for (int i = 0; i < num_profiles; i++)
MP_DBG(p, " %s\n", str_va_profile(va_profiles[i]));
VAProfile va_profile = pe->hw_profile;
if (!has_profile(va_profiles, num_profiles, va_profile)) {
MP_ERR(p, "Decoder profile '%s' not available.\n",
str_va_profile(va_profile));
goto error;
}
MP_VERBOSE(p, "Using profile '%s'.\n", str_va_profile(va_profile));
int num_surfaces = hwdec_get_max_refs(ctx) + ADDTIONAL_SURFACES;
if (num_surfaces > MAX_SURFACES) {
MP_ERR(p, "Internal error: too many surfaces.\n");
goto error;
}
VASurfaceID surfaces[MAX_SURFACES];
if (!preallocate_surfaces(ctx, num_surfaces, w, h, surfaces)) {
MP_ERR(p, "Could not allocate surfaces.\n");
goto error;
}
int num_ep = vaMaxNumEntrypoints(p->display);
VAEntrypoint *ep = talloc_zero_array(tmp, VAEntrypoint, num_ep);
status = vaQueryConfigEntrypoints(p->display, va_profile, ep, &num_ep);
if (!CHECK_VA_STATUS(p, "vaQueryConfigEntrypoints()"))
goto error;
int entrypoint = find_entrypoint(IMGFMT_VAAPI, ep, num_ep);
if (entrypoint < 0) {
MP_ERR(p, "Could not find VA entrypoint.\n");
goto error;
}
VAConfigAttrib attrib = {
.type = VAConfigAttribRTFormat,
};
status = vaGetConfigAttributes(p->display, va_profile, entrypoint,
&attrib, 1);
if (!CHECK_VA_STATUS(p, "vaGetConfigAttributes()"))
goto error;
if ((attrib.value & p->rt_format) == 0) {
MP_ERR(p, "Chroma format not supported.\n");
goto error;
}
status = vaCreateConfig(p->display, va_profile, entrypoint, &attrib, 1,
&p->va_context->config_id);
if (!CHECK_VA_STATUS(p, "vaCreateConfig()"))
goto error;
status = vaCreateContext(p->display, p->va_context->config_id,
w, h, VA_PROGRESSIVE,
surfaces, num_surfaces,
&p->va_context->context_id);
if (!CHECK_VA_STATUS(p, "vaCreateContext()"))
goto error;
res = 0;
error:
va_unlock(p->ctx);
talloc_free(tmp);
return res;
}
static struct mp_image *allocate_image(struct lavc_ctx *ctx, int w, int h)
{
struct priv *p = ctx->hwdec_priv;
struct mp_image *img = mp_image_pool_get(p->pool, IMGFMT_VAAPI, w, h);
if (!img)
MP_ERR(p, "Failed to allocate additional VAAPI surface.\n");
return img;
}
static struct mp_image *update_format(struct lavc_ctx *ctx, struct mp_image *img)
{
va_surface_init_subformat(img);
return img;
}
static void destroy_va_dummy_ctx(struct priv *p)
{
va_destroy(p->ctx);
p->ctx = NULL;
p->display = NULL;
if (p->native_display_fns)
p->native_display_fns->destroy(p);
}
// Creates a "private" VADisplay, disconnected from the VO. We just create a
// new X connection, because that's simpler. (We could also pass the X
// connection along with struct mp_hwdec_devices, if we wanted.)
static bool create_va_dummy_ctx(struct priv *p)
{
for (int n = 0; native_displays[n]; n++) {
native_displays[n]->create(p);
if (p->display) {
p->native_display_fns = native_displays[n];
break;
}
}
if (!p->display)
goto destroy_ctx;
p->ctx = va_initialize(p->display, p->log, true);
if (!p->ctx) {
vaTerminate(p->display);
goto destroy_ctx;
}
return true;
destroy_ctx:
destroy_va_dummy_ctx(p);
return false;
}
static void uninit(struct lavc_ctx *ctx)
{
struct priv *p = ctx->hwdec_priv;
if (!p)
return;
destroy_decoder(ctx);
talloc_free(p->pool);
p->pool = NULL;
if (p->native_display_fns)
destroy_va_dummy_ctx(p);
talloc_free(p);
ctx->hwdec_priv = NULL;
}
static int init(struct lavc_ctx *ctx, bool direct)
{
struct priv *p = talloc_ptrtype(NULL, p);
*p = (struct priv) {
.log = mp_log_new(p, ctx->log, "vaapi"),
.va_context = &p->va_context_storage,
.rt_format = VA_RT_FORMAT_YUV420
};
if (direct) {
p->ctx = hwdec_devices_get(ctx->hwdec_devs, HWDEC_VAAPI)->ctx;
} else {
create_va_dummy_ctx(p);
if (!p->ctx) {
talloc_free(p);
return -1;
}
}
p->display = p->ctx->display;
p->pool = talloc_steal(p, mp_image_pool_new(MAX_SURFACES));
va_pool_set_allocator(p->pool, p->ctx, p->rt_format);
p->sw_pool = talloc_steal(p, mp_image_pool_new(17));
p->va_context->display = p->display;
p->va_context->config_id = VA_INVALID_ID;
p->va_context->context_id = VA_INVALID_ID;
ctx->avctx->hwaccel_context = p->va_context;
ctx->hwdec_priv = p;
return 0;
}
static int init_direct(struct lavc_ctx *ctx)
{
return init(ctx, true);
}
static int probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec,
const char *codec)
{
if (!hwdec_devices_load(ctx->hwdec_devs, HWDEC_VAAPI))
return HWDEC_ERR_NO_CTX;
if (!hwdec_check_codec_support(codec, profiles))
return HWDEC_ERR_NO_CODEC;
return 0;
}
static int probe_copy(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec,
const char *codec)
{
struct priv dummy = {mp_null_log};
if (!create_va_dummy_ctx(&dummy))
return HWDEC_ERR_NO_CTX;
bool emulated = va_guess_if_emulated(dummy.ctx);
destroy_va_dummy_ctx(&dummy);
if (!hwdec_check_codec_support(codec, profiles))
return HWDEC_ERR_NO_CODEC;
if (emulated)
return HWDEC_ERR_EMULATED;
return 0;
}
static int init_copy(struct lavc_ctx *ctx)
{
return init(ctx, false);
}
static struct mp_image *copy_image(struct lavc_ctx *ctx, struct mp_image *img)
{
struct priv *p = ctx->hwdec_priv;
struct mp_image *simg = va_surface_download(img, p->sw_pool);
if (simg) {
talloc_free(img);
return simg;
}
return img;
}
static void intel_shit_lock(struct lavc_ctx *ctx)
{
struct priv *p = ctx->hwdec_priv;
va_lock(p->ctx);
}
static void intel_crap_unlock(struct lavc_ctx *ctx)
{
struct priv *p = ctx->hwdec_priv;
va_unlock(p->ctx);
}
const struct vd_lavc_hwdec mp_vd_lavc_vaapi = {
.type = HWDEC_VAAPI,
.image_format = IMGFMT_VAAPI,
.probe = probe,
.init = init_direct,
.uninit = uninit,
.init_decoder = init_decoder,
.allocate_image = allocate_image,
.lock = intel_shit_lock,
.unlock = intel_crap_unlock,
.process_image = update_format,
};
const struct vd_lavc_hwdec mp_vd_lavc_vaapi_copy = {
.type = HWDEC_VAAPI_COPY,
.copying = true,
.image_format = IMGFMT_VAAPI,
.probe = probe_copy,
.init = init_copy,
.uninit = uninit,
.init_decoder = init_decoder,
.allocate_image = allocate_image,
.process_image = copy_image,
.delay_queue = HWDEC_DELAY_QUEUE_COUNT,
};