1
0
mirror of https://github.com/mpv-player/mpv synced 2024-12-31 19:52:16 +00:00

vo_gpu_next: add support for hardware decoding

There are two major ways of going about this:

1. Expose the native ra_gl/ra_pl/ra_d3d11 objects to the pre-existing
   hwdec mappers, and then add code in vo_gpu_next to rewrap those
   ra_tex objects into pl_tex.

2. Wrap the underlying pl_opengl/pl_d3d11 into a ra_pl object and expose
   it to the hwdec mappers, then directly use the resulting pl_tex.

I ultimately opted for approach 1 because it enables compatibility with
more hardware decoders, specifically including ones that use native
OpenGL calls currently. The second approach only really works with
cuda_vk and vaapi_pl.
This commit is contained in:
Niklas Haas 2022-02-25 22:07:56 +01:00 committed by Niklas Haas
parent c13f5f24cd
commit dfe846f9de
3 changed files with 167 additions and 41 deletions

View File

@ -262,10 +262,6 @@ Available video output drivers are:
the same set of features as ``--vo=gpu``. See `GPU renderer options`_ for a
list.
Currently, this only supports Vulkan, OpenGL, D3D11 and no hardware
decoding. Unlike ``--vo=gpu``, the FBO formats are not tunable, but you can
still set ``--gpu-dumb-mode=yes`` to forcibly disable their use.
Should generally be faster and higher quality, but some features may still
be missing or misbehave. Expect (and report!) bugs. See here for a list of
known differences and bugs:

View File

@ -26,7 +26,6 @@ struct gl_video_opts;
struct gpu_ctx {
struct mp_log *log;
struct ra_ctx *ra_ctx;
pl_log pllog;

View File

@ -32,15 +32,22 @@
#include "options/path.h"
#include "osdep/io.h"
#include "stream/stream.h"
#include "video/mp_image.h"
#include "video/fmt-conversion.h"
#include "video/mp_image.h"
#include "video/out/placebo/ra_pl.h"
#include "placebo/utils.h"
#include "gpu/context.h"
#include "gpu/hwdec.h"
#include "gpu/video.h"
#include "gpu/video_shaders.h"
#include "sub/osd.h"
#include "gpu_next/context.h"
#if HAVE_GL && defined(PL_HAVE_OPENGL)
#include <libplacebo/opengl.h>
#include "video/out/opengl/ra_gl.h"
#endif
struct osd_entry {
pl_tex tex;
struct pl_overlay_part *parts;
@ -75,6 +82,11 @@ struct priv {
struct mpv_global *global;
struct ra_ctx *ra_ctx;
struct gpu_ctx *context;
struct ra_hwdec_ctx hwdec_ctx;
// Pooled/cached mappers, for performance
struct ra_hwdec_mapper **hwdec_mappers;
int num_hwdec_mappers;
pl_log pllog;
pl_gpu gpu;
@ -306,6 +318,7 @@ struct frame_priv {
struct vo *vo;
struct osd_state subs;
uint64_t osd_sync;
struct ra_hwdec_mapper *hwdec_mapper;
};
static int plane_data_from_imgfmt(struct pl_plane_data out_data[4],
@ -441,19 +454,70 @@ static struct pl_color_space get_mpi_csp(struct vo *vo, struct mp_image *mpi)
return csp;
}
// For RAs not based on ra_pl, this creates a new pl_tex wrapper
static pl_tex hwdec_get_tex(struct frame_priv *fp, int n)
{
struct priv *p = fp->vo->priv;
struct ra_tex *ratex = fp->hwdec_mapper->tex[n];
struct ra *ra = fp->hwdec_mapper->ra;
if (ra_pl_get(ra))
return (pl_tex) ratex->priv;
#if HAVE_GL && defined(PL_HAVE_OPENGL)
if (ra_is_gl(ra) && pl_opengl_get(p->gpu)) {
struct pl_opengl_wrap_params par = {
.width = ratex->params.w,
.height = ratex->params.h,
};
ra_gl_get_format(ratex->params.format, &par.iformat,
&(GLenum){0}, &(GLenum){0});
ra_gl_get_raw_tex(ra, ratex, &par.texture, &par.target);
return pl_opengl_wrap(p->gpu, &par);
}
#endif
// TODO: d3d11 wrapping/unwrapping
MP_ERR(p, "Failed mapping hwdec frame? Open a bug!\n");
return false;
}
static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src,
struct pl_frame *frame)
{
struct mp_image *mpi = src->frame_data;
const struct mp_image_params *par = &mpi->params;
struct frame_priv *fp = mpi->priv;
struct pl_plane_data data[4] = {0};
struct vo *vo = fp->vo;
struct priv *p = vo->priv;
// TODO: implement support for hwdec wrappers
struct ra_hwdec *hwdec = ra_hwdec_get(&p->hwdec_ctx, mpi->imgfmt);
if (hwdec) {
if (MP_TARRAY_POP(p->hwdec_mappers, p->num_hwdec_mappers, &fp->hwdec_mapper)) {
if (!mp_image_params_equal(&mpi->params, &fp->hwdec_mapper->src_params))
ra_hwdec_mapper_free(&fp->hwdec_mapper);
}
if (!fp->hwdec_mapper) {
fp->hwdec_mapper = ra_hwdec_mapper_create(hwdec, &mpi->params);
if (!fp->hwdec_mapper) {
MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
return false;
}
}
if (ra_hwdec_mapper_map(fp->hwdec_mapper, mpi) < 0) {
MP_ERR(p, "Mapping hardware decoded surface failed.\n");
MP_TARRAY_APPEND(p, p->hwdec_mappers, p->num_hwdec_mappers, fp->hwdec_mapper);
fp->hwdec_mapper = NULL;
return false;
}
par = &fp->hwdec_mapper->dst_params;
}
*frame = (struct pl_frame) {
.num_planes = mpi->num_planes,
.color = get_mpi_csp(vo, mpi),
.repr = {
.sys = mp_csp_to_pl(par->color.space),
@ -485,43 +549,71 @@ static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src
default: break;
}
enum pl_chroma_location chroma = mp_chroma_to_pl(par->chroma_location);
int planes = plane_data_from_imgfmt(data, &frame->repr.bits, mpi->imgfmt);
for (int n = 0; n < planes; n++) {
struct pl_plane *plane = &frame->planes[n];
data[n].width = mp_image_plane_w(mpi, n);
data[n].height = mp_image_plane_h(mpi, n);
if (mpi->stride[n] < 0) {
data[n].pixels = mpi->planes[n] + (data[n].height - 1) * mpi->stride[n];
data[n].row_stride = -mpi->stride[n];
plane->flipped = true;
} else {
data[n].pixels = mpi->planes[n];
data[n].row_stride = mpi->stride[n];
if (hwdec) {
struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(par->imgfmt);
frame->num_planes = desc.num_planes;
for (int n = 0; n < frame->num_planes; n++) {
struct pl_plane *plane = &frame->planes[n];
plane->texture = hwdec_get_tex(fp, n);
if (!plane->texture)
return false;
int *map = plane->component_mapping;
for (int c = 0; c < mp_imgfmt_desc_get_num_comps(&desc); c++) {
if (desc.comps[c].plane != n)
continue;
// Sort by component offset
uint8_t offset = desc.comps[c].offset;
int index = plane->components++;
while (index > 0 && desc.comps[map[index - 1]].offset > offset) {
map[index] = map[index - 1];
index--;
}
map[index] = c;
}
}
pl_buf buf = get_dr_buf(mpi);
if (buf) {
data[n].buf = buf;
data[n].buf_offset = (uint8_t *) data[n].pixels - buf->data;
data[n].pixels = NULL;
} else if (gpu->limits.callbacks) {
data[n].callback = talloc_free;
data[n].priv = mp_image_new_ref(mpi);
} else { // swdec
struct pl_plane_data data[4] = {0};
frame->num_planes = plane_data_from_imgfmt(data, &frame->repr.bits, mpi->imgfmt);
for (int n = 0; n < frame->num_planes; n++) {
struct pl_plane *plane = &frame->planes[n];
data[n].width = mp_image_plane_w(mpi, n);
data[n].height = mp_image_plane_h(mpi, n);
if (mpi->stride[n] < 0) {
data[n].pixels = mpi->planes[n] + (data[n].height - 1) * mpi->stride[n];
data[n].row_stride = -mpi->stride[n];
plane->flipped = true;
} else {
data[n].pixels = mpi->planes[n];
data[n].row_stride = mpi->stride[n];
}
pl_buf buf = get_dr_buf(mpi);
if (buf) {
data[n].buf = buf;
data[n].buf_offset = (uint8_t *) data[n].pixels - buf->data;
data[n].pixels = NULL;
} else if (gpu->limits.callbacks) {
data[n].callback = talloc_free;
data[n].priv = mp_image_new_ref(mpi);
}
if (!pl_upload_plane(gpu, plane, &tex[n], &data[n])) {
MP_ERR(vo, "Failed uploading frame!\n");
talloc_free(data[n].priv);
return false;
}
}
if (!pl_upload_plane(gpu, plane, &tex[n], &data[n])) {
MP_ERR(vo, "Failed uploading frame!\n");
talloc_free(data[n].priv);
return false;
}
if (mpi->fmt.xs[n] || mpi->fmt.ys[n]) {
pl_chroma_location_offset(chroma, &plane->shift_x, &plane->shift_y);
plane->shift_y = -plane->shift_y;
}
}
// Update chroma location, must be done after initializing planes
pl_frame_set_chroma_location(frame, mp_chroma_to_pl(par->chroma_location));
#ifdef PL_HAVE_LAV_DOLBY_VISION
if (mpi->dovi) {
const AVDOVIMetadata *metadata = (AVDOVIMetadata *) mpi->dovi->data;
@ -558,6 +650,17 @@ static void unmap_frame(pl_gpu gpu, struct pl_frame *frame,
struct mp_image *mpi = src->frame_data;
struct frame_priv *fp = mpi->priv;
struct priv *p = fp->vo->priv;
if (fp->hwdec_mapper) {
// Clean up after wrapped plane textures
if (!ra_pl_get(fp->hwdec_mapper->ra)) {
for (int n = 0; n < frame->num_planes; n++)
pl_tex_destroy(p->gpu, &frame->planes[n].texture);
}
ra_hwdec_mapper_unmap(fp->hwdec_mapper);
MP_TARRAY_APPEND(p, p->hwdec_mappers, p->num_hwdec_mappers, fp->hwdec_mapper);
fp->hwdec_mapper = NULL;
}
for (int i = 0; i < MP_ARRAY_SIZE(fp->subs.entries); i++) {
pl_tex tex = fp->subs.entries[i].tex;
if (tex)
@ -844,6 +947,9 @@ static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
static int query_format(struct vo *vo, int format)
{
struct priv *p = vo->priv;
if (ra_hwdec_get(&p->hwdec_ctx, format))
return true;
struct pl_bit_encoding bits;
struct pl_plane_data data[4] = {0};
int planes = plane_data_from_imgfmt(data, &bits, format);
@ -1086,6 +1192,10 @@ static int control(struct vo *vo, uint32_t request, void *data)
case VOCTRL_EXTERNAL_RESIZE:
reconfig(vo, NULL);
return true;
case VOCTRL_LOAD_HWDEC_API:
ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, vo->hwdec_devs, (intptr_t) data);
return true;
}
int events = 0;
@ -1144,6 +1254,14 @@ static void uninit(struct vo *vo)
for (int i = 0; i < p->num_user_hooks; i++)
pl_mpv_user_shader_destroy(&p->user_hooks[i].hook);
if (vo->hwdec_devs) {
for (int n = 0; n < p->num_hwdec_mappers; n++)
ra_hwdec_mapper_free(&p->hwdec_mappers[n]);
ra_hwdec_ctx_uninit(&p->hwdec_ctx);
hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL);
hwdec_devices_destroy(vo->hwdec_devs);
}
char *cache_file = get_cache_file(p);
if (cache_file) {
FILE *cache = fopen(cache_file, "wb");
@ -1167,6 +1285,11 @@ static void uninit(struct vo *vo)
gpu_ctx_destroy(&p->context);
}
static void load_hwdec_api(void *ctx, int imgfmt)
{
vo_control(ctx, VOCTRL_LOAD_HWDEC_API, (void *)(intptr_t) imgfmt);
}
static int preinit(struct vo *vo)
{
struct priv *p = vo->priv;
@ -1176,7 +1299,6 @@ static int preinit(struct vo *vo)
p->log = vo->log;
struct gl_video_opts *gl_opts = p->opts_cache->opts;
p->context = gpu_ctx_create(vo, gl_opts);
if (!p->context)
goto err_out;
@ -1185,6 +1307,15 @@ static int preinit(struct vo *vo)
p->pllog = p->context->pllog;
p->gpu = p->context->gpu;
p->sw = p->context->swapchain;
p->hwdec_ctx = (struct ra_hwdec_ctx) {
.log = p->log,
.global = p->global,
.ra = p->ra_ctx->ra,
};
vo->hwdec_devs = hwdec_devices_create();
hwdec_devices_set_loader(vo->hwdec_devs, load_hwdec_api, vo);
ra_hwdec_ctx_init(&p->hwdec_ctx, vo->hwdec_devs, gl_opts->hwdec_interop, false);
p->rr = pl_renderer_create(p->pllog, p->gpu);
p->queue = pl_queue_create(p->gpu);