vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API
Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This
file is named so because because it's written against the "old"
libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau").
Add support for the "new" libavcodec vdpau support. This was recently
added and replaces the "old" vdpau parts. (In fact, Libav is about to
deprecate and remove the "old" API without deprecation grace period,
so we have to support it now. Moreover, there will probably be no Libav
release which supports both, so the transition is even less smooth than
we could hope, and we have to support both the old and new API.)
Whether the old or new API is used is checked by a configure test: if
the new API is found, it is used, otherwise the old API is assumed.
Some details might be handled differently. Especially display preemption
is a bit problematic with the "new" libavcodec vdpau support: it wants
to keep a pointer to a specific vdpau API function (which can be driver
specific, because preemption might switch drivers). Also, surface IDs
are now directly stored in AVFrames (and mp_images), so they can't be
forced to VDP_INVALID_HANDLE on preemption. (This changes even with
older libavcodec versions, because mp_image always uses the newer
representation to make vo_vdpau.c simpler.)
Decoder initialization in the new code tries to deal with codec
profiles, while the old code always uses the highest profile per codec.
Surface allocation changes. Since the decoder won't call config() in
vo_vdpau.c on video size change anymore, we allow allocating surfaces
of arbitrary size instead of locking it to what the VO was configured.
The non-hwdec code also has slightly different allocation behavior now.
Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau
doesn't work anymore (a warning suggesting the --hwdec option is
printed instead).
2013-07-27 23:49:45 +00:00
|
|
|
/*
|
|
|
|
* This file is part of mpv.
|
|
|
|
*
|
|
|
|
* mpv is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* mpv is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with mpv. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2013-11-05 21:06:32 +00:00
|
|
|
#include <assert.h>
|
|
|
|
|
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API
Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This
file is named so because because it's written against the "old"
libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau").
Add support for the "new" libavcodec vdpau support. This was recently
added and replaces the "old" vdpau parts. (In fact, Libav is about to
deprecate and remove the "old" API without deprecation grace period,
so we have to support it now. Moreover, there will probably be no Libav
release which supports both, so the transition is even less smooth than
we could hope, and we have to support both the old and new API.)
Whether the old or new API is used is checked by a configure test: if
the new API is found, it is used, otherwise the old API is assumed.
Some details might be handled differently. Especially display preemption
is a bit problematic with the "new" libavcodec vdpau support: it wants
to keep a pointer to a specific vdpau API function (which can be driver
specific, because preemption might switch drivers). Also, surface IDs
are now directly stored in AVFrames (and mp_images), so they can't be
forced to VDP_INVALID_HANDLE on preemption. (This changes even with
older libavcodec versions, because mp_image always uses the newer
representation to make vo_vdpau.c simpler.)
Decoder initialization in the new code tries to deal with codec
profiles, while the old code always uses the highest profile per codec.
Surface allocation changes. Since the decoder won't call config() in
vo_vdpau.c on video size change anymore, we allow allocating surfaces
of arbitrary size instead of locking it to what the VO was configured.
The non-hwdec code also has slightly different allocation behavior now.
Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau
doesn't work anymore (a warning suggesting the --hwdec option is
printed instead).
2013-07-27 23:49:45 +00:00
|
|
|
#include "vdpau.h"
|
|
|
|
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
#include "osdep/threads.h"
|
2013-11-05 21:06:32 +00:00
|
|
|
#include "osdep/timer.h"
|
|
|
|
|
|
|
|
#include "video/out/x11_common.h"
|
2015-01-22 16:47:14 +00:00
|
|
|
#include "img_format.h"
|
|
|
|
#include "mp_image.h"
|
|
|
|
#include "mp_image_pool.h"
|
|
|
|
#include "vdpau_mixer.h"
|
|
|
|
|
|
|
|
static struct mp_image *download_image(struct mp_hwdec_ctx *hwctx,
|
|
|
|
struct mp_image *mpi,
|
|
|
|
struct mp_image_pool *swpool)
|
|
|
|
{
|
|
|
|
struct mp_vdpau_ctx *ctx = hwctx->vdpau_ctx;
|
|
|
|
struct vdp_functions *vdp = &ctx->vdp;
|
|
|
|
VdpStatus vdp_st;
|
|
|
|
|
|
|
|
struct mp_image *res = NULL;
|
2015-12-19 19:04:31 +00:00
|
|
|
int w, h;
|
|
|
|
mp_image_params_get_dsize(&mpi->params, &w, &h);
|
2015-01-22 16:47:14 +00:00
|
|
|
|
|
|
|
// Abuse this lock for our own purposes. It could use its own lock instead.
|
|
|
|
pthread_mutex_lock(&ctx->pool_lock);
|
|
|
|
|
|
|
|
if (ctx->getimg_surface == VDP_INVALID_HANDLE ||
|
|
|
|
ctx->getimg_w < w || ctx->getimg_h < h)
|
|
|
|
{
|
|
|
|
if (ctx->getimg_surface != VDP_INVALID_HANDLE) {
|
|
|
|
vdp_st = vdp->output_surface_destroy(ctx->getimg_surface);
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
|
|
|
|
}
|
|
|
|
ctx->getimg_surface = VDP_INVALID_HANDLE;
|
|
|
|
vdp_st = vdp->output_surface_create(ctx->vdp_device,
|
|
|
|
VDP_RGBA_FORMAT_B8G8R8A8, w, h,
|
|
|
|
&ctx->getimg_surface);
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_create");
|
|
|
|
if (vdp_st != VDP_STATUS_OK)
|
|
|
|
goto error;
|
|
|
|
ctx->getimg_w = w;
|
|
|
|
ctx->getimg_h = h;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ctx->getimg_mixer)
|
|
|
|
ctx->getimg_mixer = mp_vdpau_mixer_create(ctx, ctx->log);
|
|
|
|
|
|
|
|
VdpRect in = { .x1 = mpi->w, .y1 = mpi->h };
|
|
|
|
VdpRect out = { .x1 = w, .y1 = h };
|
|
|
|
if (mp_vdpau_mixer_render(ctx->getimg_mixer, NULL, ctx->getimg_surface, &out,
|
|
|
|
mpi, &in) < 0)
|
|
|
|
goto error;
|
|
|
|
|
2015-04-07 08:18:48 +00:00
|
|
|
res = mp_image_pool_get(swpool, IMGFMT_BGR0, ctx->getimg_w, ctx->getimg_h);
|
2015-01-22 16:47:14 +00:00
|
|
|
if (!res)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
void *dst_planes[] = { res->planes[0] };
|
|
|
|
uint32_t dst_pitches[] = { res->stride[0] };
|
|
|
|
vdp_st = vdp->output_surface_get_bits_native(ctx->getimg_surface, NULL,
|
|
|
|
dst_planes, dst_pitches);
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_get_bits_native");
|
|
|
|
if (vdp_st != VDP_STATUS_OK)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
mp_image_set_size(res, w, h);
|
|
|
|
mp_image_copy_attributes(res, mpi);
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&ctx->pool_lock);
|
|
|
|
return res;
|
|
|
|
error:
|
|
|
|
talloc_free(res);
|
|
|
|
MP_WARN(ctx, "Error copying image from GPU.\n");
|
|
|
|
pthread_mutex_unlock(&ctx->pool_lock);
|
|
|
|
return NULL;
|
|
|
|
}
|
2013-11-05 21:06:32 +00:00
|
|
|
|
|
|
|
static void mark_vdpau_objects_uninitialized(struct mp_vdpau_ctx *ctx)
|
|
|
|
{
|
2014-05-22 18:55:17 +00:00
|
|
|
for (int i = 0; i < MAX_VIDEO_SURFACES; i++) {
|
2013-11-05 21:06:32 +00:00
|
|
|
ctx->video_surfaces[i].surface = VDP_INVALID_HANDLE;
|
2014-05-22 18:55:17 +00:00
|
|
|
ctx->video_surfaces[i].osurface = VDP_INVALID_HANDLE;
|
|
|
|
ctx->video_surfaces[i].allocated = false;
|
|
|
|
}
|
2013-11-05 21:06:32 +00:00
|
|
|
ctx->vdp_device = VDP_INVALID_HANDLE;
|
vdpau: force driver to report preemption early
Another fix for the crazy and insane nvidia preemption behavior.
This time, the situation is that we are using vo_opengl with vdpau
interop, and that vdpau got preempted in the background while mpv was
sitting idly. This can be e.g. reproduced by using:
--force-window=immediate --idle --hwdec=vdpau
and switching VTs. Then after switching back, load a video file.
This will not let mp_vdpau_handle_preemption() perform preemption
recovery, simply because it will do so only once vdp_decoder_create()
has been called. There are some other API calls which trigger
preemption, but many don't.
Due to the way the libavcodec API works, vdp_decoder_create() is way too
late. It does so when get_format returns. It notices creating the
decoder fails, and continues calling get_format without the vdpau
format. We could perhaps force it to reinit again (by adding a call to
vdpau.c, that checks for preemption, and sets hwdec_request_reinit), but
this seems too much of a mess.
Solve it by calling API in mp_vdpau_handle_preemption() that empirically
does trigger preemption: output_surface_put_bits_native(). This call is
useless, and in fact should be doing nothing (empty update VdpRect).
There's the slight chance that in theory it will slow down operation,
but in practice it's bound to be harmless. It's the likely cheapest and
simplest API call I've found that can trigger the fallback this way.
(The driver is closed source, so it was up to trial & error.)
Also, when initializing decoding, allow initial preemption recovery,
which is needed to pass the test mention above.
2016-01-25 15:42:54 +00:00
|
|
|
ctx->preemption_obj = VDP_INVALID_HANDLE;
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void preemption_callback(VdpDevice device, void *context)
|
|
|
|
{
|
|
|
|
struct mp_vdpau_ctx *ctx = context;
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
|
|
|
|
pthread_mutex_lock(&ctx->preempt_lock);
|
2013-11-05 21:06:32 +00:00
|
|
|
ctx->is_preempted = true;
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
pthread_mutex_unlock(&ctx->preempt_lock);
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
|
2015-06-20 20:26:57 +00:00
|
|
|
static int win_x11_init_vdpau_procs(struct mp_vdpau_ctx *ctx, bool probing)
|
2013-11-05 21:06:32 +00:00
|
|
|
{
|
2014-12-03 20:13:59 +00:00
|
|
|
Display *x11 = ctx->x11;
|
2013-11-05 21:06:32 +00:00
|
|
|
VdpStatus vdp_st;
|
|
|
|
|
|
|
|
// Don't operate on ctx->vdp directly, so that even if init fails, ctx->vdp
|
|
|
|
// will have the function pointers from the previous successful init, and
|
|
|
|
// won't randomly make other code crash on calling NULL pointers.
|
|
|
|
struct vdp_functions vdp = {0};
|
|
|
|
|
|
|
|
if (!x11)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
struct vdp_function {
|
|
|
|
const int id;
|
|
|
|
int offset;
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct vdp_function vdp_func[] = {
|
|
|
|
#define VDP_FUNCTION(_, macro_name, mp_name) {macro_name, offsetof(struct vdp_functions, mp_name)},
|
|
|
|
#include "video/vdpau_functions.inc"
|
|
|
|
#undef VDP_FUNCTION
|
|
|
|
{0, -1}
|
|
|
|
};
|
|
|
|
|
|
|
|
VdpGetProcAddress *get_proc_address;
|
2014-12-03 20:13:59 +00:00
|
|
|
vdp_st = vdp_device_create_x11(x11, DefaultScreen(x11), &ctx->vdp_device,
|
2013-11-05 21:06:32 +00:00
|
|
|
&get_proc_address);
|
|
|
|
if (vdp_st != VDP_STATUS_OK) {
|
2015-06-20 20:26:57 +00:00
|
|
|
if (ctx->is_preempted) {
|
2013-11-05 21:06:32 +00:00
|
|
|
MP_DBG(ctx, "Error calling vdp_device_create_x11 while preempted: %d\n",
|
|
|
|
vdp_st);
|
2015-06-20 20:26:57 +00:00
|
|
|
} else {
|
|
|
|
int lev = probing ? MSGL_V : MSGL_ERR;
|
|
|
|
mp_msg(ctx->log, lev, "Error when calling vdp_device_create_x11: %d\n",
|
|
|
|
vdp_st);
|
|
|
|
}
|
2013-11-05 21:06:32 +00:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const struct vdp_function *dsc = vdp_func; dsc->offset >= 0; dsc++) {
|
|
|
|
vdp_st = get_proc_address(ctx->vdp_device, dsc->id,
|
|
|
|
(void **)((char *)&vdp + dsc->offset));
|
|
|
|
if (vdp_st != VDP_STATUS_OK) {
|
|
|
|
MP_ERR(ctx, "Error when calling vdp_get_proc_address(function "
|
|
|
|
"id %d): %s\n", dsc->id,
|
|
|
|
vdp.get_error_string ? vdp.get_error_string(vdp_st) : "?");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-19 18:57:08 +00:00
|
|
|
ctx->vdp = vdp;
|
2013-11-05 21:06:32 +00:00
|
|
|
ctx->get_proc_address = get_proc_address;
|
|
|
|
|
vdpau: force driver to report preemption early
Another fix for the crazy and insane nvidia preemption behavior.
This time, the situation is that we are using vo_opengl with vdpau
interop, and that vdpau got preempted in the background while mpv was
sitting idly. This can be e.g. reproduced by using:
--force-window=immediate --idle --hwdec=vdpau
and switching VTs. Then after switching back, load a video file.
This will not let mp_vdpau_handle_preemption() perform preemption
recovery, simply because it will do so only once vdp_decoder_create()
has been called. There are some other API calls which trigger
preemption, but many don't.
Due to the way the libavcodec API works, vdp_decoder_create() is way too
late. It does so when get_format returns. It notices creating the
decoder fails, and continues calling get_format without the vdpau
format. We could perhaps force it to reinit again (by adding a call to
vdpau.c, that checks for preemption, and sets hwdec_request_reinit), but
this seems too much of a mess.
Solve it by calling API in mp_vdpau_handle_preemption() that empirically
does trigger preemption: output_surface_put_bits_native(). This call is
useless, and in fact should be doing nothing (empty update VdpRect).
There's the slight chance that in theory it will slow down operation,
but in practice it's bound to be harmless. It's the likely cheapest and
simplest API call I've found that can trigger the fallback this way.
(The driver is closed source, so it was up to trial & error.)
Also, when initializing decoding, allow initial preemption recovery,
which is needed to pass the test mention above.
2016-01-25 15:42:54 +00:00
|
|
|
vdp_st = vdp.output_surface_create(ctx->vdp_device, VDP_RGBA_FORMAT_B8G8R8A8,
|
|
|
|
1, 1, &ctx->preemption_obj);
|
|
|
|
if (vdp_st != VDP_STATUS_OK) {
|
|
|
|
MP_ERR(ctx, "Could not create dummy object: %s",
|
|
|
|
vdp.get_error_string(vdp_st));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-03-23 17:09:40 +00:00
|
|
|
vdp.preemption_callback_register(ctx->vdp_device, preemption_callback, ctx);
|
2013-11-05 21:06:32 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int handle_preemption(struct mp_vdpau_ctx *ctx)
|
|
|
|
{
|
|
|
|
if (!ctx->is_preempted)
|
|
|
|
return 0;
|
|
|
|
mark_vdpau_objects_uninitialized(ctx);
|
|
|
|
if (!ctx->preemption_user_notified) {
|
|
|
|
MP_ERR(ctx, "Got display preemption notice! Will attempt to recover.\n");
|
|
|
|
ctx->preemption_user_notified = true;
|
|
|
|
}
|
|
|
|
/* Trying to initialize seems to be quite slow, so only try once a
|
|
|
|
* second to avoid using 100% CPU. */
|
|
|
|
if (ctx->last_preemption_retry_fail &&
|
|
|
|
mp_time_sec() - ctx->last_preemption_retry_fail < 1.0)
|
|
|
|
return -1;
|
2015-06-20 20:26:57 +00:00
|
|
|
if (win_x11_init_vdpau_procs(ctx, false) < 0) {
|
2013-11-05 21:06:32 +00:00
|
|
|
ctx->last_preemption_retry_fail = mp_time_sec();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
ctx->preemption_user_notified = false;
|
|
|
|
ctx->last_preemption_retry_fail = 0;
|
|
|
|
ctx->is_preempted = false;
|
|
|
|
ctx->preemption_counter++;
|
|
|
|
MP_INFO(ctx, "Recovered from display preemption.\n");
|
|
|
|
return 1;
|
|
|
|
}
|
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API
Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This
file is named so because because it's written against the "old"
libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau").
Add support for the "new" libavcodec vdpau support. This was recently
added and replaces the "old" vdpau parts. (In fact, Libav is about to
deprecate and remove the "old" API without deprecation grace period,
so we have to support it now. Moreover, there will probably be no Libav
release which supports both, so the transition is even less smooth than
we could hope, and we have to support both the old and new API.)
Whether the old or new API is used is checked by a configure test: if
the new API is found, it is used, otherwise the old API is assumed.
Some details might be handled differently. Especially display preemption
is a bit problematic with the "new" libavcodec vdpau support: it wants
to keep a pointer to a specific vdpau API function (which can be driver
specific, because preemption might switch drivers). Also, surface IDs
are now directly stored in AVFrames (and mp_images), so they can't be
forced to VDP_INVALID_HANDLE on preemption. (This changes even with
older libavcodec versions, because mp_image always uses the newer
representation to make vo_vdpau.c simpler.)
Decoder initialization in the new code tries to deal with codec
profiles, while the old code always uses the highest profile per codec.
Surface allocation changes. Since the decoder won't call config() in
vo_vdpau.c on video size change anymore, we allow allocating surfaces
of arbitrary size instead of locking it to what the VO was configured.
The non-hwdec code also has slightly different allocation behavior now.
Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau
doesn't work anymore (a warning suggesting the --hwdec option is
printed instead).
2013-07-27 23:49:45 +00:00
|
|
|
|
2014-05-09 19:49:29 +00:00
|
|
|
// Check whether vdpau display preemption happened. The caller provides a
|
|
|
|
// preemption counter, which contains the logical timestamp of the last
|
|
|
|
// preemption handled by the caller. The counter can be 0 for init.
|
2015-05-29 11:48:18 +00:00
|
|
|
// If counter is NULL, only ever return -1 or 1.
|
2014-05-09 19:49:29 +00:00
|
|
|
// Return values:
|
|
|
|
// -1: the display is currently preempted, and vdpau can't be used
|
|
|
|
// 0: a preemption event happened, and the caller must recover
|
|
|
|
// (*counter is updated, and a second call will report status ok)
|
|
|
|
// 1: everything is fine, no preemption happened
|
|
|
|
int mp_vdpau_handle_preemption(struct mp_vdpau_ctx *ctx, uint64_t *counter)
|
|
|
|
{
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
int r = 1;
|
|
|
|
pthread_mutex_lock(&ctx->preempt_lock);
|
|
|
|
|
vdpau: force driver to report preemption early
Another fix for the crazy and insane nvidia preemption behavior.
This time, the situation is that we are using vo_opengl with vdpau
interop, and that vdpau got preempted in the background while mpv was
sitting idly. This can be e.g. reproduced by using:
--force-window=immediate --idle --hwdec=vdpau
and switching VTs. Then after switching back, load a video file.
This will not let mp_vdpau_handle_preemption() perform preemption
recovery, simply because it will do so only once vdp_decoder_create()
has been called. There are some other API calls which trigger
preemption, but many don't.
Due to the way the libavcodec API works, vdp_decoder_create() is way too
late. It does so when get_format returns. It notices creating the
decoder fails, and continues calling get_format without the vdpau
format. We could perhaps force it to reinit again (by adding a call to
vdpau.c, that checks for preemption, and sets hwdec_request_reinit), but
this seems too much of a mess.
Solve it by calling API in mp_vdpau_handle_preemption() that empirically
does trigger preemption: output_surface_put_bits_native(). This call is
useless, and in fact should be doing nothing (empty update VdpRect).
There's the slight chance that in theory it will slow down operation,
but in practice it's bound to be harmless. It's the likely cheapest and
simplest API call I've found that can trigger the fallback this way.
(The driver is closed source, so it was up to trial & error.)
Also, when initializing decoding, allow initial preemption recovery,
which is needed to pass the test mention above.
2016-01-25 15:42:54 +00:00
|
|
|
const void *p[4] = {&(uint32_t){0}};
|
|
|
|
uint32_t stride[4] = {4};
|
|
|
|
VdpRect rc = {0};
|
|
|
|
ctx->vdp.output_surface_put_bits_native(ctx->preemption_obj, p, stride, &rc);
|
|
|
|
|
2014-05-09 19:49:29 +00:00
|
|
|
// First time init
|
2015-05-29 11:48:18 +00:00
|
|
|
if (counter && !*counter)
|
2014-05-09 19:49:29 +00:00
|
|
|
*counter = ctx->preemption_counter;
|
|
|
|
|
|
|
|
if (handle_preemption(ctx) < 0)
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
r = -1;
|
2014-05-09 19:49:29 +00:00
|
|
|
|
2015-05-29 11:48:18 +00:00
|
|
|
if (counter && r > 0 && *counter < ctx->preemption_counter) {
|
2014-05-09 19:49:29 +00:00
|
|
|
*counter = ctx->preemption_counter;
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
r = 0; // signal recovery after preemption
|
2014-05-09 19:49:29 +00:00
|
|
|
}
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
|
|
|
|
pthread_mutex_unlock(&ctx->preempt_lock);
|
|
|
|
return r;
|
2014-05-09 19:49:29 +00:00
|
|
|
}
|
|
|
|
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
struct surface_ref {
|
|
|
|
struct mp_vdpau_ctx *ctx;
|
|
|
|
int index;
|
|
|
|
};
|
|
|
|
|
2013-11-05 21:06:32 +00:00
|
|
|
static void release_decoder_surface(void *ptr)
|
|
|
|
{
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
struct surface_ref *r = ptr;
|
|
|
|
struct mp_vdpau_ctx *ctx = r->ctx;
|
|
|
|
|
|
|
|
pthread_mutex_lock(&ctx->pool_lock);
|
|
|
|
assert(ctx->video_surfaces[r->index].in_use);
|
|
|
|
ctx->video_surfaces[r->index].in_use = false;
|
|
|
|
pthread_mutex_unlock(&ctx->pool_lock);
|
|
|
|
|
|
|
|
talloc_free(r);
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
static struct mp_image *create_ref(struct mp_vdpau_ctx *ctx, int index)
|
2013-11-05 21:06:32 +00:00
|
|
|
{
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
struct surface_entry *e = &ctx->video_surfaces[index];
|
2013-11-05 21:06:32 +00:00
|
|
|
assert(!e->in_use);
|
|
|
|
e->in_use = true;
|
2014-08-11 12:03:53 +00:00
|
|
|
e->age = ctx->age_counter++;
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
struct surface_ref *ref = talloc_ptrtype(NULL, ref);
|
|
|
|
*ref = (struct surface_ref){ctx, index};
|
2013-11-05 21:06:32 +00:00
|
|
|
struct mp_image *res =
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
mp_image_new_custom_ref(&(struct mp_image){0}, ref,
|
2013-11-05 21:06:32 +00:00
|
|
|
release_decoder_surface);
|
video: introduce failure path for image allocations
Until now, failure to allocate image data resulted in a crash (i.e.
abort() was called). This was intentional, because it's pretty silly to
degrade playback, and in almost all situations, the OOM will probably
kill you anyway. (And then there's the standard Linux overcommit
behavior, which also will kill you at some point.)
But I changed my opinion, so here we go. This change does not affect
_all_ memory allocations, just image data. Now in most failure cases,
the output will just be skipped. For video filters, this coincidentally
means that failure is treated as EOF (because the playback core assumes
EOF if nothing comes out of the video filter chain). In other
situations, output might be in some way degraded, like skipping frames,
not scaling OSD, and such.
Functions whose return values changed semantics:
mp_image_alloc
mp_image_new_copy
mp_image_new_ref
mp_image_make_writeable
mp_image_setrefp
mp_image_to_av_frame_and_unref
mp_image_from_av_frame
mp_image_new_external_ref
mp_image_new_custom_ref
mp_image_pool_make_writeable
mp_image_pool_get
mp_image_pool_new_copy
mp_vdpau_mixed_frame_create
vf_alloc_out_image
vf_make_out_image_writeable
glGetWindowScreenshot
2014-06-17 20:43:43 +00:00
|
|
|
if (res) {
|
|
|
|
mp_image_setfmt(res, e->rgb ? IMGFMT_VDPAU_OUTPUT : IMGFMT_VDPAU);
|
|
|
|
mp_image_set_size(res, e->w, e->h);
|
|
|
|
res->planes[0] = (void *)"dummy"; // must be non-NULL, otherwise arbitrary
|
|
|
|
res->planes[3] = (void *)(intptr_t)(e->rgb ? e->osurface : e->surface);
|
|
|
|
}
|
2013-11-05 21:06:32 +00:00
|
|
|
return res;
|
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API
Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This
file is named so because because it's written against the "old"
libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau").
Add support for the "new" libavcodec vdpau support. This was recently
added and replaces the "old" vdpau parts. (In fact, Libav is about to
deprecate and remove the "old" API without deprecation grace period,
so we have to support it now. Moreover, there will probably be no Libav
release which supports both, so the transition is even less smooth than
we could hope, and we have to support both the old and new API.)
Whether the old or new API is used is checked by a configure test: if
the new API is found, it is used, otherwise the old API is assumed.
Some details might be handled differently. Especially display preemption
is a bit problematic with the "new" libavcodec vdpau support: it wants
to keep a pointer to a specific vdpau API function (which can be driver
specific, because preemption might switch drivers). Also, surface IDs
are now directly stored in AVFrames (and mp_images), so they can't be
forced to VDP_INVALID_HANDLE on preemption. (This changes even with
older libavcodec versions, because mp_image always uses the newer
representation to make vo_vdpau.c simpler.)
Decoder initialization in the new code tries to deal with codec
profiles, while the old code always uses the highest profile per codec.
Surface allocation changes. Since the decoder won't call config() in
vo_vdpau.c on video size change anymore, we allow allocating surfaces
of arbitrary size instead of locking it to what the VO was configured.
The non-hwdec code also has slightly different allocation behavior now.
Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau
doesn't work anymore (a warning suggesting the --hwdec option is
printed instead).
2013-07-27 23:49:45 +00:00
|
|
|
}
|
|
|
|
|
2014-05-22 18:55:17 +00:00
|
|
|
static struct mp_image *mp_vdpau_get_surface(struct mp_vdpau_ctx *ctx,
|
|
|
|
VdpChromaType chroma,
|
|
|
|
VdpRGBAFormat rgb_format,
|
|
|
|
bool rgb, int w, int h)
|
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API
Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This
file is named so because because it's written against the "old"
libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau").
Add support for the "new" libavcodec vdpau support. This was recently
added and replaces the "old" vdpau parts. (In fact, Libav is about to
deprecate and remove the "old" API without deprecation grace period,
so we have to support it now. Moreover, there will probably be no Libav
release which supports both, so the transition is even less smooth than
we could hope, and we have to support both the old and new API.)
Whether the old or new API is used is checked by a configure test: if
the new API is found, it is used, otherwise the old API is assumed.
Some details might be handled differently. Especially display preemption
is a bit problematic with the "new" libavcodec vdpau support: it wants
to keep a pointer to a specific vdpau API function (which can be driver
specific, because preemption might switch drivers). Also, surface IDs
are now directly stored in AVFrames (and mp_images), so they can't be
forced to VDP_INVALID_HANDLE on preemption. (This changes even with
older libavcodec versions, because mp_image always uses the newer
representation to make vo_vdpau.c simpler.)
Decoder initialization in the new code tries to deal with codec
profiles, while the old code always uses the highest profile per codec.
Surface allocation changes. Since the decoder won't call config() in
vo_vdpau.c on video size change anymore, we allow allocating surfaces
of arbitrary size instead of locking it to what the VO was configured.
The non-hwdec code also has slightly different allocation behavior now.
Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau
doesn't work anymore (a warning suggesting the --hwdec option is
printed instead).
2013-07-27 23:49:45 +00:00
|
|
|
{
|
2014-03-19 18:57:08 +00:00
|
|
|
struct vdp_functions *vdp = &ctx->vdp;
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
int surface_index = -1;
|
2013-11-05 21:06:32 +00:00
|
|
|
VdpStatus vdp_st;
|
|
|
|
|
2014-05-22 18:55:17 +00:00
|
|
|
if (rgb) {
|
|
|
|
chroma = (VdpChromaType)-1;
|
|
|
|
} else {
|
|
|
|
rgb_format = (VdpChromaType)-1;
|
|
|
|
}
|
|
|
|
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
pthread_mutex_lock(&ctx->pool_lock);
|
|
|
|
|
2013-11-05 21:06:32 +00:00
|
|
|
// Destroy all unused surfaces that don't have matching parameters
|
|
|
|
for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
|
|
|
|
struct surface_entry *e = &ctx->video_surfaces[n];
|
2014-05-22 18:55:17 +00:00
|
|
|
if (!e->in_use && e->allocated) {
|
|
|
|
if (e->w != w || e->h != h || e->rgb != rgb ||
|
|
|
|
e->chroma != chroma || e->rgb_format != rgb_format)
|
|
|
|
{
|
|
|
|
if (e->rgb) {
|
|
|
|
vdp_st = vdp->output_surface_destroy(e->osurface);
|
|
|
|
} else {
|
|
|
|
vdp_st = vdp->video_surface_destroy(e->surface);
|
|
|
|
}
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when destroying surface");
|
|
|
|
e->surface = e->osurface = VDP_INVALID_HANDLE;
|
|
|
|
e->allocated = false;
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to find an existing unused surface
|
|
|
|
for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
|
|
|
|
struct surface_entry *e = &ctx->video_surfaces[n];
|
2014-05-22 18:55:17 +00:00
|
|
|
if (!e->in_use && e->allocated) {
|
2013-11-05 21:06:32 +00:00
|
|
|
assert(e->w == w && e->h == h);
|
2014-03-17 17:21:11 +00:00
|
|
|
assert(e->chroma == chroma);
|
2014-05-22 18:55:17 +00:00
|
|
|
assert(e->rgb_format == rgb_format);
|
|
|
|
assert(e->rgb == rgb);
|
2014-08-11 12:03:53 +00:00
|
|
|
if (surface_index >= 0) {
|
|
|
|
struct surface_entry *other = &ctx->video_surfaces[surface_index];
|
|
|
|
if (other->age < e->age)
|
|
|
|
continue;
|
|
|
|
}
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
surface_index = n;
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-11 12:03:53 +00:00
|
|
|
if (surface_index >= 0)
|
|
|
|
goto done;
|
|
|
|
|
2013-11-05 21:06:32 +00:00
|
|
|
// Allocate new surface
|
|
|
|
for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
|
|
|
|
struct surface_entry *e = &ctx->video_surfaces[n];
|
|
|
|
if (!e->in_use) {
|
|
|
|
assert(e->surface == VDP_INVALID_HANDLE);
|
2014-05-22 18:55:17 +00:00
|
|
|
assert(e->osurface == VDP_INVALID_HANDLE);
|
|
|
|
assert(!e->allocated);
|
2013-11-05 21:06:32 +00:00
|
|
|
e->chroma = chroma;
|
2014-05-22 18:55:17 +00:00
|
|
|
e->rgb_format = rgb_format;
|
|
|
|
e->rgb = rgb;
|
2013-11-05 21:06:32 +00:00
|
|
|
e->w = w;
|
|
|
|
e->h = h;
|
2015-05-29 11:48:18 +00:00
|
|
|
if (mp_vdpau_handle_preemption(ctx, NULL) >= 0) {
|
|
|
|
if (rgb) {
|
|
|
|
vdp_st = vdp->output_surface_create(ctx->vdp_device, rgb_format,
|
|
|
|
w, h, &e->osurface);
|
|
|
|
e->allocated = e->osurface != VDP_INVALID_HANDLE;
|
|
|
|
} else {
|
|
|
|
vdp_st = vdp->video_surface_create(ctx->vdp_device, chroma,
|
|
|
|
w, h, &e->surface);
|
|
|
|
e->allocated = e->surface != VDP_INVALID_HANDLE;
|
|
|
|
}
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when allocating surface");
|
2014-05-22 18:55:17 +00:00
|
|
|
} else {
|
2015-05-29 11:48:18 +00:00
|
|
|
e->allocated = false;
|
|
|
|
e->osurface = VDP_INVALID_HANDLE;
|
|
|
|
e->surface = VDP_INVALID_HANDLE;
|
2014-05-22 18:55:17 +00:00
|
|
|
}
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
surface_index = n;
|
|
|
|
goto done;
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
done: ;
|
|
|
|
struct mp_image *mpi = NULL;
|
|
|
|
if (surface_index >= 0)
|
|
|
|
mpi = create_ref(ctx, surface_index);
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&ctx->pool_lock);
|
|
|
|
|
|
|
|
if (!mpi)
|
|
|
|
MP_ERR(ctx, "no surfaces available in mp_vdpau_get_video_surface\n");
|
|
|
|
return mpi;
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
|
2014-05-22 18:55:17 +00:00
|
|
|
struct mp_image *mp_vdpau_get_video_surface(struct mp_vdpau_ctx *ctx,
|
|
|
|
VdpChromaType chroma, int w, int h)
|
|
|
|
{
|
|
|
|
return mp_vdpau_get_surface(ctx, chroma, 0, false, w, h);
|
|
|
|
}
|
|
|
|
|
2015-06-20 20:26:57 +00:00
|
|
|
struct mp_vdpau_ctx *mp_vdpau_create_device_x11(struct mp_log *log, Display *x11,
|
|
|
|
bool probing)
|
2013-11-05 21:06:32 +00:00
|
|
|
{
|
|
|
|
struct mp_vdpau_ctx *ctx = talloc_ptrtype(NULL, ctx);
|
|
|
|
*ctx = (struct mp_vdpau_ctx) {
|
|
|
|
.log = log,
|
|
|
|
.x11 = x11,
|
2014-05-09 19:49:29 +00:00
|
|
|
.preemption_counter = 1,
|
2015-01-22 14:32:23 +00:00
|
|
|
.hwctx = {
|
2015-02-02 21:43:05 +00:00
|
|
|
.type = HWDEC_VDPAU,
|
2015-01-22 14:32:23 +00:00
|
|
|
.priv = ctx,
|
|
|
|
.vdpau_ctx = ctx,
|
2015-01-22 16:47:14 +00:00
|
|
|
.download_image = download_image,
|
2015-01-22 14:32:23 +00:00
|
|
|
},
|
2015-01-22 16:47:14 +00:00
|
|
|
.getimg_surface = VDP_INVALID_HANDLE,
|
2013-11-05 21:06:32 +00:00
|
|
|
};
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
mpthread_mutex_init_recursive(&ctx->preempt_lock);
|
|
|
|
pthread_mutex_init(&ctx->pool_lock, NULL);
|
2013-11-05 21:06:32 +00:00
|
|
|
|
|
|
|
mark_vdpau_objects_uninitialized(ctx);
|
|
|
|
|
2015-06-20 20:26:57 +00:00
|
|
|
if (win_x11_init_vdpau_procs(ctx, probing) < 0) {
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
mp_vdpau_destroy(ctx);
|
2013-11-05 21:06:32 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return ctx;
|
|
|
|
}
|
|
|
|
|
|
|
|
void mp_vdpau_destroy(struct mp_vdpau_ctx *ctx)
|
|
|
|
{
|
2014-03-19 18:57:08 +00:00
|
|
|
struct vdp_functions *vdp = &ctx->vdp;
|
2013-11-05 21:06:32 +00:00
|
|
|
VdpStatus vdp_st;
|
|
|
|
|
|
|
|
for (int i = 0; i < MAX_VIDEO_SURFACES; i++) {
|
|
|
|
// can't hold references past context lifetime
|
|
|
|
assert(!ctx->video_surfaces[i].in_use);
|
|
|
|
if (ctx->video_surfaces[i].surface != VDP_INVALID_HANDLE) {
|
|
|
|
vdp_st = vdp->video_surface_destroy(ctx->video_surfaces[i].surface);
|
2013-12-21 17:05:23 +00:00
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_video_surface_destroy");
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
2014-05-22 18:55:17 +00:00
|
|
|
if (ctx->video_surfaces[i].osurface != VDP_INVALID_HANDLE) {
|
|
|
|
vdp_st = vdp->output_surface_destroy(ctx->video_surfaces[i].osurface);
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
|
|
|
|
}
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
|
2015-01-22 16:47:14 +00:00
|
|
|
if (ctx->getimg_mixer)
|
|
|
|
mp_vdpau_mixer_destroy(ctx->getimg_mixer);
|
|
|
|
if (ctx->getimg_surface != VDP_INVALID_HANDLE) {
|
|
|
|
vdp_st = vdp->output_surface_destroy(ctx->getimg_surface);
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
|
|
|
|
}
|
|
|
|
|
vdpau: force driver to report preemption early
Another fix for the crazy and insane nvidia preemption behavior.
This time, the situation is that we are using vo_opengl with vdpau
interop, and that vdpau got preempted in the background while mpv was
sitting idly. This can be e.g. reproduced by using:
--force-window=immediate --idle --hwdec=vdpau
and switching VTs. Then after switching back, load a video file.
This will not let mp_vdpau_handle_preemption() perform preemption
recovery, simply because it will do so only once vdp_decoder_create()
has been called. There are some other API calls which trigger
preemption, but many don't.
Due to the way the libavcodec API works, vdp_decoder_create() is way too
late. It does so when get_format returns. It notices creating the
decoder fails, and continues calling get_format without the vdpau
format. We could perhaps force it to reinit again (by adding a call to
vdpau.c, that checks for preemption, and sets hwdec_request_reinit), but
this seems too much of a mess.
Solve it by calling API in mp_vdpau_handle_preemption() that empirically
does trigger preemption: output_surface_put_bits_native(). This call is
useless, and in fact should be doing nothing (empty update VdpRect).
There's the slight chance that in theory it will slow down operation,
but in practice it's bound to be harmless. It's the likely cheapest and
simplest API call I've found that can trigger the fallback this way.
(The driver is closed source, so it was up to trial & error.)
Also, when initializing decoding, allow initial preemption recovery,
which is needed to pass the test mention above.
2016-01-25 15:42:54 +00:00
|
|
|
if (ctx->preemption_obj != VDP_INVALID_HANDLE) {
|
|
|
|
vdp_st = vdp->output_surface_destroy(ctx->preemption_obj);
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
|
|
|
|
}
|
|
|
|
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
if (vdp->device_destroy && ctx->vdp_device != VDP_INVALID_HANDLE) {
|
2013-11-05 21:06:32 +00:00
|
|
|
vdp_st = vdp->device_destroy(ctx->vdp_device);
|
2013-12-21 17:05:23 +00:00
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_device_destroy");
|
2013-11-05 21:06:32 +00:00
|
|
|
}
|
|
|
|
|
vdpau: make mp_vdpau_ctx thread-safe
Preparation so that various things related to video can run in different
threads. One part to this is making the video surface pool safe.
Another issue is the preemption mechanism, which continues to give us
endless pain. In theory, it's probably impossible to handle preemption
100% correctly and race-condition free, unless _every_ API user in the
same process uses a central, shared mutex to protect every vdpau API
call. Otherwise, it could happen that one thread recovering from
preemption allocates a vdpau object, and then another thread (which
hasn't recovered yet) happens to free the object for some reason. This
is because objects are referenced by integer IDs, and vdpau will reuse
IDs invalidated by preemption after preemption.
Since this is unreasonable, we're as lazy as possible when it comes to
handling preemption. We don't do any locking around the mp_vdpau_ctx
fields that are normally immutable, and only can change when recovering
from preemption. In practice, this will work, because it doesn't matter
whether not-yet-recovered components use the old or new vdpau function
pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to
check for the preemption event and possibly to recover, and that
function acquires the lock protecting the preemption state.
Another possible source of potential grandiose fuckup is the fact that
the vdpau library is in fact only a tiny wrapper, and the real driver
lives in a shared object dlopen()ed by the wrapper. The wrapper also
calls dlclose() on the loaded shared object in some situations. One
possible danger is that failing to recreate a vdpau device could trigger
a dlclose() call, and that glibc might unload it. Currently, glibc
implements full unloading of shared objects on the last dlclose() call,
and if that happens, calls to function pointers pointing into the shared
object would obviously crash. Fortunately, it seems the existing vdpau
wrapper won't trigger this case and never unloads the driver once it's
successfully loaded.
To make it short, vdpau preemption opens up endless depths of WTFs.
Another issue is that any participating thread might do the preemption
recovery (whichever comes first). This is easier to implement. The
implication is that we need threadsafe xlib. We just hope and pray that
this will actually work. This also means that once vdpau code is
actually involved in a multithreaded scenario, we have to add
XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
|
|
|
pthread_mutex_destroy(&ctx->pool_lock);
|
|
|
|
pthread_mutex_destroy(&ctx->preempt_lock);
|
2013-11-05 21:06:32 +00:00
|
|
|
talloc_free(ctx);
|
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API
Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This
file is named so because because it's written against the "old"
libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau").
Add support for the "new" libavcodec vdpau support. This was recently
added and replaces the "old" vdpau parts. (In fact, Libav is about to
deprecate and remove the "old" API without deprecation grace period,
so we have to support it now. Moreover, there will probably be no Libav
release which supports both, so the transition is even less smooth than
we could hope, and we have to support both the old and new API.)
Whether the old or new API is used is checked by a configure test: if
the new API is found, it is used, otherwise the old API is assumed.
Some details might be handled differently. Especially display preemption
is a bit problematic with the "new" libavcodec vdpau support: it wants
to keep a pointer to a specific vdpau API function (which can be driver
specific, because preemption might switch drivers). Also, surface IDs
are now directly stored in AVFrames (and mp_images), so they can't be
forced to VDP_INVALID_HANDLE on preemption. (This changes even with
older libavcodec versions, because mp_image always uses the newer
representation to make vo_vdpau.c simpler.)
Decoder initialization in the new code tries to deal with codec
profiles, while the old code always uses the highest profile per codec.
Surface allocation changes. Since the decoder won't call config() in
vo_vdpau.c on video size change anymore, we allow allocating surfaces
of arbitrary size instead of locking it to what the VO was configured.
The non-hwdec code also has slightly different allocation behavior now.
Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau
doesn't work anymore (a warning suggesting the --hwdec option is
printed instead).
2013-07-27 23:49:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool mp_vdpau_get_format(int imgfmt, VdpChromaType *out_chroma_type,
|
|
|
|
VdpYCbCrFormat *out_pixel_format)
|
|
|
|
{
|
|
|
|
VdpChromaType chroma = VDP_CHROMA_TYPE_420;
|
|
|
|
VdpYCbCrFormat ycbcr = (VdpYCbCrFormat)-1;
|
|
|
|
|
|
|
|
switch (imgfmt) {
|
|
|
|
case IMGFMT_420P:
|
|
|
|
ycbcr = VDP_YCBCR_FORMAT_YV12;
|
|
|
|
break;
|
|
|
|
case IMGFMT_NV12:
|
|
|
|
ycbcr = VDP_YCBCR_FORMAT_NV12;
|
|
|
|
break;
|
|
|
|
case IMGFMT_YUYV:
|
|
|
|
ycbcr = VDP_YCBCR_FORMAT_YUYV;
|
|
|
|
chroma = VDP_CHROMA_TYPE_422;
|
|
|
|
break;
|
|
|
|
case IMGFMT_UYVY:
|
|
|
|
ycbcr = VDP_YCBCR_FORMAT_UYVY;
|
|
|
|
chroma = VDP_CHROMA_TYPE_422;
|
|
|
|
break;
|
|
|
|
case IMGFMT_VDPAU:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (out_chroma_type)
|
|
|
|
*out_chroma_type = chroma;
|
|
|
|
if (out_pixel_format)
|
|
|
|
*out_pixel_format = ycbcr;
|
|
|
|
return true;
|
|
|
|
}
|
2014-05-04 08:50:32 +00:00
|
|
|
|
2014-05-22 18:55:17 +00:00
|
|
|
bool mp_vdpau_get_rgb_format(int imgfmt, VdpRGBAFormat *out_rgba_format)
|
|
|
|
{
|
|
|
|
VdpRGBAFormat format = (VdpRGBAFormat)-1;
|
|
|
|
|
|
|
|
switch (imgfmt) {
|
|
|
|
case IMGFMT_BGR32:
|
|
|
|
format = VDP_RGBA_FORMAT_B8G8R8A8; break;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (out_rgba_format)
|
|
|
|
*out_rgba_format = format;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-05-04 08:50:32 +00:00
|
|
|
// Use mp_vdpau_get_video_surface, and upload mpi to it. Return NULL on failure.
|
|
|
|
// If the image is already a vdpau video surface, just return a reference.
|
|
|
|
struct mp_image *mp_vdpau_upload_video_surface(struct mp_vdpau_ctx *ctx,
|
|
|
|
struct mp_image *mpi)
|
|
|
|
{
|
|
|
|
struct vdp_functions *vdp = &ctx->vdp;
|
|
|
|
VdpStatus vdp_st;
|
|
|
|
|
2014-05-22 18:55:17 +00:00
|
|
|
if (mpi->imgfmt == IMGFMT_VDPAU || mpi->imgfmt == IMGFMT_VDPAU_OUTPUT)
|
2014-05-04 08:50:32 +00:00
|
|
|
return mp_image_new_ref(mpi);
|
|
|
|
|
2014-05-22 18:55:17 +00:00
|
|
|
VdpChromaType chroma = (VdpChromaType)-1;
|
|
|
|
VdpYCbCrFormat ycbcr = (VdpYCbCrFormat)-1;
|
|
|
|
VdpRGBAFormat rgbafmt = (VdpRGBAFormat)-1;
|
|
|
|
bool rgb = !mp_vdpau_get_format(mpi->imgfmt, &chroma, &ycbcr);
|
|
|
|
if (rgb && !mp_vdpau_get_rgb_format(mpi->imgfmt, &rgbafmt))
|
2014-05-04 08:50:32 +00:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
struct mp_image *hwmpi =
|
2014-05-22 18:55:17 +00:00
|
|
|
mp_vdpau_get_surface(ctx, chroma, rgbafmt, rgb, mpi->w, mpi->h);
|
2014-05-04 08:50:32 +00:00
|
|
|
if (!hwmpi)
|
|
|
|
return NULL;
|
|
|
|
|
2014-08-04 22:18:57 +00:00
|
|
|
struct mp_image *src = mpi;
|
|
|
|
if (mpi->stride[0] < 0)
|
|
|
|
src = mp_image_new_copy(mpi); // unflips it when copying
|
|
|
|
|
2014-05-22 18:55:17 +00:00
|
|
|
if (hwmpi->imgfmt == IMGFMT_VDPAU) {
|
|
|
|
VdpVideoSurface surface = (intptr_t)hwmpi->planes[3];
|
2014-08-04 22:18:57 +00:00
|
|
|
const void *destdata[3] = {src->planes[0], src->planes[2], src->planes[1]};
|
|
|
|
if (src->imgfmt == IMGFMT_NV12)
|
2014-05-22 18:55:17 +00:00
|
|
|
destdata[1] = destdata[2];
|
|
|
|
vdp_st = vdp->video_surface_put_bits_y_cb_cr(surface,
|
2014-08-04 22:18:57 +00:00
|
|
|
ycbcr, destdata, src->stride);
|
2014-05-22 18:55:17 +00:00
|
|
|
} else {
|
|
|
|
VdpOutputSurface rgb_surface = (intptr_t)hwmpi->planes[3];
|
|
|
|
vdp_st = vdp->output_surface_put_bits_native(rgb_surface,
|
2014-08-04 22:18:57 +00:00
|
|
|
&(const void *){src->planes[0]},
|
|
|
|
&(uint32_t){src->stride[0]},
|
2014-05-22 18:55:17 +00:00
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when uploading surface");
|
2014-05-04 08:50:32 +00:00
|
|
|
|
2014-08-04 22:18:57 +00:00
|
|
|
if (src != mpi)
|
|
|
|
talloc_free(src);
|
|
|
|
|
2014-05-04 08:50:32 +00:00
|
|
|
mp_image_copy_attributes(hwmpi, mpi);
|
|
|
|
return hwmpi;
|
|
|
|
}
|
2014-05-27 23:37:53 +00:00
|
|
|
|
|
|
|
bool mp_vdpau_guess_if_emulated(struct mp_vdpau_ctx *ctx)
|
|
|
|
{
|
|
|
|
struct vdp_functions *vdp = &ctx->vdp;
|
|
|
|
VdpStatus vdp_st;
|
|
|
|
char const* info = NULL;
|
|
|
|
vdp_st = vdp->get_information_string(&info);
|
|
|
|
CHECK_VDP_WARNING(ctx, "Error when calling vdp_get_information_string");
|
|
|
|
return vdp_st == VDP_STATUS_OK && info && strstr(info, "VAAPI");
|
|
|
|
}
|