1
0
mirror of https://github.com/mpv-player/mpv synced 2024-12-22 06:42:03 +00:00
mpv/video/vdpau.c

654 lines
21 KiB
C
Raw Normal View History

vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This file is named so because because it's written against the "old" libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau"). Add support for the "new" libavcodec vdpau support. This was recently added and replaces the "old" vdpau parts. (In fact, Libav is about to deprecate and remove the "old" API without deprecation grace period, so we have to support it now. Moreover, there will probably be no Libav release which supports both, so the transition is even less smooth than we could hope, and we have to support both the old and new API.) Whether the old or new API is used is checked by a configure test: if the new API is found, it is used, otherwise the old API is assumed. Some details might be handled differently. Especially display preemption is a bit problematic with the "new" libavcodec vdpau support: it wants to keep a pointer to a specific vdpau API function (which can be driver specific, because preemption might switch drivers). Also, surface IDs are now directly stored in AVFrames (and mp_images), so they can't be forced to VDP_INVALID_HANDLE on preemption. (This changes even with older libavcodec versions, because mp_image always uses the newer representation to make vo_vdpau.c simpler.) Decoder initialization in the new code tries to deal with codec profiles, while the old code always uses the highest profile per codec. Surface allocation changes. Since the decoder won't call config() in vo_vdpau.c on video size change anymore, we allow allocating surfaces of arbitrary size instead of locking it to what the VO was configured. The non-hwdec code also has slightly different allocation behavior now. Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau doesn't work anymore (a warning suggesting the --hwdec option is printed instead).
2013-07-27 23:49:45 +00:00
/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include <assert.h>
#include <libavutil/hwcontext.h>
#include <libavutil/hwcontext_vdpau.h>
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This file is named so because because it's written against the "old" libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau"). Add support for the "new" libavcodec vdpau support. This was recently added and replaces the "old" vdpau parts. (In fact, Libav is about to deprecate and remove the "old" API without deprecation grace period, so we have to support it now. Moreover, there will probably be no Libav release which supports both, so the transition is even less smooth than we could hope, and we have to support both the old and new API.) Whether the old or new API is used is checked by a configure test: if the new API is found, it is used, otherwise the old API is assumed. Some details might be handled differently. Especially display preemption is a bit problematic with the "new" libavcodec vdpau support: it wants to keep a pointer to a specific vdpau API function (which can be driver specific, because preemption might switch drivers). Also, surface IDs are now directly stored in AVFrames (and mp_images), so they can't be forced to VDP_INVALID_HANDLE on preemption. (This changes even with older libavcodec versions, because mp_image always uses the newer representation to make vo_vdpau.c simpler.) Decoder initialization in the new code tries to deal with codec profiles, while the old code always uses the highest profile per codec. Surface allocation changes. Since the decoder won't call config() in vo_vdpau.c on video size change anymore, we allow allocating surfaces of arbitrary size instead of locking it to what the VO was configured. The non-hwdec code also has slightly different allocation behavior now. Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau doesn't work anymore (a warning suggesting the --hwdec option is printed instead).
2013-07-27 23:49:45 +00:00
#include "vdpau.h"
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
#include "osdep/threads.h"
#include "osdep/timer.h"
#include "video/out/x11_common.h"
#include "img_format.h"
#include "mp_image.h"
#include "mp_image_pool.h"
#include "vdpau_mixer.h"
static struct mp_image *download_image_yuv(struct mp_hwdec_ctx *hwctx,
struct mp_image *mpi,
struct mp_image_pool *swpool)
{
if (mpi->imgfmt != IMGFMT_VDPAU || mp_vdpau_mixed_frame_get(mpi))
return NULL;
return mp_image_hw_download(mpi, swpool);
}
static struct mp_image *download_image(struct mp_hwdec_ctx *hwctx,
struct mp_image *mpi,
struct mp_image_pool *swpool)
{
if (mpi->imgfmt != IMGFMT_VDPAU && mpi->imgfmt != IMGFMT_VDPAU_OUTPUT)
return NULL;
struct mp_vdpau_ctx *ctx = hwctx->ctx;
struct vdp_functions *vdp = &ctx->vdp;
VdpStatus vdp_st;
struct mp_image *res = NULL;
int w, h;
mp_image_params_get_dsize(&mpi->params, &w, &h);
res = download_image_yuv(hwctx, mpi, swpool);
if (res)
return res;
// Abuse this lock for our own purposes. It could use its own lock instead.
pthread_mutex_lock(&ctx->pool_lock);
if (ctx->getimg_surface == VDP_INVALID_HANDLE ||
ctx->getimg_w < w || ctx->getimg_h < h)
{
if (ctx->getimg_surface != VDP_INVALID_HANDLE) {
vdp_st = vdp->output_surface_destroy(ctx->getimg_surface);
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
}
ctx->getimg_surface = VDP_INVALID_HANDLE;
vdp_st = vdp->output_surface_create(ctx->vdp_device,
VDP_RGBA_FORMAT_B8G8R8A8, w, h,
&ctx->getimg_surface);
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_create");
if (vdp_st != VDP_STATUS_OK)
goto error;
ctx->getimg_w = w;
ctx->getimg_h = h;
}
if (!ctx->getimg_mixer)
ctx->getimg_mixer = mp_vdpau_mixer_create(ctx, ctx->log);
VdpRect in = { .x1 = mpi->w, .y1 = mpi->h };
VdpRect out = { .x1 = w, .y1 = h };
if (mp_vdpau_mixer_render(ctx->getimg_mixer, NULL, ctx->getimg_surface, &out,
mpi, &in) < 0)
goto error;
res = mp_image_pool_get(swpool, IMGFMT_BGR0, ctx->getimg_w, ctx->getimg_h);
if (!res)
goto error;
void *dst_planes[] = { res->planes[0] };
uint32_t dst_pitches[] = { res->stride[0] };
vdp_st = vdp->output_surface_get_bits_native(ctx->getimg_surface, NULL,
dst_planes, dst_pitches);
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_get_bits_native");
if (vdp_st != VDP_STATUS_OK)
goto error;
mp_image_set_size(res, w, h);
mp_image_copy_attributes(res, mpi);
pthread_mutex_unlock(&ctx->pool_lock);
return res;
error:
talloc_free(res);
MP_WARN(ctx, "Error copying image from GPU.\n");
pthread_mutex_unlock(&ctx->pool_lock);
return NULL;
}
static void mark_vdpau_objects_uninitialized(struct mp_vdpau_ctx *ctx)
{
for (int i = 0; i < MAX_VIDEO_SURFACES; i++) {
ctx->video_surfaces[i].surface = VDP_INVALID_HANDLE;
ctx->video_surfaces[i].osurface = VDP_INVALID_HANDLE;
ctx->video_surfaces[i].allocated = false;
}
ctx->vdp_device = VDP_INVALID_HANDLE;
vdpau: force driver to report preemption early Another fix for the crazy and insane nvidia preemption behavior. This time, the situation is that we are using vo_opengl with vdpau interop, and that vdpau got preempted in the background while mpv was sitting idly. This can be e.g. reproduced by using: --force-window=immediate --idle --hwdec=vdpau and switching VTs. Then after switching back, load a video file. This will not let mp_vdpau_handle_preemption() perform preemption recovery, simply because it will do so only once vdp_decoder_create() has been called. There are some other API calls which trigger preemption, but many don't. Due to the way the libavcodec API works, vdp_decoder_create() is way too late. It does so when get_format returns. It notices creating the decoder fails, and continues calling get_format without the vdpau format. We could perhaps force it to reinit again (by adding a call to vdpau.c, that checks for preemption, and sets hwdec_request_reinit), but this seems too much of a mess. Solve it by calling API in mp_vdpau_handle_preemption() that empirically does trigger preemption: output_surface_put_bits_native(). This call is useless, and in fact should be doing nothing (empty update VdpRect). There's the slight chance that in theory it will slow down operation, but in practice it's bound to be harmless. It's the likely cheapest and simplest API call I've found that can trigger the fallback this way. (The driver is closed source, so it was up to trial & error.) Also, when initializing decoding, allow initial preemption recovery, which is needed to pass the test mention above.
2016-01-25 15:42:54 +00:00
ctx->preemption_obj = VDP_INVALID_HANDLE;
}
static void preemption_callback(VdpDevice device, void *context)
{
struct mp_vdpau_ctx *ctx = context;
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
pthread_mutex_lock(&ctx->preempt_lock);
ctx->is_preempted = true;
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
pthread_mutex_unlock(&ctx->preempt_lock);
}
static int win_x11_init_vdpau_procs(struct mp_vdpau_ctx *ctx, bool probing)
{
Display *x11 = ctx->x11;
VdpStatus vdp_st;
// Don't operate on ctx->vdp directly, so that even if init fails, ctx->vdp
// will have the function pointers from the previous successful init, and
// won't randomly make other code crash on calling NULL pointers.
struct vdp_functions vdp = {0};
if (!x11)
return -1;
struct vdp_function {
const int id;
int offset;
};
static const struct vdp_function vdp_func[] = {
#define VDP_FUNCTION(_, macro_name, mp_name) {macro_name, offsetof(struct vdp_functions, mp_name)},
#include "video/vdpau_functions.inc"
#undef VDP_FUNCTION
{0, -1}
};
VdpGetProcAddress *get_proc_address;
vdp_st = vdp_device_create_x11(x11, DefaultScreen(x11), &ctx->vdp_device,
&get_proc_address);
if (vdp_st != VDP_STATUS_OK) {
if (ctx->is_preempted) {
MP_DBG(ctx, "Error calling vdp_device_create_x11 while preempted: %d\n",
vdp_st);
} else {
int lev = probing ? MSGL_V : MSGL_ERR;
mp_msg(ctx->log, lev, "Error when calling vdp_device_create_x11: %d\n",
vdp_st);
}
return -1;
}
for (const struct vdp_function *dsc = vdp_func; dsc->offset >= 0; dsc++) {
vdp_st = get_proc_address(ctx->vdp_device, dsc->id,
(void **)((char *)&vdp + dsc->offset));
if (vdp_st != VDP_STATUS_OK) {
MP_ERR(ctx, "Error when calling vdp_get_proc_address(function "
"id %d): %s\n", dsc->id,
vdp.get_error_string ? vdp.get_error_string(vdp_st) : "?");
return -1;
}
}
ctx->vdp = vdp;
ctx->get_proc_address = get_proc_address;
vdpau: crappy hack to allow initializing hw decoding after preemption If vo_opengl is used, and vo_opengl already created the vdpau interop (for whatever reasons), and then preemption happens, and then you try to enable hw decoding, it failed. The reason was that preemption recovery is not run at any point before libavcodec accesses the vdpau device. The actual impact was that with libmpv + opengl-cb use, hardware decoding was permanently broken after display mode switching (something that caused the display to get preempted at least with older drivers). With mpv CLI, you can for example enable hw decoding during playback, then disable it, VT switch to console, switch back to X, and try to enable hw decoding again. This is mostly because libav* does not deal with preemption, and NVIDIA driver preemption behavior being horrible garbage. In addition to being misdesigned API, the preemption callback is not called before you try to access vdpau API, and then only with _some_ accesses. In summary, the preemption callback was never called, neither before nor after libavcodec tried to init the decoder. So we have to get mp_vdpau_handle_preemption() called before libavcodec accesses it. This in turn will do a dummy API access which usually triggers the preemption callback immediately (with NVIDIA's drivers). In addition, we have to update the AVHWDeviceContext's device. In theory it could change (in practice it usually seems to use handle "0"). Creating a new device would cause chaos, as we don't have a concept of switching the device context on the fly. So we simply update it directly. I'm fairly sure this violates the libav* API, but it's the best we can do.
2017-05-19 13:24:38 +00:00
if (ctx->av_device_ref) {
AVHWDeviceContext *hwctx = (void *)ctx->av_device_ref->data;
AVVDPAUDeviceContext *vdctx = hwctx->hwctx;
vdctx->device = ctx->vdp_device;
vdctx->get_proc_address = ctx->get_proc_address;
}
vdpau: force driver to report preemption early Another fix for the crazy and insane nvidia preemption behavior. This time, the situation is that we are using vo_opengl with vdpau interop, and that vdpau got preempted in the background while mpv was sitting idly. This can be e.g. reproduced by using: --force-window=immediate --idle --hwdec=vdpau and switching VTs. Then after switching back, load a video file. This will not let mp_vdpau_handle_preemption() perform preemption recovery, simply because it will do so only once vdp_decoder_create() has been called. There are some other API calls which trigger preemption, but many don't. Due to the way the libavcodec API works, vdp_decoder_create() is way too late. It does so when get_format returns. It notices creating the decoder fails, and continues calling get_format without the vdpau format. We could perhaps force it to reinit again (by adding a call to vdpau.c, that checks for preemption, and sets hwdec_request_reinit), but this seems too much of a mess. Solve it by calling API in mp_vdpau_handle_preemption() that empirically does trigger preemption: output_surface_put_bits_native(). This call is useless, and in fact should be doing nothing (empty update VdpRect). There's the slight chance that in theory it will slow down operation, but in practice it's bound to be harmless. It's the likely cheapest and simplest API call I've found that can trigger the fallback this way. (The driver is closed source, so it was up to trial & error.) Also, when initializing decoding, allow initial preemption recovery, which is needed to pass the test mention above.
2016-01-25 15:42:54 +00:00
vdp_st = vdp.output_surface_create(ctx->vdp_device, VDP_RGBA_FORMAT_B8G8R8A8,
1, 1, &ctx->preemption_obj);
if (vdp_st != VDP_STATUS_OK) {
MP_ERR(ctx, "Could not create dummy object: %s",
vdp.get_error_string(vdp_st));
return -1;
}
vdp.preemption_callback_register(ctx->vdp_device, preemption_callback, ctx);
return 0;
}
static int handle_preemption(struct mp_vdpau_ctx *ctx)
{
if (!ctx->is_preempted)
return 0;
mark_vdpau_objects_uninitialized(ctx);
if (!ctx->preemption_user_notified) {
MP_ERR(ctx, "Got display preemption notice! Will attempt to recover.\n");
ctx->preemption_user_notified = true;
}
/* Trying to initialize seems to be quite slow, so only try once a
* second to avoid using 100% CPU. */
if (ctx->last_preemption_retry_fail &&
mp_time_sec() - ctx->last_preemption_retry_fail < 1.0)
return -1;
if (win_x11_init_vdpau_procs(ctx, false) < 0) {
ctx->last_preemption_retry_fail = mp_time_sec();
return -1;
}
ctx->preemption_user_notified = false;
ctx->last_preemption_retry_fail = 0;
ctx->is_preempted = false;
ctx->preemption_counter++;
MP_INFO(ctx, "Recovered from display preemption.\n");
return 1;
}
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This file is named so because because it's written against the "old" libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau"). Add support for the "new" libavcodec vdpau support. This was recently added and replaces the "old" vdpau parts. (In fact, Libav is about to deprecate and remove the "old" API without deprecation grace period, so we have to support it now. Moreover, there will probably be no Libav release which supports both, so the transition is even less smooth than we could hope, and we have to support both the old and new API.) Whether the old or new API is used is checked by a configure test: if the new API is found, it is used, otherwise the old API is assumed. Some details might be handled differently. Especially display preemption is a bit problematic with the "new" libavcodec vdpau support: it wants to keep a pointer to a specific vdpau API function (which can be driver specific, because preemption might switch drivers). Also, surface IDs are now directly stored in AVFrames (and mp_images), so they can't be forced to VDP_INVALID_HANDLE on preemption. (This changes even with older libavcodec versions, because mp_image always uses the newer representation to make vo_vdpau.c simpler.) Decoder initialization in the new code tries to deal with codec profiles, while the old code always uses the highest profile per codec. Surface allocation changes. Since the decoder won't call config() in vo_vdpau.c on video size change anymore, we allow allocating surfaces of arbitrary size instead of locking it to what the VO was configured. The non-hwdec code also has slightly different allocation behavior now. Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau doesn't work anymore (a warning suggesting the --hwdec option is printed instead).
2013-07-27 23:49:45 +00:00
// Check whether vdpau display preemption happened. The caller provides a
// preemption counter, which contains the logical timestamp of the last
// preemption handled by the caller. The counter can be 0 for init.
// If counter is NULL, only ever return -1 or 1.
// Return values:
// -1: the display is currently preempted, and vdpau can't be used
// 0: a preemption event happened, and the caller must recover
// (*counter is updated, and a second call will report status ok)
// 1: everything is fine, no preemption happened
int mp_vdpau_handle_preemption(struct mp_vdpau_ctx *ctx, uint64_t *counter)
{
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
int r = 1;
pthread_mutex_lock(&ctx->preempt_lock);
vdpau: force driver to report preemption early Another fix for the crazy and insane nvidia preemption behavior. This time, the situation is that we are using vo_opengl with vdpau interop, and that vdpau got preempted in the background while mpv was sitting idly. This can be e.g. reproduced by using: --force-window=immediate --idle --hwdec=vdpau and switching VTs. Then after switching back, load a video file. This will not let mp_vdpau_handle_preemption() perform preemption recovery, simply because it will do so only once vdp_decoder_create() has been called. There are some other API calls which trigger preemption, but many don't. Due to the way the libavcodec API works, vdp_decoder_create() is way too late. It does so when get_format returns. It notices creating the decoder fails, and continues calling get_format without the vdpau format. We could perhaps force it to reinit again (by adding a call to vdpau.c, that checks for preemption, and sets hwdec_request_reinit), but this seems too much of a mess. Solve it by calling API in mp_vdpau_handle_preemption() that empirically does trigger preemption: output_surface_put_bits_native(). This call is useless, and in fact should be doing nothing (empty update VdpRect). There's the slight chance that in theory it will slow down operation, but in practice it's bound to be harmless. It's the likely cheapest and simplest API call I've found that can trigger the fallback this way. (The driver is closed source, so it was up to trial & error.) Also, when initializing decoding, allow initial preemption recovery, which is needed to pass the test mention above.
2016-01-25 15:42:54 +00:00
const void *p[4] = {&(uint32_t){0}};
uint32_t stride[4] = {4};
VdpRect rc = {0};
ctx->vdp.output_surface_put_bits_native(ctx->preemption_obj, p, stride, &rc);
// First time init
if (counter && !*counter)
*counter = ctx->preemption_counter;
if (handle_preemption(ctx) < 0)
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
r = -1;
if (counter && r > 0 && *counter < ctx->preemption_counter) {
*counter = ctx->preemption_counter;
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
r = 0; // signal recovery after preemption
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
pthread_mutex_unlock(&ctx->preempt_lock);
return r;
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
struct surface_ref {
struct mp_vdpau_ctx *ctx;
int index;
};
static void release_decoder_surface(void *ptr)
{
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
struct surface_ref *r = ptr;
struct mp_vdpau_ctx *ctx = r->ctx;
pthread_mutex_lock(&ctx->pool_lock);
assert(ctx->video_surfaces[r->index].in_use);
ctx->video_surfaces[r->index].in_use = false;
pthread_mutex_unlock(&ctx->pool_lock);
talloc_free(r);
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
static struct mp_image *create_ref(struct mp_vdpau_ctx *ctx, int index)
{
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
struct surface_entry *e = &ctx->video_surfaces[index];
assert(!e->in_use);
e->in_use = true;
e->age = ctx->age_counter++;
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
struct surface_ref *ref = talloc_ptrtype(NULL, ref);
*ref = (struct surface_ref){ctx, index};
struct mp_image *res =
mp_image_new_custom_ref(NULL, ref, release_decoder_surface);
if (res) {
mp_image_setfmt(res, e->rgb ? IMGFMT_VDPAU_OUTPUT : IMGFMT_VDPAU);
mp_image_set_size(res, e->w, e->h);
res->planes[0] = (void *)"dummy"; // must be non-NULL, otherwise arbitrary
res->planes[3] = (void *)(intptr_t)(e->rgb ? e->osurface : e->surface);
}
return res;
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This file is named so because because it's written against the "old" libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau"). Add support for the "new" libavcodec vdpau support. This was recently added and replaces the "old" vdpau parts. (In fact, Libav is about to deprecate and remove the "old" API without deprecation grace period, so we have to support it now. Moreover, there will probably be no Libav release which supports both, so the transition is even less smooth than we could hope, and we have to support both the old and new API.) Whether the old or new API is used is checked by a configure test: if the new API is found, it is used, otherwise the old API is assumed. Some details might be handled differently. Especially display preemption is a bit problematic with the "new" libavcodec vdpau support: it wants to keep a pointer to a specific vdpau API function (which can be driver specific, because preemption might switch drivers). Also, surface IDs are now directly stored in AVFrames (and mp_images), so they can't be forced to VDP_INVALID_HANDLE on preemption. (This changes even with older libavcodec versions, because mp_image always uses the newer representation to make vo_vdpau.c simpler.) Decoder initialization in the new code tries to deal with codec profiles, while the old code always uses the highest profile per codec. Surface allocation changes. Since the decoder won't call config() in vo_vdpau.c on video size change anymore, we allow allocating surfaces of arbitrary size instead of locking it to what the VO was configured. The non-hwdec code also has slightly different allocation behavior now. Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau doesn't work anymore (a warning suggesting the --hwdec option is printed instead).
2013-07-27 23:49:45 +00:00
}
static struct mp_image *mp_vdpau_get_surface(struct mp_vdpau_ctx *ctx,
VdpChromaType chroma,
VdpRGBAFormat rgb_format,
bool rgb, int w, int h)
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This file is named so because because it's written against the "old" libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau"). Add support for the "new" libavcodec vdpau support. This was recently added and replaces the "old" vdpau parts. (In fact, Libav is about to deprecate and remove the "old" API without deprecation grace period, so we have to support it now. Moreover, there will probably be no Libav release which supports both, so the transition is even less smooth than we could hope, and we have to support both the old and new API.) Whether the old or new API is used is checked by a configure test: if the new API is found, it is used, otherwise the old API is assumed. Some details might be handled differently. Especially display preemption is a bit problematic with the "new" libavcodec vdpau support: it wants to keep a pointer to a specific vdpau API function (which can be driver specific, because preemption might switch drivers). Also, surface IDs are now directly stored in AVFrames (and mp_images), so they can't be forced to VDP_INVALID_HANDLE on preemption. (This changes even with older libavcodec versions, because mp_image always uses the newer representation to make vo_vdpau.c simpler.) Decoder initialization in the new code tries to deal with codec profiles, while the old code always uses the highest profile per codec. Surface allocation changes. Since the decoder won't call config() in vo_vdpau.c on video size change anymore, we allow allocating surfaces of arbitrary size instead of locking it to what the VO was configured. The non-hwdec code also has slightly different allocation behavior now. Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau doesn't work anymore (a warning suggesting the --hwdec option is printed instead).
2013-07-27 23:49:45 +00:00
{
struct vdp_functions *vdp = &ctx->vdp;
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
int surface_index = -1;
VdpStatus vdp_st;
if (rgb) {
chroma = (VdpChromaType)-1;
} else {
rgb_format = (VdpChromaType)-1;
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
pthread_mutex_lock(&ctx->pool_lock);
// Destroy all unused surfaces that don't have matching parameters
for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
struct surface_entry *e = &ctx->video_surfaces[n];
if (!e->in_use && e->allocated) {
if (e->w != w || e->h != h || e->rgb != rgb ||
e->chroma != chroma || e->rgb_format != rgb_format)
{
if (e->rgb) {
vdp_st = vdp->output_surface_destroy(e->osurface);
} else {
vdp_st = vdp->video_surface_destroy(e->surface);
}
CHECK_VDP_WARNING(ctx, "Error when destroying surface");
e->surface = e->osurface = VDP_INVALID_HANDLE;
e->allocated = false;
}
}
}
// Try to find an existing unused surface
for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
struct surface_entry *e = &ctx->video_surfaces[n];
if (!e->in_use && e->allocated) {
assert(e->w == w && e->h == h);
assert(e->chroma == chroma);
assert(e->rgb_format == rgb_format);
assert(e->rgb == rgb);
if (surface_index >= 0) {
struct surface_entry *other = &ctx->video_surfaces[surface_index];
if (other->age < e->age)
continue;
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
surface_index = n;
}
}
if (surface_index >= 0)
goto done;
// Allocate new surface
for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
struct surface_entry *e = &ctx->video_surfaces[n];
if (!e->in_use) {
assert(e->surface == VDP_INVALID_HANDLE);
assert(e->osurface == VDP_INVALID_HANDLE);
assert(!e->allocated);
e->chroma = chroma;
e->rgb_format = rgb_format;
e->rgb = rgb;
e->w = w;
e->h = h;
if (mp_vdpau_handle_preemption(ctx, NULL) >= 0) {
if (rgb) {
vdp_st = vdp->output_surface_create(ctx->vdp_device, rgb_format,
w, h, &e->osurface);
e->allocated = e->osurface != VDP_INVALID_HANDLE;
} else {
vdp_st = vdp->video_surface_create(ctx->vdp_device, chroma,
w, h, &e->surface);
e->allocated = e->surface != VDP_INVALID_HANDLE;
}
CHECK_VDP_WARNING(ctx, "Error when allocating surface");
} else {
e->allocated = false;
e->osurface = VDP_INVALID_HANDLE;
e->surface = VDP_INVALID_HANDLE;
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
surface_index = n;
goto done;
}
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
done: ;
struct mp_image *mpi = NULL;
if (surface_index >= 0)
mpi = create_ref(ctx, surface_index);
pthread_mutex_unlock(&ctx->pool_lock);
if (!mpi)
MP_ERR(ctx, "no surfaces available in mp_vdpau_get_video_surface\n");
return mpi;
}
struct mp_image *mp_vdpau_get_video_surface(struct mp_vdpau_ctx *ctx,
VdpChromaType chroma, int w, int h)
{
return mp_vdpau_get_surface(ctx, chroma, 0, false, w, h);
}
vdpau: crappy hack to allow initializing hw decoding after preemption If vo_opengl is used, and vo_opengl already created the vdpau interop (for whatever reasons), and then preemption happens, and then you try to enable hw decoding, it failed. The reason was that preemption recovery is not run at any point before libavcodec accesses the vdpau device. The actual impact was that with libmpv + opengl-cb use, hardware decoding was permanently broken after display mode switching (something that caused the display to get preempted at least with older drivers). With mpv CLI, you can for example enable hw decoding during playback, then disable it, VT switch to console, switch back to X, and try to enable hw decoding again. This is mostly because libav* does not deal with preemption, and NVIDIA driver preemption behavior being horrible garbage. In addition to being misdesigned API, the preemption callback is not called before you try to access vdpau API, and then only with _some_ accesses. In summary, the preemption callback was never called, neither before nor after libavcodec tried to init the decoder. So we have to get mp_vdpau_handle_preemption() called before libavcodec accesses it. This in turn will do a dummy API access which usually triggers the preemption callback immediately (with NVIDIA's drivers). In addition, we have to update the AVHWDeviceContext's device. In theory it could change (in practice it usually seems to use handle "0"). Creating a new device would cause chaos, as we don't have a concept of switching the device context on the fly. So we simply update it directly. I'm fairly sure this violates the libav* API, but it's the best we can do.
2017-05-19 13:24:38 +00:00
static void recheck_preemption(struct mp_hwdec_ctx *hwctx)
{
struct mp_vdpau_ctx *ctx = hwctx->ctx;
mp_vdpau_handle_preemption(ctx, NULL);
}
static bool open_lavu_vdpau_device(struct mp_vdpau_ctx *ctx)
{
ctx->av_device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VDPAU);
if (!ctx->av_device_ref)
return false;
AVHWDeviceContext *hwctx = (void *)ctx->av_device_ref->data;
AVVDPAUDeviceContext *vdctx = hwctx->hwctx;
vdctx->device = ctx->vdp_device;
vdctx->get_proc_address = ctx->get_proc_address;
if (av_hwdevice_ctx_init(ctx->av_device_ref) < 0)
av_buffer_unref(&ctx->av_device_ref);
ctx->hwctx.av_device_ref = ctx->av_device_ref;
return !!ctx->av_device_ref;
}
struct mp_vdpau_ctx *mp_vdpau_create_device_x11(struct mp_log *log, Display *x11,
bool probing)
{
struct mp_vdpau_ctx *ctx = talloc_ptrtype(NULL, ctx);
*ctx = (struct mp_vdpau_ctx) {
.log = log,
.x11 = x11,
.preemption_counter = 1,
.hwctx = {
.type = HWDEC_VDPAU,
.ctx = ctx,
.download_image = download_image,
vdpau: crappy hack to allow initializing hw decoding after preemption If vo_opengl is used, and vo_opengl already created the vdpau interop (for whatever reasons), and then preemption happens, and then you try to enable hw decoding, it failed. The reason was that preemption recovery is not run at any point before libavcodec accesses the vdpau device. The actual impact was that with libmpv + opengl-cb use, hardware decoding was permanently broken after display mode switching (something that caused the display to get preempted at least with older drivers). With mpv CLI, you can for example enable hw decoding during playback, then disable it, VT switch to console, switch back to X, and try to enable hw decoding again. This is mostly because libav* does not deal with preemption, and NVIDIA driver preemption behavior being horrible garbage. In addition to being misdesigned API, the preemption callback is not called before you try to access vdpau API, and then only with _some_ accesses. In summary, the preemption callback was never called, neither before nor after libavcodec tried to init the decoder. So we have to get mp_vdpau_handle_preemption() called before libavcodec accesses it. This in turn will do a dummy API access which usually triggers the preemption callback immediately (with NVIDIA's drivers). In addition, we have to update the AVHWDeviceContext's device. In theory it could change (in practice it usually seems to use handle "0"). Creating a new device would cause chaos, as we don't have a concept of switching the device context on the fly. So we simply update it directly. I'm fairly sure this violates the libav* API, but it's the best we can do.
2017-05-19 13:24:38 +00:00
.restore_device = recheck_preemption,
},
.getimg_surface = VDP_INVALID_HANDLE,
};
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
mpthread_mutex_init_recursive(&ctx->preempt_lock);
pthread_mutex_init(&ctx->pool_lock, NULL);
mark_vdpau_objects_uninitialized(ctx);
if (win_x11_init_vdpau_procs(ctx, probing) < 0) {
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
mp_vdpau_destroy(ctx);
return NULL;
}
if (!open_lavu_vdpau_device(ctx)) {
mp_vdpau_destroy(ctx);
return NULL;
}
return ctx;
}
void mp_vdpau_destroy(struct mp_vdpau_ctx *ctx)
{
struct vdp_functions *vdp = &ctx->vdp;
VdpStatus vdp_st;
for (int i = 0; i < MAX_VIDEO_SURFACES; i++) {
// can't hold references past context lifetime
assert(!ctx->video_surfaces[i].in_use);
if (ctx->video_surfaces[i].surface != VDP_INVALID_HANDLE) {
vdp_st = vdp->video_surface_destroy(ctx->video_surfaces[i].surface);
2013-12-21 17:05:23 +00:00
CHECK_VDP_WARNING(ctx, "Error when calling vdp_video_surface_destroy");
}
if (ctx->video_surfaces[i].osurface != VDP_INVALID_HANDLE) {
vdp_st = vdp->output_surface_destroy(ctx->video_surfaces[i].osurface);
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
}
}
if (ctx->getimg_mixer)
mp_vdpau_mixer_destroy(ctx->getimg_mixer);
if (ctx->getimg_surface != VDP_INVALID_HANDLE) {
vdp_st = vdp->output_surface_destroy(ctx->getimg_surface);
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
}
av_buffer_unref(&ctx->av_device_ref);
vdpau: force driver to report preemption early Another fix for the crazy and insane nvidia preemption behavior. This time, the situation is that we are using vo_opengl with vdpau interop, and that vdpau got preempted in the background while mpv was sitting idly. This can be e.g. reproduced by using: --force-window=immediate --idle --hwdec=vdpau and switching VTs. Then after switching back, load a video file. This will not let mp_vdpau_handle_preemption() perform preemption recovery, simply because it will do so only once vdp_decoder_create() has been called. There are some other API calls which trigger preemption, but many don't. Due to the way the libavcodec API works, vdp_decoder_create() is way too late. It does so when get_format returns. It notices creating the decoder fails, and continues calling get_format without the vdpau format. We could perhaps force it to reinit again (by adding a call to vdpau.c, that checks for preemption, and sets hwdec_request_reinit), but this seems too much of a mess. Solve it by calling API in mp_vdpau_handle_preemption() that empirically does trigger preemption: output_surface_put_bits_native(). This call is useless, and in fact should be doing nothing (empty update VdpRect). There's the slight chance that in theory it will slow down operation, but in practice it's bound to be harmless. It's the likely cheapest and simplest API call I've found that can trigger the fallback this way. (The driver is closed source, so it was up to trial & error.) Also, when initializing decoding, allow initial preemption recovery, which is needed to pass the test mention above.
2016-01-25 15:42:54 +00:00
if (ctx->preemption_obj != VDP_INVALID_HANDLE) {
vdp_st = vdp->output_surface_destroy(ctx->preemption_obj);
CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
if (vdp->device_destroy && ctx->vdp_device != VDP_INVALID_HANDLE) {
vdp_st = vdp->device_destroy(ctx->vdp_device);
2013-12-21 17:05:23 +00:00
CHECK_VDP_WARNING(ctx, "Error when calling vdp_device_destroy");
}
vdpau: make mp_vdpau_ctx thread-safe Preparation so that various things related to video can run in different threads. One part to this is making the video surface pool safe. Another issue is the preemption mechanism, which continues to give us endless pain. In theory, it's probably impossible to handle preemption 100% correctly and race-condition free, unless _every_ API user in the same process uses a central, shared mutex to protect every vdpau API call. Otherwise, it could happen that one thread recovering from preemption allocates a vdpau object, and then another thread (which hasn't recovered yet) happens to free the object for some reason. This is because objects are referenced by integer IDs, and vdpau will reuse IDs invalidated by preemption after preemption. Since this is unreasonable, we're as lazy as possible when it comes to handling preemption. We don't do any locking around the mp_vdpau_ctx fields that are normally immutable, and only can change when recovering from preemption. In practice, this will work, because it doesn't matter whether not-yet-recovered components use the old or new vdpau function pointers or device ID. Code calls mp_vdpau_handle_preemption() anyway to check for the preemption event and possibly to recover, and that function acquires the lock protecting the preemption state. Another possible source of potential grandiose fuckup is the fact that the vdpau library is in fact only a tiny wrapper, and the real driver lives in a shared object dlopen()ed by the wrapper. The wrapper also calls dlclose() on the loaded shared object in some situations. One possible danger is that failing to recreate a vdpau device could trigger a dlclose() call, and that glibc might unload it. Currently, glibc implements full unloading of shared objects on the last dlclose() call, and if that happens, calls to function pointers pointing into the shared object would obviously crash. Fortunately, it seems the existing vdpau wrapper won't trigger this case and never unloads the driver once it's successfully loaded. To make it short, vdpau preemption opens up endless depths of WTFs. Another issue is that any participating thread might do the preemption recovery (whichever comes first). This is easier to implement. The implication is that we need threadsafe xlib. We just hope and pray that this will actually work. This also means that once vdpau code is actually involved in a multithreaded scenario, we have to add XInitThreads() to the X11 code.
2014-05-09 19:49:42 +00:00
pthread_mutex_destroy(&ctx->pool_lock);
pthread_mutex_destroy(&ctx->preempt_lock);
talloc_free(ctx);
vdpau: split off decoder parts, use "new" libavcodec vdpau hwaccel API Move the decoder parts from vo_vdpau.c to a new file vdpau_old.c. This file is named so because because it's written against the "old" libavcodec vdpau pseudo-decoder (e.g. "h264_vdpau"). Add support for the "new" libavcodec vdpau support. This was recently added and replaces the "old" vdpau parts. (In fact, Libav is about to deprecate and remove the "old" API without deprecation grace period, so we have to support it now. Moreover, there will probably be no Libav release which supports both, so the transition is even less smooth than we could hope, and we have to support both the old and new API.) Whether the old or new API is used is checked by a configure test: if the new API is found, it is used, otherwise the old API is assumed. Some details might be handled differently. Especially display preemption is a bit problematic with the "new" libavcodec vdpau support: it wants to keep a pointer to a specific vdpau API function (which can be driver specific, because preemption might switch drivers). Also, surface IDs are now directly stored in AVFrames (and mp_images), so they can't be forced to VDP_INVALID_HANDLE on preemption. (This changes even with older libavcodec versions, because mp_image always uses the newer representation to make vo_vdpau.c simpler.) Decoder initialization in the new code tries to deal with codec profiles, while the old code always uses the highest profile per codec. Surface allocation changes. Since the decoder won't call config() in vo_vdpau.c on video size change anymore, we allow allocating surfaces of arbitrary size instead of locking it to what the VO was configured. The non-hwdec code also has slightly different allocation behavior now. Enabling the old vdpau special decoders via e.g. --vd=lavc:h264_vdpau doesn't work anymore (a warning suggesting the --hwdec option is printed instead).
2013-07-27 23:49:45 +00:00
}
bool mp_vdpau_get_format(int imgfmt, VdpChromaType *out_chroma_type,
VdpYCbCrFormat *out_pixel_format)
{
VdpChromaType chroma = VDP_CHROMA_TYPE_420;
VdpYCbCrFormat ycbcr = (VdpYCbCrFormat)-1;
switch (imgfmt) {
case IMGFMT_420P:
ycbcr = VDP_YCBCR_FORMAT_YV12;
break;
case IMGFMT_NV12:
ycbcr = VDP_YCBCR_FORMAT_NV12;
break;
case IMGFMT_YUYV:
ycbcr = VDP_YCBCR_FORMAT_YUYV;
chroma = VDP_CHROMA_TYPE_422;
break;
case IMGFMT_UYVY:
ycbcr = VDP_YCBCR_FORMAT_UYVY;
chroma = VDP_CHROMA_TYPE_422;
break;
case IMGFMT_VDPAU:
break;
default:
return false;
}
if (out_chroma_type)
*out_chroma_type = chroma;
if (out_pixel_format)
*out_pixel_format = ycbcr;
return true;
}
bool mp_vdpau_get_rgb_format(int imgfmt, VdpRGBAFormat *out_rgba_format)
{
VdpRGBAFormat format = (VdpRGBAFormat)-1;
switch (imgfmt) {
case IMGFMT_BGR32:
format = VDP_RGBA_FORMAT_B8G8R8A8; break;
default:
return false;
}
if (out_rgba_format)
*out_rgba_format = format;
return true;
}
// Use mp_vdpau_get_video_surface, and upload mpi to it. Return NULL on failure.
// If the image is already a vdpau video surface, just return a reference.
struct mp_image *mp_vdpau_upload_video_surface(struct mp_vdpau_ctx *ctx,
struct mp_image *mpi)
{
struct vdp_functions *vdp = &ctx->vdp;
VdpStatus vdp_st;
if (mpi->imgfmt == IMGFMT_VDPAU || mpi->imgfmt == IMGFMT_VDPAU_OUTPUT)
return mp_image_new_ref(mpi);
VdpChromaType chroma = (VdpChromaType)-1;
VdpYCbCrFormat ycbcr = (VdpYCbCrFormat)-1;
VdpRGBAFormat rgbafmt = (VdpRGBAFormat)-1;
bool rgb = !mp_vdpau_get_format(mpi->imgfmt, &chroma, &ycbcr);
if (rgb && !mp_vdpau_get_rgb_format(mpi->imgfmt, &rgbafmt))
return NULL;
struct mp_image *hwmpi =
mp_vdpau_get_surface(ctx, chroma, rgbafmt, rgb, mpi->w, mpi->h);
if (!hwmpi)
return NULL;
struct mp_image *src = mpi;
if (mpi->stride[0] < 0)
src = mp_image_new_copy(mpi); // unflips it when copying
if (hwmpi->imgfmt == IMGFMT_VDPAU) {
VdpVideoSurface surface = (intptr_t)hwmpi->planes[3];
const void *destdata[3] = {src->planes[0], src->planes[2], src->planes[1]};
if (src->imgfmt == IMGFMT_NV12)
destdata[1] = destdata[2];
vdp_st = vdp->video_surface_put_bits_y_cb_cr(surface,
ycbcr, destdata, src->stride);
} else {
VdpOutputSurface rgb_surface = (intptr_t)hwmpi->planes[3];
vdp_st = vdp->output_surface_put_bits_native(rgb_surface,
&(const void *){src->planes[0]},
&(uint32_t){src->stride[0]},
NULL);
}
CHECK_VDP_WARNING(ctx, "Error when uploading surface");
if (src != mpi)
talloc_free(src);
mp_image_copy_attributes(hwmpi, mpi);
return hwmpi;
}
bool mp_vdpau_guess_if_emulated(struct mp_vdpau_ctx *ctx)
{
struct vdp_functions *vdp = &ctx->vdp;
VdpStatus vdp_st;
char const* info = NULL;
vdp_st = vdp->get_information_string(&info);
CHECK_VDP_WARNING(ctx, "Error when calling vdp_get_information_string");
return vdp_st == VDP_STATUS_OK && info && strstr(info, "VAAPI");
}
static void vdpau_destroy_standalone(struct mp_hwdec_ctx *ctx)
{
struct mp_vdpau_ctx *vdp = ctx->ctx;
Display *display = vdp->x11;
mp_vdpau_destroy(vdp);
XCloseDisplay(display);
}
struct mp_hwdec_ctx *vdpau_create_standalone(struct mpv_global *global,
struct mp_log *plog, bool probing)
{
XInitThreads();
Display *display = XOpenDisplay(NULL);
if (!display)
return NULL;
struct mp_vdpau_ctx *vdp = mp_vdpau_create_device_x11(plog, display, probing);
if (!vdp) {
XCloseDisplay(display);
return NULL;
}
vdp->hwctx.emulated = mp_vdpau_guess_if_emulated(vdp);
vdp->hwctx.destroy = vdpau_destroy_standalone;
return &vdp->hwctx;
}