mpv/video/out/dr_helper.c

163 lines
4.6 KiB
C
Raw Normal View History

#include <stdlib.h>
#include <assert.h>
#include <pthread.h>
#include <libavutil/buffer.h>
#include "mpv_talloc.h"
#include "misc/dispatch.h"
#include "osdep/atomic.h"
#include "video/mp_image.h"
#include "dr_helper.h"
struct dr_helper {
client API: fix potential deadlock problems by throwing more shit at it The render API (vo_libmpv) had potential deadlock problems with MPV_RENDER_PARAM_ADVANCED_CONTROL. This required vd-lavc-dr to be enabled (the default). I never observed these deadlocks in the wild (doesn't mean they didn't happen), although I could specifically provoke them with some code changes. The problem was mostly about DR (direct rendering, letting the video decoder write to OpenGL buffer memory). Allocating/freeing a DR image needs to be done on the OpenGL thread, even though _lots_ of threads are involved with handling images. Freeing a DR image is a special case that can happen any time. dr_helper.c does most of the evil magic of achieving this. Unfortunately, there was a (sort of) circular lock dependency: freeing an image while certain internal locks are held would trigger the user's context update callback, which in turn would call mpv_render_context_update(), which processed all pending free requests, and then acquire an internal lock - which the caller might not release until a further DR image could be freed. "Solve" this by making freeing DR images asynchronous. This is slightly risky, but actually not much. The DR images will be free'd eventually. The biggest disadvantage is probably that debugging might get trickier. Any solution to this problem will probably add images to free to some sort of queue, and then process it later. I considered making this more explicit (so there'd be a point where the caller forcibly waits for all queued items to be free'd), but discarded these ideas as this probably would only increase complexity. Another consequence is that freeing DR images on the GL thread is not synchronous anymore. Instead, it mpv_render_context_update() will do it with a delay. This seems roundabout, but doesn't actually change anything, and avoids additional code. This also fixes that the render API required the render API user to remain on the same thread, even though this wasn't documented. As such, it was a bug. OpenGL essentially forces you to do all GL usage on a single thread, but in theory the API user could for example move the GL context to another thread. The API bump is because I think you can't make enough noise about this. Since we don't backport fixes to old versions, I'm specifically stating that old versions are broken, and I'm supplying workarounds. Internally, dr_helper_create() does not use pthread_self() anymore, thus the vo.c change. I think it's better to make binding to the current thread as explicit as possible. Of course it's not sure that this fixes all deadlocks (probably not).
2019-09-26 12:07:33 +00:00
pthread_mutex_t thread_lock;
pthread_t thread;
client API: fix potential deadlock problems by throwing more shit at it The render API (vo_libmpv) had potential deadlock problems with MPV_RENDER_PARAM_ADVANCED_CONTROL. This required vd-lavc-dr to be enabled (the default). I never observed these deadlocks in the wild (doesn't mean they didn't happen), although I could specifically provoke them with some code changes. The problem was mostly about DR (direct rendering, letting the video decoder write to OpenGL buffer memory). Allocating/freeing a DR image needs to be done on the OpenGL thread, even though _lots_ of threads are involved with handling images. Freeing a DR image is a special case that can happen any time. dr_helper.c does most of the evil magic of achieving this. Unfortunately, there was a (sort of) circular lock dependency: freeing an image while certain internal locks are held would trigger the user's context update callback, which in turn would call mpv_render_context_update(), which processed all pending free requests, and then acquire an internal lock - which the caller might not release until a further DR image could be freed. "Solve" this by making freeing DR images asynchronous. This is slightly risky, but actually not much. The DR images will be free'd eventually. The biggest disadvantage is probably that debugging might get trickier. Any solution to this problem will probably add images to free to some sort of queue, and then process it later. I considered making this more explicit (so there'd be a point where the caller forcibly waits for all queued items to be free'd), but discarded these ideas as this probably would only increase complexity. Another consequence is that freeing DR images on the GL thread is not synchronous anymore. Instead, it mpv_render_context_update() will do it with a delay. This seems roundabout, but doesn't actually change anything, and avoids additional code. This also fixes that the render API required the render API user to remain on the same thread, even though this wasn't documented. As such, it was a bug. OpenGL essentially forces you to do all GL usage on a single thread, but in theory the API user could for example move the GL context to another thread. The API bump is because I think you can't make enough noise about this. Since we don't backport fixes to old versions, I'm specifically stating that old versions are broken, and I'm supplying workarounds. Internally, dr_helper_create() does not use pthread_self() anymore, thus the vo.c change. I think it's better to make binding to the current thread as explicit as possible. Of course it's not sure that this fixes all deadlocks (probably not).
2019-09-26 12:07:33 +00:00
bool thread_valid; // (POSIX defines no "unset" pthread_t value yet)
struct mp_dispatch_queue *dispatch;
atomic_ullong dr_in_flight;
struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h,
int stride_align, int flags);
void *get_image_ctx;
};
static void dr_helper_destroy(void *ptr)
{
struct dr_helper *dr = ptr;
// All references must have been freed on destruction, or we'll have
// dangling pointers.
assert(atomic_load(&dr->dr_in_flight) == 0);
client API: fix potential deadlock problems by throwing more shit at it The render API (vo_libmpv) had potential deadlock problems with MPV_RENDER_PARAM_ADVANCED_CONTROL. This required vd-lavc-dr to be enabled (the default). I never observed these deadlocks in the wild (doesn't mean they didn't happen), although I could specifically provoke them with some code changes. The problem was mostly about DR (direct rendering, letting the video decoder write to OpenGL buffer memory). Allocating/freeing a DR image needs to be done on the OpenGL thread, even though _lots_ of threads are involved with handling images. Freeing a DR image is a special case that can happen any time. dr_helper.c does most of the evil magic of achieving this. Unfortunately, there was a (sort of) circular lock dependency: freeing an image while certain internal locks are held would trigger the user's context update callback, which in turn would call mpv_render_context_update(), which processed all pending free requests, and then acquire an internal lock - which the caller might not release until a further DR image could be freed. "Solve" this by making freeing DR images asynchronous. This is slightly risky, but actually not much. The DR images will be free'd eventually. The biggest disadvantage is probably that debugging might get trickier. Any solution to this problem will probably add images to free to some sort of queue, and then process it later. I considered making this more explicit (so there'd be a point where the caller forcibly waits for all queued items to be free'd), but discarded these ideas as this probably would only increase complexity. Another consequence is that freeing DR images on the GL thread is not synchronous anymore. Instead, it mpv_render_context_update() will do it with a delay. This seems roundabout, but doesn't actually change anything, and avoids additional code. This also fixes that the render API required the render API user to remain on the same thread, even though this wasn't documented. As such, it was a bug. OpenGL essentially forces you to do all GL usage on a single thread, but in theory the API user could for example move the GL context to another thread. The API bump is because I think you can't make enough noise about this. Since we don't backport fixes to old versions, I'm specifically stating that old versions are broken, and I'm supplying workarounds. Internally, dr_helper_create() does not use pthread_self() anymore, thus the vo.c change. I think it's better to make binding to the current thread as explicit as possible. Of course it's not sure that this fixes all deadlocks (probably not).
2019-09-26 12:07:33 +00:00
pthread_mutex_destroy(&dr->thread_lock);
}
struct dr_helper *dr_helper_create(struct mp_dispatch_queue *dispatch,
struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h,
int stride_align, int flags),
void *get_image_ctx)
{
struct dr_helper *dr = talloc_ptrtype(NULL, dr);
talloc_set_destructor(dr, dr_helper_destroy);
*dr = (struct dr_helper){
.dispatch = dispatch,
.dr_in_flight = ATOMIC_VAR_INIT(0),
.get_image = get_image,
.get_image_ctx = get_image_ctx,
};
client API: fix potential deadlock problems by throwing more shit at it The render API (vo_libmpv) had potential deadlock problems with MPV_RENDER_PARAM_ADVANCED_CONTROL. This required vd-lavc-dr to be enabled (the default). I never observed these deadlocks in the wild (doesn't mean they didn't happen), although I could specifically provoke them with some code changes. The problem was mostly about DR (direct rendering, letting the video decoder write to OpenGL buffer memory). Allocating/freeing a DR image needs to be done on the OpenGL thread, even though _lots_ of threads are involved with handling images. Freeing a DR image is a special case that can happen any time. dr_helper.c does most of the evil magic of achieving this. Unfortunately, there was a (sort of) circular lock dependency: freeing an image while certain internal locks are held would trigger the user's context update callback, which in turn would call mpv_render_context_update(), which processed all pending free requests, and then acquire an internal lock - which the caller might not release until a further DR image could be freed. "Solve" this by making freeing DR images asynchronous. This is slightly risky, but actually not much. The DR images will be free'd eventually. The biggest disadvantage is probably that debugging might get trickier. Any solution to this problem will probably add images to free to some sort of queue, and then process it later. I considered making this more explicit (so there'd be a point where the caller forcibly waits for all queued items to be free'd), but discarded these ideas as this probably would only increase complexity. Another consequence is that freeing DR images on the GL thread is not synchronous anymore. Instead, it mpv_render_context_update() will do it with a delay. This seems roundabout, but doesn't actually change anything, and avoids additional code. This also fixes that the render API required the render API user to remain on the same thread, even though this wasn't documented. As such, it was a bug. OpenGL essentially forces you to do all GL usage on a single thread, but in theory the API user could for example move the GL context to another thread. The API bump is because I think you can't make enough noise about this. Since we don't backport fixes to old versions, I'm specifically stating that old versions are broken, and I'm supplying workarounds. Internally, dr_helper_create() does not use pthread_self() anymore, thus the vo.c change. I think it's better to make binding to the current thread as explicit as possible. Of course it's not sure that this fixes all deadlocks (probably not).
2019-09-26 12:07:33 +00:00
pthread_mutex_init(&dr->thread_lock, NULL);
return dr;
}
client API: fix potential deadlock problems by throwing more shit at it The render API (vo_libmpv) had potential deadlock problems with MPV_RENDER_PARAM_ADVANCED_CONTROL. This required vd-lavc-dr to be enabled (the default). I never observed these deadlocks in the wild (doesn't mean they didn't happen), although I could specifically provoke them with some code changes. The problem was mostly about DR (direct rendering, letting the video decoder write to OpenGL buffer memory). Allocating/freeing a DR image needs to be done on the OpenGL thread, even though _lots_ of threads are involved with handling images. Freeing a DR image is a special case that can happen any time. dr_helper.c does most of the evil magic of achieving this. Unfortunately, there was a (sort of) circular lock dependency: freeing an image while certain internal locks are held would trigger the user's context update callback, which in turn would call mpv_render_context_update(), which processed all pending free requests, and then acquire an internal lock - which the caller might not release until a further DR image could be freed. "Solve" this by making freeing DR images asynchronous. This is slightly risky, but actually not much. The DR images will be free'd eventually. The biggest disadvantage is probably that debugging might get trickier. Any solution to this problem will probably add images to free to some sort of queue, and then process it later. I considered making this more explicit (so there'd be a point where the caller forcibly waits for all queued items to be free'd), but discarded these ideas as this probably would only increase complexity. Another consequence is that freeing DR images on the GL thread is not synchronous anymore. Instead, it mpv_render_context_update() will do it with a delay. This seems roundabout, but doesn't actually change anything, and avoids additional code. This also fixes that the render API required the render API user to remain on the same thread, even though this wasn't documented. As such, it was a bug. OpenGL essentially forces you to do all GL usage on a single thread, but in theory the API user could for example move the GL context to another thread. The API bump is because I think you can't make enough noise about this. Since we don't backport fixes to old versions, I'm specifically stating that old versions are broken, and I'm supplying workarounds. Internally, dr_helper_create() does not use pthread_self() anymore, thus the vo.c change. I think it's better to make binding to the current thread as explicit as possible. Of course it's not sure that this fixes all deadlocks (probably not).
2019-09-26 12:07:33 +00:00
void dr_helper_acquire_thread(struct dr_helper *dr)
{
pthread_mutex_lock(&dr->thread_lock);
assert(!dr->thread_valid); // fails on API user errors
dr->thread_valid = true;
dr->thread = pthread_self();
pthread_mutex_unlock(&dr->thread_lock);
}
void dr_helper_release_thread(struct dr_helper *dr)
{
pthread_mutex_lock(&dr->thread_lock);
// Fails on API user errors.
assert(dr->thread_valid);
assert(pthread_equal(dr->thread, pthread_self()));
dr->thread_valid = false;
pthread_mutex_unlock(&dr->thread_lock);
}
struct free_dr_context {
struct dr_helper *dr;
AVBufferRef *ref;
};
static void dr_thread_free(void *ptr)
{
struct free_dr_context *ctx = ptr;
unsigned long long v = atomic_fetch_add(&ctx->dr->dr_in_flight, -1);
assert(v); // value before sub is 0 - unexpected underflow.
av_buffer_unref(&ctx->ref);
talloc_free(ctx);
}
static void free_dr_buffer_on_dr_thread(void *opaque, uint8_t *data)
{
struct free_dr_context *ctx = opaque;
client API: fix potential deadlock problems by throwing more shit at it The render API (vo_libmpv) had potential deadlock problems with MPV_RENDER_PARAM_ADVANCED_CONTROL. This required vd-lavc-dr to be enabled (the default). I never observed these deadlocks in the wild (doesn't mean they didn't happen), although I could specifically provoke them with some code changes. The problem was mostly about DR (direct rendering, letting the video decoder write to OpenGL buffer memory). Allocating/freeing a DR image needs to be done on the OpenGL thread, even though _lots_ of threads are involved with handling images. Freeing a DR image is a special case that can happen any time. dr_helper.c does most of the evil magic of achieving this. Unfortunately, there was a (sort of) circular lock dependency: freeing an image while certain internal locks are held would trigger the user's context update callback, which in turn would call mpv_render_context_update(), which processed all pending free requests, and then acquire an internal lock - which the caller might not release until a further DR image could be freed. "Solve" this by making freeing DR images asynchronous. This is slightly risky, but actually not much. The DR images will be free'd eventually. The biggest disadvantage is probably that debugging might get trickier. Any solution to this problem will probably add images to free to some sort of queue, and then process it later. I considered making this more explicit (so there'd be a point where the caller forcibly waits for all queued items to be free'd), but discarded these ideas as this probably would only increase complexity. Another consequence is that freeing DR images on the GL thread is not synchronous anymore. Instead, it mpv_render_context_update() will do it with a delay. This seems roundabout, but doesn't actually change anything, and avoids additional code. This also fixes that the render API required the render API user to remain on the same thread, even though this wasn't documented. As such, it was a bug. OpenGL essentially forces you to do all GL usage on a single thread, but in theory the API user could for example move the GL context to another thread. The API bump is because I think you can't make enough noise about this. Since we don't backport fixes to old versions, I'm specifically stating that old versions are broken, and I'm supplying workarounds. Internally, dr_helper_create() does not use pthread_self() anymore, thus the vo.c change. I think it's better to make binding to the current thread as explicit as possible. Of course it's not sure that this fixes all deadlocks (probably not).
2019-09-26 12:07:33 +00:00
struct dr_helper *dr = ctx->dr;
pthread_mutex_lock(&dr->thread_lock);
bool on_this_thread =
dr->thread_valid && pthread_equal(ctx->dr->thread, pthread_self());
pthread_mutex_unlock(&dr->thread_lock);
// The image could be unreffed even on the DR thread. In practice, this
// matters most on DR destruction.
client API: fix potential deadlock problems by throwing more shit at it The render API (vo_libmpv) had potential deadlock problems with MPV_RENDER_PARAM_ADVANCED_CONTROL. This required vd-lavc-dr to be enabled (the default). I never observed these deadlocks in the wild (doesn't mean they didn't happen), although I could specifically provoke them with some code changes. The problem was mostly about DR (direct rendering, letting the video decoder write to OpenGL buffer memory). Allocating/freeing a DR image needs to be done on the OpenGL thread, even though _lots_ of threads are involved with handling images. Freeing a DR image is a special case that can happen any time. dr_helper.c does most of the evil magic of achieving this. Unfortunately, there was a (sort of) circular lock dependency: freeing an image while certain internal locks are held would trigger the user's context update callback, which in turn would call mpv_render_context_update(), which processed all pending free requests, and then acquire an internal lock - which the caller might not release until a further DR image could be freed. "Solve" this by making freeing DR images asynchronous. This is slightly risky, but actually not much. The DR images will be free'd eventually. The biggest disadvantage is probably that debugging might get trickier. Any solution to this problem will probably add images to free to some sort of queue, and then process it later. I considered making this more explicit (so there'd be a point where the caller forcibly waits for all queued items to be free'd), but discarded these ideas as this probably would only increase complexity. Another consequence is that freeing DR images on the GL thread is not synchronous anymore. Instead, it mpv_render_context_update() will do it with a delay. This seems roundabout, but doesn't actually change anything, and avoids additional code. This also fixes that the render API required the render API user to remain on the same thread, even though this wasn't documented. As such, it was a bug. OpenGL essentially forces you to do all GL usage on a single thread, but in theory the API user could for example move the GL context to another thread. The API bump is because I think you can't make enough noise about this. Since we don't backport fixes to old versions, I'm specifically stating that old versions are broken, and I'm supplying workarounds. Internally, dr_helper_create() does not use pthread_self() anymore, thus the vo.c change. I think it's better to make binding to the current thread as explicit as possible. Of course it's not sure that this fixes all deadlocks (probably not).
2019-09-26 12:07:33 +00:00
if (on_this_thread) {
dr_thread_free(ctx);
} else {
client API: fix potential deadlock problems by throwing more shit at it The render API (vo_libmpv) had potential deadlock problems with MPV_RENDER_PARAM_ADVANCED_CONTROL. This required vd-lavc-dr to be enabled (the default). I never observed these deadlocks in the wild (doesn't mean they didn't happen), although I could specifically provoke them with some code changes. The problem was mostly about DR (direct rendering, letting the video decoder write to OpenGL buffer memory). Allocating/freeing a DR image needs to be done on the OpenGL thread, even though _lots_ of threads are involved with handling images. Freeing a DR image is a special case that can happen any time. dr_helper.c does most of the evil magic of achieving this. Unfortunately, there was a (sort of) circular lock dependency: freeing an image while certain internal locks are held would trigger the user's context update callback, which in turn would call mpv_render_context_update(), which processed all pending free requests, and then acquire an internal lock - which the caller might not release until a further DR image could be freed. "Solve" this by making freeing DR images asynchronous. This is slightly risky, but actually not much. The DR images will be free'd eventually. The biggest disadvantage is probably that debugging might get trickier. Any solution to this problem will probably add images to free to some sort of queue, and then process it later. I considered making this more explicit (so there'd be a point where the caller forcibly waits for all queued items to be free'd), but discarded these ideas as this probably would only increase complexity. Another consequence is that freeing DR images on the GL thread is not synchronous anymore. Instead, it mpv_render_context_update() will do it with a delay. This seems roundabout, but doesn't actually change anything, and avoids additional code. This also fixes that the render API required the render API user to remain on the same thread, even though this wasn't documented. As such, it was a bug. OpenGL essentially forces you to do all GL usage on a single thread, but in theory the API user could for example move the GL context to another thread. The API bump is because I think you can't make enough noise about this. Since we don't backport fixes to old versions, I'm specifically stating that old versions are broken, and I'm supplying workarounds. Internally, dr_helper_create() does not use pthread_self() anymore, thus the vo.c change. I think it's better to make binding to the current thread as explicit as possible. Of course it's not sure that this fixes all deadlocks (probably not).
2019-09-26 12:07:33 +00:00
mp_dispatch_enqueue(dr->dispatch, dr_thread_free, ctx);
}
}
struct get_image_cmd {
struct dr_helper *dr;
int imgfmt, w, h, stride_align, flags;
struct mp_image *res;
};
static void sync_get_image(void *ptr)
{
struct get_image_cmd *cmd = ptr;
struct dr_helper *dr = cmd->dr;
cmd->res = dr->get_image(dr->get_image_ctx, cmd->imgfmt, cmd->w, cmd->h,
cmd->stride_align, cmd->flags);
if (!cmd->res)
return;
// We require exactly 1 AVBufferRef.
assert(cmd->res->bufs[0]);
assert(!cmd->res->bufs[1]);
// Apply some magic to get it free'd on the DR thread as well. For this to
// work, we create a dummy-ref that aliases the original ref, which is why
// the original ref must be writable in the first place. (A newly allocated
// image should be always writable of course.)
assert(mp_image_is_writeable(cmd->res));
struct free_dr_context *ctx = talloc_zero(NULL, struct free_dr_context);
*ctx = (struct free_dr_context){
.dr = dr,
.ref = cmd->res->bufs[0],
};
AVBufferRef *new_ref = av_buffer_create(ctx->ref->data, ctx->ref->size,
free_dr_buffer_on_dr_thread, ctx, 0);
MP_HANDLE_OOM(new_ref);
cmd->res->bufs[0] = new_ref;
atomic_fetch_add(&dr->dr_in_flight, 1);
}
struct mp_image *dr_helper_get_image(struct dr_helper *dr, int imgfmt,
int w, int h, int stride_align, int flags)
{
struct get_image_cmd cmd = {
.dr = dr,
.imgfmt = imgfmt,
.w = w, .h = h,
.stride_align = stride_align,
.flags = flags,
};
mp_dispatch_run(dr->dispatch, sync_get_image, &cmd);
return cmd.res;
}