vo_opengl: move timers to struct ra

In order to prevent code duplication and keep the ra abstraction as small as possible, `ra` only implements the actual timer queries, it does not do pooling/averaging of the results. This is instead moved to a ra-neutral struct timer_pool in utils.c.
2017-08-05 18:20:45 +02:00 · 2017-08-05 18:20:45 +02:00 · f2298f394e
parent a680c643eb
commit f2298f394e
9 changed files with 247 additions and 168 deletions
--- a/video/out/opengl/gl_headers.h
+++ b/video/out/opengl/gl_headers.h
@ -61,9 +61,10 @@

 #define GL_TEXTURE_RECTANGLE              0x84F5

-// --- GL 3.3
+// --- GL 3.3 or GL_ARB_timer_query

 #define GL_TIME_ELAPSED                   0x88BF
+#define GL_TIMESTAMP                      0x8E28

 // --- GL 4.3 or GL_ARB_debug_output

--- a/video/out/opengl/gl_utils.c
+++ b/video/out/opengl/gl_utils.c
@ -282,125 +282,6 @@ void gl_set_debug_logger(GL *gl, struct mp_log *log)
        gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log);
 }

-// Maximum number of simultaneous query objects to keep around. Reducing this
-// number might cause rendering to block until the result of a previous query is
-// available
-#define QUERY_OBJECT_NUM 8
-
-struct gl_timer {
-    GL *gl;
-    GLuint query[QUERY_OBJECT_NUM];
-    int query_idx;
-
-    // these numbers are all in nanoseconds
-    uint64_t samples[PERF_SAMPLE_COUNT];
-    int sample_idx;
-    int sample_count;
-
-    uint64_t avg_sum;
-    uint64_t peak;
-};
-
-struct mp_pass_perf gl_timer_measure(struct gl_timer *timer)
-{
-    assert(timer);
-    struct mp_pass_perf res = {
-        .count = timer->sample_count,
-        .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT,
-        .peak = timer->peak,
-        .samples = timer->samples,
-    };
-
-    res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT];
-
-    if (timer->sample_count > 0) {
-        res.avg  = timer->avg_sum / timer->sample_count;
-    }
-
-    return res;
-}
-
-struct gl_timer *gl_timer_create(GL *gl)
-{
-    struct gl_timer *timer = talloc_ptrtype(NULL, timer);
-    *timer = (struct gl_timer){ .gl = gl };
-
-    if (gl->GenQueries)
-        gl->GenQueries(QUERY_OBJECT_NUM, timer->query);
-
-    return timer;
-}
-
-void gl_timer_free(struct gl_timer *timer)
-{
-    if (!timer)
-        return;
-
-    GL *gl = timer->gl;
-    if (gl && gl->DeleteQueries) {
-        // this is a no-op on already uninitialized queries
-        gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query);
-    }
-
-    talloc_free(timer);
-}
-
-static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
-{
-    // Input res into the buffer and grab the previous value
-    uint64_t old = timer->samples[timer->sample_idx];
-    timer->samples[timer->sample_idx++] = new;
-    timer->sample_idx %= PERF_SAMPLE_COUNT;
-
-    // Update average and sum
-    timer->avg_sum = timer->avg_sum + new - old;
-    timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT);
-
-    // Update peak if necessary
-    if (new >= timer->peak) {
-        timer->peak = new;
-    } else if (timer->peak == old) {
-        // It's possible that the last peak was the value we just removed,
-        // if so we need to scan for the new peak
-        uint64_t peak = new;
-        for (int i = 0; i < PERF_SAMPLE_COUNT; i++)
-            peak = MPMAX(peak, timer->samples[i]);
-        timer->peak = peak;
-    }
-}
-
-// If no free query is available, this can block. Shouldn't ever happen in
-// practice, though. (If it does, consider increasing QUERY_OBJECT_NUM)
-// IMPORTANT: only one gl_timer object may ever be active at a single time.
-// The caling code *MUST* ensure this
-void gl_timer_start(struct gl_timer *timer)
-{
-    assert(timer);
-    GL *gl = timer->gl;
-    if (!gl->BeginQuery)
-        return;
-
-    // Get the next query object
-    GLuint id = timer->query[timer->query_idx++];
-    timer->query_idx %= QUERY_OBJECT_NUM;
-
-    // If this query object already holds a result, we need to get and
-    // record it first
-    if (gl->IsQuery(id)) {
-        GLuint64 elapsed;
-        gl->GetQueryObjectui64v(id, GL_QUERY_RESULT, &elapsed);
-        gl_timer_record(timer, elapsed);
-    }
-
-    gl->BeginQuery(GL_TIME_ELAPSED, id);
-}
-
-void gl_timer_stop(GL *gl)
-{
-    if (gl->EndQuery)
-        gl->EndQuery(GL_TIME_ELAPSED);
-}
-
 // Upload a texture, going through a PBO. PBO supposedly can facilitate
 // asynchronous copy from CPU to GPU, so this is an optimization. Note that
 // changing format/type/tex_w/tex_h or reusing the PBO in the same frame can
--- a/video/out/opengl/gl_utils.h
+++ b/video/out/opengl/gl_utils.h
@ -55,14 +55,6 @@ void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num);

 void gl_set_debug_logger(GL *gl, struct mp_log *log);

-struct gl_timer;
-
-struct gl_timer *gl_timer_create(GL *gl);
-void gl_timer_free(struct gl_timer *timer);
-void gl_timer_start(struct gl_timer *timer);
-void gl_timer_stop(GL *gl);
-struct mp_pass_perf gl_timer_measure(struct gl_timer *timer);
-
 #define NUM_PBO_BUFFERS 3

 struct gl_pbo_upload {
--- a/video/out/opengl/ra.h
+++ b/video/out/opengl/ra.h
@ -264,6 +264,10 @@ enum {
    RA_TEX_UPLOAD_DISCARD = 1 << 0, // discard pre-existing data not in the region
 };

+// This is an opaque type provided by the implementation, but we want to at
+// least give it a saner name than void* for code readability purposes.
+typedef void ra_timer;
+
 // Rendering API entrypoints. (Note: there are some additional hidden features
 // you need to take care of. For example, hwdec mapping will be provided
 // separately from ra, but might need to call into ra private code.)
@ -347,6 +351,24 @@ struct ra_fns {
    // This is an extremely common operation.
    void (*renderpass_run)(struct ra *ra,
                           const struct ra_renderpass_run_params *params);
+
+    // Create a timer object. Returns NULL on failure, or if timers are
+    // unavailable.
+    ra_timer *(*timer_create)(struct ra *ra);
+
+    void (*timer_destroy)(struct ra *ra, ra_timer *timer);
+
+    // Start recording a timer. Note that valid usage requires you to pair
+    // every start with a stop. Trying to start a timer twice, or trying to
+    // stop a timer before having started it, consistutes invalid usage.
+    void (*timer_start)(struct ra *ra, ra_timer *timer);
+
+    // Stop recording a timer. This also returns any results that have been
+    // measured since the last usage of this ra_timer. It's important to note
+    // that GPU timer measurement are asynchronous, so this function does not
+    // always produce a value - and the values it does produce are typically
+    // delayed by a few frames. When no value is available, this returns 0.
+    uint64_t (*timer_stop)(struct ra *ra, ra_timer *timer);
 };

 struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params);
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@ -840,6 +840,75 @@ static void gl_renderpass_run(struct ra *ra,
    pass_gl->first_run = false;
 }

+// Timers in GL use query objects, and are asynchronous. So pool a few of
+// these together. GL_QUERY_OBJECT_NUM should be large enough to avoid this
+// ever blocking. We can afford to throw query objects around, there's no
+// practical limit on them and their overhead is small.
+
+#define GL_QUERY_OBJECT_NUM 8
+
+struct gl_timer {
+    GLuint start[GL_QUERY_OBJECT_NUM];
+    GLuint stop[GL_QUERY_OBJECT_NUM];
+    int idx;
+    uint64_t result;
+};
+
+static ra_timer *gl_timer_create(struct ra *ra)
+{
+    GL *gl = ra_gl_get(ra);
+
+    if (!gl->GenQueries)
+        return NULL;
+
+    struct gl_timer *timer = talloc_zero(NULL, struct gl_timer);
+    gl->GenQueries(GL_QUERY_OBJECT_NUM, timer->start);
+    gl->GenQueries(GL_QUERY_OBJECT_NUM, timer->stop);
+
+    return (ra_timer *)timer;
+}
+
+static void gl_timer_destroy(struct ra *ra, ra_timer *ratimer)
+{
+    if (!ratimer)
+        return;
+
+    GL *gl = ra_gl_get(ra);
+    struct gl_timer *timer = ratimer;
+
+    gl->DeleteQueries(GL_QUERY_OBJECT_NUM, timer->start);
+    gl->DeleteQueries(GL_QUERY_OBJECT_NUM, timer->stop);
+    talloc_free(timer);
+}
+
+static void gl_timer_start(struct ra *ra, ra_timer *ratimer)
+{
+    GL *gl = ra_gl_get(ra);
+    struct gl_timer *timer = ratimer;
+
+    // If this query object already contains a result, we need to retrieve it
+    timer->result = 0;
+    if (gl->IsQuery(timer->start[timer->idx])) {
+        uint64_t start = 0, stop = 0;
+        gl->GetQueryObjectui64v(timer->start[timer->idx], GL_QUERY_RESULT, &start);
+        gl->GetQueryObjectui64v(timer->stop[timer->idx], GL_QUERY_RESULT, &stop);
+        timer->result = stop - start;
+    }
+
+    gl->QueryCounter(timer->start[timer->idx], GL_TIMESTAMP);
+}
+
+static uint64_t gl_timer_stop(struct ra *ra, ra_timer *ratimer)
+{
+    GL *gl = ra_gl_get(ra);
+    struct gl_timer *timer = ratimer;
+
+    gl->QueryCounter(timer->stop[timer->idx++], GL_TIMESTAMP);
+    timer->idx %= GL_QUERY_OBJECT_NUM;
+
+    return timer->result;
+}
+
 static struct ra_fns ra_fns_gl = {
    .destroy                = gl_destroy,
    .tex_create             = gl_tex_create,
@ -853,4 +922,8 @@ static struct ra_fns ra_fns_gl = {
    .renderpass_create      = gl_renderpass_create,
    .renderpass_destroy     = gl_renderpass_destroy,
    .renderpass_run         = gl_renderpass_run,
+    .timer_create           = gl_timer_create,
+    .timer_destroy          = gl_timer_destroy,
+    .timer_start            = gl_timer_start,
+    .timer_stop             = gl_timer_stop,
 };
--- a/video/out/opengl/shader_cache.c
+++ b/video/out/opengl/shader_cache.c
@ -16,6 +16,7 @@
 #include "shader_cache.h"
 #include "formats.h"
 #include "ra_gl.h"
+#include "utils.h"

 // Force cache flush if more than this number of shaders is created.
 #define SC_MAX_ENTRIES 48
@ -42,7 +43,7 @@ struct sc_entry {
    struct sc_cached_uniform *cached_uniforms;
    int num_cached_uniforms;
    bstr total;
-    struct gl_timer *timer;
+    struct timer_pool *timer;
 };

 struct gl_shader_cache {
@ -108,11 +109,6 @@ struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global,
 // Unbind all GL state managed by sc - the current program and texture units.
 static void gl_sc_reset(struct gl_shader_cache *sc)
 {
-    GL *gl = sc->gl;
-
-    if (sc->needs_reset)
-        gl_timer_stop(gl);
-
    sc->prelude_text.len = 0;
    sc->header_text.len = 0;
    sc->text.len = 0;
@ -135,7 +131,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
        struct sc_entry *e = sc->entries[n];
        if (e->pass)
            sc->ra->fns->renderpass_destroy(sc->ra, e->pass);
-        gl_timer_free(e->timer);
+        timer_pool_destroy(e->timer);
        talloc_free(e);
    }
    sc->num_entries = 0;
@ -541,12 +537,7 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
 // 1. Unbind the program and all textures.
 // 2. Reset the sc state and prepare for a new shader program. (All uniforms
 //    and fragment operations needed for the next program have to be re-added.)
-// The return value is a mp_pass_perf containing performance metrics for the
-// execution of the generated shader. (Note: execution is measured up until
-// the corresponding gl_sc_reset call)
-// 'type' must be valid
-static struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc,
-                                          enum ra_renderpass_type type)
+static void gl_sc_generate(struct gl_shader_cache *sc, enum ra_renderpass_type type)
 {
    int glsl_version = sc->ra->glsl_version;
    int glsl_es = sc->ra->glsl_es ? glsl_version : 0;
@ -703,7 +694,7 @@ static struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc,
        entry = talloc_ptrtype(NULL, entry);
        *entry = (struct sc_entry){
            .total = bstrdup(entry, *hash_total),
-            .timer = gl_timer_create(sc->gl),
+            .timer = timer_pool_create(sc->ra),
        };
        for (int n = 0; n < sc->num_uniforms; n++) {
            struct sc_cached_uniform u = {0};
@ -716,7 +707,7 @@ static struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc,
        MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry);
    }
    if (!entry->pass)
-        return (struct mp_pass_perf){0}; // not sure what to return?
+        return;

    assert(sc->num_uniforms == entry->num_cached_uniforms);
    assert(sc->num_uniforms == entry->pass->params.num_inputs);
@ -725,20 +716,21 @@ static struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc,
    for (int n = 0; n < sc->num_uniforms; n++)
        update_uniform(sc, entry, &sc->uniforms[n], n);

-    gl_timer_start(entry->timer);
    sc->current_shader = entry;
-
-    return gl_timer_measure(entry->timer);
 }

 struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
                                        struct ra_tex *target,
                                        void *ptr, size_t num)
 {
-    struct mp_pass_perf perf = gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER);
+    struct timer_pool *timer = NULL;
+
+    gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER);
    if (!sc->current_shader)
        goto error;

+    timer = sc->current_shader->timer;
+
    struct mp_rect full_rc = {0, 0, target->params.w, target->params.h};

    struct ra_renderpass_run_params run = {
@ -752,20 +744,26 @@ struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
        .scissors = full_rc,
    };

+    timer_pool_start(timer);
    sc->ra->fns->renderpass_run(sc->ra, &run);
+    timer_pool_stop(timer);

 error:
    gl_sc_reset(sc);
-    return perf;
+    return timer_pool_measure(timer);
 }

 struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
                                           int w, int h, int d)
 {
-    struct mp_pass_perf perf = gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE);
+    struct timer_pool *timer = NULL;
+
+    gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE);
    if (!sc->current_shader)
        goto error;

+    timer = sc->current_shader->timer;
+
    struct ra_renderpass_run_params run = {
        .pass = sc->current_shader->pass,
        .values = sc->values,
@ -773,9 +771,11 @@ struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
        .compute_groups = {w, h, d},
    };

+    timer_pool_start(timer);
    sc->ra->fns->renderpass_run(sc->ra, &run);
+    timer_pool_stop(timer);

 error:
    gl_sc_reset(sc);
-    return perf;
+    return timer_pool_measure(timer);
 }
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@ -1,4 +1,5 @@
 #include "common/msg.h"
+#include "video/out/vo.h"
 #include "utils.h"

 // Standard parallel 2D projection, except y1 < y0 means that the coordinate
@ -118,3 +119,105 @@ void fbotex_uninit(struct fbotex *fbo)
        *fbo = (struct fbotex) {0};
    }
 }
+
+struct timer_pool {
+    struct ra *ra;
+    ra_timer *timer;
+    bool running; // detect invalid usage
+
+    uint64_t samples[PERF_SAMPLE_COUNT];
+    int sample_idx;
+    int sample_count;
+
+    uint64_t avg_sum;
+    uint64_t peak;
+};
+
+struct timer_pool *timer_pool_create(struct ra *ra)
+{
+    ra_timer *timer = ra->fns->timer_create(ra);
+    if (!timer)
+        return NULL;
+
+    struct timer_pool *pool = talloc(NULL, struct timer_pool);
+    if (!pool) {
+        ra->fns->timer_destroy(ra, timer);
+        return NULL;
+    }
+
+    *pool = (struct timer_pool){ .ra = ra, .timer = timer };
+    return pool;
+}
+
+void timer_pool_destroy(struct timer_pool *pool)
+{
+    if (!pool)
+        return;
+
+    pool->ra->fns->timer_destroy(pool->ra, pool->timer);
+    talloc_free(pool);
+}
+
+void timer_pool_start(struct timer_pool *pool)
+{
+    if (!pool)
+        return;
+
+    assert(!pool->running);
+    pool->ra->fns->timer_start(pool->ra, pool->timer);
+    pool->running = true;
+}
+
+void timer_pool_stop(struct timer_pool *pool)
+{
+    if (!pool)
+        return;
+
+    assert(pool->running);
+    uint64_t res = pool->ra->fns->timer_stop(pool->ra, pool->timer);
+    pool->running = false;
+
+    if (res) {
+        // Input res into the buffer and grab the previous value
+        uint64_t old = pool->samples[pool->sample_idx];
+        pool->samples[pool->sample_idx++] = res;
+        pool->sample_idx %= PERF_SAMPLE_COUNT;
+
+        // Update average and sum
+        pool->avg_sum = pool->avg_sum + res - old;
+        pool->sample_count = MPMIN(pool->sample_count + 1, PERF_SAMPLE_COUNT);
+
+        // Update peak if necessary
+        if (res >= pool->peak) {
+            pool->peak = res;
+        } else if (pool->peak == old) {
+            // It's possible that the last peak was the value we just removed,
+            // if so we need to scan for the new peak
+            uint64_t peak = res;
+            for (int i = 0; i < PERF_SAMPLE_COUNT; i++)
+                peak = MPMAX(peak, pool->samples[i]);
+            pool->peak = peak;
+        }
+    }
+}
+
+struct mp_pass_perf timer_pool_measure(struct timer_pool *pool)
+{
+    if (!pool)
+        return (struct mp_pass_perf){0};
+
+    struct mp_pass_perf res = {
+        .count = pool->sample_count,
+        .index = (pool->sample_idx - pool->sample_count) % PERF_SAMPLE_COUNT,
+        .peak = pool->peak,
+        .samples = pool->samples,
+    };
+
+    res.last = pool->samples[(pool->sample_idx - 1) % PERF_SAMPLE_COUNT];
+
+    if (pool->sample_count > 0) {
+        res.avg  = pool->avg_sum / pool->sample_count;
+    }
+
+    return res;
+}
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@ -77,3 +77,12 @@ bool fbotex_change(struct fbotex *fbo, struct ra *ra, struct mp_log *log,
 #define FBOTEX_FUZZY_W 1
 #define FBOTEX_FUZZY_H 2
 #define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H)
+
+// A wrapper around ra_timer that does result pooling, averaging etc.
+struct timer_pool;
+
+struct timer_pool *timer_pool_create(struct ra *ra);
+void timer_pool_destroy(struct timer_pool *pool);
+void timer_pool_start(struct timer_pool *pool);
+void timer_pool_stop(struct timer_pool *pool);
+struct mp_pass_perf timer_pool_measure(struct timer_pool *pool);
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@ -269,8 +269,8 @@ struct gl_video {
    struct pass_info pass_redraw[PASS_INFO_MAX];
    struct pass_info *pass;
    int pass_idx;
-    struct gl_timer *upload_timer;
-    struct gl_timer *blit_timer;
+    struct timer_pool *upload_timer;
+    struct timer_pool *blit_timer;

    // intermediate textures
    struct saved_tex saved_tex[SHADER_MAX_SAVED];
@ -3097,11 +3097,11 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
                    rc.y1 = -p->vp_h - p->dst_rect.y0;
                    rc.y0 = -p->vp_h - p->dst_rect.y1;
                }
-                gl_timer_start(p->blit_timer);
+                timer_pool_start(p->blit_timer);
                p->ra->fns->blit(p->ra, target, p->output_fbo.tex,
                                 rc.x0, rc.y0, &rc);
-                gl_timer_stop(gl);
-                pass_record(p, gl_timer_measure(p->blit_timer));
+                timer_pool_stop(p->blit_timer);
+                pass_record(p, timer_pool_measure(p->blit_timer));
            }
        }
    }
@ -3233,7 +3233,6 @@ static void reinterleave_vdpau(struct gl_video *p, struct gl_hwdec_frame *frame,
 // Returns false on failure.
 static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id)
 {
-    GL *gl = p->gl;
    struct video_image *vimg = &p->image;

    if (vimg->id == id)
@ -3255,10 +3254,10 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t
        struct gl_hwdec_frame gl_frame = {0};

        pass_describe(p, "map frame (hwdec)");
-        gl_timer_start(p->upload_timer);
+        timer_pool_start(p->upload_timer);
        bool ok = p->hwdec->driver->map_frame(p->hwdec, vimg->mpi, &gl_frame) >= 0;
-        gl_timer_stop(gl);
-        pass_record(p, gl_timer_measure(p->upload_timer));
+        timer_pool_stop(p->upload_timer);
+        pass_record(p, timer_pool_measure(p->upload_timer));

        vimg->hwdec_mapped = true;
        if (ok) {
@ -3290,7 +3289,7 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t
    // Software decoding
    assert(mpi->num_planes == p->plane_count);

-    gl_timer_start(p->upload_timer);
+    timer_pool_start(p->upload_timer);
    for (int n = 0; n < p->plane_count; n++) {
        struct texplane *plane = &vimg->planes[n];

@ -3310,10 +3309,10 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t
            MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no");
        }
    }
-    gl_timer_stop(gl);
+    timer_pool_stop(p->upload_timer);
    const char *mode = p->using_dr_path ? "DR" : p->opts.pbo ? "PBO" : "naive";
    pass_describe(p, "upload frame (%s)", mode);
-    pass_record(p, gl_timer_measure(p->upload_timer));
+    pass_record(p, timer_pool_measure(p->upload_timer));

    return true;

@ -3488,12 +3487,10 @@ static void check_gl_features(struct gl_video *p)

 static void init_gl(struct gl_video *p)
 {
-    GL *gl = p->gl;
-
    debug_check_gl(p, "before init_gl");

-    p->upload_timer = gl_timer_create(gl);
-    p->blit_timer = gl_timer_create(gl);
+    p->upload_timer = timer_pool_create(p->ra);
+    p->blit_timer = timer_pool_create(p->ra);

    debug_check_gl(p, "after init_gl");

@ -3515,8 +3512,9 @@ void gl_video_uninit(struct gl_video *p)
    ra_tex_free(p->ra, &p->lut_3d_texture);
    gl->DeleteBuffers(1, &p->hdr_peak_ssbo);

-    gl_timer_free(p->upload_timer);
-    gl_timer_free(p->blit_timer);
+    timer_pool_destroy(p->upload_timer);
+    timer_pool_destroy(p->blit_timer);
+
    for (int i = 0; i < PASS_INFO_MAX; i++) {
        talloc_free(p->pass_fresh[i].desc.start);
        talloc_free(p->pass_redraw[i].desc.start);