mpv/video/out/hwdec/hwdec_cuda.c

/*
 * Copyright (c) 2016 Philip Langdale <philipl@overt.org>
 *
 * This file is part of mpv.
 *
 * mpv is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * mpv is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * This hwdec implements an optimized output path using CUDA->OpenGL
 * or CUDA->Vulkan interop for frame data that is stored in CUDA
 * device memory. Although it is not explicit in the code here, the
 * only practical way to get data in this form is from the
 * nvdec/cuvid decoder.
 */

#include "config.h"
#include "hwdec_cuda.h"

#include <libavutil/hwcontext.h>
#include <libavutil/hwcontext_cuda.h>

int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func)
{
    const char *err_name;
    const char *err_string;

    struct cuda_hw_priv *p = hw->priv;
    int level = hw->probing ? MSGL_V : MSGL_ERR;

    MP_TRACE(hw, "Calling %s\n", func);

    if (err == CUDA_SUCCESS)
        return 0;

    p->cu->cuGetErrorName(err, &err_name);
    p->cu->cuGetErrorString(err, &err_string);

    MP_MSG(hw, level, "%s failed", func);
    if (err_name && err_string)
        MP_MSG(hw, level, " -> %s: %s", err_name, err_string);
    MP_MSG(hw, level, "\n");

    return -1;
}

#define CHECK_CU(x) check_cu(hw, (x), #x)

const static cuda_interop_init interop_inits[] = {
#if HAVE_GL
    cuda_gl_init,
#endif
#if HAVE_VULKAN
    cuda_vk_init,
#endif
    NULL
};

static int cuda_init(struct ra_hwdec *hw)
{
    AVBufferRef *hw_device_ctx = NULL;
    CUcontext dummy;
    int ret = 0;
    struct cuda_hw_priv *p = hw->priv;
    CudaFunctions *cu;
    int level = hw->probing ? MSGL_V : MSGL_ERR;

    ret = cuda_load_functions(&p->cu, NULL);
    if (ret != 0) {
        MP_MSG(hw, level, "Failed to load CUDA symbols\n");
        return -1;
    }
    cu = p->cu;

    ret = CHECK_CU(cu->cuInit(0));
    if (ret < 0)
        return -1;

    // Initialise CUDA context from backend.
    for (int i = 0; interop_inits[i]; i++) {
        if (interop_inits[i](hw)) {
            break;
        }
    }

    if (!p->ext_init || !p->ext_uninit) {
        MP_MSG(hw, level,
               "CUDA hwdec only works with OpenGL or Vulkan backends.\n");
        return -1;
    }

    hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
    if (!hw_device_ctx)
        goto error;

    AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data;

    AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
    device_hwctx->cuda_ctx = p->decode_ctx;

    ret = av_hwdevice_ctx_init(hw_device_ctx);
    if (ret < 0) {
        MP_MSG(hw, level, "av_hwdevice_ctx_init failed\n");
        goto error;
    }

    ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
    if (ret < 0)
        goto error;

    p->hwctx = (struct mp_hwdec_ctx) {
        .driver_name = hw->driver->name,
        .av_device_ref = hw_device_ctx,
        .hw_imgfmt = IMGFMT_CUDA,
    };
    hwdec_devices_add(hw->devs, &p->hwctx);
    return 0;

 error:
    av_buffer_unref(&hw_device_ctx);
    CHECK_CU(cu->cuCtxPopCurrent(&dummy));

    return -1;
}

static void cuda_uninit(struct ra_hwdec *hw)
{
    struct cuda_hw_priv *p = hw->priv;
    CudaFunctions *cu = p->cu;

    hwdec_devices_remove(hw->devs, &p->hwctx);
    av_buffer_unref(&p->hwctx.av_device_ref);

    if (p->decode_ctx && p->decode_ctx != p->display_ctx)
        CHECK_CU(cu->cuCtxDestroy(p->decode_ctx));

    if (p->display_ctx)
        CHECK_CU(cu->cuCtxDestroy(p->display_ctx));

    cuda_free_functions(&p->cu);
}

#undef CHECK_CU
#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)

static int mapper_init(struct ra_hwdec_mapper *mapper)
{
    struct cuda_hw_priv *p_owner = mapper->owner->priv;
    struct cuda_mapper_priv *p = mapper->priv;
    CUcontext dummy;
    CudaFunctions *cu = p_owner->cu;
    int ret = 0, eret = 0;

    p->display_ctx = p_owner->display_ctx;

    int imgfmt = mapper->src_params.hw_subfmt;
    mapper->dst_params = mapper->src_params;
    mapper->dst_params.imgfmt = imgfmt;
    mapper->dst_params.hw_subfmt = 0;

    mp_image_set_params(&p->layout, &mapper->dst_params);

    struct ra_imgfmt_desc desc;
    if (!ra_get_imgfmt_desc(mapper->ra, imgfmt, &desc)) {
        MP_ERR(mapper, "Unsupported format: %s\n", mp_imgfmt_to_name(imgfmt));
        return -1;
    }

    ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
    if (ret < 0)
        return ret;

    for (int n = 0; n < desc.num_planes; n++) {
        if (!p_owner->ext_init(mapper, desc.planes[n], n))
            goto error;
    }

 error:
    eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
    if (eret < 0)
        return eret;

    return ret;
}

static void mapper_uninit(struct ra_hwdec_mapper *mapper)
{
    struct cuda_mapper_priv *p = mapper->priv;
    struct cuda_hw_priv *p_owner = mapper->owner->priv;
    CudaFunctions *cu = p_owner->cu;
    CUcontext dummy;

    // Don't bail if any CUDA calls fail. This is all best effort.
    CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
    for (int n = 0; n < 4; n++) {
        p_owner->ext_uninit(mapper, n);
        ra_tex_free(mapper->ra, &mapper->tex[n]);
    }
    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
}

static void mapper_unmap(struct ra_hwdec_mapper *mapper)
{
}

static int mapper_map(struct ra_hwdec_mapper *mapper)
{
    struct cuda_mapper_priv *p = mapper->priv;
    struct cuda_hw_priv *p_owner = mapper->owner->priv;
    CudaFunctions *cu = p_owner->cu;
    CUcontext dummy;
    int ret = 0, eret = 0;

    ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
    if (ret < 0)
        return ret;

    for (int n = 0; n < p->layout.num_planes; n++) {
        if (p_owner->ext_wait) {
            if (!p_owner->ext_wait(mapper, n))
                goto error;
        }

        CUDA_MEMCPY2D cpy = {
            .srcMemoryType = CU_MEMORYTYPE_DEVICE,
            .srcDevice     = (CUdeviceptr)mapper->src->planes[n],
            .srcPitch      = mapper->src->stride[n],
            .srcY          = 0,
            .dstMemoryType = CU_MEMORYTYPE_ARRAY,
            .dstArray      = p->cu_array[n],
            .WidthInBytes  = mp_image_plane_w(&p->layout, n) *
                             mapper->tex[n]->params.format->pixel_size,
            .Height        = mp_image_plane_h(&p->layout, n),
        };

        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, 0));
        if (ret < 0)
            goto error;

        if (p_owner->ext_signal) {
            if (!p_owner->ext_signal(mapper, n))
                goto error;
        }
    }
    if (p_owner->do_full_sync)
        CHECK_CU(cu->cuStreamSynchronize(0));

    // fall through
 error:

    // Regardless of success or failure, we no longer need the source image,
    // because this hwdec makes an explicit memcpy into the mapper textures
    mp_image_unrefp(&mapper->src);

    eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
    if (eret < 0)
        return eret;

    return ret;
}

const struct ra_hwdec_driver ra_hwdec_cuda = {
    .name = "cuda",
    .imgfmts = {IMGFMT_CUDA, 0},
    .priv_size = sizeof(struct cuda_hw_priv),
    .init = cuda_init,
    .uninit = cuda_uninit,
    .mapper = &(const struct ra_hwdec_mapper_driver){
        .priv_size = sizeof(struct cuda_mapper_priv),
        .init = mapper_init,
        .uninit = mapper_uninit,
        .map = mapper_map,
        .unmap = mapper_unmap,
    },
};
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`/*`
			`* Copyright (c) 2016 Philip Langdale <philipl@overt.org>`
			`*`
			`* This file is part of mpv.`
			`*`
			`* mpv is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* mpv is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with mpv. If not, see <http://www.gnu.org/licenses/>.`
			`*/`

			`/*`
			`* This hwdec implements an optimized output path using CUDA->OpenGL`
			`* or CUDA->Vulkan interop for frame data that is stored in CUDA`
			`* device memory. Although it is not explicit in the code here, the`
			`* only practical way to get data in this form is from the`
			`* nvdec/cuvid decoder.`
			`*/`

			`#include "config.h"`
			`#include "hwdec_cuda.h"`

			`#include <libavutil/hwcontext.h>`
			`#include <libavutil/hwcontext_cuda.h>`

			`int check_cu(const struct ra_hwdec hw, CUresult err, const char func)`
			`{`
			`const char *err_name;`
			`const char *err_string;`

			`struct cuda_hw_priv *p = hw->priv;`
vo_gpu: hwdec_cuda: Reduce message level of errors while probing We should only be printing errors that occur when not probing, to avoid creating the impression that something is wrong - and errors during probing isn't a problem. 2019-11-17 01:42:51 +00:00			`int level = hw->probing ? MSGL_V : MSGL_ERR;`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00
			`MP_TRACE(hw, "Calling %s\n", func);`

			`if (err == CUDA_SUCCESS)`
			`return 0;`

			`p->cu->cuGetErrorName(err, &err_name);`
			`p->cu->cuGetErrorString(err, &err_string);`

vo_gpu: hwdec_cuda: Reduce message level of errors while probing We should only be printing errors that occur when not probing, to avoid creating the impression that something is wrong - and errors during probing isn't a problem. 2019-11-17 01:42:51 +00:00			`MP_MSG(hw, level, "%s failed", func);`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`if (err_name && err_string)`
vo_gpu: hwdec_cuda: Reduce message level of errors while probing We should only be printing errors that occur when not probing, to avoid creating the impression that something is wrong - and errors during probing isn't a problem. 2019-11-17 01:42:51 +00:00			`MP_MSG(hw, level, " -> %s: %s", err_name, err_string);`
			`MP_MSG(hw, level, "\n");`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00
			`return -1;`
			`}`

			`#define CHECK_CU(x) check_cu(hw, (x), #x)`

vo_gpu: hwdec_cuda: Improve interop selection mechanism This change updates the interop selection to match what I did for VAAPI, by iterating through an array of init functions until one of them works. 2019-09-16 00:49:13 +00:00			`const static cuda_interop_init interop_inits[] = {`
			`#if HAVE_GL`
			`cuda_gl_init,`
			`#endif`
			`#if HAVE_VULKAN`
			`cuda_vk_init,`
			`#endif`
			`NULL`
			`};`

vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`static int cuda_init(struct ra_hwdec *hw)`
			`{`
			`AVBufferRef *hw_device_ctx = NULL;`
			`CUcontext dummy;`
			`int ret = 0;`
			`struct cuda_hw_priv *p = hw->priv;`
			`CudaFunctions *cu;`
vo_gpu[_next]: hwdec: fix logging regression when probing When I introduced the concept of lazy loading of hwdecs by img format, I did not propagate the probing flag correctly, leading to the new normal loading path not runnng with probing set, meaning that any errors would show up, creating unnecessary noise. This change fixes this regression. 2022-03-20 19:31:38 +00:00			`int level = hw->probing ? MSGL_V : MSGL_ERR;`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00
			`ret = cuda_load_functions(&p->cu, NULL);`
			`if (ret != 0) {`
vo_gpu[_next]: hwdec: fix logging regression when probing When I introduced the concept of lazy loading of hwdecs by img format, I did not propagate the probing flag correctly, leading to the new normal loading path not runnng with probing set, meaning that any errors would show up, creating unnecessary noise. This change fixes this regression. 2022-03-20 19:31:38 +00:00			`MP_MSG(hw, level, "Failed to load CUDA symbols\n");`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`return -1;`
			`}`
			`cu = p->cu;`

			`ret = CHECK_CU(cu->cuInit(0));`
			`if (ret < 0)`
			`return -1;`

			`// Initialise CUDA context from backend.`
vo_gpu: hwdec_cuda: Improve interop selection mechanism This change updates the interop selection to match what I did for VAAPI, by iterating through an array of init functions until one of them works. 2019-09-16 00:49:13 +00:00			`for (int i = 0; interop_inits[i]; i++) {`
			`if (interop_inits[i](hw)) {`
			`break;`
			`}`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`}`

			`if (!p->ext_init \|\| !p->ext_uninit) {`
vo_gpu[_next]: hwdec: fix logging regression when probing When I introduced the concept of lazy loading of hwdecs by img format, I did not propagate the probing flag correctly, leading to the new normal loading path not runnng with probing set, meaning that any errors would show up, creating unnecessary noise. This change fixes this regression. 2022-03-20 19:31:38 +00:00			`MP_MSG(hw, level,`
			`"CUDA hwdec only works with OpenGL or Vulkan backends.\n");`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`return -1;`
			`}`

			`hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);`
			`if (!hw_device_ctx)`
			`goto error;`

			`AVHWDeviceContext device_ctx = (void )hw_device_ctx->data;`

			`AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;`
			`device_hwctx->cuda_ctx = p->decode_ctx;`

			`ret = av_hwdevice_ctx_init(hw_device_ctx);`
			`if (ret < 0) {`
vo_gpu[_next]: hwdec: fix logging regression when probing When I introduced the concept of lazy loading of hwdecs by img format, I did not propagate the probing flag correctly, leading to the new normal loading path not runnng with probing set, meaning that any errors would show up, creating unnecessary noise. This change fixes this regression. 2022-03-20 19:31:38 +00:00			`MP_MSG(hw, level, "av_hwdevice_ctx_init failed\n");`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`goto error;`
			`}`

			`ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));`
			`if (ret < 0)`
			`goto error;`

			`p->hwctx = (struct mp_hwdec_ctx) {`
			`.driver_name = hw->driver->name,`
			`.av_device_ref = hw_device_ctx,`
vo: hwdec: do hwdec interop lookup by image format It turns out that it's generally more useful to look up hwdecs by image format, rather than device type. In the situations where we need to find one, we generally know the image format we're dealing with. Doing this avoids us having to create mappings from image format to device type. The most significant part of this change is filling in the image format for the various hw interops. There is a hw_imgfmt field today today, but only a couple of the interops fill it in, and that seems to be because we've never actually used this piece of metadata before. Well, now we have a good use for it. 2022-03-24 21:18:59 +00:00			`.hw_imgfmt = IMGFMT_CUDA,`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`};`
			`hwdec_devices_add(hw->devs, &p->hwctx);`
			`return 0;`

			`error:`
			`av_buffer_unref(&hw_device_ctx);`
			`CHECK_CU(cu->cuCtxPopCurrent(&dummy));`

			`return -1;`
			`}`

			`static void cuda_uninit(struct ra_hwdec *hw)`
			`{`
			`struct cuda_hw_priv *p = hw->priv;`
			`CudaFunctions *cu = p->cu;`

			`hwdec_devices_remove(hw->devs, &p->hwctx);`
			`av_buffer_unref(&p->hwctx.av_device_ref);`

			`if (p->decode_ctx && p->decode_ctx != p->display_ctx)`
			`CHECK_CU(cu->cuCtxDestroy(p->decode_ctx));`

			`if (p->display_ctx)`
			`CHECK_CU(cu->cuCtxDestroy(p->display_ctx));`

			`cuda_free_functions(&p->cu);`
			`}`

			`#undef CHECK_CU`
			`#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)`

			`static int mapper_init(struct ra_hwdec_mapper *mapper)`
			`{`
			`struct cuda_hw_priv *p_owner = mapper->owner->priv;`
			`struct cuda_mapper_priv *p = mapper->priv;`
			`CUcontext dummy;`
			`CudaFunctions *cu = p_owner->cu;`
			`int ret = 0, eret = 0;`

			`p->display_ctx = p_owner->display_ctx;`

			`int imgfmt = mapper->src_params.hw_subfmt;`
			`mapper->dst_params = mapper->src_params;`
			`mapper->dst_params.imgfmt = imgfmt;`
			`mapper->dst_params.hw_subfmt = 0;`

			`mp_image_set_params(&p->layout, &mapper->dst_params);`

			`struct ra_imgfmt_desc desc;`
			`if (!ra_get_imgfmt_desc(mapper->ra, imgfmt, &desc)) {`
			`MP_ERR(mapper, "Unsupported format: %s\n", mp_imgfmt_to_name(imgfmt));`
			`return -1;`
			`}`

			`ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));`
			`if (ret < 0)`
			`return ret;`

			`for (int n = 0; n < desc.num_planes; n++) {`
			`if (!p_owner->ext_init(mapper, desc.planes[n], n))`
			`goto error;`
			`}`

			`error:`
			`eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));`
			`if (eret < 0)`
			`return eret;`

			`return ret;`
			`}`

			`static void mapper_uninit(struct ra_hwdec_mapper *mapper)`
			`{`
			`struct cuda_mapper_priv *p = mapper->priv;`
			`struct cuda_hw_priv *p_owner = mapper->owner->priv;`
			`CudaFunctions *cu = p_owner->cu;`
			`CUcontext dummy;`

			`// Don't bail if any CUDA calls fail. This is all best effort.`
			`CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));`
			`for (int n = 0; n < 4; n++) {`
			`p_owner->ext_uninit(mapper, n);`
			`ra_tex_free(mapper->ra, &mapper->tex[n]);`
			`}`
			`CHECK_CU(cu->cuCtxPopCurrent(&dummy));`
			`}`

			`static void mapper_unmap(struct ra_hwdec_mapper *mapper)`
			`{`
			`}`

			`static int mapper_map(struct ra_hwdec_mapper *mapper)`
			`{`
			`struct cuda_mapper_priv *p = mapper->priv;`
			`struct cuda_hw_priv *p_owner = mapper->owner->priv;`
			`CudaFunctions *cu = p_owner->cu;`
			`CUcontext dummy;`
			`int ret = 0, eret = 0;`

			`ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));`
			`if (ret < 0)`
			`return ret;`

			`for (int n = 0; n < p->layout.num_planes; n++) {`
			`if (p_owner->ext_wait) {`
			`if (!p_owner->ext_wait(mapper, n))`
			`goto error;`
			`}`

			`CUDA_MEMCPY2D cpy = {`
			`.srcMemoryType = CU_MEMORYTYPE_DEVICE,`
			`.srcDevice = (CUdeviceptr)mapper->src->planes[n],`
			`.srcPitch = mapper->src->stride[n],`
			`.srcY = 0,`
			`.dstMemoryType = CU_MEMORYTYPE_ARRAY,`
			`.dstArray = p->cu_array[n],`
			`.WidthInBytes = mp_image_plane_w(&p->layout, n) *`
			`mapper->tex[n]->params.format->pixel_size,`
			`.Height = mp_image_plane_h(&p->layout, n),`
			`};`

			`ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, 0));`
			`if (ret < 0)`
			`goto error;`

			`if (p_owner->ext_signal) {`
			`if (!p_owner->ext_signal(mapper, n))`
			`goto error;`
			`}`
			`}`
vo_gpu: hwdec_cuda: Synchronise OpenGL Interop Previously, there appeared to be implicit synchronisation in the GL interop path, and we never observed any visual glitches. However, recently, I started seeing stuttering in the GL path and on closer examination it looked like read-before-write behaviour where GL would display an old frame again rather than the current one. After verifying that disabling hwdec made the problem go away, I tried adding a cuStreamSynchronize() after the memcpys and that also resolved the problem, so it's clearly sync related. cuStreamSynchronize() is a CPU sync and so more heavy-weight than you want, but it's the only tool we have. There is no mechanism defined for synchronising GL to CUDA (It looks like there is a way to synchronise CUDA to EGL but it appears one way and so wouldn't directly address this problem). Anyway, empirically, the output now looks the same as with hwdec off. 2019-09-28 14:57:11 +00:00			`if (p_owner->do_full_sync)`
			`CHECK_CU(cu->cuStreamSynchronize(0));`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00
hwdec: release images as soon as possible after mapping We don't need to hold on to buffers longer than necessary. Doesn't matter for vo_gpu but greatly matters for vo_gpu_next, since it persists hwdec mapped textures for longer periods. Unfortunately, only provides benefits for hwdecs which do explicit copies in their decode path, which currently just means cuda and d3d11va. 2022-02-27 10:28:21 +00:00			`// fall through`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`error:`

hwdec: release images as soon as possible after mapping We don't need to hold on to buffers longer than necessary. Doesn't matter for vo_gpu but greatly matters for vo_gpu_next, since it persists hwdec mapped textures for longer periods. Unfortunately, only provides benefits for hwdecs which do explicit copies in their decode path, which currently just means cuda and d3d11va. 2022-02-27 10:28:21 +00:00			`// Regardless of success or failure, we no longer need the source image,`
			`// because this hwdec makes an explicit memcpy into the mapper textures`
			`mp_image_unrefp(&mapper->src);`

			`eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));`
			`if (eret < 0)`
			`return eret;`

			`return ret;`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`}`

			`const struct ra_hwdec_driver ra_hwdec_cuda = {`
vo_gpu/hwdec: rename and introduce legacy names for some interops We've had some annoying names for interops, which we can't simply rename because that would break config files and command lines. So we need to put a little more effort in and add a concept of legacy names that allow us to continue loading them, but with a warning. The two I'm renaming here are: * vaapi-egl -> vaapi (vaapi works with Vulkan too) * drmprime-drm -> drmprime-overlay (actually describes what it does) * cuda-nvdec -> cuda (cuda interop is not nvdec specific) 2022-08-27 16:53:31 +00:00			`.name = "cuda",`
vo/gpu: hwdec_cuda: Refactor gpu api specific code into separate files The amount of code now present that's specific to Vulkan or OpenGL has reached the point where we really want to split it out to avoid a mess of #ifdefs. At the same time, I'm moving the code to an api neutral location. 2019-02-02 23:24:27 +00:00			`.imgfmts = {IMGFMT_CUDA, 0},`
			`.priv_size = sizeof(struct cuda_hw_priv),`
			`.init = cuda_init,`
			`.uninit = cuda_uninit,`
			`.mapper = &(const struct ra_hwdec_mapper_driver){`
			`.priv_size = sizeof(struct cuda_mapper_priv),`
			`.init = mapper_init,`
			`.uninit = mapper_uninit,`
			`.map = mapper_map,`
			`.unmap = mapper_unmap,`
			`},`
			`};`