From 7faae6e745a3b22f81d522ea19164d3b2b3aa2cc Mon Sep 17 00:00:00 2001 From: Mark Thompson Date: Sun, 10 Sep 2017 13:37:20 +0100 Subject: [PATCH] hwcontext_opencl: DRM to OpenCL mapping for ARM Using cl_arm_import_memory. Unfortunately, despite this not being a standard extension, the function clImportMemoryARM() is not accessible via clGetExtensionFunctionAddressForPlatform(). This means that it has to be linked directly to the ARM OpenCL binary, so making a portable binary is not possible as it is with all other mapping extensions. --- configure | 6 + libavutil/hwcontext_opencl.c | 263 +++++++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+) diff --git a/configure b/configure index c4d6ad9d9d..e03d68aa92 100755 --- a/configure +++ b/configure @@ -2121,6 +2121,7 @@ HAVE_LIST=" makeinfo makeinfo_html opencl_d3d11 + opencl_drm_arm opencl_dxva2 opencl_vaapi_beignet opencl_vaapi_intel_media @@ -6178,6 +6179,11 @@ if enabled_all opencl d3d11va ; then enable opencl_d3d11 fi +if enabled_all opencl libdrm ; then + check_func_headers "CL/cl_ext.h" clImportMemoryARM && + enable opencl_drm_arm +fi + enabled vdpau && check_cpp_condition vdpau/vdpau.h "defined VDP_DECODER_PROFILE_MPEG4_PART2_ASP" || disable vdpau diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c index 08b3ea90f7..8754d73939 100644 --- a/libavutil/hwcontext_opencl.c +++ b/libavutil/hwcontext_opencl.c @@ -56,6 +56,12 @@ #include "hwcontext_d3d11va.h" #endif +#if HAVE_OPENCL_DRM_ARM +#include +#include +#include "hwcontext_drm.h" +#endif + typedef struct OpenCLDeviceContext { // Default command queue to use for transfer/mapping operations on @@ -104,6 +110,10 @@ typedef struct OpenCLDeviceContext { clEnqueueReleaseD3D11ObjectsKHR_fn clEnqueueReleaseD3D11ObjectsKHR; #endif + +#if HAVE_OPENCL_DRM_ARM + int drm_arm_mapping_usable; +#endif } OpenCLDeviceContext; typedef struct OpenCLFramesContext { @@ -826,6 +836,37 @@ static int opencl_device_init(AVHWDeviceContext *hwdev) } #endif +#if HAVE_OPENCL_DRM_ARM + { + const char *drm_arm_ext = "cl_arm_import_memory"; + const char *image_ext = "cl_khr_image2d_from_buffer"; + int fail = 0; + + if (!opencl_check_extension(hwdev, drm_arm_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is " + "required for DRM to OpenCL mapping on ARM.\n", + drm_arm_ext); + fail = 1; + } + if (!opencl_check_extension(hwdev, image_ext)) { + av_log(hwdev, AV_LOG_VERBOSE, "The %s extension is " + "required for DRM to OpenCL mapping on ARM.\n", + image_ext); + fail = 1; + } + + // clImportMemoryARM() is linked statically. + + if (fail) { + av_log(hwdev, AV_LOG_WARNING, "DRM to OpenCL mapping on ARM " + "not usable.\n"); + priv->drm_arm_mapping_usable = 0; + } else { + priv->drm_arm_mapping_usable = 1; + } + } +#endif + #undef CL_FUNC return 0; @@ -1104,6 +1145,40 @@ static int opencl_filter_gpu_device(AVHWDeviceContext *hwdev, } #endif +#if HAVE_OPENCL_DRM_ARM +static int opencl_filter_drm_arm_platform(AVHWDeviceContext *hwdev, + cl_platform_id platform_id, + const char *platform_name, + void *context) +{ + const char *drm_arm_ext = "cl_arm_import_memory"; + + if (opencl_check_platform_extension(platform_id, drm_arm_ext)) { + return 0; + } else { + av_log(hwdev, AV_LOG_DEBUG, "Platform %s does not support the " + "%s extension.\n", platform_name, drm_arm_ext); + return 1; + } +} + +static int opencl_filter_drm_arm_device(AVHWDeviceContext *hwdev, + cl_device_id device_id, + const char *device_name, + void *context) +{ + const char *drm_arm_ext = "cl_arm_import_memory"; + + if (opencl_check_device_extension(device_id, drm_arm_ext)) { + return 0; + } else { + av_log(hwdev, AV_LOG_DEBUG, "Device %s does not support the " + "%s extension.\n", device_name, drm_arm_ext); + return 1; + } +} +#endif + static int opencl_device_derive(AVHWDeviceContext *hwdev, AVHWDeviceContext *src_ctx, int flags) @@ -1250,6 +1325,24 @@ static int opencl_device_derive(AVHWDeviceContext *hwdev, break; #endif +#if HAVE_OPENCL_DRM_ARM + case AV_HWDEVICE_TYPE_DRM: + { + OpenCLDeviceSelector selector = { + .platform_index = -1, + .device_index = -1, + .context = NULL, + .enumerate_platforms = &opencl_enumerate_platforms, + .filter_platform = &opencl_filter_drm_arm_platform, + .enumerate_devices = &opencl_enumerate_devices, + .filter_device = &opencl_filter_drm_arm_device, + }; + + err = opencl_device_create_internal(hwdev, &selector, NULL); + } + break; +#endif + default: err = AVERROR(ENOSYS); break; @@ -2558,6 +2651,165 @@ fail: #endif +#if HAVE_OPENCL_DRM_ARM + +typedef struct DRMARMtoOpenCLMapping { + int nb_objects; + cl_mem object_buffers[AV_DRM_MAX_PLANES]; + int nb_planes; + cl_mem plane_images[AV_DRM_MAX_PLANES]; +} DRMARMtoOpenCLMapping; + +static void opencl_unmap_from_drm_arm(AVHWFramesContext *dst_fc, + HWMapDescriptor *hwmap) +{ + DRMARMtoOpenCLMapping *mapping = hwmap->priv; + int i; + + for (i = 0; i < mapping->nb_planes; i++) + clReleaseMemObject(mapping->plane_images[i]); + + for (i = 0; i < mapping->nb_objects; i++) + clReleaseMemObject(mapping->object_buffers[i]); + + av_free(mapping); +} + +static int opencl_map_from_drm_arm(AVHWFramesContext *dst_fc, AVFrame *dst, + const AVFrame *src, int flags) +{ + AVHWFramesContext *src_fc = + (AVHWFramesContext*)src->hw_frames_ctx->data; + AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; + const AVDRMFrameDescriptor *desc; + DRMARMtoOpenCLMapping *mapping = NULL; + cl_mem_flags cl_flags; + const cl_import_properties_arm props[3] = { + CL_IMPORT_TYPE_ARM, CL_IMPORT_TYPE_DMA_BUF_ARM, 0, + }; + cl_int cle; + int err, i, j; + + desc = (const AVDRMFrameDescriptor*)src->data[0]; + + cl_flags = opencl_mem_flags_for_mapping(flags); + if (!cl_flags) + return AVERROR(EINVAL); + + mapping = av_mallocz(sizeof(*mapping)); + if (!mapping) + return AVERROR(ENOMEM); + + mapping->nb_objects = desc->nb_objects; + for (i = 0; i < desc->nb_objects; i++) { + int fd = desc->objects[i].fd; + + av_log(dst_fc, AV_LOG_DEBUG, "Map DRM PRIME fd %d to OpenCL.\n", fd); + + if (desc->objects[i].format_modifier) { + av_log(dst_fc, AV_LOG_DEBUG, "Warning: object %d fd %d has " + "nonzero format modifier %"PRId64", result may not " + "be as expected.\n", i, fd, + desc->objects[i].format_modifier); + } + + mapping->object_buffers[i] = + clImportMemoryARM(dst_dev->context, cl_flags, props, + &fd, desc->objects[i].size, &cle); + if (!mapping->object_buffers[i]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL buffer " + "from object %d (fd %d, size %zu) of DRM frame: %d.\n", + i, fd, desc->objects[i].size, cle); + err = AVERROR(EIO); + goto fail; + } + } + + mapping->nb_planes = 0; + for (i = 0; i < desc->nb_layers; i++) { + const AVDRMLayerDescriptor *layer = &desc->layers[i]; + + for (j = 0; j < layer->nb_planes; j++) { + const AVDRMPlaneDescriptor *plane = &layer->planes[j]; + cl_mem plane_buffer; + cl_image_format image_format; + cl_image_desc image_desc; + cl_buffer_region region; + int p = mapping->nb_planes; + + err = opencl_get_plane_format(src_fc->sw_format, p, + src_fc->width, src_fc->height, + &image_format, &image_desc); + if (err < 0) { + av_log(dst_fc, AV_LOG_ERROR, "Invalid plane %d (DRM " + "layer %d plane %d): %d.\n", p, i, j, err); + goto fail; + } + + region.origin = plane->offset; + region.size = image_desc.image_row_pitch * + image_desc.image_height; + + plane_buffer = + clCreateSubBuffer(mapping->object_buffers[plane->object_index], + cl_flags, + CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &cle); + if (!plane_buffer) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create sub-buffer " + "for plane %d: %d.\n", p, cle); + err = AVERROR(EIO); + goto fail; + } + + image_desc.buffer = plane_buffer; + + mapping->plane_images[p] = + clCreateImage(dst_dev->context, cl_flags, + &image_format, &image_desc, NULL, &cle); + + // Unreference the sub-buffer immediately - we don't need it + // directly and a reference is held by the image. + clReleaseMemObject(plane_buffer); + + if (!mapping->plane_images[p]) { + av_log(dst_fc, AV_LOG_ERROR, "Failed to create image " + "for plane %d: %d.\n", p, cle); + err = AVERROR(EIO); + goto fail; + } + + ++mapping->nb_planes; + } + } + + for (i = 0; i < mapping->nb_planes; i++) + dst->data[i] = (uint8_t*)mapping->plane_images[i]; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, + &opencl_unmap_from_drm_arm, mapping); + if (err < 0) + goto fail; + + dst->width = src->width; + dst->height = src->height; + + return 0; + +fail: + for (i = 0; i < mapping->nb_planes; i++) { + clReleaseMemObject(mapping->plane_images[i]); + } + for (i = 0; i < mapping->nb_objects; i++) { + if (mapping->object_buffers[i]) + clReleaseMemObject(mapping->object_buffers[i]); + } + av_free(mapping); + return err; +} + +#endif + static int opencl_map_from(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags) { @@ -2593,6 +2845,11 @@ static int opencl_map_to(AVHWFramesContext *hwfc, AVFrame *dst, case AV_PIX_FMT_D3D11: if (priv->d3d11_mapping_usable) return opencl_map_from_d3d11(hwfc, dst, src, flags); +#endif +#if HAVE_OPENCL_DRM_ARM + case AV_PIX_FMT_DRM_PRIME: + if (priv->drm_arm_mapping_usable) + return opencl_map_from_drm_arm(hwfc, dst, src, flags); #endif } return AVERROR(ENOSYS); @@ -2639,6 +2896,12 @@ static int opencl_frames_derive_to(AVHWFramesContext *dst_fc, return err; } break; +#endif +#if HAVE_OPENCL_DRM_ARM + case AV_HWDEVICE_TYPE_DRM: + if (!priv->drm_arm_mapping_usable) + return AVERROR(ENOSYS); + break; #endif default: return AVERROR(ENOSYS);