From 2da3a5c10f5249b21b8e37d1f354085178b0ffc3 Mon Sep 17 00:00:00 2001 From: "Guo, Yejun" Date: Sun, 7 Feb 2021 15:03:43 +0800 Subject: [PATCH] dnn: add color conversion for analytic case Signed-off-by: Guo, Yejun --- libavfilter/dnn/dnn_backend_native.c | 2 +- libavfilter/dnn/dnn_backend_openvino.c | 23 ++++++++++- libavfilter/dnn/dnn_backend_tf.c | 2 +- libavfilter/dnn/dnn_io_proc.c | 56 +++++++++++++++++++++++++- libavfilter/dnn/dnn_io_proc.h | 2 +- 5 files changed, 80 insertions(+), 5 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index be6451367a..3bc253c1ad 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -321,7 +321,7 @@ static DNNReturnType execute_model_native(const DNNModel *model, const char *inp if (native_model->model->pre_proc != NULL) { native_model->model->pre_proc(in_frame, &input, native_model->model->filter_ctx); } else { - ff_proc_from_frame_to_dnn(in_frame, &input, ctx); + ff_proc_from_frame_to_dnn(in_frame, &input, native_model->model->func_type, ctx); } } diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 7c1abb3eeb..cca155a52c 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -99,6 +99,8 @@ static DNNDataType precision_to_datatype(precision_e precision) { case FP32: return DNN_FLOAT; + case U8: + return DNN_UINT8; default: av_assert0(!"not supported yet."); return DNN_FLOAT; @@ -111,6 +113,8 @@ static int get_datatype_size(DNNDataType dt) { case DNN_FLOAT: return sizeof(float); + case DNN_UINT8: + return sizeof(uint8_t); default: av_assert0(!"not supported yet."); return 1; @@ -152,6 +156,9 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request input.channels = dims.dims[1]; input.data = blob_buffer.buffer; input.dt = precision_to_datatype(precision); + // all models in openvino open model zoo use BGR as input, + // change to be an option when necessary. + input.order = DCO_BGR; av_assert0(request->task_count <= dims.dims[0]); for (int i = 0; i < request->task_count; ++i) { @@ -160,7 +167,7 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request if (ov_model->model->pre_proc != NULL) { ov_model->model->pre_proc(task->in_frame, &input, ov_model->model->filter_ctx); } else { - ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx); + ff_proc_from_frame_to_dnn(task->in_frame, &input, ov_model->model->func_type, ctx); } } input.data = (uint8_t *)input.data @@ -290,6 +297,20 @@ static DNNReturnType init_model_ov(OVModel *ov_model, const char *input_name, co goto err; } + // all models in openvino open model zoo use BGR with range [0.0f, 255.0f] as input, + // we don't have a AVPixelFormat to descibe it, so we'll use AV_PIX_FMT_BGR24 and + // ask openvino to do the conversion internally. + // the current supported SR model (frame processing) is generated from tensorflow model, + // and its input is Y channel as float with range [0.0f, 1.0f], so do not set for this case. + // TODO: we need to get a final clear&general solution with all backends/formats considered. + if (ov_model->model->func_type != DFT_PROCESS_FRAME) { + status = ie_network_set_input_precision(ov_model->network, input_name, U8); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to set input precision as U8 for %s\n", input_name); + return DNN_ERROR; + } + } + status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n"); diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index e7e5f221f3..750a476726 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -744,7 +744,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n if (tf_model->model->pre_proc != NULL) { tf_model->model->pre_proc(in_frame, &input, tf_model->model->filter_ctx); } else { - ff_proc_from_frame_to_dnn(in_frame, &input, ctx); + ff_proc_from_frame_to_dnn(in_frame, &input, tf_model->model->func_type, ctx); } } diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c index bee1423342..e104cc5064 100644 --- a/libavfilter/dnn/dnn_io_proc.c +++ b/libavfilter/dnn/dnn_io_proc.c @@ -21,6 +21,7 @@ #include "dnn_io_proc.h" #include "libavutil/imgutils.h" #include "libswscale/swscale.h" +#include "libavutil/avassert.h" DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) { @@ -92,7 +93,7 @@ DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *l return DNN_SUCCESS; } -DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) +static DNNReturnType proc_from_frame_to_dnn_frameprocessing(AVFrame *frame, DNNData *input, void *log_ctx) { struct SwsContext *sws_ctx; int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); @@ -163,3 +164,56 @@ DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *lo return DNN_SUCCESS; } + +static enum AVPixelFormat get_pixel_format(DNNData *data) +{ + if (data->dt == DNN_UINT8 && data->order == DCO_BGR) { + return AV_PIX_FMT_BGR24; + } + + av_assert0(!"not supported yet.\n"); + return AV_PIX_FMT_BGR24; +} + +static DNNReturnType proc_from_frame_to_dnn_analytics(AVFrame *frame, DNNData *input, void *log_ctx) +{ + struct SwsContext *sws_ctx; + int linesizes[4]; + enum AVPixelFormat fmt = get_pixel_format(input); + sws_ctx = sws_getContext(frame->width, frame->height, frame->format, + input->width, input->height, fmt, + SWS_FAST_BILINEAR, NULL, NULL, NULL); + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", + av_get_pix_fmt_name(frame->format), frame->width, frame->height, + av_get_pix_fmt_name(fmt), input->width, input->height); + return DNN_ERROR; + } + + if (av_image_fill_linesizes(linesizes, fmt, input->width) < 0) { + av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes"); + sws_freeContext(sws_ctx); + return DNN_ERROR; + } + + sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height, + (uint8_t *const *)(&input->data), linesizes); + + sws_freeContext(sws_ctx); + return DNN_SUCCESS; +} + +DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx) +{ + switch (func_type) + { + case DFT_PROCESS_FRAME: + return proc_from_frame_to_dnn_frameprocessing(frame, input, log_ctx); + case DFT_ANALYTICS_DETECT: + return proc_from_frame_to_dnn_analytics(frame, input, log_ctx); + default: + avpriv_report_missing_feature(log_ctx, "model function type %d", func_type); + return DNN_ERROR; + } +} diff --git a/libavfilter/dnn/dnn_io_proc.h b/libavfilter/dnn/dnn_io_proc.h index 6a410ccc7b..91ad3cb261 100644 --- a/libavfilter/dnn/dnn_io_proc.h +++ b/libavfilter/dnn/dnn_io_proc.h @@ -30,7 +30,7 @@ #include "../dnn_interface.h" #include "libavutil/frame.h" -DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx); +DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx); DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx); #endif