From 8020a62953926cd6d672e9151290bd4d65e8ee08 Mon Sep 17 00:00:00 2001
From: James Ross-Gowan <rossy@jrg.systems>
Date: Sun, 22 Oct 2017 01:18:31 +1100
Subject: [PATCH] vo_gpu: export the GLSL format qualifier for ra_format

Backported from @haasn's change to libplacebo, except in the current RA,
there's nothing to indicate an ra_format can be bound as a storage
image, so there's no way to force all of these formats to have a
glsl_format. Instead, the layout qualifier will be removed if
glsl_format is NULL.

This is needed for the upcoming ra_d3d11 backend. In Direct3D 11, while
loading float values from unorm images often works as expected, it's
technically undefined behaviour, and in Windows 10, it will cause the
debug layer to spam the log with error messages. Also, apparently in
GLSL, the format name must match the image's format exactly (but in
Direct3D, it just has to have the same component type.)
---
 video/out/gpu/ra.c           | 59 ++++++++++++++++++++++++++++++++++++
 video/out/gpu/ra.h           |  6 ++++
 video/out/gpu/shader_cache.c | 22 +++++---------
 video/out/opengl/ra_gl.c     |  2 ++
 video/out/vulkan/ra_vk.c     |  2 ++
 5 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/video/out/gpu/ra.c b/video/out/gpu/ra.c
index ef1de54d1a..fdb20fe1d5 100644
--- a/video/out/gpu/ra.c
+++ b/video/out/gpu/ra.c
@@ -86,6 +86,65 @@ struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent,
     return res;
 };
 
+struct glsl_fmt {
+    enum ra_ctype ctype;
+    int num_components;
+    int component_depth[4];
+    const char *glsl_format;
+};
+
+// List taken from the GLSL specification, sans snorm and sint formats
+static const struct glsl_fmt ra_glsl_fmts[] = {
+    {RA_CTYPE_FLOAT, 1, {16},             "r16f"},
+    {RA_CTYPE_FLOAT, 1, {32},             "r32f"},
+    {RA_CTYPE_FLOAT, 2, {16, 16},         "rg16f"},
+    {RA_CTYPE_FLOAT, 2, {32, 32},         "rg32f"},
+    {RA_CTYPE_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"},
+    {RA_CTYPE_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"},
+    {RA_CTYPE_FLOAT, 3, {11, 11, 10},     "r11f_g11f_b10f"},
+
+    {RA_CTYPE_UNORM, 1, {8},              "r8"},
+    {RA_CTYPE_UNORM, 1, {16},             "r16"},
+    {RA_CTYPE_UNORM, 2, {8,  8},          "rg8"},
+    {RA_CTYPE_UNORM, 2, {16, 16},         "rg16"},
+    {RA_CTYPE_UNORM, 4, {8,  8,  8,  8},  "rgba8"},
+    {RA_CTYPE_UNORM, 4, {16, 16, 16, 16}, "rgba16"},
+    {RA_CTYPE_UNORM, 4, {10, 10, 10,  2}, "rgb10_a2"},
+
+    {RA_CTYPE_UINT,  1, {8},              "r8ui"},
+    {RA_CTYPE_UINT,  1, {16},             "r16ui"},
+    {RA_CTYPE_UINT,  1, {32},             "r32ui"},
+    {RA_CTYPE_UINT,  2, {8,  8},          "rg8ui"},
+    {RA_CTYPE_UINT,  2, {16, 16},         "rg16ui"},
+    {RA_CTYPE_UINT,  2, {32, 32},         "rg32ui"},
+    {RA_CTYPE_UINT,  4, {8,  8,  8,  8},  "rgba8ui"},
+    {RA_CTYPE_UINT,  4, {16, 16, 16, 16}, "rgba16ui"},
+    {RA_CTYPE_UINT,  4, {32, 32, 32, 32}, "rgba32ui"},
+    {RA_CTYPE_UINT,  4, {10, 10, 10,  2}, "rgb10_a2ui"},
+};
+
+const char *ra_fmt_glsl_format(const struct ra_format *fmt)
+{
+    for (int n = 0; n < MP_ARRAY_SIZE(ra_glsl_fmts); n++) {
+        const struct glsl_fmt *gfmt = &ra_glsl_fmts[n];
+
+        if (fmt->ctype != gfmt->ctype)
+            continue;
+        if (fmt->num_components != gfmt->num_components)
+            continue;
+
+        for (int i = 0; i < fmt->num_components; i++) {
+            if (fmt->component_depth[i] != gfmt->component_depth[i])
+                goto next_fmt;
+        }
+
+        return gfmt->glsl_format;
+
+next_fmt: ; // equivalent to `continue`
+    }
+
+    return NULL;
+}
 
 // Return whether this is a tightly packed format with no external padding and
 // with the same bit size/depth in all components, and the shader returns
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index 5c5c851e64..b10ab76124 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -87,6 +87,10 @@ struct ra_format {
     // shader representation is given by the special_imgfmt_desc pointer.
     int special_imgfmt;
     const struct ra_imgfmt_desc *special_imgfmt_desc;
+
+    // This gives the GLSL image format corresponding to the format, if any.
+    // (e.g. rgba16ui)
+    const char *glsl_format;
 };
 
 struct ra_tex_params {
@@ -494,6 +498,8 @@ struct ra_imgfmt_desc {
     uint8_t components[4][4];
 };
 
+const char *ra_fmt_glsl_format(const struct ra_format *fmt);
+
 bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out);
 
 void ra_dump_tex_formats(struct ra *ra, int msgl);
diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c
index f191a7198c..83ca11d7ad 100644
--- a/video/out/gpu/shader_cache.c
+++ b/video/out/gpu/shader_cache.c
@@ -710,22 +710,16 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
             break;
         case RA_VARTYPE_IMG_W: {
             // For better compatibility, we have to explicitly label the
-            // type of data we will be reading/writing to this image. For
-            // simplicity, just pick 32-bit float with however many components.
-            static const char *fmt_mapping[] = {
-                [1] = "r32f",
-                [2] = "rg32f",
-                [3] = "rgba32f", // rgb32f doesn't exist
-                [4] = "rgba32f",
-            };
-
-            const struct ra_format *format = u->v.tex->params.format;
-            assert(format->num_components < MP_ARRAY_SIZE(fmt_mapping));
-            const char *fmt = fmt_mapping[format->num_components];
+            // type of data we will be reading/writing to this image.
+            const char *fmt = u->v.tex->params.format->glsl_format;
 
             if (sc->ra->glsl_vulkan) {
-                ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt);
-            } else {
+                if (fmt) {
+                    ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt);
+                } else {
+                    ADD(dst, "layout(binding=%d) ", u->input.binding);
+                }
+            } else if (fmt) {
                 ADD(dst, "layout(%s) ", fmt);
             }
             ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name);
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index 86488b11ab..61ac2c2bbb 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -179,6 +179,8 @@ static int ra_init_gl(struct ra *ra, GL *gl)
             desc->chroma_w = desc->chroma_h = 1;
         }
 
+        fmt->glsl_format = ra_fmt_glsl_format(fmt);
+
         MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
     }
 
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
index a39261f049..58213bd0e9 100644
--- a/video/out/vulkan/ra_vk.c
+++ b/video/out/vulkan/ra_vk.c
@@ -114,6 +114,8 @@ static bool vk_setup_formats(struct ra *ra)
         for (int i = 0; i < 4; i++)
             fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
 
+        fmt->glsl_format = ra_fmt_glsl_format(fmt);
+
         MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
     }