sub: better alpha blending when rendering to alpha surfaces

This actually treats destination alpha correctly, and gives much better results than before. I don't know if this is perfectly correct yet, though. Slight difference with vo_opengl behavior suggests it might not be. Note that this does not affect VOs with true alpha support. vo_opengl does not use this code at all, and does the alpha calculations in OpenGL instead.
2025-04-11 04:01:31 +00:00 · 2015-12-24 14:43:23 +01:00 · 2015-12-24 14:43:23 +01:00 · 6bec6ac558
commit 6bec6ac558
parent 946bd52a1d
3 changed files with 37 additions and 1 deletions
--- a/sub/draw_bmp.c
+++ b/sub/draw_bmp.c
@ -114,6 +114,7 @@ static void blend_const8_alpha(void *dst, int dst_stride, uint16_t srcp,
    }
 }

+// dst = srcp * (srca * srcamul) + dst * (1 - (srca * srcamul))
 static void blend_const_alpha(void *dst, int dst_stride, int srcp,
                              uint8_t *srca, int srca_stride, uint8_t srcamul,
                              int w, int h, int bytes)
@ -169,6 +170,7 @@ static void blend_src8_alpha(void *dst, int dst_stride, void *src,
    }
 }

+// dst = src * srca + dst * (1 - srca)
 static void blend_src_alpha(void *dst, int dst_stride, void *src,
                            int src_stride, uint8_t *srca, int srca_stride,
                            int w, int h, int bytes)
@ -182,6 +184,30 @@ static void blend_src_alpha(void *dst, int dst_stride, void *src,
    }
 }

+// dst = src * srcmul + dst * (1 - src * srcmul)
+static void blend_src_dst_mul(void *dst, int dst_stride,
+                              uint8_t *src, int src_stride, uint8_t srcmul,
+                              int w, int h, int dst_bytes)
+{
+    for (int y = 0; y < h; y++) {
+        void *dst_rp = (uint8_t *)dst + dst_stride * y;
+        uint8_t *src_r = (uint8_t *)src + src_stride * y;
+        if (dst_bytes == 2) {
+            uint16_t *dst_r = dst_rp;
+            for (int x = 0; x < w; x++) {
+                uint16_t srcp = src_r[x] * srcmul; // now 0..65025
+                dst_r[x] = (srcp * 65025 + dst_r[x] * (65025 - srcp) + 32512) / 65025;
+            }
+        } else if (dst_bytes == 1) {
+            uint8_t *dst_r = dst_rp;
+            for (int x = 0; x < w; x++) {
+                uint16_t srcp = src_r[x] * srcmul; // now 0..65025
+                dst_r[x] = (srcp * 255 + dst_r[x] * (65025 - srcp) + 32512) / 65025;
+            }
+        }
+    }
+}
+
 static void unpremultiply_and_split_BGR32(struct mp_image *img,
                                          struct mp_image *alpha)
 {
@ -278,6 +304,10 @@ static void draw_rgba(struct mp_draw_sub_cache *cache, struct mp_rect bb,
            blend_src_alpha(dst.planes[p], dst.stride[p], src, sbi->stride[p],
                            alpha_p, sba->stride[0], dst.w, dst.h, bytes);
        }
+        if (temp->num_planes >= 4) {
+            blend_src_dst_mul(dst.planes[3], dst.stride[3], alpha_p,
+                              sba->stride[0], 255, dst.w, dst.h, bytes);
+        }

        part->imgs[i].i = talloc_steal(part, sbi);
        part->imgs[i].a = talloc_steal(part, sba);
@ -328,6 +358,10 @@ static void draw_ass(struct mp_draw_sub_cache *cache, struct mp_rect bb,
            blend_const_alpha(dst.planes[p], dst.stride[p], color_yuv[p],
                              alpha_p, sb->stride, a, dst.w, dst.h, bytes);
        }
+        if (temp->num_planes >= 4) {
+            blend_src_dst_mul(dst.planes[3], dst.stride[3], alpha_p,
+                              sb->stride, a, dst.w, dst.h, bytes);
+        }
    }
 }

@ -374,7 +408,7 @@ static void get_closest_y444_format(int imgfmt, int *out_format, int *out_bits)
 {
    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt);
    if (desc.flags & MP_IMGFLAG_RGB) {
-        *out_format = IMGFMT_GBRP;
+        *out_format = desc.flags & MP_IMGFLAG_ALPHA ? IMGFMT_GBRAP : IMGFMT_GBRP;
        *out_bits = 8;
        return;
    } else if (desc.flags & MP_IMGFLAG_YUV_P) {
--- a/video/fmt-conversion.c
+++ b/video/fmt-conversion.c
@ -49,6 +49,7 @@ static const struct {
    {IMGFMT_BGR4,  AV_PIX_FMT_BGR4},
    {IMGFMT_PAL8,  AV_PIX_FMT_PAL8},
    {IMGFMT_GBRP,  AV_PIX_FMT_GBRP},
+    {IMGFMT_GBRAP, AV_PIX_FMT_GBRAP},
    {IMGFMT_YUYV,  AV_PIX_FMT_YUYV422},
    {IMGFMT_UYVY,  AV_PIX_FMT_UYVY422},
    {IMGFMT_NV12,  AV_PIX_FMT_NV12},
--- a/video/img_format.h
+++ b/video/img_format.h
@ -197,6 +197,7 @@ enum mp_imgfmt {

    // Planar RGB (planes are shuffled: plane 0 is G, etc.)
    IMGFMT_GBRP,
+    IMGFMT_GBRAP,

    // XYZ colorspace, similar organization to RGB48. Even though it says "12",
    // the components are stored as 16 bit, with lower 4 bits set to 0.