From 2f9936e446d56f6e8da9f179143d3ac5a4546afc Mon Sep 17 00:00:00 2001 From: Connor Worley Date: Sat, 10 Feb 2024 14:58:46 -0800 Subject: [PATCH] lavc/dvx: use texdsp funcs for texture block decompression Signed-off-by: Connor Worley --- libavcodec/dxv.c | 289 ++++++++++++----------------------------------- 1 file changed, 75 insertions(+), 214 deletions(-) diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c index 16c34fff3b..cd78de3e0d 100644 --- a/libavcodec/dxv.c +++ b/libavcodec/dxv.c @@ -38,15 +38,12 @@ typedef struct DXVContext { GetByteContext gbc; uint8_t *tex_data; // Compressed texture - uint8_t *ctex_data; // Compressed texture + uint8_t *ctex_data; // Compressed chroma texture int tex_rat; // Compression ratio int tex_step; // Distance between blocks int ctex_step; // Distance between blocks int64_t tex_size; // Texture size - int64_t ctex_size; // Texture size - - /* Optimal number of slices for parallel decoding */ - int slice_count; + int64_t ctex_size; // Chroma texture size uint8_t *op_data[4]; // Opcodes int64_t op_size[4]; // Opcodes size @@ -56,198 +53,8 @@ typedef struct DXVContext { int ctexture_block_w; int ctexture_block_h; - - /* Pointer to the selected decompression function */ - int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block); - int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0, - uint8_t *plane1, ptrdiff_t stride1, - const uint8_t *block); } DXVContext; -static void decompress_indices(uint8_t *dst, const uint8_t *src) -{ - int block, i; - - for (block = 0; block < 2; block++) { - int tmp = AV_RL24(src); - - /* Unpack 8x3 bit from last 3 byte block */ - for (i = 0; i < 8; i++) - dst[i] = (tmp >> (i * 3)) & 0x7; - - src += 3; - dst += 8; - } -} - -static int extract_component(int yo0, int yo1, int code) -{ - int yo; - - if (yo0 == yo1) { - yo = yo0; - } else if (code == 0) { - yo = yo0; - } else if (code == 1) { - yo = yo1; - } else { - if (yo0 > yo1) { - yo = (uint8_t) (((8 - code) * yo0 + - (code - 1) * yo1) / 7); - } else { - if (code == 6) { - yo = 0; - } else if (code == 7) { - yo = 255; - } else { - yo = (uint8_t) (((6 - code) * yo0 + - (code - 1) * yo1) / 5); - } - } - } - - return yo; -} - -static int cocg_block(uint8_t *plane0, ptrdiff_t stride0, - uint8_t *plane1, ptrdiff_t stride1, - const uint8_t *block) -{ - uint8_t co_indices[16]; - uint8_t cg_indices[16]; - uint8_t co0 = *(block); - uint8_t co1 = *(block + 1); - uint8_t cg0 = *(block + 8); - uint8_t cg1 = *(block + 9); - int x, y; - - decompress_indices(co_indices, block + 2); - decompress_indices(cg_indices, block + 10); - - for (y = 0; y < 4; y++) { - for (x = 0; x < 4; x++) { - int co_code = co_indices[x + y * 4]; - int cg_code = cg_indices[x + y * 4]; - - plane0[x] = extract_component(cg0, cg1, cg_code); - plane1[x] = extract_component(co0, co1, co_code); - } - plane0 += stride0; - plane1 += stride1; - } - - return 16; -} - -static void yao_subblock(uint8_t *dst, uint8_t *yo_indices, - ptrdiff_t stride, const uint8_t *block) -{ - uint8_t yo0 = *(block); - uint8_t yo1 = *(block + 1); - int x, y; - - decompress_indices(yo_indices, block + 2); - - for (y = 0; y < 4; y++) { - for (x = 0; x < 4; x++) { - int yo_code = yo_indices[x + y * 4]; - - dst[x] = extract_component(yo0, yo1, yo_code); - } - dst += stride; - } -} - -static int yo_block(uint8_t *dst, ptrdiff_t stride, - uint8_t *unused0, ptrdiff_t unused1, - const uint8_t *block) -{ - uint8_t yo_indices[16]; - - yao_subblock(dst, yo_indices, stride, block); - yao_subblock(dst + 4, yo_indices, stride, block + 8); - yao_subblock(dst + 8, yo_indices, stride, block + 16); - yao_subblock(dst + 12, yo_indices, stride, block + 24); - - return 32; -} - -static int yao_block(uint8_t *plane0, ptrdiff_t stride0, - uint8_t *plane3, ptrdiff_t stride1, - const uint8_t *block) -{ - uint8_t yo_indices[16]; - uint8_t a_indices[16]; - - yao_subblock(plane0, yo_indices, stride0, block); - yao_subblock(plane3, a_indices, stride1, block + 8); - yao_subblock(plane0 + 4, yo_indices, stride0, block + 16); - yao_subblock(plane3 + 4, a_indices, stride1, block + 24); - yao_subblock(plane0 + 8, yo_indices, stride0, block + 32); - yao_subblock(plane3 + 8, a_indices, stride1, block + 40); - yao_subblock(plane0 + 12, yo_indices, stride0, block + 48); - yao_subblock(plane3 + 12, a_indices, stride1, block + 56); - - return 64; -} - -static int decompress_texture_thread(AVCodecContext *avctx, void *arg, - int slice, int thread_nb) -{ - const DXVContext *ctx = avctx->priv_data; - AVFrame *frame = arg; - const uint8_t *d = ctx->tex_data; - int w_block = avctx->coded_width / ctx->texture_block_w; - int h_block = avctx->coded_height / ctx->texture_block_h; - int x, y; - int start_slice, end_slice; - - start_slice = h_block * slice / ctx->slice_count; - end_slice = h_block * (slice + 1) / ctx->slice_count; - - if (ctx->tex_funct) { - for (y = start_slice; y < end_slice; y++) { - uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h; - int off = y * w_block; - for (x = 0; x < w_block; x++) { - ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0], - d + (off + x) * ctx->tex_step); - } - } - } else { - const uint8_t *c = ctx->ctex_data; - - for (y = start_slice; y < end_slice; y++) { - uint8_t *p0 = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h; - uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y * frame->linesize[3] * ctx->texture_block_h; - int off = y * w_block; - for (x = 0; x < w_block; x++) { - ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w, frame->linesize[0], - p3 != NULL ? p3 + x * ctx->texture_block_w : NULL, frame->linesize[3], - d + (off + x) * ctx->tex_step); - } - } - - w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w; - h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h; - start_slice = h_block * slice / ctx->slice_count; - end_slice = h_block * (slice + 1) / ctx->slice_count; - - for (y = start_slice; y < end_slice; y++) { - uint8_t *p0 = frame->data[1] + y * frame->linesize[1] * ctx->ctexture_block_h; - uint8_t *p1 = frame->data[2] + y * frame->linesize[2] * ctx->ctexture_block_h; - int off = y * w_block; - for (x = 0; x < w_block; x++) { - ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w, frame->linesize[1], - p1 + x * ctx->ctexture_block_w, frame->linesize[2], - c + (off + x) * ctx->ctex_step); - } - } - } - - return 0; -} - /* This scheme addresses already decoded elements depending on 2-bit status: * 0 -> copy new element * 1 -> copy one element from position -x @@ -1044,6 +851,8 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, { DXVContext *ctx = avctx->priv_data; GetByteContext *gbc = &ctx->gbc; + AVCodecContext cavctx = *avctx; + TextureDSPThreadContext texdsp_ctx, ctexdsp_ctx; int (*decompress_tex)(AVCodecContext *avctx); const char *msgcomp, *msgtext; uint32_t tag; @@ -1053,21 +862,22 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, bytestream2_init(gbc, avpkt->data, avpkt->size); + cavctx.coded_height = avctx->coded_height / 2; + cavctx.coded_width = avctx->coded_width / 2; + ctx->texture_block_h = 4; ctx->texture_block_w = 4; avctx->pix_fmt = AV_PIX_FMT_RGBA; avctx->colorspace = AVCOL_SPC_RGB; - ctx->tex_funct = NULL; - ctx->tex_funct_planar[0] = NULL; - ctx->tex_funct_planar[1] = NULL; - tag = bytestream2_get_le32(gbc); switch (tag) { case DXV_FMT_DXT1: decompress_tex = dxv_decompress_dxt1; - ctx->tex_funct = ctx->texdsp.dxt1_block; + texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block; + texdsp_ctx.tex_ratio = 8; + texdsp_ctx.raw_ratio = 16; ctx->tex_rat = 8; ctx->tex_step = 8; msgcomp = "DXTR1"; @@ -1076,7 +886,9 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, case DXV_FMT_DXT5: decompress_tex = dxv_decompress_dxt5; /* DXV misnomers DXT5, alpha is premultiplied so use DXT4 instead */ - ctx->tex_funct = ctx->texdsp.dxt4_block; + texdsp_ctx.tex_funct = ctx->texdsp.dxt4_block; + texdsp_ctx.tex_ratio = 16; + texdsp_ctx.raw_ratio = 16; ctx->tex_rat = 4; ctx->tex_step = 16; msgcomp = "DXTR5"; @@ -1084,8 +896,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, break; case DXV_FMT_YCG6: decompress_tex = dxv_decompress_ycg6; - ctx->tex_funct_planar[0] = yo_block; - ctx->tex_funct_planar[1] = cocg_block; + texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block; + texdsp_ctx.tex_ratio = 8; + texdsp_ctx.raw_ratio = 4; + ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block; + ctexdsp_ctx.tex_ratio = 16; + ctexdsp_ctx.raw_ratio = 4; ctx->tex_rat = 8; ctx->tex_step = 32; ctx->ctex_step = 16; @@ -1101,8 +917,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, break; case DXV_FMT_YG10: decompress_tex = dxv_decompress_yg10; - ctx->tex_funct_planar[0] = yao_block; - ctx->tex_funct_planar[1] = cocg_block; + texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block; + texdsp_ctx.tex_ratio = 16; + texdsp_ctx.raw_ratio = 4; + ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block; + ctexdsp_ctx.tex_ratio = 16; + ctexdsp_ctx.raw_ratio = 4; ctx->tex_rat = 4; ctx->tex_step = 64; ctx->ctex_step = 16; @@ -1131,14 +951,20 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, } if (old_type & 0x40) { + tag = DXV_FMT_DXT5; msgtext = "DXT5"; - ctx->tex_funct = ctx->texdsp.dxt5_block; + texdsp_ctx.tex_funct = ctx->texdsp.dxt5_block; + texdsp_ctx.tex_ratio = 16; + texdsp_ctx.raw_ratio = 16; ctx->tex_step = 16; } else if (old_type & 0x20 || version_major == 1) { + tag = DXV_FMT_DXT1; msgtext = "DXT1"; - ctx->tex_funct = ctx->texdsp.dxt1_block; + texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block; + texdsp_ctx.tex_ratio = 8; + texdsp_ctx.raw_ratio = 16; ctx->tex_step = 8; } else { av_log(avctx, AV_LOG_ERROR, "Unsupported header (0x%08"PRIX32")\n.", tag); @@ -1148,10 +974,10 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, break; } - ctx->slice_count = av_clip(avctx->thread_count, 1, - avctx->coded_height / FFMAX(ctx->texture_block_h, - ctx->ctexture_block_h)); - + texdsp_ctx.slice_count = av_clip(avctx->thread_count, 1, + avctx->coded_height / TEXTURE_BLOCK_H); + ctexdsp_ctx.slice_count = av_clip(avctx->thread_count, 1, + cavctx.coded_height / TEXTURE_BLOCK_H); /* New header is 12 bytes long. */ if (!old_type) { version_major = bytestream2_get_byte(gbc) - 1; @@ -1216,9 +1042,44 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame, if (ret < 0) return ret; - /* Now decompress the texture with the standard functions. */ - avctx->execute2(avctx, decompress_texture_thread, - frame, NULL, ctx->slice_count); + switch (tag) { + case DXV_FMT_YG10: + /* BC5 texture with alpha in the second half of each block */ + texdsp_ctx.tex_data.in = ctx->tex_data + texdsp_ctx.tex_ratio / 2; + texdsp_ctx.frame_data.out = frame->data[3]; + texdsp_ctx.stride = frame->linesize[3]; + ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx); + if (ret < 0) + return ret; + /* fallthrough */ + case DXV_FMT_YCG6: + /* BC5 texture with Co in the first half of each block and Cg in the second */ + ctexdsp_ctx.tex_data.in = ctx->ctex_data; + ctexdsp_ctx.frame_data.out = frame->data[2]; + ctexdsp_ctx.stride = frame->linesize[2]; + ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx); + if (ret < 0) + return ret; + ctexdsp_ctx.tex_data.in = ctx->ctex_data + ctexdsp_ctx.tex_ratio / 2; + ctexdsp_ctx.frame_data.out = frame->data[1]; + ctexdsp_ctx.stride = frame->linesize[1]; + ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx); + if (ret < 0) + return ret; + /* fallthrough */ + case DXV_FMT_DXT1: + case DXV_FMT_DXT5: + /* For DXT1 and DXT5, self explanatory + * For YCG6, BC4 texture for Y + * For YG10, BC5 texture with Y in the first half of each block */ + texdsp_ctx.tex_data.in = ctx->tex_data; + texdsp_ctx.frame_data.out = frame->data[0]; + texdsp_ctx.stride = frame->linesize[0]; + ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx); + if (ret < 0) + return ret; + break; + } /* Frame is ready to be output. */ frame->pict_type = AV_PICTURE_TYPE_I;