lavc/dvx: use texdsp funcs for texture block decompression

Signed-off-by: Connor Worley <connorbworley@gmail.com>
This commit is contained in:
Connor Worley 2024-02-10 14:58:46 -08:00 committed by Lynne
parent 939bf30d82
commit 2f9936e446
1 changed files with 75 additions and 214 deletions

View File

@ -38,15 +38,12 @@ typedef struct DXVContext {
GetByteContext gbc;
uint8_t *tex_data; // Compressed texture
uint8_t *ctex_data; // Compressed texture
uint8_t *ctex_data; // Compressed chroma texture
int tex_rat; // Compression ratio
int tex_step; // Distance between blocks
int ctex_step; // Distance between blocks
int64_t tex_size; // Texture size
int64_t ctex_size; // Texture size
/* Optimal number of slices for parallel decoding */
int slice_count;
int64_t ctex_size; // Chroma texture size
uint8_t *op_data[4]; // Opcodes
int64_t op_size[4]; // Opcodes size
@ -56,198 +53,8 @@ typedef struct DXVContext {
int ctexture_block_w;
int ctexture_block_h;
/* Pointer to the selected decompression function */
int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0,
uint8_t *plane1, ptrdiff_t stride1,
const uint8_t *block);
} DXVContext;
static void decompress_indices(uint8_t *dst, const uint8_t *src)
{
int block, i;
for (block = 0; block < 2; block++) {
int tmp = AV_RL24(src);
/* Unpack 8x3 bit from last 3 byte block */
for (i = 0; i < 8; i++)
dst[i] = (tmp >> (i * 3)) & 0x7;
src += 3;
dst += 8;
}
}
static int extract_component(int yo0, int yo1, int code)
{
int yo;
if (yo0 == yo1) {
yo = yo0;
} else if (code == 0) {
yo = yo0;
} else if (code == 1) {
yo = yo1;
} else {
if (yo0 > yo1) {
yo = (uint8_t) (((8 - code) * yo0 +
(code - 1) * yo1) / 7);
} else {
if (code == 6) {
yo = 0;
} else if (code == 7) {
yo = 255;
} else {
yo = (uint8_t) (((6 - code) * yo0 +
(code - 1) * yo1) / 5);
}
}
}
return yo;
}
static int cocg_block(uint8_t *plane0, ptrdiff_t stride0,
uint8_t *plane1, ptrdiff_t stride1,
const uint8_t *block)
{
uint8_t co_indices[16];
uint8_t cg_indices[16];
uint8_t co0 = *(block);
uint8_t co1 = *(block + 1);
uint8_t cg0 = *(block + 8);
uint8_t cg1 = *(block + 9);
int x, y;
decompress_indices(co_indices, block + 2);
decompress_indices(cg_indices, block + 10);
for (y = 0; y < 4; y++) {
for (x = 0; x < 4; x++) {
int co_code = co_indices[x + y * 4];
int cg_code = cg_indices[x + y * 4];
plane0[x] = extract_component(cg0, cg1, cg_code);
plane1[x] = extract_component(co0, co1, co_code);
}
plane0 += stride0;
plane1 += stride1;
}
return 16;
}
static void yao_subblock(uint8_t *dst, uint8_t *yo_indices,
ptrdiff_t stride, const uint8_t *block)
{
uint8_t yo0 = *(block);
uint8_t yo1 = *(block + 1);
int x, y;
decompress_indices(yo_indices, block + 2);
for (y = 0; y < 4; y++) {
for (x = 0; x < 4; x++) {
int yo_code = yo_indices[x + y * 4];
dst[x] = extract_component(yo0, yo1, yo_code);
}
dst += stride;
}
}
static int yo_block(uint8_t *dst, ptrdiff_t stride,
uint8_t *unused0, ptrdiff_t unused1,
const uint8_t *block)
{
uint8_t yo_indices[16];
yao_subblock(dst, yo_indices, stride, block);
yao_subblock(dst + 4, yo_indices, stride, block + 8);
yao_subblock(dst + 8, yo_indices, stride, block + 16);
yao_subblock(dst + 12, yo_indices, stride, block + 24);
return 32;
}
static int yao_block(uint8_t *plane0, ptrdiff_t stride0,
uint8_t *plane3, ptrdiff_t stride1,
const uint8_t *block)
{
uint8_t yo_indices[16];
uint8_t a_indices[16];
yao_subblock(plane0, yo_indices, stride0, block);
yao_subblock(plane3, a_indices, stride1, block + 8);
yao_subblock(plane0 + 4, yo_indices, stride0, block + 16);
yao_subblock(plane3 + 4, a_indices, stride1, block + 24);
yao_subblock(plane0 + 8, yo_indices, stride0, block + 32);
yao_subblock(plane3 + 8, a_indices, stride1, block + 40);
yao_subblock(plane0 + 12, yo_indices, stride0, block + 48);
yao_subblock(plane3 + 12, a_indices, stride1, block + 56);
return 64;
}
static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
int slice, int thread_nb)
{
const DXVContext *ctx = avctx->priv_data;
AVFrame *frame = arg;
const uint8_t *d = ctx->tex_data;
int w_block = avctx->coded_width / ctx->texture_block_w;
int h_block = avctx->coded_height / ctx->texture_block_h;
int x, y;
int start_slice, end_slice;
start_slice = h_block * slice / ctx->slice_count;
end_slice = h_block * (slice + 1) / ctx->slice_count;
if (ctx->tex_funct) {
for (y = start_slice; y < end_slice; y++) {
uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
int off = y * w_block;
for (x = 0; x < w_block; x++) {
ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0],
d + (off + x) * ctx->tex_step);
}
}
} else {
const uint8_t *c = ctx->ctex_data;
for (y = start_slice; y < end_slice; y++) {
uint8_t *p0 = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y * frame->linesize[3] * ctx->texture_block_h;
int off = y * w_block;
for (x = 0; x < w_block; x++) {
ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w, frame->linesize[0],
p3 != NULL ? p3 + x * ctx->texture_block_w : NULL, frame->linesize[3],
d + (off + x) * ctx->tex_step);
}
}
w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w;
h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h;
start_slice = h_block * slice / ctx->slice_count;
end_slice = h_block * (slice + 1) / ctx->slice_count;
for (y = start_slice; y < end_slice; y++) {
uint8_t *p0 = frame->data[1] + y * frame->linesize[1] * ctx->ctexture_block_h;
uint8_t *p1 = frame->data[2] + y * frame->linesize[2] * ctx->ctexture_block_h;
int off = y * w_block;
for (x = 0; x < w_block; x++) {
ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w, frame->linesize[1],
p1 + x * ctx->ctexture_block_w, frame->linesize[2],
c + (off + x) * ctx->ctex_step);
}
}
}
return 0;
}
/* This scheme addresses already decoded elements depending on 2-bit status:
* 0 -> copy new element
* 1 -> copy one element from position -x
@ -1044,6 +851,8 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
{
DXVContext *ctx = avctx->priv_data;
GetByteContext *gbc = &ctx->gbc;
AVCodecContext cavctx = *avctx;
TextureDSPThreadContext texdsp_ctx, ctexdsp_ctx;
int (*decompress_tex)(AVCodecContext *avctx);
const char *msgcomp, *msgtext;
uint32_t tag;
@ -1053,21 +862,22 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
bytestream2_init(gbc, avpkt->data, avpkt->size);
cavctx.coded_height = avctx->coded_height / 2;
cavctx.coded_width = avctx->coded_width / 2;
ctx->texture_block_h = 4;
ctx->texture_block_w = 4;
avctx->pix_fmt = AV_PIX_FMT_RGBA;
avctx->colorspace = AVCOL_SPC_RGB;
ctx->tex_funct = NULL;
ctx->tex_funct_planar[0] = NULL;
ctx->tex_funct_planar[1] = NULL;
tag = bytestream2_get_le32(gbc);
switch (tag) {
case DXV_FMT_DXT1:
decompress_tex = dxv_decompress_dxt1;
ctx->tex_funct = ctx->texdsp.dxt1_block;
texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block;
texdsp_ctx.tex_ratio = 8;
texdsp_ctx.raw_ratio = 16;
ctx->tex_rat = 8;
ctx->tex_step = 8;
msgcomp = "DXTR1";
@ -1076,7 +886,9 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
case DXV_FMT_DXT5:
decompress_tex = dxv_decompress_dxt5;
/* DXV misnomers DXT5, alpha is premultiplied so use DXT4 instead */
ctx->tex_funct = ctx->texdsp.dxt4_block;
texdsp_ctx.tex_funct = ctx->texdsp.dxt4_block;
texdsp_ctx.tex_ratio = 16;
texdsp_ctx.raw_ratio = 16;
ctx->tex_rat = 4;
ctx->tex_step = 16;
msgcomp = "DXTR5";
@ -1084,8 +896,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
break;
case DXV_FMT_YCG6:
decompress_tex = dxv_decompress_ycg6;
ctx->tex_funct_planar[0] = yo_block;
ctx->tex_funct_planar[1] = cocg_block;
texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
texdsp_ctx.tex_ratio = 8;
texdsp_ctx.raw_ratio = 4;
ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
ctexdsp_ctx.tex_ratio = 16;
ctexdsp_ctx.raw_ratio = 4;
ctx->tex_rat = 8;
ctx->tex_step = 32;
ctx->ctex_step = 16;
@ -1101,8 +917,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
break;
case DXV_FMT_YG10:
decompress_tex = dxv_decompress_yg10;
ctx->tex_funct_planar[0] = yao_block;
ctx->tex_funct_planar[1] = cocg_block;
texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
texdsp_ctx.tex_ratio = 16;
texdsp_ctx.raw_ratio = 4;
ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
ctexdsp_ctx.tex_ratio = 16;
ctexdsp_ctx.raw_ratio = 4;
ctx->tex_rat = 4;
ctx->tex_step = 64;
ctx->ctex_step = 16;
@ -1131,14 +951,20 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
}
if (old_type & 0x40) {
tag = DXV_FMT_DXT5;
msgtext = "DXT5";
ctx->tex_funct = ctx->texdsp.dxt5_block;
texdsp_ctx.tex_funct = ctx->texdsp.dxt5_block;
texdsp_ctx.tex_ratio = 16;
texdsp_ctx.raw_ratio = 16;
ctx->tex_step = 16;
} else if (old_type & 0x20 || version_major == 1) {
tag = DXV_FMT_DXT1;
msgtext = "DXT1";
ctx->tex_funct = ctx->texdsp.dxt1_block;
texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block;
texdsp_ctx.tex_ratio = 8;
texdsp_ctx.raw_ratio = 16;
ctx->tex_step = 8;
} else {
av_log(avctx, AV_LOG_ERROR, "Unsupported header (0x%08"PRIX32")\n.", tag);
@ -1148,10 +974,10 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
break;
}
ctx->slice_count = av_clip(avctx->thread_count, 1,
avctx->coded_height / FFMAX(ctx->texture_block_h,
ctx->ctexture_block_h));
texdsp_ctx.slice_count = av_clip(avctx->thread_count, 1,
avctx->coded_height / TEXTURE_BLOCK_H);
ctexdsp_ctx.slice_count = av_clip(avctx->thread_count, 1,
cavctx.coded_height / TEXTURE_BLOCK_H);
/* New header is 12 bytes long. */
if (!old_type) {
version_major = bytestream2_get_byte(gbc) - 1;
@ -1216,9 +1042,44 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
if (ret < 0)
return ret;
/* Now decompress the texture with the standard functions. */
avctx->execute2(avctx, decompress_texture_thread,
frame, NULL, ctx->slice_count);
switch (tag) {
case DXV_FMT_YG10:
/* BC5 texture with alpha in the second half of each block */
texdsp_ctx.tex_data.in = ctx->tex_data + texdsp_ctx.tex_ratio / 2;
texdsp_ctx.frame_data.out = frame->data[3];
texdsp_ctx.stride = frame->linesize[3];
ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx);
if (ret < 0)
return ret;
/* fallthrough */
case DXV_FMT_YCG6:
/* BC5 texture with Co in the first half of each block and Cg in the second */
ctexdsp_ctx.tex_data.in = ctx->ctex_data;
ctexdsp_ctx.frame_data.out = frame->data[2];
ctexdsp_ctx.stride = frame->linesize[2];
ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx);
if (ret < 0)
return ret;
ctexdsp_ctx.tex_data.in = ctx->ctex_data + ctexdsp_ctx.tex_ratio / 2;
ctexdsp_ctx.frame_data.out = frame->data[1];
ctexdsp_ctx.stride = frame->linesize[1];
ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx);
if (ret < 0)
return ret;
/* fallthrough */
case DXV_FMT_DXT1:
case DXV_FMT_DXT5:
/* For DXT1 and DXT5, self explanatory
* For YCG6, BC4 texture for Y
* For YG10, BC5 texture with Y in the first half of each block */
texdsp_ctx.tex_data.in = ctx->tex_data;
texdsp_ctx.frame_data.out = frame->data[0];
texdsp_ctx.stride = frame->linesize[0];
ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx);
if (ret < 0)
return ret;
break;
}
/* Frame is ready to be output. */
frame->pict_type = AV_PICTURE_TYPE_I;