mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-28 18:32:22 +00:00
lavc/dvx: use texdsp funcs for texture block decompression
Signed-off-by: Connor Worley <connorbworley@gmail.com>
This commit is contained in:
parent
939bf30d82
commit
2f9936e446
289
libavcodec/dxv.c
289
libavcodec/dxv.c
@ -38,15 +38,12 @@ typedef struct DXVContext {
|
||||
GetByteContext gbc;
|
||||
|
||||
uint8_t *tex_data; // Compressed texture
|
||||
uint8_t *ctex_data; // Compressed texture
|
||||
uint8_t *ctex_data; // Compressed chroma texture
|
||||
int tex_rat; // Compression ratio
|
||||
int tex_step; // Distance between blocks
|
||||
int ctex_step; // Distance between blocks
|
||||
int64_t tex_size; // Texture size
|
||||
int64_t ctex_size; // Texture size
|
||||
|
||||
/* Optimal number of slices for parallel decoding */
|
||||
int slice_count;
|
||||
int64_t ctex_size; // Chroma texture size
|
||||
|
||||
uint8_t *op_data[4]; // Opcodes
|
||||
int64_t op_size[4]; // Opcodes size
|
||||
@ -56,198 +53,8 @@ typedef struct DXVContext {
|
||||
|
||||
int ctexture_block_w;
|
||||
int ctexture_block_h;
|
||||
|
||||
/* Pointer to the selected decompression function */
|
||||
int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
|
||||
int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0,
|
||||
uint8_t *plane1, ptrdiff_t stride1,
|
||||
const uint8_t *block);
|
||||
} DXVContext;
|
||||
|
||||
static void decompress_indices(uint8_t *dst, const uint8_t *src)
|
||||
{
|
||||
int block, i;
|
||||
|
||||
for (block = 0; block < 2; block++) {
|
||||
int tmp = AV_RL24(src);
|
||||
|
||||
/* Unpack 8x3 bit from last 3 byte block */
|
||||
for (i = 0; i < 8; i++)
|
||||
dst[i] = (tmp >> (i * 3)) & 0x7;
|
||||
|
||||
src += 3;
|
||||
dst += 8;
|
||||
}
|
||||
}
|
||||
|
||||
static int extract_component(int yo0, int yo1, int code)
|
||||
{
|
||||
int yo;
|
||||
|
||||
if (yo0 == yo1) {
|
||||
yo = yo0;
|
||||
} else if (code == 0) {
|
||||
yo = yo0;
|
||||
} else if (code == 1) {
|
||||
yo = yo1;
|
||||
} else {
|
||||
if (yo0 > yo1) {
|
||||
yo = (uint8_t) (((8 - code) * yo0 +
|
||||
(code - 1) * yo1) / 7);
|
||||
} else {
|
||||
if (code == 6) {
|
||||
yo = 0;
|
||||
} else if (code == 7) {
|
||||
yo = 255;
|
||||
} else {
|
||||
yo = (uint8_t) (((6 - code) * yo0 +
|
||||
(code - 1) * yo1) / 5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return yo;
|
||||
}
|
||||
|
||||
static int cocg_block(uint8_t *plane0, ptrdiff_t stride0,
|
||||
uint8_t *plane1, ptrdiff_t stride1,
|
||||
const uint8_t *block)
|
||||
{
|
||||
uint8_t co_indices[16];
|
||||
uint8_t cg_indices[16];
|
||||
uint8_t co0 = *(block);
|
||||
uint8_t co1 = *(block + 1);
|
||||
uint8_t cg0 = *(block + 8);
|
||||
uint8_t cg1 = *(block + 9);
|
||||
int x, y;
|
||||
|
||||
decompress_indices(co_indices, block + 2);
|
||||
decompress_indices(cg_indices, block + 10);
|
||||
|
||||
for (y = 0; y < 4; y++) {
|
||||
for (x = 0; x < 4; x++) {
|
||||
int co_code = co_indices[x + y * 4];
|
||||
int cg_code = cg_indices[x + y * 4];
|
||||
|
||||
plane0[x] = extract_component(cg0, cg1, cg_code);
|
||||
plane1[x] = extract_component(co0, co1, co_code);
|
||||
}
|
||||
plane0 += stride0;
|
||||
plane1 += stride1;
|
||||
}
|
||||
|
||||
return 16;
|
||||
}
|
||||
|
||||
static void yao_subblock(uint8_t *dst, uint8_t *yo_indices,
|
||||
ptrdiff_t stride, const uint8_t *block)
|
||||
{
|
||||
uint8_t yo0 = *(block);
|
||||
uint8_t yo1 = *(block + 1);
|
||||
int x, y;
|
||||
|
||||
decompress_indices(yo_indices, block + 2);
|
||||
|
||||
for (y = 0; y < 4; y++) {
|
||||
for (x = 0; x < 4; x++) {
|
||||
int yo_code = yo_indices[x + y * 4];
|
||||
|
||||
dst[x] = extract_component(yo0, yo1, yo_code);
|
||||
}
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static int yo_block(uint8_t *dst, ptrdiff_t stride,
|
||||
uint8_t *unused0, ptrdiff_t unused1,
|
||||
const uint8_t *block)
|
||||
{
|
||||
uint8_t yo_indices[16];
|
||||
|
||||
yao_subblock(dst, yo_indices, stride, block);
|
||||
yao_subblock(dst + 4, yo_indices, stride, block + 8);
|
||||
yao_subblock(dst + 8, yo_indices, stride, block + 16);
|
||||
yao_subblock(dst + 12, yo_indices, stride, block + 24);
|
||||
|
||||
return 32;
|
||||
}
|
||||
|
||||
static int yao_block(uint8_t *plane0, ptrdiff_t stride0,
|
||||
uint8_t *plane3, ptrdiff_t stride1,
|
||||
const uint8_t *block)
|
||||
{
|
||||
uint8_t yo_indices[16];
|
||||
uint8_t a_indices[16];
|
||||
|
||||
yao_subblock(plane0, yo_indices, stride0, block);
|
||||
yao_subblock(plane3, a_indices, stride1, block + 8);
|
||||
yao_subblock(plane0 + 4, yo_indices, stride0, block + 16);
|
||||
yao_subblock(plane3 + 4, a_indices, stride1, block + 24);
|
||||
yao_subblock(plane0 + 8, yo_indices, stride0, block + 32);
|
||||
yao_subblock(plane3 + 8, a_indices, stride1, block + 40);
|
||||
yao_subblock(plane0 + 12, yo_indices, stride0, block + 48);
|
||||
yao_subblock(plane3 + 12, a_indices, stride1, block + 56);
|
||||
|
||||
return 64;
|
||||
}
|
||||
|
||||
static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
|
||||
int slice, int thread_nb)
|
||||
{
|
||||
const DXVContext *ctx = avctx->priv_data;
|
||||
AVFrame *frame = arg;
|
||||
const uint8_t *d = ctx->tex_data;
|
||||
int w_block = avctx->coded_width / ctx->texture_block_w;
|
||||
int h_block = avctx->coded_height / ctx->texture_block_h;
|
||||
int x, y;
|
||||
int start_slice, end_slice;
|
||||
|
||||
start_slice = h_block * slice / ctx->slice_count;
|
||||
end_slice = h_block * (slice + 1) / ctx->slice_count;
|
||||
|
||||
if (ctx->tex_funct) {
|
||||
for (y = start_slice; y < end_slice; y++) {
|
||||
uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
|
||||
int off = y * w_block;
|
||||
for (x = 0; x < w_block; x++) {
|
||||
ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0],
|
||||
d + (off + x) * ctx->tex_step);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const uint8_t *c = ctx->ctex_data;
|
||||
|
||||
for (y = start_slice; y < end_slice; y++) {
|
||||
uint8_t *p0 = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
|
||||
uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y * frame->linesize[3] * ctx->texture_block_h;
|
||||
int off = y * w_block;
|
||||
for (x = 0; x < w_block; x++) {
|
||||
ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w, frame->linesize[0],
|
||||
p3 != NULL ? p3 + x * ctx->texture_block_w : NULL, frame->linesize[3],
|
||||
d + (off + x) * ctx->tex_step);
|
||||
}
|
||||
}
|
||||
|
||||
w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w;
|
||||
h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h;
|
||||
start_slice = h_block * slice / ctx->slice_count;
|
||||
end_slice = h_block * (slice + 1) / ctx->slice_count;
|
||||
|
||||
for (y = start_slice; y < end_slice; y++) {
|
||||
uint8_t *p0 = frame->data[1] + y * frame->linesize[1] * ctx->ctexture_block_h;
|
||||
uint8_t *p1 = frame->data[2] + y * frame->linesize[2] * ctx->ctexture_block_h;
|
||||
int off = y * w_block;
|
||||
for (x = 0; x < w_block; x++) {
|
||||
ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w, frame->linesize[1],
|
||||
p1 + x * ctx->ctexture_block_w, frame->linesize[2],
|
||||
c + (off + x) * ctx->ctex_step);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This scheme addresses already decoded elements depending on 2-bit status:
|
||||
* 0 -> copy new element
|
||||
* 1 -> copy one element from position -x
|
||||
@ -1044,6 +851,8 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
|
||||
{
|
||||
DXVContext *ctx = avctx->priv_data;
|
||||
GetByteContext *gbc = &ctx->gbc;
|
||||
AVCodecContext cavctx = *avctx;
|
||||
TextureDSPThreadContext texdsp_ctx, ctexdsp_ctx;
|
||||
int (*decompress_tex)(AVCodecContext *avctx);
|
||||
const char *msgcomp, *msgtext;
|
||||
uint32_t tag;
|
||||
@ -1053,21 +862,22 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
|
||||
|
||||
bytestream2_init(gbc, avpkt->data, avpkt->size);
|
||||
|
||||
cavctx.coded_height = avctx->coded_height / 2;
|
||||
cavctx.coded_width = avctx->coded_width / 2;
|
||||
|
||||
ctx->texture_block_h = 4;
|
||||
ctx->texture_block_w = 4;
|
||||
|
||||
avctx->pix_fmt = AV_PIX_FMT_RGBA;
|
||||
avctx->colorspace = AVCOL_SPC_RGB;
|
||||
|
||||
ctx->tex_funct = NULL;
|
||||
ctx->tex_funct_planar[0] = NULL;
|
||||
ctx->tex_funct_planar[1] = NULL;
|
||||
|
||||
tag = bytestream2_get_le32(gbc);
|
||||
switch (tag) {
|
||||
case DXV_FMT_DXT1:
|
||||
decompress_tex = dxv_decompress_dxt1;
|
||||
ctx->tex_funct = ctx->texdsp.dxt1_block;
|
||||
texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block;
|
||||
texdsp_ctx.tex_ratio = 8;
|
||||
texdsp_ctx.raw_ratio = 16;
|
||||
ctx->tex_rat = 8;
|
||||
ctx->tex_step = 8;
|
||||
msgcomp = "DXTR1";
|
||||
@ -1076,7 +886,9 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
|
||||
case DXV_FMT_DXT5:
|
||||
decompress_tex = dxv_decompress_dxt5;
|
||||
/* DXV misnomers DXT5, alpha is premultiplied so use DXT4 instead */
|
||||
ctx->tex_funct = ctx->texdsp.dxt4_block;
|
||||
texdsp_ctx.tex_funct = ctx->texdsp.dxt4_block;
|
||||
texdsp_ctx.tex_ratio = 16;
|
||||
texdsp_ctx.raw_ratio = 16;
|
||||
ctx->tex_rat = 4;
|
||||
ctx->tex_step = 16;
|
||||
msgcomp = "DXTR5";
|
||||
@ -1084,8 +896,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
|
||||
break;
|
||||
case DXV_FMT_YCG6:
|
||||
decompress_tex = dxv_decompress_ycg6;
|
||||
ctx->tex_funct_planar[0] = yo_block;
|
||||
ctx->tex_funct_planar[1] = cocg_block;
|
||||
texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
|
||||
texdsp_ctx.tex_ratio = 8;
|
||||
texdsp_ctx.raw_ratio = 4;
|
||||
ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
|
||||
ctexdsp_ctx.tex_ratio = 16;
|
||||
ctexdsp_ctx.raw_ratio = 4;
|
||||
ctx->tex_rat = 8;
|
||||
ctx->tex_step = 32;
|
||||
ctx->ctex_step = 16;
|
||||
@ -1101,8 +917,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
|
||||
break;
|
||||
case DXV_FMT_YG10:
|
||||
decompress_tex = dxv_decompress_yg10;
|
||||
ctx->tex_funct_planar[0] = yao_block;
|
||||
ctx->tex_funct_planar[1] = cocg_block;
|
||||
texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
|
||||
texdsp_ctx.tex_ratio = 16;
|
||||
texdsp_ctx.raw_ratio = 4;
|
||||
ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
|
||||
ctexdsp_ctx.tex_ratio = 16;
|
||||
ctexdsp_ctx.raw_ratio = 4;
|
||||
ctx->tex_rat = 4;
|
||||
ctx->tex_step = 64;
|
||||
ctx->ctex_step = 16;
|
||||
@ -1131,14 +951,20 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
|
||||
}
|
||||
|
||||
if (old_type & 0x40) {
|
||||
tag = DXV_FMT_DXT5;
|
||||
msgtext = "DXT5";
|
||||
|
||||
ctx->tex_funct = ctx->texdsp.dxt5_block;
|
||||
texdsp_ctx.tex_funct = ctx->texdsp.dxt5_block;
|
||||
texdsp_ctx.tex_ratio = 16;
|
||||
texdsp_ctx.raw_ratio = 16;
|
||||
ctx->tex_step = 16;
|
||||
} else if (old_type & 0x20 || version_major == 1) {
|
||||
tag = DXV_FMT_DXT1;
|
||||
msgtext = "DXT1";
|
||||
|
||||
ctx->tex_funct = ctx->texdsp.dxt1_block;
|
||||
texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block;
|
||||
texdsp_ctx.tex_ratio = 8;
|
||||
texdsp_ctx.raw_ratio = 16;
|
||||
ctx->tex_step = 8;
|
||||
} else {
|
||||
av_log(avctx, AV_LOG_ERROR, "Unsupported header (0x%08"PRIX32")\n.", tag);
|
||||
@ -1148,10 +974,10 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
|
||||
break;
|
||||
}
|
||||
|
||||
ctx->slice_count = av_clip(avctx->thread_count, 1,
|
||||
avctx->coded_height / FFMAX(ctx->texture_block_h,
|
||||
ctx->ctexture_block_h));
|
||||
|
||||
texdsp_ctx.slice_count = av_clip(avctx->thread_count, 1,
|
||||
avctx->coded_height / TEXTURE_BLOCK_H);
|
||||
ctexdsp_ctx.slice_count = av_clip(avctx->thread_count, 1,
|
||||
cavctx.coded_height / TEXTURE_BLOCK_H);
|
||||
/* New header is 12 bytes long. */
|
||||
if (!old_type) {
|
||||
version_major = bytestream2_get_byte(gbc) - 1;
|
||||
@ -1216,9 +1042,44 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Now decompress the texture with the standard functions. */
|
||||
avctx->execute2(avctx, decompress_texture_thread,
|
||||
frame, NULL, ctx->slice_count);
|
||||
switch (tag) {
|
||||
case DXV_FMT_YG10:
|
||||
/* BC5 texture with alpha in the second half of each block */
|
||||
texdsp_ctx.tex_data.in = ctx->tex_data + texdsp_ctx.tex_ratio / 2;
|
||||
texdsp_ctx.frame_data.out = frame->data[3];
|
||||
texdsp_ctx.stride = frame->linesize[3];
|
||||
ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/* fallthrough */
|
||||
case DXV_FMT_YCG6:
|
||||
/* BC5 texture with Co in the first half of each block and Cg in the second */
|
||||
ctexdsp_ctx.tex_data.in = ctx->ctex_data;
|
||||
ctexdsp_ctx.frame_data.out = frame->data[2];
|
||||
ctexdsp_ctx.stride = frame->linesize[2];
|
||||
ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ctexdsp_ctx.tex_data.in = ctx->ctex_data + ctexdsp_ctx.tex_ratio / 2;
|
||||
ctexdsp_ctx.frame_data.out = frame->data[1];
|
||||
ctexdsp_ctx.stride = frame->linesize[1];
|
||||
ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/* fallthrough */
|
||||
case DXV_FMT_DXT1:
|
||||
case DXV_FMT_DXT5:
|
||||
/* For DXT1 and DXT5, self explanatory
|
||||
* For YCG6, BC4 texture for Y
|
||||
* For YG10, BC5 texture with Y in the first half of each block */
|
||||
texdsp_ctx.tex_data.in = ctx->tex_data;
|
||||
texdsp_ctx.frame_data.out = frame->data[0];
|
||||
texdsp_ctx.stride = frame->linesize[0];
|
||||
ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Frame is ready to be output. */
|
||||
frame->pict_type = AV_PICTURE_TYPE_I;
|
||||
|
Loading…
Reference in New Issue
Block a user