avcodec/flashsv: Avoid deflating data

Currently priming the zlib decompressor involves compressing
data directly after having decompressed it and decompressing
it again in order to set the "dictionary" and to initialize
the adler32-checksum. Yet this is wasteful and can be simplified
by synthetizing the compressed data via non-compressed blocks.

This reduced the amount of allocations for the decoding part
of fate-vsynth1-flashsv2, namely from
total heap usage: 9,135 allocs, 9,135 frees, 376,503,427 bytes allocated
to
total heap usage: 2,373 allocs, 2,373 frees, 14,144,083 bytes allocated

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt 2022-03-11 14:19:27 +01:00
parent 88cccd1c73
commit dcc3f62270
1 changed files with 38 additions and 51 deletions

View File

@ -63,11 +63,10 @@ typedef struct FlashSVContext {
AVBufferRef *keyframedata_buf; AVBufferRef *keyframedata_buf;
uint8_t *keyframe; uint8_t *keyframe;
BlockInfo *blocks; BlockInfo *blocks;
uint8_t *deflate_block;
int deflate_block_size;
int color_depth; int color_depth;
int zlibprime_curr, zlibprime_prev; int zlibprime_curr, zlibprime_prev;
int diff_start, diff_height; int diff_start, diff_height;
uint8_t tmp[UINT16_MAX];
} FlashSVContext; } FlashSVContext;
static int decode_hybrid(const uint8_t *sptr, const uint8_t *sptr_end, uint8_t *dptr, int dx, int dy, static int decode_hybrid(const uint8_t *sptr, const uint8_t *sptr_end, uint8_t *dptr, int dx, int dy,
@ -141,41 +140,59 @@ static av_cold int flashsv_decode_init(AVCodecContext *avctx)
static int flashsv2_prime(FlashSVContext *s, const uint8_t *src, int size) static int flashsv2_prime(FlashSVContext *s, const uint8_t *src, int size)
{ {
z_stream zs;
int zret; // Zlib return code int zret; // Zlib return code
static const uint8_t zlib_header[] = { 0x78, 0x01 };
uint8_t *data = s->tmpblock;
unsigned remaining;
if (!src) if (!src)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
zs.zalloc = NULL;
zs.zfree = NULL;
zs.opaque = NULL;
s->zstream.next_in = src; s->zstream.next_in = src;
s->zstream.avail_in = size; s->zstream.avail_in = size;
s->zstream.next_out = s->tmpblock; s->zstream.next_out = data;
s->zstream.avail_out = s->block_size * 3; s->zstream.avail_out = s->block_size * 3;
inflate(&s->zstream, Z_SYNC_FLUSH); inflate(&s->zstream, Z_SYNC_FLUSH);
remaining = s->block_size * 3 - s->zstream.avail_out;
if (deflateInit(&zs, 0) != Z_OK)
return -1;
zs.next_in = s->tmpblock;
zs.avail_in = s->block_size * 3 - s->zstream.avail_out;
zs.next_out = s->deflate_block;
zs.avail_out = s->deflate_block_size;
deflate(&zs, Z_SYNC_FLUSH);
deflateEnd(&zs);
if ((zret = inflateReset(&s->zstream)) != Z_OK) { if ((zret = inflateReset(&s->zstream)) != Z_OK) {
av_log(s->avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret); av_log(s->avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
return AVERROR_UNKNOWN; return AVERROR_UNKNOWN;
} }
s->zstream.next_in = s->deflate_block; /* Create input for zlib that is equivalent to encoding the output
s->zstream.avail_in = s->deflate_block_size - zs.avail_out; * from above and decoding it again (the net result of this is that
s->zstream.next_out = s->tmpblock; * the dictionary of past decoded data is correctly primed and
s->zstream.avail_out = s->block_size * 3; * the adler32 checksum is correctly initialized).
* This is accomplished by synthetizing blocks of uncompressed data
* out of the output from above. See section 3.2.4 of RFC 1951. */
s->zstream.next_in = zlib_header;
s->zstream.avail_in = sizeof(zlib_header);
inflate(&s->zstream, Z_SYNC_FLUSH); inflate(&s->zstream, Z_SYNC_FLUSH);
while (remaining > 0) {
unsigned block_size = FFMIN(UINT16_MAX, remaining);
uint8_t header[5];
/* Bit 0: Non-last-block, bits 1-2: BTYPE for uncompressed block */
header[0] = 0;
/* Block size */
AV_WL16(header + 1, block_size);
/* Block size (one's complement) */
AV_WL16(header + 3, block_size ^ 0xFFFF);
s->zstream.next_in = header;
s->zstream.avail_in = sizeof(header);
s->zstream.next_out = s->tmp;
s->zstream.avail_out = sizeof(s->tmp);
zret = inflate(&s->zstream, Z_SYNC_FLUSH);
if (zret != Z_OK)
return AVERROR_UNKNOWN;
s->zstream.next_in = data;
s->zstream.avail_in = block_size;
zret = inflate(&s->zstream, Z_SYNC_FLUSH);
if (zret != Z_OK)
return AVERROR_UNKNOWN;
data += block_size;
remaining -= block_size;
}
return 0; return 0;
} }
@ -248,22 +265,6 @@ static int flashsv_decode_block(AVCodecContext *avctx, const AVPacket *avpkt,
return 0; return 0;
} }
static int calc_deflate_block_size(int tmpblock_size)
{
z_stream zstream;
int size;
zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zstream.opaque = Z_NULL;
if (deflateInit(&zstream, 0) != Z_OK)
return -1;
size = deflateBound(&zstream, tmpblock_size);
deflateEnd(&zstream);
return size;
}
static int flashsv_decode_frame(AVCodecContext *avctx, void *data, static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
int *got_frame, AVPacket *avpkt) int *got_frame, AVPacket *avpkt)
{ {
@ -322,19 +323,6 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
"Cannot allocate decompression buffer.\n"); "Cannot allocate decompression buffer.\n");
return err; return err;
} }
if (s->ver == 2) {
s->deflate_block_size = calc_deflate_block_size(tmpblock_size);
if (s->deflate_block_size <= 0) {
av_log(avctx, AV_LOG_ERROR,
"Cannot determine deflate buffer size.\n");
return -1;
}
if ((err = av_reallocp(&s->deflate_block, s->deflate_block_size)) < 0) {
s->block_size = 0;
av_log(avctx, AV_LOG_ERROR, "Cannot allocate deflate buffer.\n");
return err;
}
}
} }
s->block_size = s->block_width * s->block_height; s->block_size = s->block_width * s->block_height;
@ -570,7 +558,6 @@ static av_cold int flashsv2_decode_end(AVCodecContext *avctx)
av_buffer_unref(&s->keyframedata_buf); av_buffer_unref(&s->keyframedata_buf);
av_freep(&s->blocks); av_freep(&s->blocks);
av_freep(&s->keyframe); av_freep(&s->keyframe);
av_freep(&s->deflate_block);
flashsv_decode_end(avctx); flashsv_decode_end(avctx);
return 0; return 0;