avcodec/magicyuv: add vlc multi support

Gives nice speed boost, depending on encoded content it goes from
30% to 60% faster.
This commit is contained in:
Paul B Mahol 2023-08-28 12:20:15 +02:00
parent 29b9fca4b3
commit 8b7391cb5f
1 changed files with 34 additions and 31 deletions

View File

@ -34,6 +34,8 @@
#include "lossless_videodsp.h" #include "lossless_videodsp.h"
#include "thread.h" #include "thread.h"
#define VLC_BITS 12
typedef struct Slice { typedef struct Slice {
uint32_t start; uint32_t start;
uint32_t size; uint32_t size;
@ -67,13 +69,14 @@ typedef struct MagicYUVContext {
Slice *slices[4]; // slice bitstream positions for each plane Slice *slices[4]; // slice bitstream positions for each plane
unsigned int slices_size[4]; // slice sizes for each plane unsigned int slices_size[4]; // slice sizes for each plane
VLC vlc[4]; // VLC for each plane VLC vlc[4]; // VLC for each plane
VLC_MULTI multi[4]; // Buffer for joint VLC data
int (*magy_decode_slice)(AVCodecContext *avctx, void *tdata, int (*magy_decode_slice)(AVCodecContext *avctx, void *tdata,
int j, int threadnr); int j, int threadnr);
LLVidDSPContext llviddsp; LLVidDSPContext llviddsp;
} MagicYUVContext; } MagicYUVContext;
static int huff_build(const uint8_t len[], uint16_t codes_pos[33], static int huff_build(const uint8_t len[], uint16_t codes_pos[33],
VLC *vlc, int nb_elems, void *logctx) VLC *vlc, VLC_MULTI *multi, int nb_elems, void *logctx)
{ {
HuffEntry he[4096]; HuffEntry he[4096];
@ -84,7 +87,8 @@ static int huff_build(const uint8_t len[], uint16_t codes_pos[33],
he[--codes_pos[len[i]]] = (HuffEntry){ len[i], i }; he[--codes_pos[len[i]]] = (HuffEntry){ len[i], i };
ff_free_vlc(vlc); ff_free_vlc(vlc);
return ff_init_vlc_from_lengths(vlc, FFMIN(he[0].len, 12), nb_elems, ff_free_vlc_multi(multi);
return ff_init_vlc_multi_from_lengths(vlc, multi, FFMIN(he[0].len, VLC_BITS), nb_elems, nb_elems,
&he[0].len, sizeof(he[0]), &he[0].len, sizeof(he[0]),
&he[0].sym, sizeof(he[0]), sizeof(he[0].sym), &he[0].sym, sizeof(he[0]), sizeof(he[0].sym),
0, 0, logctx); 0, 0, logctx);
@ -111,6 +115,22 @@ static void magicyuv_median_pred16(uint16_t *dst, const uint16_t *src1,
*left_top = lt; *left_top = lt;
} }
#define READ_PLANE(dst, plane, b, c) \
{ \
x = 0; \
for (; CACHED_BITSTREAM_READER && x < width-c && get_bits_left(&gb) > 0;) {\
ret = get_vlc_multi(&gb, (uint8_t *)dst + x * b, multi, \
vlc, vlc_bits, 3); \
if (ret > 0) \
x += ret; \
if (ret <= 0) \
return AVERROR_INVALIDDATA; \
} \
for (; x < width && get_bits_left(&gb) > 0; x++) \
dst[x] = get_vlc2(&gb, vlc, vlc_bits, 3); \
dst += stride; \
}
static int magy_decode_slice10(AVCodecContext *avctx, void *tdata, static int magy_decode_slice10(AVCodecContext *avctx, void *tdata,
int j, int threadnr) int j, int threadnr)
{ {
@ -130,6 +150,9 @@ static int magy_decode_slice10(AVCodecContext *avctx, void *tdata,
int sheight = AV_CEIL_RSHIFT(s->slice_height, s->vshift[i]); int sheight = AV_CEIL_RSHIFT(s->slice_height, s->vshift[i]);
ptrdiff_t fake_stride = (p->linesize[i] / 2) * (1 + interlaced); ptrdiff_t fake_stride = (p->linesize[i] / 2) * (1 + interlaced);
ptrdiff_t stride = p->linesize[i] / 2; ptrdiff_t stride = p->linesize[i] / 2;
const VLC_MULTI_ELEM *const multi = s->multi[i].table;
const VLCElem *const vlc = s->vlc[i].table;
const int vlc_bits = s->vlc[i].bits;
int flags, pred; int flags, pred;
int ret = init_get_bits8(&gb, s->buf + s->slices[i][j].start, int ret = init_get_bits8(&gb, s->buf + s->slices[i][j].start,
s->slices[i][j].size); s->slices[i][j].size);
@ -151,20 +174,8 @@ static int magy_decode_slice10(AVCodecContext *avctx, void *tdata,
dst += stride; dst += stride;
} }
} else { } else {
for (k = 0; k < height; k++) { for (k = 0; k < height; k++)
for (x = 0; x < width; x++) { READ_PLANE(dst, i, 2, 3)
int pix;
if (get_bits_left(&gb) <= 0)
return AVERROR_INVALIDDATA;
pix = get_vlc2(&gb, s->vlc[i].table, s->vlc[i].bits, 3);
if (pix < 0)
return AVERROR_INVALIDDATA;
dst[x] = pix;
}
dst += stride;
}
} }
switch (pred) { switch (pred) {
@ -261,6 +272,9 @@ static int magy_decode_slice(AVCodecContext *avctx, void *tdata,
ptrdiff_t fake_stride = p->linesize[i] * (1 + interlaced); ptrdiff_t fake_stride = p->linesize[i] * (1 + interlaced);
ptrdiff_t stride = p->linesize[i]; ptrdiff_t stride = p->linesize[i];
const uint8_t *slice = s->buf + s->slices[i][j].start; const uint8_t *slice = s->buf + s->slices[i][j].start;
const VLC_MULTI_ELEM *const multi = s->multi[i].table;
const VLCElem *const vlc = s->vlc[i].table;
const int vlc_bits = s->vlc[i].bits;
int flags, pred; int flags, pred;
flags = bytestream_get_byte(&slice); flags = bytestream_get_byte(&slice);
@ -280,20 +294,8 @@ static int magy_decode_slice(AVCodecContext *avctx, void *tdata,
if (ret < 0) if (ret < 0)
return ret; return ret;
for (k = 0; k < height; k++) { for (k = 0; k < height; k++)
for (x = 0; x < width; x++) { READ_PLANE(dst, i, 1, 5)
int pix;
if (get_bits_left(&gb) <= 0)
return AVERROR_INVALIDDATA;
pix = get_vlc2(&gb, s->vlc[i].table, s->vlc[i].bits, 3);
if (pix < 0)
return AVERROR_INVALIDDATA;
dst[x] = pix;
}
dst += stride;
}
} }
switch (pred) { switch (pred) {
@ -407,7 +409,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
if (j == max) { if (j == max) {
j = 0; j = 0;
if (huff_build(len, length_count, &s->vlc[i], max, avctx)) { if (huff_build(len, length_count, &s->vlc[i], &s->multi[i], max, avctx)) {
av_log(avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n"); av_log(avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n");
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
@ -687,6 +689,7 @@ static av_cold int magy_decode_end(AVCodecContext *avctx)
av_freep(&s->slices[i]); av_freep(&s->slices[i]);
s->slices_size[i] = 0; s->slices_size[i] = 0;
ff_free_vlc(&s->vlc[i]); ff_free_vlc(&s->vlc[i]);
ff_free_vlc_multi(&s->multi[i]);
} }
return 0; return 0;