From 29b9fca4b3ca24ee8d8f951a3afe6daa77089096 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sun, 27 Aug 2023 21:18:46 +0200 Subject: [PATCH] avcodec: add multi vlc reader Heavily based and inspired by Christophe's cache branches. Co-Authored-by: Christophe Gisquet --- libavcodec/bitstream.h | 2 + libavcodec/bitstream_template.h | 29 +++++++ libavcodec/get_bits.h | 10 +++ libavcodec/vlc.c | 137 ++++++++++++++++++++++++++++++++ libavcodec/vlc.h | 53 ++++++++++++ 5 files changed, 231 insertions(+) diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h index b60f0c296d..35b7873b9c 100644 --- a/libavcodec/bitstream.h +++ b/libavcodec/bitstream.h @@ -102,6 +102,7 @@ # define bits_decode210 bits_decode210_le # define bits_apply_sign bits_apply_sign_le # define bits_read_vlc bits_read_vlc_le +# define bits_read_vlc_multi bits_read_vlc_multi_le #elif defined(BITS_DEFAULT_BE) @@ -130,6 +131,7 @@ # define bits_decode210 bits_decode210_be # define bits_apply_sign bits_apply_sign_be # define bits_read_vlc bits_read_vlc_be +# define bits_read_vlc_multi bits_read_vlc_multi_be #endif diff --git a/libavcodec/bitstream_template.h b/libavcodec/bitstream_template.h index 30bea84add..0308e3a924 100644 --- a/libavcodec/bitstream_template.h +++ b/libavcodec/bitstream_template.h @@ -520,6 +520,35 @@ static inline int BS_FUNC(read_vlc)(BSCTX *bc, const VLCElem *table, return code; } +static inline int BS_FUNC(read_vlc_multi)(BSCTX *bc, uint8_t *dst, + const VLC_MULTI_ELEM *const Jtable, + const VLCElem *const table, + const int bits, const int max_depth) +{ + unsigned idx = BS_FUNC(peek)(bc, bits); + int ret, nb_bits, code, n = Jtable[idx].len; + if (Jtable[idx].num) { + AV_COPY64U(dst, Jtable[idx].val); + ret = Jtable[idx].num; + } else { + code = table[idx].sym; + n = table[idx].len; + if (max_depth > 1 && n < 0) { + BS_FUNC(priv_skip_remaining)(bc, bits); + code = BS_FUNC(priv_set_idx)(bc, code, &n, &nb_bits, table); + if (max_depth > 2 && n < 0) { + BS_FUNC(priv_skip_remaining)(bc, nb_bits); + code = BS_FUNC(priv_set_idx)(bc, code, &n, &nb_bits, table); + } + } + AV_WN16(dst, code); + ret = n > 0; + } + BS_FUNC(priv_skip_remaining)(bc, n); + + return ret; +} + #undef BSCTX #undef BS_FUNC #undef BS_JOIN3 diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h index 65dc080ddb..0594e104bb 100644 --- a/libavcodec/get_bits.h +++ b/libavcodec/get_bits.h @@ -93,6 +93,7 @@ typedef BitstreamContext GetBitContext; #define init_get_bits8 bits_init8 #define align_get_bits bits_align #define get_vlc2 bits_read_vlc +#define get_vlc_multi bits_read_vlc_multi #define init_get_bits8_le(s, buffer, byte_size) bits_init8_le((BitstreamContextLE*)s, buffer, byte_size) #define get_bits_le(s, n) bits_read_le((BitstreamContextLE*)s, n) @@ -643,6 +644,15 @@ static av_always_inline int get_vlc2(GetBitContext *s, const VLCElem *table, return code; } +static inline int get_vlc_multi(GetBitContext *s, uint8_t *dst, + const VLC_MULTI_ELEM *const Jtable, + const VLCElem *const table, + const int bits, const int max_depth) +{ + dst[0] = get_vlc2(s, table, bits, max_depth); + return 1; +} + static inline int decode012(GetBitContext *gb) { int n; diff --git a/libavcodec/vlc.c b/libavcodec/vlc.c index 96f2b28c7e..9656a9472c 100644 --- a/libavcodec/vlc.c +++ b/libavcodec/vlc.c @@ -30,6 +30,7 @@ #include "libavutil/avassert.h" #include "libavutil/error.h" #include "libavutil/internal.h" +#include "libavutil/intreadwrite.h" #include "libavutil/log.h" #include "libavutil/macros.h" #include "libavutil/mem.h" @@ -372,6 +373,142 @@ fail: return AVERROR_INVALIDDATA; } +static void add_level(VLC_MULTI_ELEM *table, const int nb_elems, + const int num, const int numbits, + const VLCcode *buf, + uint32_t curcode, int curlen, + int curlimit, int curlevel, + const int minlen, const int max, + unsigned* levelcnt, VLC_MULTI_ELEM *info) +{ + if (nb_elems > 256 && curlevel > 2) + return; // No room + for (int i = num-1; i > max; i--) { + for (int j = 0; j < 2; j++) { + int newlimit, sym; + int t = j ? i-1 : i; + int l = buf[t].bits; + uint32_t code; + + sym = buf[t].symbol; + if (l > curlimit) + return; + code = curcode + (buf[t].code >> curlen); + newlimit = curlimit - l; + l += curlen; + if (nb_elems>256) AV_WN16(info->val+2*curlevel, sym); + else info->val[curlevel] = sym&0xFF; + + if (curlevel) { // let's not add single entries + uint32_t val = code >> (32 - numbits); + uint32_t nb = val + (1U << (numbits - l)); + info->len = l; + info->num = curlevel+1; + for (; val < nb; val++) + AV_COPY64(table+val, info); + levelcnt[curlevel-1]++; + } + + if (curlevel+1 < VLC_MULTI_MAX_SYMBOLS && newlimit >= minlen) { + add_level(table, nb_elems, num, numbits, buf, + code, l, newlimit, curlevel+1, + minlen, max, levelcnt, info); + } + } + } +} + +static int vlc_multi_gen(VLC_MULTI_ELEM *table, const VLC *single, + const int nb_elems, const int nb_codes, const int numbits, + VLCcode *buf, void *logctx) +{ + int minbits, maxbits, max = nb_codes-1; + unsigned count[VLC_MULTI_MAX_SYMBOLS-1] = { 0, }; + VLC_MULTI_ELEM info = { { 0, }, 0, }; + + minbits = buf[nb_codes-1].bits; + maxbits = FFMIN(buf[0].bits, numbits); + + while (max >= nb_codes/2) { + if (buf[max].bits+minbits > maxbits) + break; + max--; + } + + for (int j = 0; j < 1<table[j].len; + table[j].num = single->table[j].len > 0 ? 1 : 0; + AV_WN16(table[j].val, single->table[j].sym); + } + + add_level(table, nb_elems, nb_codes, numbits, buf, + 0, 0, numbits, 0, minbits, max, count, &info); + + av_log(NULL, AV_LOG_DEBUG, "Joint: %d/%d/%d/%d/%d codes min=%ubits max=%u\n", + count[0], count[1], count[2], count[3], count[4], minbits, max); + + return 0; +} + +int ff_init_vlc_multi_from_lengths(VLC *vlc, VLC_MULTI *multi, int nb_bits, int nb_elems, + int nb_codes, const int8_t *lens, int lens_wrap, + const void *symbols, int symbols_wrap, int symbols_size, + int offset, int flags, void *logctx) +{ + VLCcode localbuf[LOCALBUF_ELEMS], *buf = localbuf; + uint64_t code; + int ret, j, len_max = FFMIN(32, 3 * nb_bits); + + ret = vlc_common_init(vlc, nb_bits, nb_codes, &buf, flags); + if (ret < 0) + return ret; + + multi->table = av_malloc(sizeof(*multi->table) << nb_bits); + if (!multi->table) + return AVERROR(ENOMEM); + + j = code = 0; + for (int i = 0; i < nb_codes; i++, lens += lens_wrap) { + int len = *lens; + if (len > 0) { + unsigned sym; + + buf[j].bits = len; + if (symbols) + GET_DATA(sym, symbols, i, symbols_wrap, symbols_size) + else + sym = i; + buf[j].symbol = sym + offset; + buf[j++].code = code; + } else if (len < 0) { + len = -len; + } else + continue; + if (len > len_max || code & ((1U << (32 - len)) - 1)) { + av_log(logctx, AV_LOG_ERROR, "Invalid VLC (length %u)\n", len); + goto fail; + } + code += 1U << (32 - len); + if (code > UINT32_MAX + 1ULL) { + av_log(logctx, AV_LOG_ERROR, "Overdetermined VLC tree\n"); + goto fail; + } + } + ret = vlc_common_end(vlc, nb_bits, j, buf, flags, localbuf); + if (ret < 0) + goto fail; + return vlc_multi_gen(multi->table, vlc, nb_elems, j, nb_bits, buf, logctx); +fail: + if (buf != localbuf) + av_free(buf); + return AVERROR_INVALIDDATA; +} + +void ff_free_vlc_multi(VLC_MULTI *vlc) +{ + av_freep(&vlc->table); +} + void ff_free_vlc(VLC *vlc) { av_freep(&vlc->table); diff --git a/libavcodec/vlc.h b/libavcodec/vlc.h index e63c484755..46063862f6 100644 --- a/libavcodec/vlc.h +++ b/libavcodec/vlc.h @@ -21,6 +21,8 @@ #include +#define VLC_MULTI_MAX_SYMBOLS 6 + // When changing this, be sure to also update tableprint_vlc.h accordingly. typedef int16_t VLCBaseType; @@ -34,6 +36,17 @@ typedef struct VLC { int table_size, table_allocated; } VLC; +typedef struct VLC_MULTI_ELEM { + uint8_t val[VLC_MULTI_MAX_SYMBOLS]; + int8_t len; // -31,32 + uint8_t num; +} VLC_MULTI_ELEM; + +typedef struct VLC_MULTI { + VLC_MULTI_ELEM *table; + int table_size, table_allocated; +} VLC_MULTI; + typedef struct RL_VLC_ELEM { int16_t level; int8_t len; @@ -89,6 +102,46 @@ int ff_init_vlc_from_lengths(VLC *vlc, int nb_bits, int nb_codes, const void *symbols, int symbols_wrap, int symbols_size, int offset, int flags, void *logctx); +/** + * Build VLC decoding tables suitable for use with get_vlc_multi() + * + * This function takes lengths and symbols and calculates the codes from them. + * For this the input lengths and symbols have to be sorted according to "left + * nodes in the corresponding tree first". + * + * @param[in,out] vlc The VLC to be initialized; table and table_allocated + * must have been set when initializing a static VLC, + * otherwise this will be treated as uninitialized. + * @param[in,out] multi The VLC_MULTI to be initialized; table and table_allocated + * must have been set when initializing a static VLC, + * otherwise this will be treated as uninitialized. + * @param[in] nb_bits The number of bits to use for the VLC table; + * higher values take up more memory and cache, but + * allow to read codes with fewer reads. + * @param[in] nb_elems The max possible number of elements. + * @param[in] nb_codes The number of provided length and (if supplied) symbol + * entries. + * @param[in] lens The lengths of the codes. Entries > 0 correspond to + * valid codes; entries == 0 will be skipped and entries + * with len < 0 indicate that the tree is incomplete and + * has an open end of length -len at this position. + * @param[in] lens_wrap Stride (in bytes) of the lengths. + * @param[in] symbols The symbols, i.e. what is returned from get_vlc2() + * when the corresponding code is encountered. + * May be NULL, then 0, 1, 2, 3, 4,... will be used. + * @param[in] symbols_wrap Stride (in bytes) of the symbols. + * @param[in] symbols_size Size of the symbols. 1 and 2 are supported. + * @param[in] offset An offset to apply to all the valid symbols. + * @param[in] flags A combination of the INIT_VLC_* flags; notice that + * INIT_VLC_INPUT_LE is pointless and ignored. + */ +int ff_init_vlc_multi_from_lengths(VLC *vlc, VLC_MULTI *multi, int nb_bits, int nb_elems, + int nb_codes, const int8_t *lens, int lens_wrap, + const void *symbols, int symbols_wrap, int symbols_size, + int offset, int flags, void *logctx); + + +void ff_free_vlc_multi(VLC_MULTI *vlc); void ff_free_vlc(VLC *vlc); /* If INIT_VLC_INPUT_LE is set, the LSB bit of the codes used to