From 0c0dd23fe1102313742092c4760596971755814e Mon Sep 17 00:00:00 2001 From: Leo Izen Date: Mon, 10 Jul 2023 18:05:53 -0400 Subject: [PATCH] avcodec/jpegxl_parser: add JPEG XL parser Add a full parser to libavcodec for AV_CODEC_ID_JPEGXL. It finds the end of the stream in order to packetize the codec, and it looks at the headers to set preliminary information like dimensions and pixel format. Note that much of this code is duplicated from avformat/jpegxl_probe.c, but that code will be removed and call this instead in the next commit. Signed-off-by: Leo Izen --- libavcodec/Makefile | 3 + libavcodec/jpegxl.h | 94 +++ libavcodec/jpegxl_parse.c | 520 +++++++++++++ libavcodec/jpegxl_parse.h | 72 ++ libavcodec/jpegxl_parser.c | 1477 ++++++++++++++++++++++++++++++++++++ libavcodec/parsers.c | 1 + 6 files changed, 2167 insertions(+) create mode 100644 libavcodec/jpegxl.h create mode 100644 libavcodec/jpegxl_parse.c create mode 100644 libavcodec/jpegxl_parse.h create mode 100644 libavcodec/jpegxl_parser.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3c16b51462..f961d0abd6 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1056,6 +1056,8 @@ STLIBOBJS-$(CONFIG_AVFORMAT) += to_upper4.o STLIBOBJS-$(CONFIG_ISO_MEDIA) += mpegaudiotabs.o STLIBOBJS-$(CONFIG_FLV_MUXER) += mpeg4audio_sample_rates.o STLIBOBJS-$(CONFIG_HLS_DEMUXER) += ac3_channel_layout_tab.o +STLIBOBJS-$(CONFIG_IMAGE_JPEGXL_PIPE_DEMUXER) += jpegxl_parse.o +STLIBOBJS-$(CONFIG_JPEGXL_ANIM_DEMUXER) += jpegxl_parse.o STLIBOBJS-$(CONFIG_MATROSKA_DEMUXER) += mpeg4audio_sample_rates.o STLIBOBJS-$(CONFIG_MOV_DEMUXER) += ac3_channel_layout_tab.o STLIBOBJS-$(CONFIG_MXF_MUXER) += golomb.o @@ -1185,6 +1187,7 @@ OBJS-$(CONFIG_HEVC_PARSER) += hevc_parser.o hevc_data.o OBJS-$(CONFIG_HDR_PARSER) += hdr_parser.o OBJS-$(CONFIG_IPU_PARSER) += ipu_parser.o OBJS-$(CONFIG_JPEG2000_PARSER) += jpeg2000_parser.o +OBJS-$(CONFIG_JPEGXL_PARSER) += jpegxl_parser.o jpegxl_parse.o OBJS-$(CONFIG_MISC4_PARSER) += misc4_parser.o OBJS-$(CONFIG_MJPEG_PARSER) += mjpeg_parser.o OBJS-$(CONFIG_MLP_PARSER) += mlp_parse.o mlp_parser.o mlp.o diff --git a/libavcodec/jpegxl.h b/libavcodec/jpegxl.h new file mode 100644 index 0000000000..66a6be3555 --- /dev/null +++ b/libavcodec/jpegxl.h @@ -0,0 +1,94 @@ +/* + * JPEG XL Common Header Definitions + * Copyright (c) 2023 Leo Izen + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_JPEGXL_H +#define AVCODEC_JPEGXL_H + +#define FF_JPEGXL_CODESTREAM_SIGNATURE_LE 0x0aff +#define FF_JPEGXL_CONTAINER_SIGNATURE_LE 0x204c584a0c000000 +#define FF_JPEGXL_CODESTREAM_SIGNATURE_BE 0xff0a +#define FF_JPEGXL_CONTAINER_SIGNATURE_BE 0x0000000c4a584c20 + +typedef enum FFJXLFrameEncoding { + JPEGXL_ENC_VARDCT, + JPEGXL_ENC_MODULAR +} FFJXLFrameEncoding; + +typedef enum FFJXLFrameType { + JPEGXL_FRAME_REGULAR, + JPEGXL_FRAME_LF, + JPEGXL_FRAME_REFERENCE_ONLY, + JPEGXL_FRAME_SKIP_PROGRESSIVE +} FFJXLFrameType; + +typedef enum FFJXLBlendMode { + JPEGXL_BM_REPLACE, + JPEGXL_BM_ADD, + JPEGXL_BM_BLEND, + JPEGXL_BM_MULADD, + JPEGXL_BM_MUL +} FFJXLBlendMode; + +typedef enum FFJXLExtraChannelType { + JPEGXL_CT_ALPHA = 0, + JPEGXL_CT_DEPTH, + JPEGXL_CT_SPOT_COLOR, + JPEGXL_CT_SELECTION_MASK, + JPEGXL_CT_BLACK, + JPEGXL_CT_CFA, + JPEGXL_CT_THERMAL, + JPEGXL_CT_NON_OPTIONAL = 15, + JPEGXL_CT_OPTIONAL +} FFJXLExtraChannelType; + +typedef enum FFJXLColorSpace { + JPEGXL_CS_RGB = 0, + JPEGXL_CS_GRAY, + JPEGXL_CS_XYB, + JPEGXL_CS_UNKNOWN +} FFJXLColorSpace; + +typedef enum FFJXLWhitePoint { + JPEGXL_WP_D65 = 1, + JPEGXL_WP_CUSTOM, + JPEGXL_WP_E = 10, + JPEGXL_WP_DCI = 11 +} FFJXLWhitePoint; + +typedef enum FFJXLPrimaries { + JPEGXL_PR_SRGB = 1, + JPEGXL_PR_CUSTOM, + JPEGXL_PR_2100 = 9, + JPEGXL_PR_P3 = 11, +} FFJXLPrimaries; + +typedef enum FFJXLTransferCharacteristic { + JPEGXL_TR_BT709 = 1, + JPEGXL_TR_UNKNOWN, + JPEGXL_TR_LINEAR = 8, + JPEGXL_TR_SRGB = 13, + JPEGXL_TR_PQ = 16, + JPEGXL_TR_DCI, + JPEGXL_TR_HLG, + JPEGXL_TR_GAMMA = 1 << 24, +} FFJXLTransferCharacteristic; + +#endif /* AVCODEC_JPEGXL_H */ diff --git a/libavcodec/jpegxl_parse.c b/libavcodec/jpegxl_parse.c new file mode 100644 index 0000000000..eb28e80867 --- /dev/null +++ b/libavcodec/jpegxl_parse.c @@ -0,0 +1,520 @@ +/* + * JPEG XL Header Parser + * Copyright (c) 2023 Leo Izen + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "bytestream.h" +#define UNCHECKED_BITSTREAM_READER 0 +#define BITSTREAM_READER_LE +#include "get_bits.h" +#include "jpegxl.h" +#include "jpegxl_parse.h" + +/* read a U32(c_i + u(u_i)) */ +static av_always_inline uint32_t jxl_u32(GetBitContext *gb, + uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3, + uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) +{ + const uint32_t constants[4] = {c0, c1, c2, c3}; + const uint32_t ubits [4] = {u0, u1, u2, u3}; + uint32_t ret, choice = get_bits(gb, 2); + + ret = constants[choice]; + if (ubits[choice]) + ret += get_bits_long(gb, ubits[choice]); + + return ret; +} + +static av_always_inline uint32_t jxl_enum(GetBitContext *gb) +{ + return jxl_u32(gb, 0, 1, 2, 18, 0, 0, 4, 6); +} + +/* read a U64() */ +static uint64_t jxl_u64(GetBitContext *gb) +{ + uint64_t shift = 12, ret; + + switch (get_bits(gb, 2)) { + case 1: + ret = 1 + get_bits(gb, 4); + break; + case 2: + ret = 17 + get_bits(gb, 8); + break; + case 3: + ret = get_bits(gb, 12); + while (get_bits1(gb)) { + if (shift < 60) { + ret |= (uint64_t)get_bits(gb, 8) << shift; + shift += 8; + } else { + ret |= (uint64_t)get_bits(gb, 4) << shift; + break; + } + } + break; + default: + ret = 0; + } + + return ret; +} + +static uint32_t jpegxl_width_from_ratio(uint32_t height, int ratio) +{ + uint64_t height64 = height; /* avoid integer overflow */ + switch (ratio) { + case 1: + return height; + case 2: + return (uint32_t)((height64 * 12) / 10); + case 3: + return (uint32_t)((height64 * 4) / 3); + case 4: + return (uint32_t)((height64 * 3) / 2); + case 5: + return (uint32_t)((height64 * 16) / 9); + case 6: + return (uint32_t)((height64 * 5) / 4); + case 7: + return (uint32_t)(height64 * 2); + default: + break; + } + + return 0; /* manual width */ +} + +/** + * validate a Jpeg XL Size Header + * @return >= 0 upon valid size, < 0 upon invalid size found + */ +static int jpegxl_read_size_header(GetBitContext *gb, FFJXLMetadata *meta, int validate) +{ + uint32_t width, height; + + if (get_bits1(gb)) { + /* small size header */ + height = (get_bits(gb, 5) + 1) << 3; + width = jpegxl_width_from_ratio(height, get_bits(gb, 3)); + if (!width) + width = (get_bits(gb, 5) + 1) << 3; + } else { + /* large size header */ + height = 1 + jxl_u32(gb, 0, 0, 0, 0, 9, 13, 18, 30); + width = jpegxl_width_from_ratio(height, get_bits(gb, 3)); + if (!width) + width = 1 + jxl_u32(gb, 0, 0, 0, 0, 9, 13, 18, 30); + } + if (validate && (width > (1 << 18) || height > (1 << 18) + || (width >> 4) * (height >> 4) > (1 << 20))) + return AVERROR_INVALIDDATA; + + if (meta) { + meta->width = meta->coded_width = width; + meta->height = meta->coded_height = height; + } + + return 0; +} + +/** + * validate a Jpeg XL Preview Header + * @return >= 0 upon valid size, < 0 upon invalid size found + */ +static int jpegxl_read_preview_header(GetBitContext *gb, int validate) +{ + uint32_t width, height; + + if (get_bits1(gb)) { + /* coded height and width divided by eight */ + height = jxl_u32(gb, 16, 32, 1, 33, 0, 0, 5, 9) << 3; + width = jpegxl_width_from_ratio(height, get_bits(gb, 3)); + if (!width) + width = jxl_u32(gb, 16, 32, 1, 33, 0, 0, 5, 9) << 3; + } else { + /* full height and width coded */ + height = jxl_u32(gb, 1, 65, 321, 1345, 6, 8, 10, 12); + width = jpegxl_width_from_ratio(height, get_bits(gb, 3)); + if (!width) + width = jxl_u32(gb, 1, 65, 321, 1345, 6, 8, 10, 12); + } + if (validate && (width > 4096 || height > 4096)) + return AVERROR_INVALIDDATA; + + return 0; +} + +/** + * get a Jpeg XL BitDepth Header. These cannot be invalid. + */ +static void jpegxl_get_bit_depth(GetBitContext *gb, FFJXLMetadata *meta) +{ + int bit_depth; + if (get_bits1(gb)) { + /* float samples */ + bit_depth = jxl_u32(gb, 32, 16, 24, 1, 0, 0, 0, 6); /* mantissa */ + skip_bits_long(gb, 4); /* exponent */ + } else { + /* integer samples */ + bit_depth = jxl_u32(gb, 8, 10, 12, 1, 0, 0, 0, 6); + } + if (meta) + meta->bit_depth = bit_depth; +} + +/** + * validate a Jpeg XL Extra Channel Info bundle + * @return >= 0 upon valid, < 0 upon invalid + */ +static int jpegxl_read_extra_channel_info(GetBitContext *gb, FFJXLMetadata *meta, int validate) +{ + int default_alpha = get_bits1(gb); + uint32_t type, name_len = 0; + + if (!default_alpha) { + type = jxl_enum(gb); + if (validate && type > 63) + return AVERROR_INVALIDDATA; /* enum types cannot be 64+ */ + if (validate && validate < 10 && type == JPEGXL_CT_BLACK) + return AVERROR_INVALIDDATA; + jpegxl_get_bit_depth(gb, NULL); + jxl_u32(gb, 0, 3, 4, 1, 0, 0, 0, 3); /* dim-shift */ + /* max of name_len is 1071 = 48 + 2^10 - 1 */ + name_len = 8 * jxl_u32(gb, 0, 0, 16, 48, 0, 4, 5, 10); + } else { + type = JPEGXL_CT_ALPHA; + } + + if (get_bits_left(gb) < name_len) + return AVERROR_BUFFER_TOO_SMALL; + + /* skip over the name */ + skip_bits_long(gb, name_len); + + if (!default_alpha && type == JPEGXL_CT_ALPHA) + skip_bits1(gb); + + if (type == JPEGXL_CT_SPOT_COLOR) + skip_bits_long(gb, 16 * 4); + + if (type == JPEGXL_CT_CFA) + jxl_u32(gb, 1, 0, 3, 19, 0, 2, 4, 8); + + if (meta && type == JPEGXL_CT_ALPHA) + meta->have_alpha = 1; + + return 0; +} + +static int jpegxl_skip_extensions(GetBitContext *gb) +{ + uint64_t extensions = jxl_u64(gb), extensions_len = 0; + + if (get_bits_left(gb) <= 0) + return AVERROR_BUFFER_TOO_SMALL; + + if (!extensions) + return 0; + + for (int i = 0; i < 64; i++) { + if (extensions & (UINT64_C(1) << i)) + extensions_len += jxl_u64(gb); + if (get_bits_left(gb) <= 0) + return AVERROR_BUFFER_TOO_SMALL; + } + + if (extensions_len > INT_MAX || get_bits_left(gb) <= extensions_len) + return AVERROR_BUFFER_TOO_SMALL; + + skip_bits_long(gb, extensions_len); + + return 0; +} + +int ff_jpegxl_parse_codestream_header(const uint8_t *buf, int buflen, FFJXLMetadata *meta, int validate) +{ + GetBitContext gbi, *gb = &gbi; + + int all_default, extra_fields = 0; + int xyb_encoded = 1, have_icc_profile = 0; + int animation_offset = 0, have_timecodes = 0; + + FFJXLPrimaries primaries = JPEGXL_PR_SRGB; + FFJXLTransferCharacteristic trc = JPEGXL_TR_SRGB + (1U << 24); + FFJXLWhitePoint white_point = JPEGXL_WP_D65; + FFJXLColorSpace color_space = JPEGXL_CS_RGB; + + AVRational tb; + uint32_t num_extra_channels = 0; + int ret; + + ret = init_get_bits8(gb, buf, buflen); + if (ret < 0) + return ret; + + if (get_bits(gb, 16) != FF_JPEGXL_CODESTREAM_SIGNATURE_LE && validate) + return AVERROR_INVALIDDATA; + + ret = jpegxl_read_size_header(gb, meta, validate); + if (ret < 0) + return ret; + + all_default = get_bits1(gb); + if (!all_default) + extra_fields = get_bits1(gb); + + if (extra_fields) { + int orientation = get_bits(gb, 3); + if (orientation > 3 && meta) + FFSWAP(uint32_t, meta->width, meta->height); + + /* + * intrinstic size + * any size header here is valid, but as it + * is variable length we have to read it + */ + if (get_bits1(gb)) + jpegxl_read_size_header(gb, NULL, 0); + + /* preview header */ + if (get_bits1(gb)) { + ret = jpegxl_read_preview_header(gb, 0); + if (ret < 0) + return ret; + } + + /* animation header */ + if (get_bits1(gb)) { + animation_offset = get_bits_count(gb); + tb.den = jxl_u32(gb, 100, 1000, 1, 1, 0, 0, 10, 30); + tb.num = jxl_u32(gb, 1, 1001, 1, 1, 0, 0, 8, 10); + jxl_u32(gb, 0, 0, 0, 0, 0, 3, 16, 32); + have_timecodes = get_bits1(gb); + } + } + + if (animation_offset && meta) { + meta->animation_offset = animation_offset; + meta->timebase = tb; + meta->have_timecodes = have_timecodes; + } + + if (get_bits_left(gb) <= 0) + return AVERROR_BUFFER_TOO_SMALL; + + if (!all_default) { + jpegxl_get_bit_depth(gb, meta); + + /* modular_16bit_buffers must equal 1 */ + if (!get_bits1(gb) && validate && validate < 10) + return AVERROR_INVALIDDATA; + + num_extra_channels = jxl_u32(gb, 0, 1, 2, 1, 0, 0, 4, 12); + if (num_extra_channels > 4 && validate && validate < 10) + return AVERROR_INVALIDDATA; + for (uint32_t i = 0; i < num_extra_channels; i++) { + ret = jpegxl_read_extra_channel_info(gb, meta, validate); + if (ret < 0) + return ret; + if (get_bits_left(gb) <= 0) + return AVERROR_BUFFER_TOO_SMALL; + } + + xyb_encoded = get_bits1(gb); + + /* color encoding bundle */ + if (!get_bits1(gb)) { + have_icc_profile = get_bits1(gb); + color_space = jxl_enum(gb); + if (color_space > 63 && validate) + return AVERROR_INVALIDDATA; + if (!have_icc_profile) { + if (color_space != JPEGXL_CS_XYB) { + white_point = jxl_enum(gb); + if (white_point > 63 && validate) + return AVERROR_INVALIDDATA; + if (white_point == JPEGXL_WP_CUSTOM) { + /* ux and uy values */ + jxl_u32(gb, 0, 524288, 1048576, 2097152, 19, 19, 20, 21); + jxl_u32(gb, 0, 524288, 1048576, 2097152, 19, 19, 20, 21); + } + if (color_space != JPEGXL_CS_GRAY) { + /* primaries */ + primaries = jxl_enum(gb); + if (primaries > 63 && validate) + return AVERROR_INVALIDDATA; + if (primaries == JPEGXL_PR_CUSTOM) { + /* ux/uy values for r,g,b */ + for (int i = 0; i < 6; i++) { + jxl_u32(gb, 0, 524288, 1048576, 2097152, 19, 19, 20, 21); + if (get_bits_left(gb) <= 0) + return AVERROR_BUFFER_TOO_SMALL; + } + } + } + } + + /* transfer characteristics */ + if (get_bits1(gb)) { + /* gamma */ + trc = get_bits(gb, 24); + } else { + /* transfer function */ + trc = jxl_enum(gb); + if (trc > 63 && validate) + return AVERROR_INVALIDDATA; + trc += (1U << 24); + } + + /* rendering intent */ + if (jxl_enum(gb) > 63 && validate) + return AVERROR_INVALIDDATA; + } + } + + /* tone mapping bundle */ + if (extra_fields && !get_bits1(gb)) + skip_bits_long(gb, 16 + 16 + 1 + 16); + + ret = jpegxl_skip_extensions(gb); + if (ret < 0) + return ret; + } + + if (meta) { + meta->xyb_encoded = xyb_encoded; + meta->have_icc_profile = have_icc_profile; + meta->csp = color_space; + meta->primaries = primaries; + meta->wp = white_point; + meta->trc = trc; + if (!meta->bit_depth) + meta->bit_depth = 8; + meta->num_extra_channels = num_extra_channels; + } + + /* default transform */ + if (!get_bits1(gb)) { + /* opsin inverse matrix */ + if (xyb_encoded && !get_bits1(gb)) + skip_bits_long(gb, 16 * 16); + /* cw_mask and default weights */ + if (get_bits1(gb)) + skip_bits_long(gb, 16 * 15); + if (get_bits1(gb)) + skip_bits_long(gb, 16 * 55); + if (get_bits1(gb)) + skip_bits_long(gb, 16 * 210); + } + + if (!have_icc_profile) { + int bits_remaining = 7 - ((get_bits_count(gb) - 1) & 0x7); + if (bits_remaining && get_bits(gb, bits_remaining)) + return AVERROR_INVALIDDATA; + } + + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + + return get_bits_count(gb); +} + +/* + * copies as much of the codestream into the buffer as possible + * pass a shorter buflen to request less + * returns the number of bytes consumed from input, may be greater than input_len + * if the input doesn't end on an ISOBMFF-box boundary + */ +int ff_jpegxl_collect_codestream_header(const uint8_t *input_buffer, int input_len, + uint8_t *buffer, int buflen, int *copied) +{ + GetByteContext gb; + int pos = 0, last_box = 0; + bytestream2_init(&gb, input_buffer, input_len); + + while (1) { + uint64_t size; + uint32_t tag; + int head_size = 8; + + if (bytestream2_get_bytes_left(&gb) < 8) + return AVERROR_BUFFER_TOO_SMALL; + + size = bytestream2_get_be32(&gb); + if (size == 1) { + if (bytestream2_get_bytes_left(&gb) < 12) + return AVERROR_BUFFER_TOO_SMALL; + size = bytestream2_get_be64(&gb); + head_size = 16; + } + /* invalid ISOBMFF size */ + if (size && size <= head_size) + return AVERROR_INVALIDDATA; + if (size) + size -= head_size; + + tag = bytestream2_get_le32(&gb); + + if (tag == MKTAG('j','x','l','p')) { + uint32_t idx; + if (bytestream2_get_bytes_left(&gb) < 4) + return AVERROR_BUFFER_TOO_SMALL; + idx = bytestream2_get_be32(&gb); + if (idx >= UINT32_C(0x80000000)) + last_box = 1; + if (size) { + if (size <= 4) + return AVERROR_INVALIDDATA; + size -= 4; + } + } + if (tag == MKTAG('j','x','l','c')) + last_box = 1; + + /* + * size = 0 means "until EOF". this is legal but uncommon + * here we just set it to the remaining size of the probe buffer + */ + if (!size) + size = bytestream2_get_bytes_left(&gb); + else + pos += size + head_size; + + if (tag == MKTAG('j','x','l','c') || tag == MKTAG('j','x','l','p')) { + if (size > buflen - *copied) + size = buflen - *copied; + /* + * arbitrary chunking of the payload makes this memcpy hard to avoid + * in practice this will only be performed one or two times at most + */ + *copied += bytestream2_get_buffer(&gb, buffer + *copied, size); + } else { + bytestream2_skip(&gb, size); + } + if (last_box || bytestream2_get_bytes_left(&gb) <= 0 || *copied >= buflen) + break; + } + + return pos; +} diff --git a/libavcodec/jpegxl_parse.h b/libavcodec/jpegxl_parse.h new file mode 100644 index 0000000000..0602f4d409 --- /dev/null +++ b/libavcodec/jpegxl_parse.h @@ -0,0 +1,72 @@ +/* + * JPEG XL Header Parser + * Copyright (c) 2023 Leo Izen + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_JPEGXL_PARSE_H +#define AVCODEC_JPEGXL_PARSE_H + +#include + +#include "libavutil/rational.h" + +#include "jpegxl.h" + +typedef struct FFJXLMetadata { + uint32_t width; + uint32_t height; + uint32_t coded_width; + uint32_t coded_height; + int bit_depth; + int have_alpha; + /* + * offset, in bits, of the animation header + * zero if not animated + */ + int animation_offset; + AVRational timebase; + FFJXLColorSpace csp; + FFJXLWhitePoint wp; + FFJXLPrimaries primaries; + FFJXLTransferCharacteristic trc; + + /* used by the parser */ + int xyb_encoded; + int have_icc_profile; + int have_timecodes; + uint32_t num_extra_channels; +} FFJXLMetadata; + +/* + * copies as much of the codestream into the buffer as possible + * pass a shorter buflen to request less + * returns the number of bytes consumed from input, may be greater than input_len + * if the input doesn't end on an ISOBMFF-box boundary + */ +int ff_jpegxl_collect_codestream_header(const uint8_t *input_buffer, int input_len, + uint8_t *buffer, int buflen, int *copied); + +/* + * Parse the codestream header with the provided buffer. Returns negative upon failure, + * or the number of bits consumed upon success. + * The FFJXLMetadata parameter may be NULL, in which case it's ignored. + */ +int ff_jpegxl_parse_codestream_header(const uint8_t *buf, int buflen, FFJXLMetadata *meta, int validate); + +#endif /* AVCODEC_JPEGXL_PARSE_H */ diff --git a/libavcodec/jpegxl_parser.c b/libavcodec/jpegxl_parser.c new file mode 100644 index 0000000000..66e64b1074 --- /dev/null +++ b/libavcodec/jpegxl_parser.c @@ -0,0 +1,1477 @@ +/** + * JPEG XL parser + * Copyright (c) 2023 Leo Izen + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#include "libavutil/attributes.h" +#include "libavutil/error.h" +#include "libavutil/intmath.h" +#include "libavutil/macros.h" +#include "libavutil/mem.h" +#include "libavutil/pixfmt.h" + +#include "bytestream.h" +#include "codec_id.h" +#define UNCHECKED_BITSTREAM_READER 0 +#define BITSTREAM_READER_LE +#include "get_bits.h" +#include "jpegxl.h" +#include "jpegxl_parse.h" +#include "parser.h" +#include "vlc.h" + +#define JXL_FLAG_NOISE 1 +#define JXL_FLAG_PATCHES 2 +#define JXL_FLAG_SPLINES 16 +#define JXL_FLAG_USE_LF_FRAME 32 +#define JXL_FLAG_SKIP_ADAPTIVE_LF_SMOOTH 128 + +#define clog1p(x) (ff_log2(x) + !!(x)) +#define unpack_signed(x) (((x) & 1 ? -(x)-1 : (x))/2) +#define div_ceil(x, y) (((x) - 1) / (y) + 1) +#define vlm(a,b) (VLCElem){.sym = (a), .len = (b)} + +typedef struct JXLHybridUintConf { + int split_exponent; + uint32_t msb_in_token; + uint32_t lsb_in_token; +} JXLHybridUintConf; + +typedef struct JXLSymbolDistribution { + JXLHybridUintConf config; + int log_bucket_size; + /* this is the actual size of the alphabet */ + int alphabet_size; + /* ceil(log(alphabet_size)) */ + int log_alphabet_size; + + /* for prefix code distributions */ + VLC vlc; + /* in case bits == 0 */ + uint32_t default_symbol; + + /* + * each (1 << log_alphabet_size) length + * with log_alphabet_size <= 8 + */ + /* frequencies associated with this Distribution */ + uint32_t freq[258]; + /* cutoffs for using the symbol table */ + uint16_t cutoffs[258]; + /* the symbol table for this distribution */ + uint16_t symbols[258]; + /* the offset for symbols */ + uint16_t offsets[258]; + + /* if this distribution contains only one symbol this is its index */ + int uniq_pos; +} JXLSymbolDistribution; + +typedef struct JXLDistributionBundle { + /* lz77 flags */ + int lz77_enabled; + uint32_t lz77_min_symbol; + uint32_t lz77_min_length; + JXLHybridUintConf lz_len_conf; + + /* one entry for each distribution */ + uint8_t *cluster_map; + /* length of cluster_map */ + int num_dist; + + /* one for each cluster */ + JXLSymbolDistribution *dists; + int num_clusters; + + /* whether to use brotli prefixes or ans */ + int use_prefix_code; + /* bundle log alphabet size, dist ones may be smaller */ + int log_alphabet_size; +} JXLDistributionBundle; + +typedef struct JXLEntropyDecoder { + + /* state is a positive 32-bit integer, or -1 if unset */ + int64_t state; + + /* lz77 values */ + uint32_t num_to_copy; + uint32_t copy_pos; + uint32_t num_decoded; + + /* length is (1 << 20) */ + /* if lz77 is enabled for this bundle */ + /* if lz77 is disabled it's NULL */ + uint32_t *window; + + /* primary bundle associated with this distribution */ + JXLDistributionBundle bundle; + + /* for av_log */ + void *logctx; +} JXLEntropyDecoder; + +typedef struct JXLFrame { + FFJXLFrameType type; + FFJXLFrameEncoding encoding; + + int is_last; + int full_frame; + + uint32_t total_length; + uint32_t body_length; +} JXLFrame; + +typedef struct JXLCodestream { + FFJXLMetadata meta; + JXLFrame frame; +} JXLCodestream; + +typedef struct JXLParseContext { + ParseContext pc; + JXLCodestream codestream; + + /* using ISOBMFF-based container */ + int container; + int skip; + int copied; + int collected_size; + int codestream_length; + int skipped_icc; + int next; + + uint8_t cs_buffer[4096]; +} JXLParseContext; + +/* used for reading brotli prefixes */ +static const VLCElem level0_table[16] = { + vlm(0, 2), vlm(4, 2), vlm(3, 2), vlm(2, 3), vlm(0, 2), vlm(4, 2), vlm(3, 2), vlm(1, 4), + vlm(0, 2), vlm(4, 2), vlm(3, 2), vlm(2, 3), vlm(0, 2), vlm(4, 2), vlm(3, 2), vlm(5, 4), +}; + +/* prefix table for populating ANS distribution */ +static const VLCElem dist_prefix_table[128] = { + vlm(10, 3), vlm(12, 7), vlm(7, 3), vlm(3, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(5, 4), + vlm(10, 3), vlm(4, 4), vlm(7, 3), vlm(1, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(2, 4), + vlm(10, 3), vlm(0, 5), vlm(7, 3), vlm(3, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(5, 4), + vlm(10, 3), vlm(4, 4), vlm(7, 3), vlm(1, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(2, 4), + vlm(10, 3), vlm(11, 6), vlm(7, 3), vlm(3, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(5, 4), + vlm(10, 3), vlm(4, 4), vlm(7, 3), vlm(1, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(2, 4), + vlm(10, 3), vlm(0, 5), vlm(7, 3), vlm(3, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(5, 4), + vlm(10, 3), vlm(4, 4), vlm(7, 3), vlm(1, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(2, 4), + vlm(10, 3), vlm(13, 7), vlm(7, 3), vlm(3, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(5, 4), + vlm(10, 3), vlm(4, 4), vlm(7, 3), vlm(1, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(2, 4), + vlm(10, 3), vlm(0, 5), vlm(7, 3), vlm(3, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(5, 4), + vlm(10, 3), vlm(4, 4), vlm(7, 3), vlm(1, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(2, 4), + vlm(10, 3), vlm(11, 6), vlm(7, 3), vlm(3, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(5, 4), + vlm(10, 3), vlm(4, 4), vlm(7, 3), vlm(1, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(2, 4), + vlm(10, 3), vlm(0, 5), vlm(7, 3), vlm(3, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(5, 4), + vlm(10, 3), vlm(4, 4), vlm(7, 3), vlm(1, 4), vlm(6, 3), vlm(8, 3), vlm(9, 3), vlm(2, 4), +}; + +static const uint8_t prefix_codelen_map[18] = { + 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, +}; + +/** + * Read a variable-length 8-bit integer. + * Used when populating the ANS frequency tables. + */ +static av_always_inline uint8_t jxl_u8(GetBitContext *gb) +{ + int n; + if (!get_bits1(gb)) + return 0; + n = get_bits(gb, 3); + + return get_bitsz(gb, n) | (1 << n); +} + +/* read a U32(c_i + u(u_i)) */ +static av_always_inline uint32_t jxl_u32(GetBitContext *gb, + uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3, + uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) +{ + const uint32_t constants[4] = {c0, c1, c2, c3}; + const uint32_t ubits [4] = {u0, u1, u2, u3}; + uint32_t ret, choice = get_bits(gb, 2); + + ret = constants[choice]; + if (ubits[choice]) + ret += get_bits_long(gb, ubits[choice]); + + return ret; +} + +static av_always_inline uint32_t jxl_enum(GetBitContext *gb) +{ + return jxl_u32(gb, 0, 1, 2, 18, 0, 0, 4, 6); +} + +/* read a U64() */ +static uint64_t jxl_u64(GetBitContext *gb) +{ + uint64_t shift = 12, ret; + + switch (get_bits(gb, 2)) { + case 1: + ret = 1 + get_bits(gb, 4); + break; + case 2: + ret = 17 + get_bits(gb, 8); + break; + case 3: + ret = get_bits(gb, 12); + while (get_bits1(gb)) { + if (shift < 60) { + ret |= (uint64_t)get_bits(gb, 8) << shift; + shift += 8; + } else { + ret |= (uint64_t)get_bits(gb, 4) << shift; + break; + } + } + break; + default: + ret = 0; + } + + return ret; +} + +static int read_hybrid_uint_conf(GetBitContext *gb, JXLHybridUintConf *conf, int log_alphabet_size) +{ + conf->split_exponent = get_bitsz(gb, clog1p(log_alphabet_size)); + if (conf->split_exponent == log_alphabet_size) { + conf->msb_in_token = conf->lsb_in_token = 0; + return 0; + } + + conf->msb_in_token = get_bitsz(gb, clog1p(conf->split_exponent)); + if (conf->msb_in_token > conf->split_exponent) + return AVERROR_INVALIDDATA; + conf->lsb_in_token = get_bitsz(gb, clog1p(conf->split_exponent - conf->msb_in_token)); + if (conf->msb_in_token + conf->lsb_in_token > conf->split_exponent) + return AVERROR_INVALIDDATA; + + return 0; +} + +static int read_hybrid_uint(GetBitContext *gb, const JXLHybridUintConf *conf, uint32_t token, uint32_t *hybrid_uint) +{ + uint32_t n, low, split = 1 << conf->split_exponent; + + if (token < split) { + *hybrid_uint = token; + return 0; + } + + n = conf->split_exponent - conf->lsb_in_token - conf->msb_in_token + + ((token - split) >> (conf->msb_in_token + conf->lsb_in_token)); + if (n >= 32) + return AVERROR_INVALIDDATA; + low = token & ((1 << conf->lsb_in_token) - 1); + token >>= conf->lsb_in_token; + token &= (1 << conf->msb_in_token) - 1; + token |= 1 << conf->msb_in_token; + *hybrid_uint = (((token << n) | get_bits_long(gb, n)) << conf->lsb_in_token ) | low; + + return 0; +} + +static inline uint32_t read_prefix_symbol(GetBitContext *gb, const JXLSymbolDistribution *dist) +{ + if (!dist->vlc.bits) + return dist->default_symbol; + + return get_vlc2(gb, dist->vlc.table, dist->vlc.bits, 1); +} + +static uint32_t read_ans_symbol(GetBitContext *gb, JXLEntropyDecoder *dec, const JXLSymbolDistribution *dist) +{ + uint32_t index, i, pos, symbol, offset; + + if (dec->state < 0) + dec->state = get_bits_long(gb, 32); + + index = dec->state & 0xFFF; + i = index >> dist->log_bucket_size; + pos = index & ((1 << dist->log_bucket_size) - 1); + symbol = pos >= dist->cutoffs[i] ? dist->symbols[i] : i; + offset = pos >= dist->cutoffs[i] ? dist->offsets[i] + pos : pos; + dec->state = dist->freq[symbol] * (dec->state >> 12) + offset; + if (dec->state < (1 << 16)) + dec->state = (dec->state << 16) | get_bits(gb, 16); + dec->state &= 0xFFFFFFFF; + + return symbol; +} + +static int decode_hybrid_varlen_uint(GetBitContext *gb, JXLEntropyDecoder *dec, + const JXLDistributionBundle *bundle, + uint32_t context, uint32_t *hybrid_uint) +{ + int ret; + uint32_t token, distance; + const JXLSymbolDistribution *dist; + + if (dec->num_to_copy > 0) { + *hybrid_uint = dec->window[dec->copy_pos++ & 0xFFFFF]; + dec->num_to_copy--; + dec->window[dec->num_decoded++ & 0xFFFFF] = *hybrid_uint; + return 0; + } + + if (context >= bundle->num_dist) + return AVERROR(EINVAL); + if (bundle->cluster_map[context] >= bundle->num_clusters) + return AVERROR_INVALIDDATA; + + dist = &bundle->dists[bundle->cluster_map[context]]; + if (bundle->use_prefix_code) + token = read_prefix_symbol(gb, dist); + else + token = read_ans_symbol(gb, dec, dist); + + if (bundle->lz77_enabled && token >= bundle->lz77_min_symbol) { + const JXLSymbolDistribution *lz77dist = &bundle->dists[bundle->cluster_map[bundle->num_dist - 1]]; + ret = read_hybrid_uint(gb, &bundle->lz_len_conf, token - bundle->lz77_min_symbol, &dec->num_to_copy); + if (ret < 0) + return ret; + dec->num_to_copy += bundle->lz77_min_length; + if (bundle->use_prefix_code) + token = read_prefix_symbol(gb, lz77dist); + else + token = read_ans_symbol(gb, dec, lz77dist); + ret = read_hybrid_uint(gb, &lz77dist->config, token, &distance); + if (ret < 0) + return ret; + distance++; + distance = FFMIN3(distance, dec->num_decoded, 1 << 20); + dec->copy_pos = dec->num_decoded - distance; + return decode_hybrid_varlen_uint(gb, dec, bundle, context, hybrid_uint); + } + ret = read_hybrid_uint(gb, &dist->config, token, hybrid_uint); + if (ret < 0) + return ret; + if (bundle->lz77_enabled) + dec->window[dec->num_decoded++ & 0xFFFFF] = *hybrid_uint; + + return 0; +} + +static int populate_distribution(GetBitContext *gb, JXLSymbolDistribution *dist, int log_alphabet_size) +{ + int len = 0, shift, omit_log = -1, omit_pos = -1; + int prev = 0, num_same = 0; + uint32_t total_count = 0; + uint8_t logcounts[258] = { 0 }; + uint8_t same[258] = { 0 }; + dist->uniq_pos = -1; + + if (get_bits1(gb)) { + /* simple code */ + dist->alphabet_size = 256; + if (get_bits1(gb)) { + uint8_t v1 = jxl_u8(gb); + uint8_t v2 = jxl_u8(gb); + if (v1 == v2) + return AVERROR_INVALIDDATA; + dist->freq[v1] = get_bits(gb, 12); + dist->freq[v2] = (1 << 12) - dist->freq[v1]; + if (!dist->freq[v1]) + dist->uniq_pos = v2; + } else { + uint8_t x = jxl_u8(gb); + dist->freq[x] = 1 << 12; + dist->uniq_pos = x; + } + return 0; + } + + if (get_bits1(gb)) { + /* flat code */ + dist->alphabet_size = jxl_u8(gb) + 1; + for (int i = 0; i < dist->alphabet_size; i++) + dist->freq[i] = (1 << 12) / dist->alphabet_size; + for (int i = 0; i < (1 << 12) % dist->alphabet_size; i++) + dist->freq[i]++; + return 0; + } + + do { + if (!get_bits1(gb)) + break; + } while (++len < 3); + + shift = (get_bitsz(gb, len) | (1 << len)) - 1; + if (shift > 13) + return AVERROR_INVALIDDATA; + + dist->alphabet_size = jxl_u8(gb) + 3; + for (int i = 0; i < dist->alphabet_size; i++) { + logcounts[i] = get_vlc2(gb, dist_prefix_table, 7, 1); + if (logcounts[i] == 13) { + int rle = jxl_u8(gb); + same[i] = rle + 5; + i += rle + 3; + continue; + } + if (logcounts[i] > omit_log) { + omit_log = logcounts[i]; + omit_pos = i; + } + } + if (omit_pos < 0 || omit_pos + 1 < dist->alphabet_size && logcounts[omit_pos + 1] == 13) + return AVERROR_INVALIDDATA; + + for (int i = 0; i < dist->alphabet_size; i++) { + if (same[i]) { + num_same = same[i] - 1; + prev = i > 0 ? dist->freq[i - 1] : 0; + } + if (num_same) { + dist->freq[i] = prev; + num_same--; + } else { + if (i == omit_pos || !logcounts[i]) + continue; + if (logcounts[i] == 1) { + dist->freq[i] = 1; + } else { + int bitcount = FFMIN(FFMAX(0, shift - ((12 - logcounts[i] + 1) >> 1)), logcounts[i] - 1); + dist->freq[i] = (1 << (logcounts[i] - 1)) + (get_bitsz(gb, bitcount) << (logcounts[i] - 1 - bitcount)); + } + } + total_count += dist->freq[i]; + } + dist->freq[omit_pos] = (1 << 12) - total_count; + + return 0; +} + +static void dist_bundle_close(JXLDistributionBundle *bundle) +{ + if (bundle->use_prefix_code && bundle->dists) + for (int i = 0; i < bundle->num_clusters; i++) + ff_free_vlc(&bundle->dists[i].vlc); + av_freep(&bundle->dists); + av_freep(&bundle->cluster_map); +} + + +static int read_distribution_bundle(GetBitContext *gb, JXLEntropyDecoder *dec, + JXLDistributionBundle *bundle, int num_dist, int disallow_lz77); + +static int read_dist_clustering(GetBitContext *gb, JXLEntropyDecoder *dec, JXLDistributionBundle *bundle) +{ + int ret; + + bundle->cluster_map = av_malloc(bundle->num_dist); + if (!bundle->cluster_map) + return AVERROR(ENOMEM); + + if (bundle->num_dist == 1) { + bundle->cluster_map[0] = 0; + bundle->num_clusters = 1; + return 0; + } + + if (get_bits1(gb)) { + /* simple clustering */ + uint32_t nbits = get_bits(gb, 2); + for (int i = 0; i < bundle->num_dist; i++) + bundle->cluster_map[i] = get_bitsz(gb, nbits); + } else { + /* complex clustering */ + int use_mtf = get_bits1(gb); + JXLDistributionBundle nested = { 0 }; + /* num_dist == 1 prevents this from recursing again */ + ret = read_distribution_bundle(gb, dec, &nested, 1, bundle->num_dist <= 2); + if (ret < 0) { + dist_bundle_close(&nested); + return ret; + } + for (int i = 0; i < bundle->num_dist; i++) { + uint32_t clust; + ret = decode_hybrid_varlen_uint(gb, dec, &nested, 0, &clust); + if (ret < 0) { + dist_bundle_close(&nested); + return ret; + } + bundle->cluster_map[i] = clust; + } + dec->state = -1; + /* it's not going to necessarily be zero after reading */ + dec->num_to_copy = 0; + dist_bundle_close(&nested); + if (use_mtf) { + uint8_t mtf[256]; + for (int i = 0; i < 256; i++) + mtf[i] = i; + for (int i = 0; i < bundle->num_dist; i++) { + int index = bundle->cluster_map[i]; + bundle->cluster_map[i] = mtf[index]; + if (index) { + int value = mtf[index]; + for (int j = index; j > 0; j--) + mtf[j] = mtf[j - 1]; + mtf[0] = value; + } + } + } + } + for (int i = 0; i < bundle->num_dist; i++) { + if (bundle->cluster_map[i] >= bundle->num_clusters) + bundle->num_clusters = bundle->cluster_map[i] + 1; + } + + if (bundle->num_clusters > bundle->num_dist) + return AVERROR_INVALIDDATA; + + return 0; +} + +static int gen_alias_map(JXLEntropyDecoder *dec, JXLSymbolDistribution *dist, int log_alphabet_size) +{ + uint32_t bucket_size, table_size; + uint8_t overfull[256], underfull[256]; + int overfull_pos = 0, underfull_pos = 0; + dist->log_bucket_size = 12 - log_alphabet_size; + bucket_size = 1 << dist->log_bucket_size; + table_size = 1 << log_alphabet_size; + + if (dist->uniq_pos >= 0) { + for (int i = 0; i < table_size; i++) { + dist->symbols[i] = dist->uniq_pos; + dist->offsets[i] = bucket_size * i; + dist->cutoffs[i] = 0; + } + return 0; + } + + for (int i = 0; i < dist->alphabet_size; i++) { + dist->cutoffs[i] = dist->freq[i]; + dist->symbols[i] = i; + if (dist->cutoffs[i] > bucket_size) + overfull[overfull_pos++] = i; + else if (dist->cutoffs[i] < bucket_size) + underfull[underfull_pos++] = i; + } + + for (int i = dist->alphabet_size; i < table_size; i++) { + dist->cutoffs[i] = 0; + underfull[underfull_pos++] = i; + } + + while (overfull_pos) { + int o, u, by; + /* this should be impossible */ + if (!underfull_pos) + return AVERROR_INVALIDDATA; + u = underfull[--underfull_pos]; + o = overfull[--overfull_pos]; + by = bucket_size - dist->cutoffs[u]; + dist->cutoffs[o] -= by; + dist->symbols[u] = o; + dist->offsets[u] = dist->cutoffs[o]; + if (dist->cutoffs[o] < bucket_size) + underfull[underfull_pos++] = o; + else if (dist->cutoffs[o] > bucket_size) + overfull[overfull_pos++] = o; + } + + for (int i = 0; i < table_size; i++) { + if (dist->cutoffs[i] == bucket_size) { + dist->symbols[i] = i; + dist->offsets[i] = 0; + dist->cutoffs[i] = 0; + } else { + dist->offsets[i] -= dist->cutoffs[i]; + } + } + + return 0; +} + +static int read_simple_vlc_prefix(GetBitContext *gb, JXLEntropyDecoder *dec, JXLSymbolDistribution *dist) +{ + int nsym, tree_select, bits; + + int8_t lens[4]; + int16_t symbols[4]; + + nsym = 1 + get_bits(gb, 2); + for (int i = 0; i < nsym; i++) + symbols[i] = get_bitsz(gb, dist->log_alphabet_size); + if (nsym == 4) + tree_select = get_bits1(gb); + switch (nsym) { + case 1: + dist->vlc.bits = 0; + dist->default_symbol = symbols[0]; + return 0; + case 2: + bits = 1; + lens[0] = 1, lens[1] = 1, lens[2] = 0, lens[3] = 0; + if (symbols[1] < symbols[0]) + FFSWAP(int16_t, symbols[0], symbols[1]); + break; + case 3: + bits = 2; + lens[0] = 1, lens[1] = 2, lens[2] = 2, lens[3] = 0; + if (symbols[2] < symbols[1]) + FFSWAP(int16_t, symbols[1], symbols[2]); + break; + case 4: + if (tree_select) { + bits = 3; + lens[0] = 1, lens[1] = 2, lens[2] = 3, lens[3] = 3; + if (symbols[3] < symbols[2]) + FFSWAP(int16_t, symbols[2], symbols[3]); + } else { + bits = 2; + lens[0] = 2, lens[1] = 2, lens[2] = 2, lens[3] = 2; + while (1) { + if (symbols[1] < symbols[0]) + FFSWAP(int16_t, symbols[0], symbols[1]); + if (symbols[3] < symbols[2]) + FFSWAP(int16_t, symbols[2], symbols[3]); + if (symbols[1] <= symbols[2]) + break; + FFSWAP(int16_t, symbols[1], symbols[2]); + } + } + break; + default: + // Challenge Complete! How did we get here? + return AVERROR_BUG; + } + + return ff_init_vlc_from_lengths(&dist->vlc, bits, nsym, lens, 1, symbols, + 2, 2, 0, INIT_VLC_LE, dec->logctx); +} + +static int read_vlc_prefix(GetBitContext *gb, JXLEntropyDecoder *dec, JXLSymbolDistribution *dist) +{ + int8_t level1_lens[18] = { 0 }; + int8_t level1_lens_s[18] = { 0 }; + int16_t level1_syms[18] = { 0 }; + uint32_t level1_codecounts[19] = { 0 }; + uint8_t *buf = NULL; + int8_t *level2_lens, *level2_lens_s; + int16_t *level2_syms; + uint32_t *level2_codecounts; + + int repeat_count_prev = 0, repeat_count_zero = 0, prev = 8; + int total_code = 0, len, hskip, num_codes = 0, ret; + + VLC level1_vlc; + + if (dist->alphabet_size == 1) { + dist->vlc.bits = 0; + dist->default_symbol = 0; + return 0; + } + + hskip = get_bits(gb, 2); + if (hskip == 1) + return read_simple_vlc_prefix(gb, dec, dist); + + level1_codecounts[0] = hskip; + for (int i = hskip; i < 18; i++) { + len = level1_lens[prefix_codelen_map[i]] = get_vlc2(gb, level0_table, 4, 1); + level1_codecounts[len]++; + if (len) { + total_code += (32 >> len); + num_codes++; + } + if (total_code >= 32) { + level1_codecounts[0] += 18 - i - 1; + break; + } + } + + if (total_code != 32 && num_codes >= 2 || num_codes < 1) + return AVERROR_INVALIDDATA; + + for (int i = 1; i < 19; i++) + level1_codecounts[i] += level1_codecounts[i - 1]; + + for (int i = 17; i >= 0; i--) { + int idx = --level1_codecounts[level1_lens[i]]; + level1_lens_s[idx] = level1_lens[i]; + level1_syms[idx] = i; + } + + ret = ff_init_vlc_from_lengths(&level1_vlc, 5, 18, level1_lens_s, 1, level1_syms, 2, 2, + 0, INIT_VLC_LE, dec->logctx); + if (ret < 0) + goto end; + + buf = av_calloc(1, 262148); // 32768 * 8 + 4 + if (!buf) { + ret = AVERROR(ENOMEM); + goto end; + } + + level2_lens = (int8_t *)buf; + level2_lens_s = (int8_t *)(buf + 32768); + level2_syms = (int16_t *)(buf + 65536); + level2_codecounts = (uint32_t *)(buf + 131072); + + total_code = 0; + for (int i = 0; i < dist->alphabet_size; i++) { + len = get_vlc2(gb, level1_vlc.table, 5, 1); + if (len == 16) { + int extra = 3 + get_bits(gb, 2); + if (repeat_count_prev) + extra = 4 * (repeat_count_prev - 2) - repeat_count_prev + extra; + for (int j = 0; j < extra; j++) + level2_lens[i + j] = prev; + total_code += (32768 >> prev) * extra; + i += extra - 1; + repeat_count_prev += extra; + repeat_count_zero = 0; + level2_codecounts[prev] += extra; + } else if (len == 17) { + int extra = 3 + get_bits(gb, 3); + if (repeat_count_zero > 0) + extra = 8 * (repeat_count_zero - 2) - repeat_count_zero + extra; + i += extra - 1; + repeat_count_prev = 0; + repeat_count_zero += extra; + level2_codecounts[0] += extra; + } else { + level2_lens[i] = len; + repeat_count_prev = repeat_count_zero = 0; + if (len) { + total_code += (32768 >> len); + prev = len; + } + level2_codecounts[len]++; + } + if (total_code >= 32768) { + level2_codecounts[0] += dist->alphabet_size - i - 1; + break; + } + } + + if (total_code != 32768 && level2_codecounts[0] < dist->alphabet_size - 1) + return AVERROR_INVALIDDATA; + + for (int i = 1; i < dist->alphabet_size + 1; i++) + level2_codecounts[i] += level2_codecounts[i - 1]; + + for (int i = dist->alphabet_size - 1; i >= 0; i--) { + int idx = --level2_codecounts[level2_lens[i]]; + level2_lens_s[idx] = level2_lens[i]; + level2_syms[idx] = i; + } + + ret = ff_init_vlc_from_lengths(&dist->vlc, 15, dist->alphabet_size, level2_lens_s, + 1, level2_syms, 2, 2, 0, INIT_VLC_LE, dec->logctx); + +end: + av_freep(&buf); + ff_free_vlc(&level1_vlc); + + return ret; +} + +static int read_distribution_bundle(GetBitContext *gb, JXLEntropyDecoder *dec, + JXLDistributionBundle *bundle, int num_dist, int disallow_lz77) +{ + int ret; + + if (num_dist <= 0) + return AVERROR(EINVAL); + + bundle->num_dist = num_dist; + bundle->lz77_enabled = get_bits1(gb); + if (bundle->lz77_enabled) { + if (disallow_lz77) + return AVERROR_INVALIDDATA; + bundle->lz77_min_symbol = jxl_u32(gb, 224, 512, 4096, 8, 0, 0, 0, 15); + bundle->lz77_min_length = jxl_u32(gb, 3, 4, 5, 9, 0, 0, 2, 8); + bundle->num_dist++; + ret = read_hybrid_uint_conf(gb, &bundle->lz_len_conf, 8); + if (ret < 0) + return ret; + } + + if (bundle->lz77_enabled && !dec->window) { + dec->window = av_malloc_array(1 << 20, sizeof(uint32_t)); + if (!dec->window) + return AVERROR(ENOMEM); + } + + ret = read_dist_clustering(gb, dec, bundle); + if (ret < 0) + return ret; + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + + bundle->dists = av_calloc(bundle->num_clusters, sizeof(JXLSymbolDistribution)); + if (!bundle->dists) + return AVERROR(ENOMEM); + + bundle->use_prefix_code = get_bits1(gb); + bundle->log_alphabet_size = bundle->use_prefix_code ? 15 : 5 + get_bits(gb, 2); + + for (int i = 0; i < bundle->num_clusters; i++) { + ret = read_hybrid_uint_conf(gb, &bundle->dists[i].config, bundle->log_alphabet_size); + if (ret < 0) + return ret; + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + + if (bundle->use_prefix_code) { + for (int i = 0; i < bundle->num_clusters; i++) { + JXLSymbolDistribution *dist = &bundle->dists[i]; + if (get_bits1(gb)) { + int n = get_bits(gb, 4); + dist->alphabet_size = 1 + (1 << n) + get_bitsz(gb, n); + } else { + dist->alphabet_size = 1; + } + dist->log_alphabet_size = clog1p(dist->alphabet_size - 1); + } + for (int i = 0; i < bundle->num_clusters; i++) { + ret = read_vlc_prefix(gb, dec, &bundle->dists[i]); + if (ret < 0) + return ret; + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + } else { + for (int i = 0; i < bundle->num_clusters; i++) { + ret = populate_distribution(gb, &bundle->dists[i], bundle->log_alphabet_size); + if (ret < 0) + return ret; + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + for (int i = 0; i < bundle->num_clusters; i++) { + ret = gen_alias_map(dec, &bundle->dists[i], bundle->log_alphabet_size); + if (ret < 0) + return ret; + } + } + + return 0; +} + +static void entropy_decoder_close(JXLEntropyDecoder *dec) +{ + if (!dec) + return; + av_freep(&dec->window); + dist_bundle_close(&dec->bundle); +} + +static int entropy_decoder_init(void *avctx, GetBitContext *gb, JXLEntropyDecoder *dec, int num_dist) +{ + int ret; + + memset(dec, 0, sizeof(*dec)); + dec->logctx = avctx; + dec->state = -1; + + ret = read_distribution_bundle(gb, dec, &dec->bundle, num_dist, 0); + if (ret < 0) { + entropy_decoder_close(dec); + return ret; + } + + return 0; +} + +static int64_t entropy_decoder_read_symbol(GetBitContext *gb, JXLEntropyDecoder *dec, uint32_t context) +{ + int ret; + uint32_t hybrid_uint; + + ret = decode_hybrid_varlen_uint(gb, dec, &dec->bundle, context, &hybrid_uint); + if (ret < 0) + return ret; + + return hybrid_uint; +} + +static inline uint32_t icc_context(uint64_t i, uint32_t b1, uint32_t b2) +{ + uint32_t p1, p2; + if (i <= 128) + return 0; + if (b1 >= 'a' && b1 <= 'z' || b1 >= 'A' && b1 <= 'Z') + p1 = 0; + else if (b1 >= '0' && b1 <= '9' || b1 == '.' || b1 == ',') + p1 = 1; + else if (b1 <= 1) + p1 = b1 + 2; + else if (b1 > 1 && b1 < 16) + p1 = 4; + else if (b1 > 240 && b1 < 255) + p1 = 5; + else if (b1 == 255) + p1 = 6; + else + p1 = 7; + + if (b2 >= 'a' && b2 <= 'z' || b2 >= 'A' && b2 <= 'Z') + p2 = 0; + else if (b2 >= '0' && b2 <= '9' || b2 == '.' || b2 == ',') + p2 = 1; + else if (b2 < 16) + p2 = 2; + else if (b2 > 240) + p2 = 3; + else + p2 = 4; + + return 1 + p1 + p2 * 8; +} + +static inline uint32_t toc_context(uint32_t x) +{ + return FFMIN(7, clog1p(x)); +} + +static void populate_fields(AVCodecParserContext *s, AVCodecContext *avctx, const FFJXLMetadata *meta) +{ + s->width = meta->width; + s->height = meta->height; + + switch (meta->csp) { + case JPEGXL_CS_RGB: + case JPEGXL_CS_XYB: + avctx->colorspace = AVCOL_SPC_RGB; + break; + default: + avctx->colorspace = AVCOL_SPC_UNSPECIFIED; + } + + if (meta->wp == JPEGXL_WP_D65) { + switch (meta->primaries) { + case JPEGXL_PR_SRGB: + avctx->color_primaries = AVCOL_PRI_BT709; + break; + case JPEGXL_PR_P3: + avctx->color_primaries = AVCOL_PRI_SMPTE432; + break; + case JPEGXL_PR_2100: + avctx->color_primaries = AVCOL_PRI_BT2020; + break; + default: + avctx->color_primaries = AVCOL_PRI_UNSPECIFIED; + } + } else if (meta->wp == JPEGXL_WP_DCI && meta->primaries == JPEGXL_PR_P3) { + avctx->color_primaries = AVCOL_PRI_SMPTE431; + } else { + avctx->color_primaries = AVCOL_PRI_UNSPECIFIED; + } + + if (meta->trc > JPEGXL_TR_GAMMA) { + FFJXLTransferCharacteristic trc = meta->trc - JPEGXL_TR_GAMMA; + switch (trc) { + case JPEGXL_TR_BT709: + avctx->color_trc = AVCOL_TRC_BT709; + break; + case JPEGXL_TR_LINEAR: + avctx->color_trc = AVCOL_TRC_LINEAR; + break; + case JPEGXL_TR_SRGB: + avctx->color_trc = AVCOL_TRC_IEC61966_2_1; + break; + case JPEGXL_TR_PQ: + avctx->color_trc = AVCOL_TRC_SMPTEST2084; + break; + case JPEGXL_TR_DCI: + avctx->color_trc = AVCOL_TRC_SMPTE428; + break; + case JPEGXL_TR_HLG: + avctx->color_trc = AVCOL_TRC_ARIB_STD_B67; + break; + default: + avctx->color_trc = AVCOL_TRC_UNSPECIFIED; + } + } else if (meta->trc > 0) { + if (meta->trc > 45355 && meta->trc < 45555) + avctx->color_trc = AVCOL_TRC_GAMMA22; + else if (meta->trc > 35614 && meta->trc < 35814) + avctx->color_trc = AVCOL_TRC_GAMMA28; + else + avctx->color_trc = AVCOL_TRC_UNSPECIFIED; + } else { + avctx->color_trc = AVCOL_TRC_UNSPECIFIED; + } + + if (meta->csp == JPEGXL_CS_GRAY) { + if (meta->bit_depth <= 8) + s->format = meta->have_alpha ? AV_PIX_FMT_YA8 : AV_PIX_FMT_GRAY8; + else if (meta->bit_depth <= 16) + s->format = meta->have_alpha ? AV_PIX_FMT_YA16 : AV_PIX_FMT_GRAY16; + else + s->format = meta->have_alpha ? AV_PIX_FMT_NONE : AV_PIX_FMT_GRAYF32; + } else { + if (meta->bit_depth <= 8) + s->format = meta->have_alpha ? AV_PIX_FMT_RGBA : AV_PIX_FMT_RGB24; + else if (meta->bit_depth <= 16) + s->format = meta->have_alpha ? AV_PIX_FMT_RGBA64 : AV_PIX_FMT_RGB48; + else + s->format = meta->have_alpha ? AV_PIX_FMT_RGBAF32 : AV_PIX_FMT_RGBF32; + } +} + +static int skip_icc_profile(void *avctx, JXLParseContext *ctx, GetBitContext *gb) +{ + int64_t ret; + uint32_t last = 0, last2 = 0; + JXLEntropyDecoder dec; + uint64_t enc_size = jxl_u64(gb); + + if (!enc_size) + return AVERROR_INVALIDDATA; + + ret = entropy_decoder_init(avctx, gb, &dec, 41); + if (ret < 0) + return ret; + + if (get_bits_left(gb) < 0) { + entropy_decoder_close(&dec); + return AVERROR_BUFFER_TOO_SMALL; + } + + for (uint64_t read = 0; read < enc_size; read++) { + ret = entropy_decoder_read_symbol(gb, &dec, icc_context(read, last, last2)); + if (ret < 0 || get_bits_left(gb) < 0) { + entropy_decoder_close(&dec); + return ret < 0 ? ret : AVERROR_BUFFER_TOO_SMALL; + } + last2 = last; + last = ret; + } + + entropy_decoder_close(&dec); + + return 0; +} + +static int skip_extensions(GetBitContext *gb) +{ + uint64_t extensions = jxl_u64(gb), extensions_len = 0; + + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + + if (!extensions) + return 0; + + for (int i = 0; i < 64; i++) { + if (extensions & (UINT64_C(1) << i)) + extensions_len += jxl_u64(gb); + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + + if (extensions_len > INT_MAX || get_bits_left(gb) < extensions_len) + return AVERROR_BUFFER_TOO_SMALL; + + skip_bits_long(gb, extensions_len); + + return 0; +} + +static int parse_frame_header(void *avctx, JXLParseContext *ctx, GetBitContext *gb) +{ + int all_default, do_yCbCr = 0, num_passes = 1, ret; + int group_size_shift = 1, lf_level = 0, save_as_ref = 0; + int have_crop = 0, full_frame = 1, resets_canvas = 1, upsampling = 1; + JXLFrame *frame = &ctx->codestream.frame; + const FFJXLMetadata *meta = &ctx->codestream.meta; + int32_t x0 = 0, y0 = 0; + uint32_t duration = 0, width = meta->coded_width, height = meta->coded_height; + uint32_t name_len, num_groups, num_lf_groups, group_dim, lf_group_dim, toc_count; + uint64_t flags = 0; + int start_len = get_bits_count(gb); + + memset(frame, 0, sizeof(*frame)); + frame->is_last = 1; + + all_default = get_bits1(gb); + if (!all_default) { + frame->type = get_bits(gb, 2); + frame->encoding = get_bits1(gb); + flags = jxl_u64(gb); + if (!meta->xyb_encoded) + do_yCbCr = get_bits1(gb); + if (!(flags & JXL_FLAG_USE_LF_FRAME)) { + if (do_yCbCr) + skip_bits(gb, 6); // jpeg upsampling + upsampling = jxl_u32(gb, 1, 2, 4, 8, 0, 0, 0, 0); + skip_bits_long(gb, 2 * meta->num_extra_channels); + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + if (frame->encoding == JPEGXL_ENC_MODULAR) + group_size_shift = get_bits(gb, 2); + else if (meta->xyb_encoded) + skip_bits(gb, 6); // xqm and bqm scales + if (frame->type != JPEGXL_FRAME_REFERENCE_ONLY) { + num_passes = jxl_u32(gb, 1, 2, 3, 4, 0, 0, 0, 3); + if (num_passes != 1) { + int num_ds = jxl_u32(gb, 0, 1, 2, 3, 0, 0, 0, 1); + skip_bits(gb, 2 * (num_passes - 1)); // shift + skip_bits(gb, 2 * num_ds); // downsample + for (int i = 0; i < num_ds; i++) + jxl_u32(gb, 0, 1, 2, 0, 0, 0, 0, 3); + } + } + if (frame->type == JPEGXL_FRAME_LF) + lf_level = 1 + get_bits(gb, 2); + else + have_crop = get_bits1(gb); + if (have_crop) { + if (frame->type != JPEGXL_FRAME_REFERENCE_ONLY) { + uint32_t ux0 = jxl_u32(gb, 0, 256, 2304, 18688, 8, 11, 14, 30); + uint32_t uy0 = jxl_u32(gb, 0, 256, 2304, 18688, 8, 11, 14, 30); + x0 = unpack_signed(ux0); + y0 = unpack_signed(uy0); + } + width = jxl_u32(gb, 0, 256, 2304, 18688, 8, 11, 14, 30); + height = jxl_u32(gb, 0, 256, 2304, 18688, 8, 11, 14, 30); + full_frame = x0 <= 0 && y0 <= 0 && width + x0 >= meta->coded_width + && height + y0 >= meta->coded_height; + } + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + if (frame->type == JPEGXL_FRAME_REGULAR || frame->type == JPEGXL_FRAME_SKIP_PROGRESSIVE) { + for (int i = 0; i <= meta->num_extra_channels; i++) { + int mode = jxl_u32(gb, 0, 1, 2, 3, 0, 0, 0, 2); + if (meta->num_extra_channels && (mode == JPEGXL_BM_BLEND || mode == JPEGXL_BM_MULADD)) + jxl_u32(gb, 0, 1, 2, 3, 0, 0, 0, 2); + if (meta->num_extra_channels && (mode == JPEGXL_BM_BLEND || mode == JPEGXL_BM_MULADD + || mode == JPEGXL_BM_MUL)) + skip_bits1(gb); + if (!i) + resets_canvas = mode == JPEGXL_BM_REPLACE && full_frame; + if (!resets_canvas) + skip_bits(gb, 2); + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + if (meta->animation_offset) + duration = jxl_u32(gb, 0, 1, 0, 0, 0, 0, 8, 32); + if (meta->have_timecodes) + skip_bits_long(gb, 32); + frame->is_last = get_bits1(gb); + } else { + frame->is_last = 0; + } + if (frame->type != JPEGXL_FRAME_LF && !frame->is_last) + save_as_ref = get_bits(gb, 2); + if (frame->type == JPEGXL_FRAME_REFERENCE_ONLY || + (resets_canvas && !frame->is_last && (!duration || save_as_ref) + && frame->type != JPEGXL_FRAME_LF)) + skip_bits1(gb); // save before color transform + name_len = 8 * jxl_u32(gb, 0, 0, 16, 48, 0, 4, 5, 10); + if (get_bits_left(gb) < name_len) + return AVERROR_BUFFER_TOO_SMALL; + skip_bits_long(gb, name_len); + } + + if (!all_default) { + int restd = get_bits1(gb), gab = 1; + if (!restd) + gab = get_bits1(gb); + if (gab && !restd && get_bits1(gb)) + // gab custom + skip_bits_long(gb, 16 * 6); + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + if (!restd) { + int epf = get_bits(gb, 2); + if (epf) { + if (frame->encoding == JPEGXL_ENC_VARDCT && get_bits1(gb)) { + skip_bits_long(gb, 16 * 8); // custom epf sharpness + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + if (get_bits1(gb)) { + skip_bits_long(gb, 3 * 16 + 32); // custom epf weight + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + if (get_bits1(gb)) { // custom epf sigma + if (frame->encoding == JPEGXL_ENC_VARDCT) + skip_bits(gb, 16); + skip_bits_long(gb, 16 * 3); + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + if (frame->encoding == JPEGXL_ENC_MODULAR) + skip_bits(gb, 16); + } + ret = skip_extensions(gb); + if (ret < 0) + return ret; + } + ret = skip_extensions(gb); + if (ret < 0) + return ret; + } + + width = div_ceil(div_ceil(width, upsampling), 1 << (3 * lf_level)); + height = div_ceil(div_ceil(height, upsampling), 1 << (3 * lf_level)); + group_dim = 128 << group_size_shift; + lf_group_dim = group_dim << 3; + num_groups = div_ceil(width, group_dim) * div_ceil(height, group_dim); + num_lf_groups = div_ceil(width, lf_group_dim) * div_ceil(height, lf_group_dim); + if (num_groups == 1 && num_passes == 1) + toc_count = 1; + else + toc_count = 2 + num_lf_groups + num_groups * num_passes; + + // permuted toc + if (get_bits1(gb)) { + JXLEntropyDecoder dec; + uint32_t end, lehmer = 0; + ret = entropy_decoder_init(avctx, gb, &dec, 8); + if (ret < 0) + return ret; + if (get_bits_left(gb) < 0) { + entropy_decoder_close(&dec); + return AVERROR_BUFFER_TOO_SMALL; + } + end = entropy_decoder_read_symbol(gb, &dec, toc_context(toc_count)); + if (end > toc_count) { + entropy_decoder_close(&dec); + return AVERROR_INVALIDDATA; + } + for (uint32_t i = 0; i < end; i++) { + lehmer = entropy_decoder_read_symbol(gb, &dec, toc_context(lehmer)); + if (get_bits_left(gb) < 0) { + entropy_decoder_close(&dec); + return AVERROR_BUFFER_TOO_SMALL; + } + } + entropy_decoder_close(&dec); + } + align_get_bits(gb); + + for (uint32_t i = 0; i < toc_count; i++) { + frame->body_length += 8 * jxl_u32(gb, 0, 1024, 17408, 4211712, 10, 14, 22, 30); + if (get_bits_left(gb) < 0) + return AVERROR_BUFFER_TOO_SMALL; + } + align_get_bits(gb); + + frame->total_length = frame->body_length + get_bits_count(gb) - start_len; + + return 0; +} + +static int skip_boxes(JXLParseContext *ctx, const uint8_t *buf, int buf_size) +{ + GetByteContext gb; + + if (ctx->skip > buf_size) + return AVERROR_BUFFER_TOO_SMALL; + + buf += ctx->skip; + buf_size -= ctx->skip; + bytestream2_init(&gb, buf, buf_size); + + while (1) { + uint64_t size; + int head_size = 4; + + if (bytestream2_peek_le16(&gb) == FF_JPEGXL_CODESTREAM_SIGNATURE_LE) + break; + if (bytestream2_peek_le64(&gb) == FF_JPEGXL_CONTAINER_SIGNATURE_LE) + break; + + if (bytestream2_get_bytes_left(&gb) < 8) + return AVERROR_BUFFER_TOO_SMALL; + + size = bytestream2_get_be32(&gb); + if (size == 1) { + if (bytestream2_get_bytes_left(&gb) < 12) + return AVERROR_BUFFER_TOO_SMALL; + size = bytestream2_get_be64(&gb); + head_size = 12; + } + if (!size) + return AVERROR_INVALIDDATA; + /* invalid ISOBMFF size */ + if (size <= head_size + 4) + return AVERROR_INVALIDDATA; + + ctx->skip += size; + bytestream2_skip(&gb, size - head_size); + if (bytestream2_get_bytes_left(&gb) <= 0) + return AVERROR_BUFFER_TOO_SMALL; + } + + return 0; +} + +static int try_parse(AVCodecParserContext *s, AVCodecContext *avctx, JXLParseContext *ctx, + const uint8_t *buf, int buf_size) +{ + int ret, cs_buflen, header_skip; + const uint8_t *cs_buffer; + GetBitContext gb; + + if (ctx->skip > buf_size) + return AVERROR_BUFFER_TOO_SMALL; + + buf += ctx->skip; + buf_size -= ctx->skip; + + if (ctx->container || AV_RL64(buf) == FF_JPEGXL_CONTAINER_SIGNATURE_LE) { + ctx->container = 1; + ret = ff_jpegxl_collect_codestream_header(buf, buf_size, ctx->cs_buffer, + sizeof(ctx->cs_buffer), &ctx->copied); + if (ret < 0) + return ret; + ctx->collected_size = ret; + if (!ctx->copied) { + ctx->skip += ret; + return AVERROR_BUFFER_TOO_SMALL; + } + cs_buffer = ctx->cs_buffer; + cs_buflen = FFMIN(sizeof(ctx->cs_buffer), ctx->copied); + } else { + cs_buffer = buf; + cs_buflen = buf_size; + } + + if (!ctx->codestream_length) { + header_skip = ff_jpegxl_parse_codestream_header(cs_buffer, cs_buflen, &ctx->codestream.meta, 0); + if (header_skip < 0) + return header_skip; + ctx->codestream_length = header_skip; + populate_fields(s, avctx, &ctx->codestream.meta); + } + + if (ctx->container) + return ctx->collected_size; + + ret = init_get_bits8(&gb, cs_buffer, cs_buflen); + if (ret < 0) + return ret; + + skip_bits_long(&gb, ctx->codestream_length); + + if (!ctx->skipped_icc && ctx->codestream.meta.have_icc_profile) { + ret = skip_icc_profile(avctx, ctx, &gb); + if (ret < 0) + return ret; + ctx->skipped_icc = 1; + align_get_bits(&gb); + ctx->codestream_length = get_bits_count(&gb); + } + + if (get_bits_left(&gb) <= 0) + return AVERROR_BUFFER_TOO_SMALL; + + while (1) { + ret = parse_frame_header(avctx, ctx, &gb); + if (ret < 0) + return ret; + ctx->codestream_length += ctx->codestream.frame.total_length; + if (ctx->codestream.frame.is_last) + return ctx->codestream_length / 8; + if (get_bits_left(&gb) <= ctx->codestream.frame.body_length) + return AVERROR_BUFFER_TOO_SMALL; + skip_bits_long(&gb, ctx->codestream.frame.body_length); + } +} + +static int jpegxl_parse(AVCodecParserContext *s, AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + JXLParseContext *ctx = s->priv_data; + int next = END_NOT_FOUND, ret; + + *poutbuf_size = 0; + *poutbuf = NULL; + + if (!ctx->pc.index) + goto flush; + + if ((!ctx->container || !ctx->codestream_length) && !ctx->next) { + ret = try_parse(s, avctx, ctx, ctx->pc.buffer, ctx->pc.index); + if (ret < 0) + goto flush; + ctx->next = ret; + if (ctx->container) + ctx->skip += ctx->next; + } + + if (ctx->container && ctx->next >= 0) { + ret = skip_boxes(ctx, ctx->pc.buffer, ctx->pc.index); + if (ret < 0) { + if (ret == AVERROR_INVALIDDATA) + ctx->next = -1; + goto flush; + } + ctx->next = ret + ctx->skip; + } + + if (ctx->next >= 0) + next = ctx->next - ctx->pc.index; + +flush: + if (next > buf_size) + next = END_NOT_FOUND; + + ret = ff_combine_frame(&ctx->pc, next, &buf, &buf_size); + if (ret < 0) + return buf_size; + + *poutbuf = buf; + *poutbuf_size = buf_size; + + ctx->codestream_length = 0; + ctx->collected_size = 0; + ctx->container = 0; + ctx->copied = 0; + ctx->skip = 0; + ctx->skipped_icc = 0; + ctx->next = 0; + memset(&ctx->codestream, 0, sizeof(ctx->codestream)); + + return next; +} + +const AVCodecParser ff_jpegxl_parser = { + .codec_ids = { AV_CODEC_ID_JPEGXL }, + .priv_data_size = sizeof(JXLParseContext), + .parser_parse = jpegxl_parse, + .parser_close = ff_parse_close, +}; diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c index 285f81a901..a663b9e253 100644 --- a/libavcodec/parsers.c +++ b/libavcodec/parsers.c @@ -55,6 +55,7 @@ extern const AVCodecParser ff_hevc_parser; extern const AVCodecParser ff_hdr_parser; extern const AVCodecParser ff_ipu_parser; extern const AVCodecParser ff_jpeg2000_parser; +extern const AVCodecParser ff_jpegxl_parser; extern const AVCodecParser ff_misc4_parser; extern const AVCodecParser ff_mjpeg_parser; extern const AVCodecParser ff_mlp_parser;