diff --git a/stream/stream.c b/stream/stream.c index b48de575ea..fc786ec305 100644 --- a/stream/stream.c +++ b/stream/stream.c @@ -30,8 +30,8 @@ #include #include -#include #include +#include "compat/mpbswap.h" #include "talloc.h" @@ -841,134 +841,74 @@ static int stream_enable_cache(stream_t **stream, int64_t size, int64_t min, return res; } -/** - * Helper function to read 16 bits little-endian and advance pointer - */ -static uint16_t get_le16_inc(const uint8_t **buf) +static uint16_t stream_read_word_endian(stream_t *s, bool big_endian) { - uint16_t v = AV_RL16(*buf); - *buf += 2; - return v; + unsigned int y = stream_read_char(s); + y = (y << 8) | stream_read_char(s); + if (big_endian) + y = bswap_16(y); + return y; } -/** - * Helper function to read 16 bits big-endian and advance pointer - */ -static uint16_t get_be16_inc(const uint8_t **buf) +// Read characters until the next '\n' (including), or until the buffer in s is +// exhausted. +static int read_characters(stream_t *s, uint8_t *dst, int dstsize, int utf16) { - uint16_t v = AV_RB16(*buf); - *buf += 2; - return v; -} - -/** - * Find a newline character in buffer - * \param buf buffer to search - * \param len amount of bytes to search in buffer, may not overread - * \param utf16 chose between UTF-8/ASCII/other and LE and BE UTF-16 - * 0 = UTF-8/ASCII/other, 1 = UTF-16-LE, 2 = UTF-16-BE - */ -static const uint8_t *find_newline(const uint8_t *buf, int len, int utf16) -{ - uint32_t c; - const uint8_t *end = buf + len; - switch (utf16) { - case 0: - return (uint8_t *)memchr(buf, '\n', len); - case 1: - while (buf < end - 1) { - GET_UTF16(c, buf < end - 1 ? get_le16_inc(&buf) : 0, return NULL;) - if (buf <= end && c == '\n') - return buf - 1; - } - break; - case 2: - while (buf < end - 1) { - GET_UTF16(c, buf < end - 1 ? get_be16_inc(&buf) : 0, return NULL;) - if (buf <= end && c == '\n') - return buf - 1; - } - break; - } - return NULL; -} - -#define EMPTY_STMT do{}while(0); - -/** - * Copy a number of bytes, converting to UTF-8 if input is UTF-16 - * \param dst buffer to copy to - * \param dstsize size of dst buffer - * \param src buffer to copy from - * \param len amount of bytes to copy from src - * \param utf16 chose between UTF-8/ASCII/other and LE and BE UTF-16 - * 0 = UTF-8/ASCII/other, 1 = UTF-16-LE, 2 = UTF-16-BE - */ -static int copy_characters(uint8_t *dst, int dstsize, - const uint8_t *src, int *len, int utf16) -{ - uint32_t c; - uint8_t *dst_end = dst + dstsize; - const uint8_t *end = src + *len; - switch (utf16) { - case 0: - if (*len > dstsize) - *len = dstsize; - memcpy(dst, src, *len); - return *len; - case 1: - while (src < end - 1 && dst_end - dst > 8) { + if (utf16 == 1 || utf16 == 2) { + uint8_t *cur = dst; + while (1) { + if ((cur - dst) + 8 >= dstsize) // PUT_UTF8 writes max. 8 bytes + return -1; // line too long + uint32_t c; uint8_t tmp; - GET_UTF16(c, src < end - 1 ? get_le16_inc(&src) : 0, EMPTY_STMT) - PUT_UTF8(c, tmp, *dst++ = tmp; EMPTY_STMT) + GET_UTF16(c, stream_read_word_endian(s, utf16 == 2), return -1;) + if (s->eof) + break; // legitimate EOF; ignore the case of partial reads + PUT_UTF8(c, tmp, *cur++ = tmp;) + if (c == '\n') + break; } - *len -= end - src; - return dstsize - (dst_end - dst); - case 2: - while (src < end - 1 && dst_end - dst > 8) { - uint8_t tmp; - GET_UTF16(c, src < end - 1 ? get_be16_inc(&src) : 0, EMPTY_STMT) - PUT_UTF8(c, tmp, *dst++ = tmp; EMPTY_STMT) - } - *len -= end - src; - return dstsize - (dst_end - dst); + return cur - dst; + } else { + if (s->buf_pos >= s->buf_len) + stream_fill_buffer(s); + uint8_t *src = s->buffer + s->buf_pos; + int src_len = s->buf_len - s->buf_pos; + uint8_t *end = memchr(src, '\n', src_len); + int len = end ? end - src + 1 : src_len; + if (len > dstsize) + return -1; // line too long + memcpy(dst, src, len); + s->buf_pos += len; + return len; } - return 0; } +// On error, or if the line is larger than max-1, return NULL and unset s->eof. +// On EOF, return NULL, and s->eof will be set. +// Otherwise, return the line (including \n or \r\n at the end of the line). +// If the return value is non-NULL, it's always the same as mem. +// utf16: 0: UTF8 or 8 bit legacy, 1: UTF16-LE, 2: UTF16-BE unsigned char *stream_read_line(stream_t *s, unsigned char *mem, int max, int utf16) { - int len; - const unsigned char *end; - unsigned char *ptr = mem; - if (utf16 == -1) - utf16 = 0; if (max < 1) return NULL; - max--; // reserve one for 0-termination - do { - len = s->buf_len - s->buf_pos; - // try to fill the buffer - if (len <= 0 && - (!stream_fill_buffer(s) || - (len = s->buf_len - s->buf_pos) <= 0)) - break; - end = find_newline(s->buffer + s->buf_pos, len, utf16); - if (end) - len = end - (s->buffer + s->buf_pos) + 1; - if (len > 0 && max > 0) { - int l = copy_characters(ptr, max, s->buffer + s->buf_pos, &len, - utf16); - max -= l; - ptr += l; - if (!len) - break; + int read = 0; + while (1) { + // Reserve 1 byte of ptr for terminating \0. + int l = read_characters(s, &mem[read], max - read - 1, utf16); + if (l < 0) { + MP_VERBOSE(s, "error reading line\n"); + s->eof = false; + return NULL; } - s->buf_pos += len; - } while (!end); - ptr[0] = 0; - if (s->eof && ptr == mem) + read += l; + if (l == 0 || (read > 0 && mem[read - 1] == '\n')) + break; + } + mem[read] = '\0'; + if (s->eof && read == 0) // legitimate EOF return NULL; return mem; }