stream: rewrite url escaping/unescaping functions

The original functions come from 24c6f11c8b, which says that these
functions were copied from another project. This other project is GPL
and was written by an unknown author, so there is no hope to relicense
them to LGPL.

Replace the existing functions with code written by Avi Halachmi. He did
not see the old code, but wrote it based on the function signature and
an extended description of what they should do (http://sprunge.us/edia).
Some additional help was provided by me (in particular the function of
the "ok" parameter and how to implement it - not in the original
ASFRecorder code).

Some of the code is hilariously similar, but these are coincidences. The
name of the variable "c" probably "leaked" from me, but "o" is a true
coincidence.

The code was integrated by me - my only change is changing the function
names to the old ones, moving the order of the top-level declarations,
and changing "default_ok" to "url_default_ok", and changing the strings
from char* to char[].

The author of the new code is Avi Halachmi.
This commit is contained in:
wm4 2017-06-13 19:50:25 +02:00
parent 4e663566dd
commit 3bbb6078a5
1 changed files with 48 additions and 36 deletions

View File

@ -122,37 +122,49 @@ static const stream_info_t *const stream_list[] = {
static bool stream_seek_unbuffered(stream_t *s, int64_t newpos);
static int from_hex(unsigned char c)
// return -1 if not hex char
static int hex2dec(char c)
{
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'A' && c <= 'F')
return 10 + c - 'A';
if (c >= 'a' && c <= 'f')
return 10 + c - 'a';
return -1;
}
// Replace escape sequences in an URL (or a part of an URL)
void mp_url_unescape_inplace(char *buf)
void mp_url_unescape_inplace(char *url)
{
int len = strlen(buf);
int o = 0;
for (int i = 0; i < len; i++) {
unsigned char c = buf[i];
if (c == '%' && i + 2 < len) { //must have 2 more chars
int c1 = from_hex(buf[i + 1]);
int c2 = from_hex(buf[i + 2]);
if (c1 >= 0 && c2 >= 0) {
c = c1 * 16 + c2;
i = i + 2; //only skip next 2 chars if valid esc
}
for (int len = strlen(url), i = 0, o = 0; i <= len;) {
if ((url[i] != '%') || (i > len - 3)) { // %NN can't start after len-3
url[o++] = url[i++];
continue;
}
int msd = hex2dec(url[i + 1]),
lsd = hex2dec(url[i + 2]);
if (msd >= 0 && lsd >= 0) {
url[o++] = 16 * msd + lsd;
i += 3;
} else {
url[o++] = url[i++];
url[o++] = url[i++];
url[o++] = url[i++];
}
buf[o++] = c;
}
buf[o++] = '\0';
}
static const char hex_digits[] = "0123456789ABCDEF";
static const char url_default_ok[] = "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"0123456789"
"-._~";
// Escape according to http://tools.ietf.org/html/rfc3986#section-2.1
// Only unreserved characters are not escaped.
// The argument ok (if not NULL) is as follows:
@ -160,27 +172,27 @@ void mp_url_unescape_inplace(char *buf)
// ok[0] == '~': do not escape anything but these characters
// (can't override the unreserved characters, which are
// never escaped)
char *mp_url_escape(void *talloc_ctx, const char *s, const char *ok)
char *mp_url_escape(void *talloc_ctx, const char *url, const char *ok)
{
int len = strlen(s);
char *buf = talloc_array(talloc_ctx, char, len * 3 + 1);
int o = 0;
for (int i = 0; i < len; i++) {
unsigned char c = s[i];
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') || strchr("-._~", c) ||
(ok && ((ok[0] != '~') == !!strchr(ok, c))))
{
buf[o++] = c;
char *rv = talloc_size(talloc_ctx, strlen(url) * 3 + 1);
char *out = rv;
bool negate = ok && ok[0] == '~';
for (char c; (c = *url); url++) {
bool as_is = negate ? !strchr(ok + 1, c)
: (strchr(url_default_ok, c) || (ok && strchr(ok, c)));
if (as_is) {
*out++ = c;
} else {
const char hex[] = "0123456789ABCDEF";
buf[o++] = '%';
buf[o++] = hex[c / 16];
buf[o++] = hex[c % 16];
unsigned char v = c;
*out++ = '%';
*out++ = hex_digits[v / 16];
*out++ = hex_digits[v % 16];
}
}
buf[o++] = '\0';
return buf;
*out = 0;
return rv;
}
static stream_t *new_stream(void)