stream: rewrite url escaping/unescaping functions

The original functions come from 24c6f11c8b, which says that these functions were copied from another project. This other project is GPL and was written by an unknown author, so there is no hope to relicense them to LGPL. Replace the existing functions with code written by Avi Halachmi. He did not see the old code, but wrote it based on the function signature and an extended description of what they should do (http://sprunge.us/edia). Some additional help was provided by me (in particular the function of the "ok" parameter and how to implement it - not in the original ASFRecorder code). Some of the code is hilariously similar, but these are coincidences. The name of the variable "c" probably "leaked" from me, but "o" is a true coincidence. The code was integrated by me - my only change is changing the function names to the old ones, moving the order of the top-level declarations, and changing "default_ok" to "url_default_ok", and changing the strings from char* to char[]. The author of the new code is Avi Halachmi.
2017-06-13 19:50:25 +02:00 · 2017-06-13 19:50:25 +02:00 · 3bbb6078a5
parent 4e663566dd
commit 3bbb6078a5
1 changed files with 48 additions and 36 deletions
--- a/stream/stream.c
+++ b/stream/stream.c
@ -122,37 +122,49 @@ static const stream_info_t *const stream_list[] = {

 static bool stream_seek_unbuffered(stream_t *s, int64_t newpos);

-static int from_hex(unsigned char c)
+// return -1 if not hex char
+static int hex2dec(char c)
 {
-    if (c >= 'a' && c <= 'f')
-        return c - 'a' + 10;
-    if (c >= 'A' && c <= 'F')
-        return c - 'A' + 10;
    if (c >= '0' && c <= '9')
        return c - '0';
+    if (c >= 'A' && c <= 'F')
+        return 10 + c - 'A';
+    if (c >= 'a' && c <= 'f')
+        return 10 + c - 'a';
    return -1;
 }

 // Replace escape sequences in an URL (or a part of an URL)
-void mp_url_unescape_inplace(char *buf)
+void mp_url_unescape_inplace(char *url)
 {
-    int len = strlen(buf);
-    int o = 0;
-    for (int i = 0; i < len; i++) {
-        unsigned char c = buf[i];
-        if (c == '%' && i + 2 < len) { //must have 2 more chars
-            int c1 = from_hex(buf[i + 1]);
-            int c2 = from_hex(buf[i + 2]);
-            if (c1 >= 0 && c2 >= 0) {
-                c = c1 * 16 + c2;
-                i = i + 2; //only skip next 2 chars if valid esc
-            }
+    for (int len = strlen(url), i = 0, o = 0; i <= len;) {
+        if ((url[i] != '%') || (i > len - 3)) {  // %NN can't start after len-3
+            url[o++] = url[i++];
+            continue;
+        }
+
+        int msd = hex2dec(url[i + 1]),
+            lsd = hex2dec(url[i + 2]);
+
+        if (msd >= 0 && lsd >= 0) {
+            url[o++] = 16 * msd + lsd;
+            i += 3;
+        } else {
+            url[o++] = url[i++];
+            url[o++] = url[i++];
+            url[o++] = url[i++];
        }
-        buf[o++] = c;
    }
-    buf[o++] = '\0';
 }

+static const char hex_digits[] = "0123456789ABCDEF";
+
+
+static const char url_default_ok[] = "abcdefghijklmnopqrstuvwxyz"
+                                     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                     "0123456789"
+                                     "-._~";
+
 // Escape according to http://tools.ietf.org/html/rfc3986#section-2.1
 // Only unreserved characters are not escaped.
 // The argument ok (if not NULL) is as follows:
@ -160,27 +172,27 @@ void mp_url_unescape_inplace(char *buf)
 //      ok[0] == '~': do not escape anything but these characters
 //                    (can't override the unreserved characters, which are
 //                     never escaped)
-char *mp_url_escape(void *talloc_ctx, const char *s, const char *ok)
+char *mp_url_escape(void *talloc_ctx, const char *url, const char *ok)
 {
-    int len = strlen(s);
-    char *buf = talloc_array(talloc_ctx, char, len * 3 + 1);
-    int o = 0;
-    for (int i = 0; i < len; i++) {
-        unsigned char c = s[i];
-        if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
-            (c >= '0' && c <= '9') || strchr("-._~", c) ||
-            (ok && ((ok[0] != '~') == !!strchr(ok, c))))
-        {
-            buf[o++] = c;
+    char *rv = talloc_size(talloc_ctx, strlen(url) * 3 + 1);
+    char *out = rv;
+    bool negate = ok && ok[0] == '~';
+
+    for (char c; (c = *url); url++) {
+        bool as_is = negate ? !strchr(ok + 1, c)
+                            : (strchr(url_default_ok, c) || (ok && strchr(ok, c)));
+        if (as_is) {
+            *out++ = c;
        } else {
-            const char hex[] = "0123456789ABCDEF";
-            buf[o++] = '%';
-            buf[o++] = hex[c / 16];
-            buf[o++] = hex[c % 16];
+            unsigned char v = c;
+            *out++ = '%';
+            *out++ = hex_digits[v / 16];
+            *out++ = hex_digits[v % 16];
        }
    }
-    buf[o++] = '\0';
-    return buf;
+
+    *out = 0;
+    return rv;
 }

 static stream_t *new_stream(void)