ffmpeg/libavformat/url.c
Nicolas George 1201687da2 lavf/url: rewrite ff_make_absolute_url() using ff_url_decompose().
Also add and update some tests.

Change the semantic a little, because for filesytem paths
symlinks complicate things.
See the comments in the code for detail.

Fix trac tickets #8813 and 8814.
2020-08-12 16:45:21 +02:00

312 lines
8.7 KiB
C

/*
* URL utility functions
* Copyright (c) 2000, 2001, 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avformat.h"
#include "internal.h"
#include "config.h"
#include "url.h"
#if CONFIG_NETWORK
#include "network.h"
#endif
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
/**
* @file
* URL utility functions.
*/
int ff_url_join(char *str, int size, const char *proto,
const char *authorization, const char *hostname,
int port, const char *fmt, ...)
{
#if CONFIG_NETWORK
struct addrinfo hints = { 0 }, *ai;
#endif
str[0] = '\0';
if (proto)
av_strlcatf(str, size, "%s://", proto);
if (authorization && authorization[0])
av_strlcatf(str, size, "%s@", authorization);
#if CONFIG_NETWORK && defined(AF_INET6)
/* Determine if hostname is a numerical IPv6 address,
* properly escape it within [] in that case. */
hints.ai_flags = AI_NUMERICHOST;
if (!getaddrinfo(hostname, NULL, &hints, &ai)) {
if (ai->ai_family == AF_INET6) {
av_strlcat(str, "[", size);
av_strlcat(str, hostname, size);
av_strlcat(str, "]", size);
} else {
av_strlcat(str, hostname, size);
}
freeaddrinfo(ai);
} else
#endif
/* Not an IPv6 address, just output the plain string. */
av_strlcat(str, hostname, size);
if (port >= 0)
av_strlcatf(str, size, ":%d", port);
if (fmt) {
va_list vl;
size_t len = strlen(str);
va_start(vl, fmt);
vsnprintf(str + len, size > len ? size - len : 0, fmt, vl);
va_end(vl);
}
return strlen(str);
}
static const char *find_delim(const char *delim, const char *cur, const char *end)
{
while (cur < end && !strchr(delim, *cur))
cur++;
return cur;
}
int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
{
const char *cur, *aend, *p;
av_assert0(url);
if (!end)
end = url + strlen(url);
cur = uc->url = url;
/* scheme */
uc->scheme = cur;
p = find_delim(":/", cur, end); /* lavf "schemes" can contain options */
if (*p == ':')
cur = p + 1;
/* authority */
uc->authority = cur;
if (end - cur >= 2 && cur[0] == '/' && cur[1] == '/') {
cur += 2;
aend = find_delim("/?#", cur, end);
/* userinfo */
uc->userinfo = cur;
p = find_delim("@", cur, aend);
if (*p == '@')
cur = p + 1;
/* host */
uc->host = cur;
if (*cur == '[') { /* hello IPv6, thanks for using colons! */
p = find_delim("]", cur, aend);
if (*p != ']')
return AVERROR(EINVAL);
if (p + 1 < aend && p[1] != ':')
return AVERROR(EINVAL);
cur = p + 1;
} else {
cur = find_delim(":", cur, aend);
}
/* port */
uc->port = cur;
cur = aend;
} else {
uc->userinfo = uc->host = uc->port = cur;
}
/* path */
uc->path = cur;
cur = find_delim("?#", cur, end);
/* query */
uc->query = cur;
if (*cur == '?')
cur = find_delim("#", cur, end);
/* fragment */
uc->fragment = cur;
uc->end = end;
return 0;
}
static int append_path(char *root, char *out_end, char **rout,
const char *in, const char *in_end)
{
char *out = *rout;
const char *d, *next;
if (in < in_end && *in == '/')
in++; /* already taken care of */
while (in < in_end) {
d = find_delim("/", in, in_end);
next = d + (d < in_end && *d == '/');
if (d - in == 1 && in[0] == '.') {
/* skip */
} else if (d - in == 2 && in[0] == '.' && in[1] == '.') {
av_assert1(out[-1] == '/');
if (out - root > 1)
while (out > root && (--out)[-1] != '/');
} else {
if (out_end - out < next - in)
return AVERROR(ENOMEM);
memmove(out, in, next - in);
out += next - in;
}
in = next;
}
*rout = out;
return 0;
}
int ff_make_absolute_url(char *buf, int size, const char *base,
const char *rel)
{
URLComponents ub, uc;
char *out, *out_end, *path;
const char *keep, *base_path_end;
int use_base_path, simplify_path = 0, ret;
/* This is tricky.
For HTTP, http://server/site/page + ../media/file
should resolve into http://server/media/file
but for filesystem access, dir/playlist + ../media/file
should resolve into dir/../media/file
because dir could be a symlink, and .. points to
the actual parent of the target directory.
We'll consider that URLs with an actual scheme and authority,
i.e. starting with scheme://, need parent dir simplification,
while bare paths or pseudo-URLs starting with proto: without
the double slash do not.
For real URLs, the processing is similar to the algorithm described
here:
https://tools.ietf.org/html/rfc3986#section-5
*/
if (!size)
return AVERROR(ENOMEM);
out = buf;
out_end = buf + size - 1;
if (!base)
base = "";
if ((ret = ff_url_decompose(&ub, base, NULL) < 0) ||
(ret = ff_url_decompose(&uc, rel, NULL) < 0))
goto error;
keep = ub.url;
#define KEEP(component, also) do { \
if (uc.url_component_end_##component == uc.url && \
ub.url_component_end_##component > keep) { \
keep = ub.url_component_end_##component; \
also \
} \
} while (0)
KEEP(scheme, );
KEEP(authority_full, simplify_path = 1;);
KEEP(path,);
KEEP(query,);
KEEP(fragment,);
#undef KEEP
#define COPY(start, end) do { \
size_t len = end - start; \
if (len > out_end - out) { \
ret = AVERROR(ENOMEM); \
goto error; \
} \
memmove(out, start, len); \
out += len; \
} while (0)
COPY(ub.url, keep);
COPY(uc.url, uc.path);
use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path;
if (uc.path > uc.url)
use_base_path = 0;
if (URL_COMPONENT_HAVE(uc, path) && uc.path[0] == '/')
use_base_path = 0;
if (use_base_path) {
base_path_end = ub.url_component_end_path;
if (URL_COMPONENT_HAVE(uc, path))
while (base_path_end > ub.path && base_path_end[-1] != '/')
base_path_end--;
}
if (keep > ub.path)
simplify_path = 0;
if (URL_COMPONENT_HAVE(uc, scheme))
simplify_path = 0;
if (URL_COMPONENT_HAVE(uc, authority))
simplify_path = 1;
/* No path at all, leave it */
if (!use_base_path && !URL_COMPONENT_HAVE(uc, path))
simplify_path = 0;
if (simplify_path) {
const char *root = "/";
COPY(root, root + 1);
path = out;
if (use_base_path) {
ret = append_path(path, out_end, &out, ub.path, base_path_end);
if (ret < 0)
goto error;
}
if (URL_COMPONENT_HAVE(uc, path)) {
ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path);
if (ret < 0)
goto error;
}
} else {
if (use_base_path)
COPY(ub.path, base_path_end);
COPY(uc.path, uc.url_component_end_path);
}
COPY(uc.url_component_end_path, uc.end);
#undef COPY
*out = 0;
return 0;
error:
snprintf(buf, size, "invalid:%s",
ret == AVERROR(ENOMEM) ? "truncated" :
ret == AVERROR(EINVAL) ? "syntax_error" : "");
return ret;
}
AVIODirEntry *ff_alloc_dir_entry(void)
{
AVIODirEntry *entry = av_mallocz(sizeof(AVIODirEntry));
if (entry) {
entry->type = AVIO_ENTRY_UNKNOWN;
entry->size = -1;
entry->modification_timestamp = -1;
entry->access_timestamp = -1;
entry->status_change_timestamp = -1;
entry->user_id = -1;
entry->group_id = -1;
entry->filemode = -1;
}
return entry;
}