Refactor link parsing regexes to use named groups

For the sake of readability, consistency and to make further changes
easier, introduce named groups (?<text>) and (?<url>) to all link
parsing regexes which have parts containing the desired link text
and (optionally) URL.

The introduction of the named groups additionally simplifies
handleMatches() and makes all calls to it consistent.
This commit is contained in:
Bartłomiej Dach 2019-09-03 23:56:07 +02:00
parent a8f16503e2
commit 24d4f0372c
1 changed files with 9 additions and 9 deletions

View File

@ -11,16 +11,16 @@ namespace osu.Game.Online.Chat
public static class MessageFormatter
{
// [[Performance Points]] -> wiki:Performance Points (https://osu.ppy.sh/wiki/Performance_Points)
private static readonly Regex wiki_regex = new Regex(@"\[\[([^\]]+)\]\]");
private static readonly Regex wiki_regex = new Regex(@"\[\[(?<text>[^\]]+)\]\]");
// (test)[https://osu.ppy.sh/b/1234] -> test (https://osu.ppy.sh/b/1234)
private static readonly Regex old_link_regex = new Regex(@"\(([^\)]*)\)\[([a-z]+://[^ ]+)\]");
private static readonly Regex old_link_regex = new Regex(@"\((?<text>[^\)]*)\)\[(?<url>[a-z]+://[^ ]+)\]");
// [https://osu.ppy.sh/b/1234 Beatmap [Hard] (poop)] -> Beatmap [hard] (poop) (https://osu.ppy.sh/b/1234)
private static readonly Regex new_link_regex = new Regex(@"\[([a-z]+://[^ ]+) ((((?<=\\)[\[\]])|[^\[\]])*(((?<open>\[)(((?<=\\)[\[\]])|[^\[\]])*)+((?<close-open>\])(((?<=\\)[\[\]])|[^\[\]])*)+)*(?(open)(?!)))\]");
private static readonly Regex new_link_regex = new Regex(@"\[(?<url>[a-z]+://[^ ]+) (?<text>(((?<=\\)[\[\]])|[^\[\]])*(((?<open>\[)(((?<=\\)[\[\]])|[^\[\]])*)+((?<close-open>\])(((?<=\\)[\[\]])|[^\[\]])*)+)*(?(open)(?!)))\]");
// [test](https://osu.ppy.sh/b/1234) -> test (https://osu.ppy.sh/b/1234) aka correct markdown format
private static readonly Regex markdown_link_regex = new Regex(@"\[([^\]]*)\]\(([a-z]+://[^ ]+)\)");
private static readonly Regex markdown_link_regex = new Regex(@"\[(?<text>[^\]]*)\]\((?<url>[a-z]+://[^ ]+)\)");
// advanced, RFC-compatible regular expression that matches any possible URL, *but* allows certain invalid characters that are widely used
// This is in the format (<required>, [optional]):
@ -59,13 +59,13 @@ private static void handleMatches(Regex regex, string display, string link, Mess
var displayText = string.Format(display,
m.Groups[0],
m.Groups.Count > 1 ? m.Groups[1].Value : "",
m.Groups.Count > 2 ? m.Groups[2].Value : "").Trim();
m.Groups["text"].Value,
m.Groups["url"].Value).Trim();
var linkText = string.Format(link,
m.Groups[0],
m.Groups.Count > 1 ? m.Groups[1].Value : "",
m.Groups.Count > 2 ? m.Groups[2].Value : "").Trim();
m.Groups["text"].Value,
m.Groups["url"].Value).Trim();
if (displayText.Length == 0 || linkText.Length == 0) continue;
@ -188,7 +188,7 @@ private static MessageFormatterResult format(string toFormat, int startIndex = 0
var result = new MessageFormatterResult(toFormat);
// handle the [link display] format
handleMatches(new_link_regex, "{2}", "{1}", result, startIndex, escapeChars: new[] { '[', ']' });
handleMatches(new_link_regex, "{1}", "{2}", result, startIndex, escapeChars: new[] { '[', ']' });
// handle the standard markdown []() format
handleMatches(markdown_link_regex, "{1}", "{2}", result, startIndex);