gperftools/benchmark/trivialre.h

/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
 * Copyright (c) 2024, gperftools Contributors
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#ifndef TRIVIALRE_H_
#define TRIVIALRE_H_

#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <functional>
#include <initializer_list>
#include <optional>
#include <string>
#include <string_view>
#include <utility>

namespace trivialre {

// Callback for Matcher. See below.
using CB = std::function<bool(std::string_view, bool)>;
// Matcher is a function that gets string and invokes given callback
// with remaining text (i.e. suffix of parsed part) for each
// successful parsing. We're able to express arbitrary trees of regexp
// expressions with this simple abstraction.
using Matcher = std::function<bool(std::string_view str, bool line_start, const CB& cb)>;

// MatchSubstring returns true iff there is substring of `str' that
// matches given matcher.
inline bool MatchSubstring(const Matcher& m, std::string_view str) {
  size_t sz = str.size();
  CB succeed = [](std::string_view str, bool line_start) { return true; };
  bool line_start = true;
  for (size_t i = 0; i <= sz; i++) {
    if (m(str, line_start, succeed)) {
      return true;
    }
    line_start = (str[0] == '\n');
    str.remove_prefix(1);
  }
  return false;
}

Matcher CompileREOrDie(std::string_view str);

// --- implementation ---

namespace matchers {

// MatcherBuilder is a collection of functions that combine Matchers
// according to various kinds of regex structures (sequence,
// alternatives, '*' etc).
struct MatcherBuilder {
  using Matcher = trivialre::Matcher;

  // Returns Matcher that parses given literal.
  static Matcher Lit(std::string_view lit);
  // Returns Matcher that left then right.
  static Matcher Seq(Matcher left, Matcher right);
  // Returns Matcher that given parses given sequence of matchers
  // (folding from right for efficiency).
  static Matcher SeqMany(std::initializer_list<Matcher> list);
  // Returns Matcher that parses either left or right.
  static Matcher Alt(Matcher left, Matcher right);
  // Returns Matcher that matches 0 or more parsings of given nested
  // matcher. I.e. implements '*' operator of regexps.
  static Matcher Star(Matcher nested);
  static Matcher LineStart();
  static Matcher LineEnd();

  // Returns Matcher that parses one character iff pred(character) is
  // true.
  template <typename Predicate>
  static Matcher CharP(Predicate pred);

  // Dot matcher implements '.' operator of regexps. I.e. matches
  // exactly one non-newline character.
  static Matcher Dot() {
    return CharP([](char ch) { return ch != '\n'; });
  }
  // Any matcher immediately suceeds consuming no text at all.
  static Matcher Any() {
    return [](std::string_view str, bool line_start, const CB& cb) { return cb(str, line_start); };
  }
};

inline Matcher MatcherBuilder::Lit(std::string_view lit) {
  return [=](std::string_view str, bool line_start, const CB& cb) -> bool {
    auto sz = lit.size();
    if (str.substr(0, sz) != lit) {
      return false;
    }
    line_start = (sz == 0) ? line_start : (str[sz - 1] == '\n');
    str.remove_prefix(sz);
    // printf("Matched prefix %.*s (rest: %.*s)\n",
    //        static_cast<int>(lit.size()), lit.data(),
    //        std::max<int>(str.size(), 6), str.data());
    return cb(str, line_start);
  };
}

inline Matcher MatcherBuilder::Seq(Matcher left, Matcher right) {
  return [left = std::move(left), right = std::move(right)](std::string_view str, bool line_start, const CB& cb) -> bool {
    return left(str, line_start, [=](std::string_view str, bool line_start) { return right(str, line_start, cb); });
  };
}

inline Matcher MatcherBuilder::SeqMany(std::initializer_list<Matcher> list) {
  if (std::empty(list)) {
    return Any();
  }
  auto it = std::rbegin(list);
  Matcher rv = *it++;
  while (it != std::rend(list)) {
    rv = Seq(*it++, std::move(rv));
  }
  return rv;
}

inline Matcher MatcherBuilder::Alt(Matcher left, Matcher right) {
  return [left = std::move(left), right = std::move(right)](std::string_view str, bool line_start, const CB& cb) -> bool {
    if (left(str, line_start, cb)) {
      return true;
    }
    return right(str, line_start, cb);
  };
}

inline Matcher MatcherBuilder::Star(Matcher nested) {
  return [nested = std::move(nested)](std::string_view str, bool line_start, const CB& cb) -> bool {
    CB rec;
    rec = [&](std::string_view str, bool line_start) -> bool {
      if (cb(str, line_start)) {
        return true;
      }
      return nested(str, line_start, rec);
    };
    return rec(str, line_start);
  };
}

template <typename Predicate>
Matcher MatcherBuilder::CharP(Predicate pred) {
  return [pred = std::move(pred)](std::string_view str, bool line_start, const CB& cb) -> bool {
    if (str.size() && pred(str[0])) {
      bool line_start = (str[0] == '\n');
      str.remove_prefix(1);
      return cb(str, line_start);
    }
    return false;
  };
}

inline Matcher MatcherBuilder::LineStart() {
  return [](std::string_view str, bool line_start, const CB& cb) {
    if (!line_start) return false;
    return cb(str, line_start);
  };
}

inline Matcher MatcherBuilder::LineEnd() {
  return [](std::string_view str, bool line_start, const CB& cb) {
    if (str.size() && str[0] != '\n') {
      return false;
    }
    // Yes, line-end doesn't consume the \n character.
    return cb(str, line_start);
  };
}

}  // namespace matchers

namespace re_compiler {

struct ErrorPolicy {
  std::string_view original_str;

  void NoteError(std::string_view msg, std::string_view at) {
    // For our trivial implementation we're only able to crash
    fprintf(stderr, "parse error %.*s, at: %.*s\n", int(msg.size()), msg.data(), int(at.size()), at.data());
    fprintf(stderr, "expression we were parsing:\n%.*s\n", int(original_str.size()), original_str.data());
    if (size_t diff = at.data() - original_str.data(); diff < 120) {
      fprintf(stderr, "%s^\n", std::string{}.append(diff, '-').c_str());
    }
    fflush(stderr);
    abort();
  }

  void StartedParsing(std::string_view str) { original_str = str; }
};

// C is our regexp compiler. It assembles matcher tree from string
// regexp representation. Given builder is used to construct concrete
// matchers, allowing flexibility (see StringTestingBuilder).
template <typename Builder, typename ErrorPolicy = re_compiler::ErrorPolicy>
struct C : public ErrorPolicy {
  using Matcher = typename Builder::Matcher;
  // ParseResult is Matcher (or nothing if we parsed empty string) and
  // remaining text.
  using ParseResult = std::pair<std::optional<Matcher>, std::string_view>;

  const Builder& builder;
  explicit C(const Builder& builder) : builder(builder) {}

  bool IsCharAt(std::string_view str, size_t index, char ch) { return index < str.size() && str[index] == ch; }

  // This is top level parser. It parses alternatives of regex runs.
  ParseResult ParseAlt(std::string_view str) {
    auto [maybe_left, str_l] = ParseRun(str);
    if (IsCharAt(str_l, 0, '|')) {
      if (!maybe_left) {
        maybe_left.emplace(builder.Any());
      }
      auto [maybe_right, str_r] = ParseAlt(str_l.substr(1));
      if (!maybe_right) {
        maybe_right.emplace(builder.Any());
      }
      return {builder.Alt(std::move(maybe_left.value()), std::move(maybe_right.value())), str_r};
    }
    return {std::move(maybe_left), str_l};
  }

  using FnPred = std::function<bool(char)>;
  template <typename Body>
  void AddPred(FnPred* pred, Body body) {
    if (!*pred) {
      *pred = body;
    } else {
      *pred = [old = std::move(*pred), body = std::move(body)](char ch) { return old(ch) || body(ch); };
    }
  }

  // Parses [<set-of-chars>] expression. Note: str is just past
  // opening '[' character)
  ParseResult CompileCharSet(std::string_view str) {
    bool negated = false;
    if (IsCharAt(str, 0, '^')) {
      negated = true;
      str.remove_prefix(1);
    }
    FnPred pred;

    while (str.size() > 0 && str[0] != ']') {
      if (str.size() > 2 && str[1] == '-' && str[2] != ']') {
        // range
        AddPred(&pred, [a = str[0], b = str[2]](char ch) { return a <= ch && ch <= b; });
        str.remove_prefix(3);
        continue;
      }

      char ch = str[0];

      if (ch == '\\') {
        if (str.size() == 1) {
          break;
        }
        str.remove_prefix(1);
        ch = str[0];
      }

      AddPred(&pred, [ch](char candidate) { return ch == candidate; });

      str.remove_prefix(1);
    }

    if (!IsCharAt(str, 0, ']')) {
      ErrorPolicy::NoteError("failed to spot ] at the end of char-set term", str);
      return {{}, ""};
    }

    if (!pred) {
      pred = [negated](char candidate) { return negated; };
    } else if (negated) {
      pred = [pred = std::move(pred)](char candidate) { return !pred(candidate); };
    }
    return {builder.CharP(std::move(pred)), str.substr(1)};
  }

  // Parses sequence of literals and groups and groups of '*' and '+'
  // expressions.
  ParseResult ParseRun(std::string_view str) {
    if (str.size() == 0) {
      return {{}, str};
    }

    static constexpr char kSpecials[] = "()[]{}.*|\\?+^$";
    static constexpr const char* kSpecialsEnd = kSpecials + sizeof(kSpecials) - 1;

    size_t i;
    for (i = 0; i < str.size(); i++) {
      char ch = str[i];
      if (std::find(kSpecials, kSpecialsEnd, ch) != kSpecialsEnd) {
        break;
      }
    }

    if (i) {
      // we got literal
      if (i > 1 && (IsCharAt(str, i, '*') || IsCharAt(str, i, '+') || IsCharAt(str, i, '?'))) {
        // only last char of literal char runs will be '*'-ed. So lets
        // be careful
        i--;
      }
      // we got literal. Lets try to concat it with possible '*' and next run
      return MaybeStar(builder.Lit(str.substr(0, i)), str.substr(i));
    }

    char first = str[0];
    if (first == '\\' && str.size() > 1) {
      std::string_view literal;
      if (str[1] == 'n') {
        literal = "\n";
      } else if (str[1] == 't') {
        literal = "\t";
      } else if (str[1] == ' ') {
        literal = " ";
      } else if (auto place = std::find(kSpecials, kSpecialsEnd, str[1]); place != kSpecialsEnd) {
        literal = {place, 1};
      } else {
        // Failure to parse
        return {{}, str};
      }
      return MaybeStar(builder.Lit(literal), str.substr(2));
    }
    if (first == '^') {
      return MaybeStar(builder.LineStart(), str.substr(1));
    }
    if (first == '$') {
      return MaybeStar(builder.LineEnd(), str.substr(1));
    }
    if (first == '.') {
      return MaybeStar(builder.Dot(), str.substr(1));
    }
    if (first == '[') {
      return CompileCharSet(str.substr(1));
    }

    if (first == '(') {
      auto [maybe_nested, new_str] = ParseAlt(str.substr(1));

      if (!IsCharAt(new_str, 0, ')')) {
        ErrorPolicy::NoteError("failed to spot ) at the end of group term", new_str);
        return {{}, ""};
      }

      if (maybe_nested) {
        return MaybeStar(std::move(maybe_nested.value()), new_str.substr(1));
      }

      // empty group. We just ignore it. But lets also handle possible
      // '*' after it (which we also eat)
      if (IsCharAt(new_str, 1, '*')) {
        new_str.remove_prefix(1);
      }
      return ParseRun(new_str.substr(1));
    }

    // Likely '|', ')' or parse error
    return {{}, str};
  }

  // Sequences left then right or just left if right is missing).
  Matcher MaybeSeq(Matcher left, std::optional<Matcher> right) {
    if (right) {
      return builder.Seq(std::move(left), std::move(right.value()));
    }
    return left;
  }

  // Builds matcher for '+' expression.
  Matcher MakePlus(Matcher nested) { return builder.Seq(nested, builder.Star(nested)); }

  // Given regex matcher, check if it is followed by '*' or '+' and
  // wrap it if needed, then continue gathering sequence of matches
  // (see ParseRun)
  ParseResult MaybeStar(Matcher left, std::string_view str) {
    if (IsCharAt(str, 0, '*')) {
      left = builder.Star(std::move(left));
      str.remove_prefix(1);
      if (IsCharAt(str, 0, '?')) {
        // We don't produce actual matching, so there is not
        // difference between lazy and eager matching. But lets
        // support the syntax anyways, by ignoring lazyness marker
        str.remove_prefix(1);
      }
    }
    if (IsCharAt(str, 0, '+')) {
      left = MakePlus(std::move(left));
      str.remove_prefix(1);
      if (IsCharAt(str, 0, '?')) {
        // We don't produce actual matching, so there is not
        // difference between lazy and eager matching. But lets
        // support the syntax anyways, by ignoring lazyness marker
        str.remove_prefix(1);
      }
    }
    if (IsCharAt(str, 0, '?')) {
      left = builder.Alt(builder.Any(), std::move(left));
      str.remove_prefix(1);
    }
    auto [maybe_right, new_str] = ParseRun(str);
    return {MaybeSeq(left, std::move(maybe_right)), new_str};
  }

  Matcher CompileOrDie(std::string_view str) {
    ErrorPolicy::StartedParsing(str);
    auto [maybe_m, new_str] = ParseAlt(str);
    if (!new_str.empty()) {
      ErrorPolicy::NoteError("failed to parse entire re string", new_str);
    }
    if (!maybe_m) {
      return builder.Any();
    }
    return maybe_m.value();
  }
};

}  // namespace re_compiler

inline Matcher CompileREOrDie(std::string_view str) { return re_compiler::C<matchers::MatcherBuilder>({}).CompileOrDie(str); }

}  // namespace trivialre

#endif  // TRIVIALRE_H_
improve benchmarks facility We now support a set of command line flags similar to "abseil" benchmark thingy. I.e. to let people specify a subset of benchmarks or run them longer/shorter as needed. This commit also includes small, portable and very simple regexp facility. It isn't good enough for some production use, but it is plenty good for some testing uses or benchmark selection. 2024-09-07 18:39:25 +00:00			`/* -- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil --`
			`* Copyright (c) 2024, gperftools Contributors`
			`* All rights reserved.`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions are`
			`* met:`
			`*`
			`* * Redistributions of source code must retain the above copyright`
			`* notice, this list of conditions and the following disclaimer.`
			`* * Redistributions in binary form must reproduce the above`
			`* copyright notice, this list of conditions and the following disclaimer`
			`* in the documentation and/or other materials provided with the`
			`* distribution.`
			`* * Neither the name of Google Inc. nor the names of its`
			`* contributors may be used to endorse or promote products derived from`
			`* this software without specific prior written permission.`
			`*`
			`* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`*/`
			`#ifndef TRIVIALRE_H_`
			`#define TRIVIALRE_H_`

			`#include <stdio.h>`
			`#include <stdlib.h>`

			`#include <algorithm>`
			`#include <functional>`
			`#include <initializer_list>`
			`#include <optional>`
			`#include <string>`
			`#include <string_view>`
			`#include <utility>`

			`namespace trivialre {`

			`// Callback for Matcher. See below.`
			`using CB = std::function<bool(std::string_view, bool)>;`
			`// Matcher is a function that gets string and invokes given callback`
			`// with remaining text (i.e. suffix of parsed part) for each`
			`// successful parsing. We're able to express arbitrary trees of regexp`
			`// expressions with this simple abstraction.`
			`using Matcher = std::function<bool(std::string_view str, bool line_start, const CB& cb)>;`

			// MatchSubstring returns true iff there is substring of `str' that
			`// matches given matcher.`
			`inline bool MatchSubstring(const Matcher& m, std::string_view str) {`
			`size_t sz = str.size();`
			`CB succeed = [](std::string_view str, bool line_start) { return true; };`
			`bool line_start = true;`
			`for (size_t i = 0; i <= sz; i++) {`
			`if (m(str, line_start, succeed)) {`
			`return true;`
			`}`
			`line_start = (str[0] == '\n');`
			`str.remove_prefix(1);`
			`}`
			`return false;`
			`}`

			`Matcher CompileREOrDie(std::string_view str);`

			`// --- implementation ---`

			`namespace matchers {`

			`// MatcherBuilder is a collection of functions that combine Matchers`
			`// according to various kinds of regex structures (sequence,`
			`// alternatives, '*' etc).`
			`struct MatcherBuilder {`
			`using Matcher = trivialre::Matcher;`

			`// Returns Matcher that parses given literal.`
			`static Matcher Lit(std::string_view lit);`
			`// Returns Matcher that left then right.`
			`static Matcher Seq(Matcher left, Matcher right);`
			`// Returns Matcher that given parses given sequence of matchers`
			`// (folding from right for efficiency).`
			`static Matcher SeqMany(std::initializer_list<Matcher> list);`
			`// Returns Matcher that parses either left or right.`
			`static Matcher Alt(Matcher left, Matcher right);`
			`// Returns Matcher that matches 0 or more parsings of given nested`
			`// matcher. I.e. implements '*' operator of regexps.`
			`static Matcher Star(Matcher nested);`
			`static Matcher LineStart();`
			`static Matcher LineEnd();`

			`// Returns Matcher that parses one character iff pred(character) is`
			`// true.`
			`template <typename Predicate>`
			`static Matcher CharP(Predicate pred);`

			`// Dot matcher implements '.' operator of regexps. I.e. matches`
			`// exactly one non-newline character.`
			`static Matcher Dot() {`
			`return CharP([](char ch) { return ch != '\n'; });`
			`}`
			`// Any matcher immediately suceeds consuming no text at all.`
			`static Matcher Any() {`
			`return [](std::string_view str, bool line_start, const CB& cb) { return cb(str, line_start); };`
			`}`
			`};`

			`inline Matcher MatcherBuilder::Lit(std::string_view lit) {`
			`return [=](std::string_view str, bool line_start, const CB& cb) -> bool {`
			`auto sz = lit.size();`
			`if (str.substr(0, sz) != lit) {`
			`return false;`
			`}`
			`line_start = (sz == 0) ? line_start : (str[sz - 1] == '\n');`
			`str.remove_prefix(sz);`
			`// printf("Matched prefix %.s (rest: %.s)\n",`
			`// static_cast<int>(lit.size()), lit.data(),`
			`// std::max<int>(str.size(), 6), str.data());`
			`return cb(str, line_start);`
			`};`
			`}`

			`inline Matcher MatcherBuilder::Seq(Matcher left, Matcher right) {`
			`return [left = std::move(left), right = std::move(right)](std::string_view str, bool line_start, const CB& cb) -> bool {`
			`return left(str, line_start, [=](std::string_view str, bool line_start) { return right(str, line_start, cb); });`
			`};`
			`}`

			`inline Matcher MatcherBuilder::SeqMany(std::initializer_list<Matcher> list) {`
			`if (std::empty(list)) {`
			`return Any();`
			`}`
			`auto it = std::rbegin(list);`
			`Matcher rv = *it++;`
			`while (it != std::rend(list)) {`
			`rv = Seq(*it++, std::move(rv));`
			`}`
			`return rv;`
			`}`

			`inline Matcher MatcherBuilder::Alt(Matcher left, Matcher right) {`
			`return [left = std::move(left), right = std::move(right)](std::string_view str, bool line_start, const CB& cb) -> bool {`
			`if (left(str, line_start, cb)) {`
			`return true;`
			`}`
			`return right(str, line_start, cb);`
			`};`
			`}`

			`inline Matcher MatcherBuilder::Star(Matcher nested) {`
			`return [nested = std::move(nested)](std::string_view str, bool line_start, const CB& cb) -> bool {`
			`CB rec;`
			`rec = [&](std::string_view str, bool line_start) -> bool {`
			`if (cb(str, line_start)) {`
			`return true;`
			`}`
			`return nested(str, line_start, rec);`
			`};`
			`return rec(str, line_start);`
			`};`
			`}`

			`template <typename Predicate>`
			`Matcher MatcherBuilder::CharP(Predicate pred) {`
			`return [pred = std::move(pred)](std::string_view str, bool line_start, const CB& cb) -> bool {`
			`if (str.size() && pred(str[0])) {`
			`bool line_start = (str[0] == '\n');`
			`str.remove_prefix(1);`
			`return cb(str, line_start);`
			`}`
			`return false;`
			`};`
			`}`

			`inline Matcher MatcherBuilder::LineStart() {`
			`return [](std::string_view str, bool line_start, const CB& cb) {`
			`if (!line_start) return false;`
			`return cb(str, line_start);`
			`};`
			`}`

			`inline Matcher MatcherBuilder::LineEnd() {`
			`return [](std::string_view str, bool line_start, const CB& cb) {`
			`if (str.size() && str[0] != '\n') {`
			`return false;`
			`}`
			`// Yes, line-end doesn't consume the \n character.`
			`return cb(str, line_start);`
			`};`
			`}`

			`} // namespace matchers`

			`namespace re_compiler {`

			`struct ErrorPolicy {`
			`std::string_view original_str;`

			`void NoteError(std::string_view msg, std::string_view at) {`
			`// For our trivial implementation we're only able to crash`
			`fprintf(stderr, "parse error %.s, at: %.s\n", int(msg.size()), msg.data(), int(at.size()), at.data());`
			`fprintf(stderr, "expression we were parsing:\n%.*s\n", int(original_str.size()), original_str.data());`
			`if (size_t diff = at.data() - original_str.data(); diff < 120) {`
			`fprintf(stderr, "%s^\n", std::string{}.append(diff, '-').c_str());`
			`}`
			`fflush(stderr);`
			`abort();`
			`}`

			`void StartedParsing(std::string_view str) { original_str = str; }`
			`};`

			`// C is our regexp compiler. It assembles matcher tree from string`
			`// regexp representation. Given builder is used to construct concrete`
			`// matchers, allowing flexibility (see StringTestingBuilder).`
			`template <typename Builder, typename ErrorPolicy = re_compiler::ErrorPolicy>`
			`struct C : public ErrorPolicy {`
			`using Matcher = typename Builder::Matcher;`
			`// ParseResult is Matcher (or nothing if we parsed empty string) and`
			`// remaining text.`
			`using ParseResult = std::pair<std::optional<Matcher>, std::string_view>;`

			`const Builder& builder;`
			`explicit C(const Builder& builder) : builder(builder) {}`

			`bool IsCharAt(std::string_view str, size_t index, char ch) { return index < str.size() && str[index] == ch; }`

			`// This is top level parser. It parses alternatives of regex runs.`
			`ParseResult ParseAlt(std::string_view str) {`
			`auto [maybe_left, str_l] = ParseRun(str);`
			`if (IsCharAt(str_l, 0, '\|')) {`
			`if (!maybe_left) {`
			`maybe_left.emplace(builder.Any());`
			`}`
			`auto [maybe_right, str_r] = ParseAlt(str_l.substr(1));`
			`if (!maybe_right) {`
			`maybe_right.emplace(builder.Any());`
			`}`
			`return {builder.Alt(std::move(maybe_left.value()), std::move(maybe_right.value())), str_r};`
			`}`
			`return {std::move(maybe_left), str_l};`
			`}`

			`using FnPred = std::function<bool(char)>;`
			`template <typename Body>`
			`void AddPred(FnPred* pred, Body body) {`
			`if (!*pred) {`
			`*pred = body;`
			`} else {`
			`pred = [old = std::move(pred), body = std::move(body)](char ch) { return old(ch) \|\| body(ch); };`
			`}`
			`}`

			`// Parses [<set-of-chars>] expression. Note: str is just past`
			`// opening '[' character)`
			`ParseResult CompileCharSet(std::string_view str) {`
			`bool negated = false;`
			`if (IsCharAt(str, 0, '^')) {`
			`negated = true;`
			`str.remove_prefix(1);`
			`}`
			`FnPred pred;`

			`while (str.size() > 0 && str[0] != ']') {`
			`if (str.size() > 2 && str[1] == '-' && str[2] != ']') {`
			`// range`
			`AddPred(&pred, [a = str[0], b = str[2]](char ch) { return a <= ch && ch <= b; });`
			`str.remove_prefix(3);`
			`continue;`
			`}`

			`char ch = str[0];`

			`if (ch == '\\') {`
			`if (str.size() == 1) {`
			`break;`
			`}`
			`str.remove_prefix(1);`
			`ch = str[0];`
			`}`

			`AddPred(&pred, [ch](char candidate) { return ch == candidate; });`

			`str.remove_prefix(1);`
			`}`

			`if (!IsCharAt(str, 0, ']')) {`
			`ErrorPolicy::NoteError("failed to spot ] at the end of char-set term", str);`
			`return {{}, ""};`
			`}`

			`if (!pred) {`
			`pred = [negated](char candidate) { return negated; };`
			`} else if (negated) {`
			`pred = [pred = std::move(pred)](char candidate) { return !pred(candidate); };`
			`}`
			`return {builder.CharP(std::move(pred)), str.substr(1)};`
			`}`

			`// Parses sequence of literals and groups and groups of '*' and '+'`
			`// expressions.`
			`ParseResult ParseRun(std::string_view str) {`
			`if (str.size() == 0) {`
			`return {{}, str};`
			`}`

			`static constexpr char kSpecials[] = "()[]{}.*\|\\?+^$";`
			`static constexpr const char* kSpecialsEnd = kSpecials + sizeof(kSpecials) - 1;`

			`size_t i;`
			`for (i = 0; i < str.size(); i++) {`
			`char ch = str[i];`
			`if (std::find(kSpecials, kSpecialsEnd, ch) != kSpecialsEnd) {`
			`break;`
			`}`
			`}`

			`if (i) {`
			`// we got literal`
			`if (i > 1 && (IsCharAt(str, i, '*') \|\| IsCharAt(str, i, '+') \|\| IsCharAt(str, i, '?'))) {`
			`// only last char of literal char runs will be '*'-ed. So lets`
			`// be careful`
			`i--;`
			`}`
			`// we got literal. Lets try to concat it with possible '*' and next run`
			`return MaybeStar(builder.Lit(str.substr(0, i)), str.substr(i));`
			`}`

			`char first = str[0];`
			`if (first == '\\' && str.size() > 1) {`
			`std::string_view literal;`
			`if (str[1] == 'n') {`
			`literal = "\n";`
			`} else if (str[1] == 't') {`
			`literal = "\t";`
			`} else if (str[1] == ' ') {`
			`literal = " ";`
			`} else if (auto place = std::find(kSpecials, kSpecialsEnd, str[1]); place != kSpecialsEnd) {`
			`literal = {place, 1};`
			`} else {`
			`// Failure to parse`
			`return {{}, str};`
			`}`
			`return MaybeStar(builder.Lit(literal), str.substr(2));`
			`}`
			`if (first == '^') {`
			`return MaybeStar(builder.LineStart(), str.substr(1));`
			`}`
			`if (first == '$') {`
			`return MaybeStar(builder.LineEnd(), str.substr(1));`
			`}`
			`if (first == '.') {`
			`return MaybeStar(builder.Dot(), str.substr(1));`
			`}`
			`if (first == '[') {`
			`return CompileCharSet(str.substr(1));`
			`}`

			`if (first == '(') {`
			`auto [maybe_nested, new_str] = ParseAlt(str.substr(1));`

			`if (!IsCharAt(new_str, 0, ')')) {`
			`ErrorPolicy::NoteError("failed to spot ) at the end of group term", new_str);`
			`return {{}, ""};`
			`}`

			`if (maybe_nested) {`
			`return MaybeStar(std::move(maybe_nested.value()), new_str.substr(1));`
			`}`

			`// empty group. We just ignore it. But lets also handle possible`
			`// '*' after it (which we also eat)`
			`if (IsCharAt(new_str, 1, '*')) {`
			`new_str.remove_prefix(1);`
			`}`
			`return ParseRun(new_str.substr(1));`
			`}`

			`// Likely '\|', ')' or parse error`
			`return {{}, str};`
			`}`

			`// Sequences left then right or just left if right is missing).`
			`Matcher MaybeSeq(Matcher left, std::optional<Matcher> right) {`
			`if (right) {`
			`return builder.Seq(std::move(left), std::move(right.value()));`
			`}`
			`return left;`
			`}`

			`// Builds matcher for '+' expression.`
			`Matcher MakePlus(Matcher nested) { return builder.Seq(nested, builder.Star(nested)); }`

			`// Given regex matcher, check if it is followed by '*' or '+' and`
			`// wrap it if needed, then continue gathering sequence of matches`
			`// (see ParseRun)`
			`ParseResult MaybeStar(Matcher left, std::string_view str) {`
			`if (IsCharAt(str, 0, '*')) {`
			`left = builder.Star(std::move(left));`
			`str.remove_prefix(1);`
			`if (IsCharAt(str, 0, '?')) {`
			`// We don't produce actual matching, so there is not`
			`// difference between lazy and eager matching. But lets`
			`// support the syntax anyways, by ignoring lazyness marker`
			`str.remove_prefix(1);`
			`}`
			`}`
			`if (IsCharAt(str, 0, '+')) {`
			`left = MakePlus(std::move(left));`
			`str.remove_prefix(1);`
			`if (IsCharAt(str, 0, '?')) {`
			`// We don't produce actual matching, so there is not`
			`// difference between lazy and eager matching. But lets`
			`// support the syntax anyways, by ignoring lazyness marker`
			`str.remove_prefix(1);`
			`}`
			`}`
			`if (IsCharAt(str, 0, '?')) {`
			`left = builder.Alt(builder.Any(), std::move(left));`
			`str.remove_prefix(1);`
			`}`
			`auto [maybe_right, new_str] = ParseRun(str);`
			`return {MaybeSeq(left, std::move(maybe_right)), new_str};`
			`}`

			`Matcher CompileOrDie(std::string_view str) {`
			`ErrorPolicy::StartedParsing(str);`
			`auto [maybe_m, new_str] = ParseAlt(str);`
			`if (!new_str.empty()) {`
			`ErrorPolicy::NoteError("failed to parse entire re string", new_str);`
			`}`
			`if (!maybe_m) {`
			`return builder.Any();`
			`}`
			`return maybe_m.value();`
			`}`
			`};`

			`} // namespace re_compiler`

			`inline Matcher CompileREOrDie(std::string_view str) { return re_compiler::C<matchers::MatcherBuilder>({}).CompileOrDie(str); }`

			`} // namespace trivialre`

			`#endif // TRIVIALRE_H_`