301 lines
8.3 KiB
C++
301 lines
8.3 KiB
C++
/*
|
|
This file is part of Telegram Desktop,
|
|
the official desktop application for the Telegram messaging service.
|
|
|
|
For license and copyright information please follow this link:
|
|
https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
|
|
*/
|
|
#include "codegen/common/basic_tokenized_file.h"
|
|
|
|
#include "codegen/common/logging.h"
|
|
#include "codegen/common/clean_file_reader.h"
|
|
#include "codegen/common/checked_utf8_string.h"
|
|
|
|
using Token = codegen::common::BasicTokenizedFile::Token;
|
|
using Type = Token::Type;
|
|
|
|
namespace codegen {
|
|
namespace common {
|
|
namespace {
|
|
|
|
constexpr int kErrorUnterminatedStringLiteral = 201;
|
|
constexpr int kErrorIncorrectUtf8String = 202;
|
|
constexpr int kErrorIncorrectToken = 203;
|
|
constexpr int kErrorUnexpectedToken = 204;
|
|
|
|
bool isDigitChar(char ch) {
|
|
return (ch >= '0') && (ch <= '9');
|
|
}
|
|
|
|
bool isNameChar(char ch) {
|
|
return isDigitChar(ch) || ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || (ch == '_');
|
|
}
|
|
|
|
bool isWhitespaceChar(char ch) {
|
|
return (ch == '\n' || ch == '\r' || ch == ' ' || ch == '\t');
|
|
}
|
|
|
|
Token invalidToken() {
|
|
return { Type::Invalid, QString(), ConstUtf8String(nullptr, 0), false };
|
|
}
|
|
|
|
} // namespace
|
|
|
|
BasicTokenizedFile::BasicTokenizedFile(const QString &filepath) : reader_(filepath) {
|
|
}
|
|
|
|
BasicTokenizedFile::BasicTokenizedFile(const QByteArray &content, const QString &filepath) : reader_(content, filepath) {
|
|
}
|
|
|
|
bool BasicTokenizedFile::putBack() {
|
|
if (currentToken_ > 0) {
|
|
--currentToken_;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Token BasicTokenizedFile::getAnyToken() {
|
|
if (currentToken_ >= tokens_.size()) {
|
|
if (readToken() == Type::Invalid) {
|
|
return invalidToken();
|
|
}
|
|
}
|
|
return tokens_.at(currentToken_++);
|
|
}
|
|
|
|
Token BasicTokenizedFile::getToken(Type typeCondition) {
|
|
if (auto token = getAnyToken()) {
|
|
if (token.type == typeCondition) {
|
|
return token;
|
|
}
|
|
putBack();
|
|
}
|
|
return invalidToken();
|
|
}
|
|
|
|
Type BasicTokenizedFile::readToken() {
|
|
auto result = readOneToken(StartWithWhitespace::Allow);
|
|
|
|
// Try to read double token.
|
|
if (result == Type::Int) {
|
|
if (readOneToken(StartWithWhitespace::Deny) == Type::Dot) {
|
|
// We got int and dot, so it is double already.
|
|
result = uniteLastTokens(Type::Double);
|
|
|
|
// Try to read one more int (after dot).
|
|
if (readOneToken(StartWithWhitespace::Deny) == Type::Int) {
|
|
result = uniteLastTokens(Type::Double);
|
|
}
|
|
}
|
|
} else if (result == Type::Dot) {
|
|
if (readOneToken(StartWithWhitespace::Deny) == Type::Int) {
|
|
//We got dot and int, so it is double.
|
|
result = uniteLastTokens(Type::Double);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
Type BasicTokenizedFile::readOneToken(StartWithWhitespace condition) {
|
|
skipWhitespaces();
|
|
if (tokenStartWhitespace_ && condition == StartWithWhitespace::Deny) {
|
|
return Type::Invalid;
|
|
}
|
|
if (reader_.atEnd()) {
|
|
return Type::Invalid;
|
|
}
|
|
|
|
auto ch = reader_.currentChar();
|
|
if (ch == '"') {
|
|
return readString();
|
|
} else if (isNameChar(ch)) {
|
|
return readNameOrNumber();
|
|
}
|
|
return readSingleLetter();
|
|
}
|
|
|
|
Type BasicTokenizedFile::saveToken(Type type, const QString &value) {
|
|
ConstUtf8String original = { tokenStart_, reader_.currentPtr() };
|
|
tokens_.push_back({ type, value, original, tokenStartWhitespace_ });
|
|
return type;
|
|
}
|
|
|
|
Type BasicTokenizedFile::uniteLastTokens(Type type) {
|
|
auto size = tokens_.size();
|
|
if (size < 2) {
|
|
return Type::Invalid;
|
|
}
|
|
|
|
auto &token(tokens_[size - 2]);
|
|
auto originalFrom = token.original.data();
|
|
auto originalTill = tokens_.back().original.end();
|
|
token.type = type;
|
|
token.original = { originalFrom, originalTill };
|
|
token.value += tokens_.back().value;
|
|
tokens_.pop_back();
|
|
return type;
|
|
}
|
|
|
|
QString BasicTokenizedFile::getCurrentLineComment() {
|
|
if (lineNumber_ > singleLineComments_.size()) {
|
|
reader_.logError(kErrorInternal, lineNumber_) << "internal tokenizer error (line number larger than comments list size).";
|
|
failed_ = true;
|
|
return QString();
|
|
}
|
|
auto commentBytes = singleLineComments_[lineNumber_ - 1].mid(2); // Skip "//"
|
|
CheckedUtf8String comment(commentBytes);
|
|
if (!comment.isValid()) {
|
|
reader_.logError(kErrorIncorrectUtf8String, lineNumber_) << "incorrect UTF-8 string in the comment.";
|
|
failed_ = true;
|
|
return QString();
|
|
}
|
|
return comment.toString().trimmed();
|
|
}
|
|
|
|
Type BasicTokenizedFile::readNameOrNumber() {
|
|
while (!reader_.atEnd()) {
|
|
if (!isDigitChar(reader_.currentChar())) {
|
|
break;
|
|
}
|
|
reader_.skipChar();
|
|
}
|
|
bool onlyDigits = true;
|
|
while (!reader_.atEnd()) {
|
|
if (!isNameChar(reader_.currentChar())) {
|
|
break;
|
|
}
|
|
onlyDigits = false;
|
|
reader_.skipChar();
|
|
}
|
|
return saveToken(onlyDigits ? Type::Int : Type::Name);
|
|
}
|
|
|
|
Type BasicTokenizedFile::readString() {
|
|
reader_.skipChar();
|
|
auto offset = reader_.currentPtr();
|
|
|
|
QByteArray value;
|
|
while (!reader_.atEnd()) {
|
|
auto ch = reader_.currentChar();
|
|
if (ch == '"') {
|
|
if (reader_.currentPtr() > offset) {
|
|
value.append(offset, reader_.currentPtr() - offset);
|
|
}
|
|
break;
|
|
}
|
|
if (ch == '\n') {
|
|
reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal.";
|
|
failed_ = true;
|
|
return Type::Invalid;
|
|
}
|
|
if (ch == '\\') {
|
|
if (reader_.currentPtr() > offset) {
|
|
value.append(offset, reader_.currentPtr() - offset);
|
|
}
|
|
reader_.skipChar();
|
|
ch = reader_.currentChar();
|
|
if (reader_.atEnd() || ch == '\n') {
|
|
reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal.";
|
|
failed_ = true;
|
|
return Type::Invalid;
|
|
}
|
|
offset = reader_.currentPtr() + 1;
|
|
if (ch == 'n') {
|
|
value.append('\n');
|
|
} else if (ch == 't') {
|
|
value.append('\t');
|
|
} else if (ch == '"') {
|
|
value.append('"');
|
|
} else if (ch == '\\') {
|
|
value.append('\\');
|
|
}
|
|
}
|
|
reader_.skipChar();
|
|
}
|
|
if (reader_.atEnd()) {
|
|
reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal.";
|
|
failed_ = true;
|
|
return Type::Invalid;
|
|
}
|
|
CheckedUtf8String checked(value);
|
|
if (!checked.isValid()) {
|
|
reader_.logError(kErrorIncorrectUtf8String, lineNumber_) << "incorrect UTF-8 string literal.";
|
|
failed_ = true;
|
|
return Type::Invalid;
|
|
}
|
|
reader_.skipChar();
|
|
return saveToken(Type::String, checked.toString());
|
|
}
|
|
|
|
Type BasicTokenizedFile::readSingleLetter() {
|
|
auto type = singleLetterTokens_.value(reader_.currentChar(), Type::Invalid);
|
|
if (type == Type::Invalid) {
|
|
reader_.logError(kErrorIncorrectToken, lineNumber_) << "incorrect token '" << reader_.currentChar() << "'";
|
|
return Type::Invalid;
|
|
}
|
|
|
|
reader_.skipChar();
|
|
return saveToken(type);
|
|
}
|
|
|
|
void BasicTokenizedFile::skipWhitespaces() {
|
|
if (reader_.atEnd()) return;
|
|
|
|
auto ch = reader_.currentChar();
|
|
tokenStartWhitespace_ = isWhitespaceChar(ch);
|
|
if (tokenStartWhitespace_) {
|
|
do {
|
|
if (ch == '\n') {
|
|
++lineNumber_;
|
|
}
|
|
reader_.skipChar();
|
|
ch = reader_.currentChar();
|
|
} while (!reader_.atEnd() && isWhitespaceChar(ch));
|
|
}
|
|
tokenStart_ = reader_.currentPtr();
|
|
}
|
|
|
|
LogStream operator<<(LogStream &&stream, BasicTokenizedFile::Token::Type type) {
|
|
const char *value = "'invalid'";
|
|
switch (type) {
|
|
case Type::Invalid: break;
|
|
case Type::Int: value = "'int'"; break;
|
|
case Type::Double: value = "'double'"; break;
|
|
case Type::String: value = "'string'"; break;
|
|
case Type::LeftParenthesis: value = "'('"; break;
|
|
case Type::RightParenthesis: value = "')'"; break;
|
|
case Type::LeftBrace: value = "'{'"; break;
|
|
case Type::RightBrace: value = "'}'"; break;
|
|
case Type::LeftBracket: value = "'['"; break;
|
|
case Type::RightBracket: value = "']'"; break;
|
|
case Type::Colon: value = "':'"; break;
|
|
case Type::Semicolon: value = "';'"; break;
|
|
case Type::Comma: value = "','"; break;
|
|
case Type::Dot: value = "'.'"; break;
|
|
case Type::Number: value = "'#'"; break;
|
|
case Type::Plus: value = "'+'"; break;
|
|
case Type::Minus: value = "'-'"; break;
|
|
case Type::Equals: value = "'='"; break;
|
|
case Type::Name: value = "'identifier'"; break;
|
|
}
|
|
return std::forward<LogStream>(stream) << value;
|
|
}
|
|
|
|
LogStream BasicTokenizedFile::logError(int code) const {
|
|
return reader_.logError(code, lineNumber_);
|
|
}
|
|
|
|
LogStream BasicTokenizedFile::logErrorUnexpectedToken() const {
|
|
if (currentToken_ < tokens_.size()) {
|
|
auto token = tokens_.at(currentToken_).original.toStdString();
|
|
return logError(kErrorUnexpectedToken) << "unexpected token '" << token << "', expected ";
|
|
}
|
|
return logError(kErrorUnexpectedToken) << "unexpected token, expected ";
|
|
}
|
|
|
|
BasicTokenizedFile::~BasicTokenizedFile() = default;
|
|
|
|
} // namespace common
|
|
} // namespace codegen
|