/* This file is part of Telegram Desktop, the official desktop version of Telegram messaging app, see https://telegram.org Telegram Desktop is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. It is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. In addition, as a special exception, the copyright holders give permission to link the code of portions of this program with the OpenSSL library. Full license: https://github.com/telegramdesktop/tdesktop/blob/master/LICENSE Copyright (c) 2014-2017 John Preston, https://desktop.telegram.org */ #include "codegen/common/basic_tokenized_file.h" #include "codegen/common/logging.h" #include "codegen/common/clean_file_reader.h" #include "codegen/common/checked_utf8_string.h" using Token = codegen::common::BasicTokenizedFile::Token; using Type = Token::Type; namespace codegen { namespace common { namespace { constexpr int kErrorUnterminatedStringLiteral = 201; constexpr int kErrorIncorrectUtf8String = 202; constexpr int kErrorIncorrectToken = 203; constexpr int kErrorUnexpectedToken = 204; bool isDigitChar(char ch) { return (ch >= '0') && (ch <= '9'); } bool isNameChar(char ch) { return isDigitChar(ch) || ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || (ch == '_'); } bool isWhitespaceChar(char ch) { return (ch == '\n' || ch == '\r' || ch == ' ' || ch == '\t'); } Token invalidToken() { return { Type::Invalid, QString(), ConstUtf8String(nullptr, 0), false }; } } // namespace BasicTokenizedFile::BasicTokenizedFile(const QString &filepath) : reader_(filepath) { } BasicTokenizedFile::BasicTokenizedFile(const QByteArray &content, const QString &filepath) : reader_(content, filepath) { } bool BasicTokenizedFile::putBack() { if (currentToken_ > 0) { --currentToken_; return true; } return false; } Token BasicTokenizedFile::getAnyToken() { if (currentToken_ >= tokens_.size()) { if (readToken() == Type::Invalid) { return invalidToken(); } } return tokens_.at(currentToken_++); } Token BasicTokenizedFile::getToken(Type typeCondition) { if (auto token = getAnyToken()) { if (token.type == typeCondition) { return token; } putBack(); } return invalidToken(); } Type BasicTokenizedFile::readToken() { auto result = readOneToken(StartWithWhitespace::Allow); // Try to read double token. if (result == Type::Int) { if (readOneToken(StartWithWhitespace::Deny) == Type::Dot) { // We got int and dot, so it is double already. result = uniteLastTokens(Type::Double); // Try to read one more int (after dot). if (readOneToken(StartWithWhitespace::Deny) == Type::Int) { result = uniteLastTokens(Type::Double); } } } else if (result == Type::Dot) { if (readOneToken(StartWithWhitespace::Deny) == Type::Int) { //We got dot and int, so it is double. result = uniteLastTokens(Type::Double); } } return result; } Type BasicTokenizedFile::readOneToken(StartWithWhitespace condition) { skipWhitespaces(); if (tokenStartWhitespace_ && condition == StartWithWhitespace::Deny) { return Type::Invalid; } if (reader_.atEnd()) { return Type::Invalid; } auto ch = reader_.currentChar(); if (ch == '"') { return readString(); } else if (isNameChar(ch)) { return readNameOrNumber(); } return readSingleLetter(); } Type BasicTokenizedFile::saveToken(Type type, const QString &value) { ConstUtf8String original = { tokenStart_, reader_.currentPtr() }; tokens_.push_back({ type, value, original, tokenStartWhitespace_ }); return type; } Type BasicTokenizedFile::uniteLastTokens(Type type) { auto size = tokens_.size(); if (size < 2) { return Type::Invalid; } auto &token(tokens_[size - 2]); auto originalFrom = token.original.data(); auto originalTill = tokens_.back().original.end(); token.type = type; token.original = { originalFrom, originalTill }; token.value += tokens_.back().value; tokens_.pop_back(); return type; } QString BasicTokenizedFile::getCurrentLineComment() { if (lineNumber_ > singleLineComments_.size()) { reader_.logError(kErrorInternal, lineNumber_) << "internal tokenizer error (line number larger than comments list size)."; failed_ = true; return QString(); } auto commentBytes = singleLineComments_[lineNumber_ - 1].mid(2); // Skip "//" CheckedUtf8String comment(commentBytes); if (!comment.isValid()) { reader_.logError(kErrorIncorrectUtf8String, lineNumber_) << "incorrect UTF-8 string in the comment."; failed_ = true; return QString(); } return comment.toString().trimmed(); } Type BasicTokenizedFile::readNameOrNumber() { while (!reader_.atEnd()) { if (!isDigitChar(reader_.currentChar())) { break; } reader_.skipChar(); } bool onlyDigits = true; while (!reader_.atEnd()) { if (!isNameChar(reader_.currentChar())) { break; } onlyDigits = false; reader_.skipChar(); } return saveToken(onlyDigits ? Type::Int : Type::Name); } Type BasicTokenizedFile::readString() { reader_.skipChar(); auto offset = reader_.currentPtr(); QByteArray value; while (!reader_.atEnd()) { auto ch = reader_.currentChar(); if (ch == '"') { if (reader_.currentPtr() > offset) { value.append(offset, reader_.currentPtr() - offset); } break; } if (ch == '\n') { reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal."; failed_ = true; return Type::Invalid; } if (ch == '\\') { if (reader_.currentPtr() > offset) { value.append(offset, reader_.currentPtr() - offset); } reader_.skipChar(); ch = reader_.currentChar(); if (reader_.atEnd() || ch == '\n') { reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal."; failed_ = true; return Type::Invalid; } offset = reader_.currentPtr() + 1; if (ch == 'n') { value.append('\n'); } else if (ch == 't') { value.append('\t'); } else if (ch == '"') { value.append('"'); } else if (ch == '\\') { value.append('\\'); } } reader_.skipChar(); } if (reader_.atEnd()) { reader_.logError(kErrorUnterminatedStringLiteral, lineNumber_) << "unterminated string literal."; failed_ = true; return Type::Invalid; } CheckedUtf8String checked(value); if (!checked.isValid()) { reader_.logError(kErrorIncorrectUtf8String, lineNumber_) << "incorrect UTF-8 string literal."; failed_ = true; return Type::Invalid; } reader_.skipChar(); return saveToken(Type::String, checked.toString()); } Type BasicTokenizedFile::readSingleLetter() { auto type = singleLetterTokens_.value(reader_.currentChar(), Type::Invalid); if (type == Type::Invalid) { reader_.logError(kErrorIncorrectToken, lineNumber_) << "incorrect token '" << reader_.currentChar() << "'"; return Type::Invalid; } reader_.skipChar(); return saveToken(type); } void BasicTokenizedFile::skipWhitespaces() { if (reader_.atEnd()) return; auto ch = reader_.currentChar(); tokenStartWhitespace_ = isWhitespaceChar(ch); if (tokenStartWhitespace_) { do { if (ch == '\n') { ++lineNumber_; } reader_.skipChar(); ch = reader_.currentChar(); } while (!reader_.atEnd() && isWhitespaceChar(ch)); } tokenStart_ = reader_.currentPtr(); } LogStream operator<<(LogStream &&stream, BasicTokenizedFile::Token::Type type) { const char *value = "'invalid'"; switch (type) { case Type::Invalid: break; case Type::Int: value = "'int'"; break; case Type::Double: value = "'double'"; break; case Type::String: value = "'string'"; break; case Type::LeftParenthesis: value = "'('"; break; case Type::RightParenthesis: value = "')'"; break; case Type::LeftBrace: value = "'{'"; break; case Type::RightBrace: value = "'}'"; break; case Type::LeftBracket: value = "'['"; break; case Type::RightBracket: value = "']'"; break; case Type::Colon: value = "':'"; break; case Type::Semicolon: value = "';'"; break; case Type::Comma: value = "','"; break; case Type::Dot: value = "'.'"; break; case Type::Number: value = "'#'"; break; case Type::Plus: value = "'+'"; break; case Type::Minus: value = "'-'"; break; case Type::Equals: value = "'='"; break; case Type::Name: value = "'identifier'"; break; } return std::forward(stream) << value; } LogStream BasicTokenizedFile::logError(int code) const { return reader_.logError(code, lineNumber_); } LogStream BasicTokenizedFile::logErrorUnexpectedToken() const { if (currentToken_ < tokens_.size()) { auto token = tokens_.at(currentToken_).original.toStdString(); return logError(kErrorUnexpectedToken) << "unexpected token '" << token << "', expected "; } return logError(kErrorUnexpectedToken) << "unexpected token, expected "; } BasicTokenizedFile::~BasicTokenizedFile() = default; } // namespace common } // namespace codegen