342 lines
9.3 KiB
C++
342 lines
9.3 KiB
C++
/*
|
|
This file is part of Telegram Desktop,
|
|
the official desktop application for the Telegram messaging service.
|
|
|
|
For license and copyright information please follow this link:
|
|
https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
|
|
*/
|
|
#include "codegen/lang/parsed_file.h"
|
|
|
|
#include <iostream>
|
|
#include <set>
|
|
#include <QtCore/QMap>
|
|
#include <QtCore/QDir>
|
|
#include <QtCore/QRegularExpression>
|
|
#include "codegen/common/basic_tokenized_file.h"
|
|
#include "codegen/common/logging.h"
|
|
|
|
namespace codegen {
|
|
namespace lang {
|
|
namespace {
|
|
|
|
using BasicToken = codegen::common::BasicTokenizedFile::Token;
|
|
using BasicType = BasicToken::Type;
|
|
|
|
constexpr int kErrorBadString = 806;
|
|
|
|
bool ValidateAnsiString(const QString &value) {
|
|
for (auto ch : value) {
|
|
if (ch.unicode() > 127) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool ValidateKey(const QString &key) {
|
|
static const auto validator = QRegularExpression("^[a-z0-9_.-]+(#(one|other))?$", QRegularExpression::CaseInsensitiveOption);
|
|
if (!validator.match(key).hasMatch()) {
|
|
return false;
|
|
}
|
|
if (key.indexOf("__") >= 0) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool ValidateTag(const QString &tag) {
|
|
static const auto validator = QRegularExpression("^[a-z0-9_]+$", QRegularExpression::CaseInsensitiveOption);
|
|
if (!validator.match(tag).hasMatch()) {
|
|
return false;
|
|
}
|
|
if (tag.indexOf("__") >= 0) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
QString PrepareCommandString(int index) {
|
|
static const QChar TextCommand(0x0010);
|
|
static const QChar TextCommandLangTag(0x0020);
|
|
auto result = QString(4, TextCommand);
|
|
result[1] = TextCommandLangTag;
|
|
result[2] = QChar(0x0020 + ushort(index));
|
|
return result;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
const std::array<QString, kPluralPartCount> kPluralParts = { {
|
|
"zero",
|
|
"one",
|
|
"two",
|
|
"few",
|
|
"many",
|
|
"other",
|
|
} };
|
|
|
|
const std::array<QString, kPluralTagsCount> kPluralTags = { {
|
|
"count",
|
|
"count_short",
|
|
"count_decimal",
|
|
} };
|
|
|
|
QString ComputePluralKey(const QString &base, int index) {
|
|
return base + "__plural" + QString::number(index);
|
|
}
|
|
|
|
ParsedFile::ParsedFile(const Options &options)
|
|
: filePath_(options.inputPath)
|
|
, file_(filePath_)
|
|
, options_(options) {
|
|
}
|
|
|
|
bool ParsedFile::read() {
|
|
if (!file_.read()) {
|
|
return false;
|
|
}
|
|
|
|
do {
|
|
if (auto keyToken = file_.getToken(BasicType::String)) {
|
|
if (ValidateKey(keyToken.value)) {
|
|
if (auto equals = file_.getToken(BasicType::Equals)) {
|
|
if (auto valueToken = file_.getToken(BasicType::String)) {
|
|
assertNextToken(BasicType::Semicolon);
|
|
addEntity(keyToken.value, valueToken.value);
|
|
continue;
|
|
} else {
|
|
logErrorUnexpectedToken() << "string value for '" << keyToken.value.toStdString() << "' key";
|
|
}
|
|
} else {
|
|
logErrorUnexpectedToken() << "'=' for '" << keyToken.value.toStdString() << "' key";
|
|
}
|
|
} else {
|
|
logErrorUnexpectedToken() << "string key name (/^[a-z0-9_.-]+(#(one|other))?$/i)";
|
|
}
|
|
}
|
|
if (file_.atEnd()) {
|
|
break;
|
|
}
|
|
logErrorUnexpectedToken() << "ansi string key name";
|
|
} while (!failed());
|
|
|
|
fillPluralTags();
|
|
|
|
return !failed();
|
|
}
|
|
|
|
void ParsedFile::fillPluralTags() {
|
|
auto count = result_.entries.size();
|
|
for (auto i = 0; i != count;) {
|
|
auto &baseEntry = result_.entries[i];
|
|
if (baseEntry.keyBase.isEmpty()) {
|
|
++i;
|
|
continue;
|
|
}
|
|
logAssert(i + kPluralPartCount < count);
|
|
|
|
// Accumulate all tags from all plural variants.
|
|
auto tags = std::vector<LangPack::Tag>();
|
|
for (auto j = i; j != i + kPluralPartCount; ++j) {
|
|
if (tags.empty()) {
|
|
tags = result_.entries[j].tags;
|
|
} else {
|
|
for (auto &tag : result_.entries[j].tags) {
|
|
if (std::find(tags.begin(), tags.end(), tag) == tags.end()) {
|
|
tags.push_back(tag);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
logAssert(!tags.empty());
|
|
logAssert(tags.front().tag == kPluralTags[0]);
|
|
|
|
// Set this tags list to all plural variants.
|
|
for (auto j = i; j != i + kPluralPartCount; ++j) {
|
|
result_.entries[j].tags = tags;
|
|
}
|
|
|
|
i += kPluralPartCount;
|
|
}
|
|
}
|
|
|
|
BasicToken ParsedFile::assertNextToken(BasicToken::Type type) {
|
|
auto result = file_.getToken(type);
|
|
if (!result) {
|
|
logErrorUnexpectedToken() << type;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
common::LogStream ParsedFile::logErrorBadString() {
|
|
return logError(kErrorBadString);
|
|
}
|
|
|
|
QString ParsedFile::extractTagsData(const QString &value, LangPack *to) {
|
|
auto tagStart = value.indexOf('{');
|
|
if (tagStart < 0) {
|
|
return value;
|
|
}
|
|
|
|
auto tagEnd = 0;
|
|
auto finalValue = QString();
|
|
finalValue.reserve(value.size() * 2);
|
|
while (tagStart >= 0) {
|
|
if (tagStart > tagEnd) {
|
|
finalValue.append(value.midRef(tagEnd, tagStart - tagEnd));
|
|
}
|
|
++tagStart;
|
|
tagEnd = value.indexOf('}', tagStart);
|
|
if (tagEnd < 0) {
|
|
logErrorBadString() << "unexpected end of value, end of tag expected.";
|
|
return value;
|
|
}
|
|
finalValue.append(extractTagData(value.mid(tagStart, tagEnd - tagStart), to));
|
|
++tagEnd;
|
|
tagStart = value.indexOf('{', tagEnd);
|
|
}
|
|
if (tagEnd < value.size()) {
|
|
finalValue.append(value.midRef(tagEnd));
|
|
}
|
|
return finalValue;
|
|
}
|
|
|
|
QString ParsedFile::extractTagData(const QString &tagText, LangPack *to) {
|
|
auto numericPart = tagText.indexOf(':');
|
|
auto tag = (numericPart > 0) ? tagText.mid(0, numericPart) : tagText;
|
|
if (!ValidateTag(tag)) {
|
|
logErrorBadString() << "bad tag characters: '" << tagText.toStdString() << "'";
|
|
return QString();
|
|
}
|
|
for (auto &previousTag : to->tags) {
|
|
if (previousTag.tag == tag) {
|
|
logErrorBadString() << "duplicate found for tag '" << tagText.toStdString() << "'";
|
|
return QString();
|
|
}
|
|
}
|
|
auto index = 0;
|
|
auto tagIndex = result_.tags.size();
|
|
for (auto &alreadyTag : result_.tags) {
|
|
if (alreadyTag.tag == tag) {
|
|
tagIndex = index;
|
|
break;
|
|
}
|
|
++index;
|
|
}
|
|
if (tagIndex == result_.tags.size()) {
|
|
result_.tags.push_back({ tag });
|
|
}
|
|
if (numericPart > 0) {
|
|
auto numericParts = tagText.mid(numericPart + 1).split('|');
|
|
if (numericParts.size() != 3) {
|
|
logErrorBadString() << "bad option count for plural key part in tag: '" << tagText.toStdString() << "'";
|
|
return QString();
|
|
}
|
|
auto index = 0;
|
|
for (auto &part : numericParts) {
|
|
auto numericPartEntry = LangPack::Entry();
|
|
numericPartEntry.key = tag + QString::number(index++);
|
|
if (part.indexOf('#') != part.lastIndexOf('#')) {
|
|
logErrorBadString() << "bad option for plural key part in tag: '" << tagText.toStdString() << "', too many '#'.";
|
|
return QString();
|
|
}
|
|
numericPartEntry.value = part.replace('#', PrepareCommandString(tagIndex));
|
|
to->entries.push_back(numericPartEntry);
|
|
}
|
|
}
|
|
to->tags.push_back({ tag });
|
|
return PrepareCommandString(tagIndex);
|
|
}
|
|
|
|
void ParsedFile::addEntity(QString key, const QString &value) {
|
|
auto pluralPartOffset = key.indexOf('#');
|
|
auto pluralIndex = -1;
|
|
if (pluralPartOffset >= 0) {
|
|
auto pluralPart = key.mid(pluralPartOffset + 1);
|
|
pluralIndex = std::find(kPluralParts.begin(), kPluralParts.end(), pluralPart) - kPluralParts.begin();
|
|
if (pluralIndex < 0 || pluralIndex >= kPluralParts.size()) {
|
|
logErrorBadString() << "bad plural part for key '" << key.toStdString() << "': '" << pluralPart.toStdString() << "'";
|
|
return;
|
|
}
|
|
key = key.mid(0, pluralPartOffset);
|
|
}
|
|
auto checkKey = [this](const QString &key) {
|
|
for (auto &entry : result_.entries) {
|
|
if (entry.key == key) {
|
|
if (entry.keyBase.isEmpty() || !entry.tags.empty()) {
|
|
// Empty tags in plural entry means it was not encountered yet.
|
|
logErrorBadString() << "duplicate found for key '" << key.toStdString() << "'";
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
};
|
|
if (!checkKey(key)) {
|
|
return;
|
|
}
|
|
auto tagsData = LangPack();
|
|
auto entry = LangPack::Entry();
|
|
entry.key = key;
|
|
entry.value = extractTagsData(value, &tagsData);
|
|
entry.tags = tagsData.tags;
|
|
if (pluralIndex >= 0) {
|
|
logAssert(tagsData.entries.empty());
|
|
|
|
entry.keyBase = entry.key;
|
|
entry.key = ComputePluralKey(entry.keyBase, pluralIndex);
|
|
if (!checkKey(entry.key)) {
|
|
return;
|
|
}
|
|
auto baseIndex = -1;
|
|
auto alreadyCount = result_.entries.size();
|
|
for (auto i = 0; i != alreadyCount; ++i) {
|
|
if (result_.entries[i].keyBase == entry.keyBase) {
|
|
// This is not the first appearance of this plural key.
|
|
baseIndex = i;
|
|
break;
|
|
}
|
|
}
|
|
if (baseIndex < 0) {
|
|
baseIndex = result_.entries.size();
|
|
for (auto i = 0; i != kPluralPartCount; ++i) {
|
|
auto addingEntry = LangPack::Entry();
|
|
addingEntry.keyBase = entry.keyBase;
|
|
addingEntry.key = ComputePluralKey(entry.keyBase, i);
|
|
result_.entries.push_back(addingEntry);
|
|
}
|
|
}
|
|
auto entryIndex = baseIndex + pluralIndex;
|
|
logAssert(entryIndex < result_.entries.size());
|
|
auto &realEntry = result_.entries[entryIndex];
|
|
logAssert(realEntry.key == entry.key);
|
|
realEntry.value = entry.value;
|
|
|
|
// Add all new tags to the existing ones.
|
|
realEntry.tags = std::vector<LangPack::Tag>(1, LangPack::Tag{ kPluralTags[0] });
|
|
|
|
for (auto &tag : entry.tags) {
|
|
if (std::find(realEntry.tags.begin(), realEntry.tags.end(), tag) == realEntry.tags.end()) {
|
|
realEntry.tags.push_back(tag);
|
|
}
|
|
}
|
|
} else {
|
|
result_.entries.push_back(entry);
|
|
for (auto &tag : entry.tags) {
|
|
const auto plural = std::find(std::begin(kPluralTags), std::end(kPluralTags), tag.tag);
|
|
if (plural != std::end(kPluralTags)) {
|
|
logErrorBadString() << "plural tag '" << tag.tag.toStdString() << "' used in non-plural key '" << key.toStdString() << "'";
|
|
}
|
|
}
|
|
for (auto &tagEntry : tagsData.entries) {
|
|
auto taggedEntry = LangPack::Entry();
|
|
taggedEntry.key = key + "__" + tagEntry.key;
|
|
taggedEntry.value = tagEntry.value;
|
|
result_.entries.push_back(taggedEntry);
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace lang
|
|
} // namespace codegen
|