/* This file is part of Telegram Desktop, the official desktop application for the Telegram messaging service. For license and copyright information please follow this link: https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL */ #include "emoji_suggestions.h" #include #include "emoji_suggestions_data.h" #ifndef Expects #include #define Expects(condition) assert(condition) #endif // Expects namespace Ui { namespace Emoji { namespace internal { namespace { checksum Crc32Table[256]; class Crc32Initializer { public: Crc32Initializer() { checksum poly = 0x04C11DB7U; for (auto i = 0; i != 256; ++i) { Crc32Table[i] = reflect(i, 8) << 24; for (auto j = 0; j != 8; ++j) { Crc32Table[i] = (Crc32Table[i] << 1) ^ (Crc32Table[i] & (1 << 31) ? poly : 0); } Crc32Table[i] = reflect(Crc32Table[i], 32); } } private: checksum reflect(checksum val, char ch) { checksum result = 0; for (int i = 1; i < (ch + 1); ++i) { if (val & 1) { result |= 1 << (ch - i); } val >>= 1; } return result; } }; } // namespace checksum countChecksum(const void *data, std::size_t size) { static Crc32Initializer InitTable; auto buffer = static_cast(data); auto result = checksum(0xFFFFFFFFU); for (auto i = std::size_t(0); i != size; ++i) { result = (result >> 8) ^ Crc32Table[(result & 0xFFU) ^ buffer[i]]; } return (result ^ 0xFFFFFFFFU); } } // namespace internal namespace { class string_span { public: string_span() = default; string_span(const utf16string *data, std::size_t size) : begin_(data), size_(size) { } string_span(const std::vector &data) : begin_(data.data()), size_(data.size()) { } string_span(const string_span &other) = default; string_span &operator=(const string_span &other) = default; const utf16string *begin() const { return begin_; } const utf16string *end() const { return begin_ + size_; } std::size_t size() const { return size_; } string_span subspan(std::size_t offset, std::size_t size) { return string_span(begin_ + offset, size); } private: const utf16string *begin_ = nullptr; std::size_t size_ = 0; }; bool IsNumber(utf16char ch) { return (ch >= '0' && ch <= '9'); } bool IsLetterOrNumber(utf16char ch) { return (ch >= 'a' && ch <= 'z') || IsNumber(ch); } using Replacement = internal::Replacement; class Completer { public: Completer(utf16string query); std::vector resolve(); private: struct Result { const Replacement *replacement; int wordsUsed; }; static std::vector NormalizeQuery(utf16string query); void addResult(const Replacement *replacement); bool isDuplicateOfLastResult(const Replacement *replacement) const; bool isBetterThanLastResult(const Replacement *replacement) const; void processInitialList(); void filterInitialList(); void initWordsTracking(); bool matchQueryForCurrentItem(); bool matchQueryTailStartingFrom(int position); string_span findWordsStartingWith(utf16char ch); int findEqualCharsCount(int position, const utf16string *word); std::vector prepareResult(); bool startsWithQuery(utf16string word); bool isExactMatch(utf16string replacement); std::vector _result; utf16string _initialQuery; const std::vector _query; const utf16char *_queryBegin = nullptr; int _querySize = 0; const std::vector *_initialList = nullptr; string_span _currentItemWords; int _currentItemWordsUsedCount = 0; class UsedWordGuard { public: UsedWordGuard(std::vector &map, int index); UsedWordGuard(const UsedWordGuard &other) = delete; UsedWordGuard(UsedWordGuard &&other); UsedWordGuard &operator=(const UsedWordGuard &other) = delete; UsedWordGuard &operator=(UsedWordGuard &&other) = delete; explicit operator bool() const; ~UsedWordGuard(); private: std::vector &_map; int _index = 0; small _guarded = 0; }; std::vector _currentItemWordsUsedMap; }; Completer::UsedWordGuard::UsedWordGuard(std::vector &map, int index) : _map(map), _index(index) { Expects(_map.size() > _index); if (!_map[_index]) { _guarded = _map[_index] = 1; } } Completer::UsedWordGuard::UsedWordGuard(UsedWordGuard &&other) : _map(other._map), _index(other._index), _guarded(other._guarded) { other._guarded = 0; } Completer::UsedWordGuard::operator bool() const { return _guarded; } Completer::UsedWordGuard::~UsedWordGuard() { if (_guarded) { _map[_index] = 0; } } Completer::Completer(utf16string query) : _initialQuery(query), _query(NormalizeQuery(query)) { } // Remove all non-letters-or-numbers. // Leave '-' and '+' only if they're followed by a number or // at the end of the query (so it is possibly followed by a number). std::vector Completer::NormalizeQuery(utf16string query) { auto result = std::vector(); result.reserve(query.size()); auto copyFrom = query.data(); auto e = copyFrom + query.size(); auto copyTo = result.data(); for (auto i = query.data(); i != e; ++i) { if (IsLetterOrNumber(*i)) { continue; } else if (*i == '-' || *i == '+') { if (i + 1 == e || IsNumber(*(i + 1))) { continue; } } if (i > copyFrom) { result.resize(result.size() + (i - copyFrom)); memcpy(copyTo, copyFrom, (i - copyFrom) * sizeof(utf16char)); copyTo += (i - copyFrom); } copyFrom = i + 1; } if (e > copyFrom) { result.resize(result.size() + (e - copyFrom)); memcpy(copyTo, copyFrom, (e - copyFrom) * sizeof(utf16char)); copyTo += (e - copyFrom); } return result; } std::vector Completer::resolve() { _queryBegin = _query.data(); _querySize = _query.size(); if (!_querySize) { return std::vector(); } _initialList = Ui::Emoji::internal::GetReplacements(*_queryBegin); if (!_initialList) { return std::vector(); } _result.reserve(_initialList->size()); processInitialList(); return prepareResult(); } bool Completer::isDuplicateOfLastResult(const Replacement *item) const { if (_result.empty()) { return false; } return (_result.back().replacement->emoji == item->emoji); } bool Completer::isBetterThanLastResult(const Replacement *item) const { Expects(!_result.empty()); auto &last = _result.back(); if (_currentItemWordsUsedCount < last.wordsUsed) { return true; } auto firstCharOfQuery = _query[0]; auto firstCharAfterColonLast = last.replacement->replacement[1]; auto firstCharAfterColonCurrent = item->replacement[1]; auto goodLast = (firstCharAfterColonLast == firstCharOfQuery); auto goodCurrent = (firstCharAfterColonCurrent == firstCharOfQuery); return !goodLast && goodCurrent; } void Completer::addResult(const Replacement *item) { if (!isDuplicateOfLastResult(item)) { _result.push_back({ item, _currentItemWordsUsedCount }); } else if (isBetterThanLastResult(item)) { _result.back() = { item, _currentItemWordsUsedCount }; } } void Completer::processInitialList() { if (_querySize > 1) { filterInitialList(); } else { _currentItemWordsUsedCount = 1; for (auto item : *_initialList) { addResult(item); } } } void Completer::initWordsTracking() { auto maxWordsCount = 0; for (auto item : *_initialList) { auto wordsCount = item->words.size(); if (maxWordsCount < wordsCount) { maxWordsCount = wordsCount; } } _currentItemWordsUsedMap = std::vector(maxWordsCount, 0); } void Completer::filterInitialList() { initWordsTracking(); for (auto item : *_initialList) { _currentItemWords = string_span(item->words); _currentItemWordsUsedCount = 1; if (matchQueryForCurrentItem()) { addResult(item); } _currentItemWordsUsedCount = 0; } } bool Completer::matchQueryForCurrentItem() { Expects(_currentItemWords.size() != 0); if (_currentItemWords.size() < 2) { return startsWithQuery(*_currentItemWords.begin()); } return matchQueryTailStartingFrom(0); } bool Completer::startsWithQuery(utf16string word) { if (word.size() < _query.size()) { return false; } for (auto i = std::size_t(0), size = _query.size(); i != size; ++i) { if (word[i] != _query[i]) { return false; } } return true; } bool Completer::isExactMatch(utf16string replacement) { if (replacement.size() != _initialQuery.size() + 1) { return false; } for (auto i = std::size_t(0), size = _initialQuery.size(); i != size; ++i) { if (replacement[i] != _initialQuery[i]) { return false; } } return true; } bool Completer::matchQueryTailStartingFrom(int position) { auto charsLeftToMatch = (_querySize - position); if (!charsLeftToMatch) { return true; } auto firstCharToMatch = *(_queryBegin + position); auto foundWords = findWordsStartingWith(firstCharToMatch); for (auto word = foundWords.begin(), foundWordsEnd = word + foundWords.size(); word != foundWordsEnd; ++word) { auto wordIndex = word - _currentItemWords.begin(); if (auto guard = UsedWordGuard(_currentItemWordsUsedMap, wordIndex)) { ++_currentItemWordsUsedCount; auto equalCharsCount = findEqualCharsCount(position, word); for (auto check = equalCharsCount; check != 0; --check) { if (matchQueryTailStartingFrom(position + check)) { return true; } } --_currentItemWordsUsedCount; } } return false; } int Completer::findEqualCharsCount(int position, const utf16string *word) { auto charsLeft = (_querySize - position); auto wordBegin = word->data(); auto wordSize = word->size(); auto possibleEqualCharsCount = (charsLeft > wordSize ? wordSize : charsLeft); for (auto equalTill = 1; equalTill != possibleEqualCharsCount; ++equalTill) { auto wordCh = *(wordBegin + equalTill); auto queryCh = *(_queryBegin + position + equalTill); if (wordCh != queryCh) { return equalTill; } } return possibleEqualCharsCount; } std::vector Completer::prepareResult() { const auto firstCharOfQuery = _query[0]; const auto reorder = [&](auto &&predicate) { std::stable_partition( std::begin(_result), std::end(_result), std::forward(predicate)); }; reorder([&](Result &result) { const auto firstCharAfterColon = result.replacement->replacement[1]; return (firstCharAfterColon == firstCharOfQuery); }); reorder([](Result &result) { return (result.wordsUsed < 2); }); reorder([](Result &result) { return (result.wordsUsed < 3); }); reorder([&](Result &result) { return isExactMatch(result.replacement->replacement); }); auto result = std::vector(); result.reserve(_result.size()); for (auto &item : _result) { result.emplace_back( item.replacement->emoji, item.replacement->replacement, item.replacement->replacement); } return result; } string_span Completer::findWordsStartingWith(utf16char ch) { auto begin = std::lower_bound( std::begin(_currentItemWords), std::end(_currentItemWords), ch, [](utf16string word, utf16char ch) { return word[0] < ch; }); auto end = std::upper_bound( std::begin(_currentItemWords), std::end(_currentItemWords), ch, [](utf16char ch, utf16string word) { return ch < word[0]; }); return _currentItemWords.subspan( begin - _currentItemWords.begin(), end - begin); } } // namespace std::vector GetSuggestions(utf16string query) { return Completer(query).resolve(); } int GetSuggestionMaxLength() { return internal::kReplacementMaxLength; } } // namespace Emoji } // namespace Ui