tdesktop/Telegram/ThirdParty/emoji_suggestions/emoji_suggestions.cpp
2018-01-03 13:23:14 +03:00

435 lines
11 KiB
C++

/*
This file is part of Telegram Desktop,
the official desktop application for the Telegram messaging service.
For license and copyright information please follow this link:
https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
*/
#include "emoji_suggestions.h"
#include <algorithm>
#include "emoji_suggestions_data.h"
#ifndef Expects
#include <cassert>
#define Expects(condition) assert(condition)
#endif // Expects
namespace Ui {
namespace Emoji {
namespace internal {
namespace {
checksum Crc32Table[256];
class Crc32Initializer {
public:
Crc32Initializer() {
checksum poly = 0x04C11DB7U;
for (auto i = 0; i != 256; ++i) {
Crc32Table[i] = reflect(i, 8) << 24;
for (auto j = 0; j != 8; ++j) {
Crc32Table[i] = (Crc32Table[i] << 1) ^ (Crc32Table[i] & (1 << 31) ? poly : 0);
}
Crc32Table[i] = reflect(Crc32Table[i], 32);
}
}
private:
checksum reflect(checksum val, char ch) {
checksum result = 0;
for (int i = 1; i < (ch + 1); ++i) {
if (val & 1) {
result |= 1 << (ch - i);
}
val >>= 1;
}
return result;
}
};
} // namespace
checksum countChecksum(const void *data, std::size_t size) {
static Crc32Initializer InitTable;
auto buffer = static_cast<const unsigned char*>(data);
auto result = checksum(0xFFFFFFFFU);
for (auto i = std::size_t(0); i != size; ++i) {
result = (result >> 8) ^ Crc32Table[(result & 0xFFU) ^ buffer[i]];
}
return (result ^ 0xFFFFFFFFU);
}
} // namespace internal
namespace {
class string_span {
public:
string_span() = default;
string_span(const utf16string *data, std::size_t size) : begin_(data), size_(size) {
}
string_span(const std::vector<utf16string> &data) : begin_(data.data()), size_(data.size()) {
}
string_span(const string_span &other) = default;
string_span &operator=(const string_span &other) = default;
const utf16string *begin() const {
return begin_;
}
const utf16string *end() const {
return begin_ + size_;
}
std::size_t size() const {
return size_;
}
string_span subspan(std::size_t offset, std::size_t size) {
return string_span(begin_ + offset, size);
}
private:
const utf16string *begin_ = nullptr;
std::size_t size_ = 0;
};
bool IsNumber(utf16char ch) {
return (ch >= '0' && ch <= '9');
}
bool IsLetterOrNumber(utf16char ch) {
return (ch >= 'a' && ch <= 'z') || IsNumber(ch);
}
using Replacement = internal::Replacement;
class Completer {
public:
Completer(utf16string query);
std::vector<Suggestion> resolve();
private:
struct Result {
const Replacement *replacement;
int wordsUsed;
};
static std::vector<utf16char> NormalizeQuery(utf16string query);
void addResult(const Replacement *replacement);
bool isDuplicateOfLastResult(const Replacement *replacement) const;
bool isBetterThanLastResult(const Replacement *replacement) const;
void processInitialList();
void filterInitialList();
void initWordsTracking();
bool matchQueryForCurrentItem();
bool matchQueryTailStartingFrom(int position);
string_span findWordsStartingWith(utf16char ch);
int findEqualCharsCount(int position, const utf16string *word);
std::vector<Suggestion> prepareResult();
bool startsWithQuery(utf16string word);
bool isExactMatch(utf16string replacement);
std::vector<Result> _result;
utf16string _initialQuery;
const std::vector<utf16char> _query;
const utf16char *_queryBegin = nullptr;
int _querySize = 0;
const std::vector<const Replacement*> *_initialList = nullptr;
string_span _currentItemWords;
int _currentItemWordsUsedCount = 0;
class UsedWordGuard {
public:
UsedWordGuard(std::vector<small> &map, int index);
UsedWordGuard(const UsedWordGuard &other) = delete;
UsedWordGuard(UsedWordGuard &&other);
UsedWordGuard &operator=(const UsedWordGuard &other) = delete;
UsedWordGuard &operator=(UsedWordGuard &&other) = delete;
explicit operator bool() const;
~UsedWordGuard();
private:
std::vector<small> &_map;
int _index = 0;
small _guarded = 0;
};
std::vector<small> _currentItemWordsUsedMap;
};
Completer::UsedWordGuard::UsedWordGuard(std::vector<small> &map, int index) : _map(map), _index(index) {
Expects(_map.size() > _index);
if (!_map[_index]) {
_guarded = _map[_index] = 1;
}
}
Completer::UsedWordGuard::UsedWordGuard(UsedWordGuard &&other) : _map(other._map), _index(other._index), _guarded(other._guarded) {
other._guarded = 0;
}
Completer::UsedWordGuard::operator bool() const {
return _guarded;
}
Completer::UsedWordGuard::~UsedWordGuard() {
if (_guarded) {
_map[_index] = 0;
}
}
Completer::Completer(utf16string query) : _initialQuery(query), _query(NormalizeQuery(query)) {
}
// Remove all non-letters-or-numbers.
// Leave '-' and '+' only if they're followed by a number or
// at the end of the query (so it is possibly followed by a number).
std::vector<utf16char> Completer::NormalizeQuery(utf16string query) {
auto result = std::vector<utf16char>();
result.reserve(query.size());
auto copyFrom = query.data();
auto e = copyFrom + query.size();
auto copyTo = result.data();
for (auto i = query.data(); i != e; ++i) {
if (IsLetterOrNumber(*i)) {
continue;
} else if (*i == '-' || *i == '+') {
if (i + 1 == e || IsNumber(*(i + 1))) {
continue;
}
}
if (i > copyFrom) {
result.resize(result.size() + (i - copyFrom));
memcpy(copyTo, copyFrom, (i - copyFrom) * sizeof(utf16char));
copyTo += (i - copyFrom);
}
copyFrom = i + 1;
}
if (e > copyFrom) {
result.resize(result.size() + (e - copyFrom));
memcpy(copyTo, copyFrom, (e - copyFrom) * sizeof(utf16char));
copyTo += (e - copyFrom);
}
return result;
}
std::vector<Suggestion> Completer::resolve() {
_queryBegin = _query.data();
_querySize = _query.size();
if (!_querySize) {
return std::vector<Suggestion>();
}
_initialList = Ui::Emoji::internal::GetReplacements(*_queryBegin);
if (!_initialList) {
return std::vector<Suggestion>();
}
_result.reserve(_initialList->size());
processInitialList();
return prepareResult();
}
bool Completer::isDuplicateOfLastResult(const Replacement *item) const {
if (_result.empty()) {
return false;
}
return (_result.back().replacement->emoji == item->emoji);
}
bool Completer::isBetterThanLastResult(const Replacement *item) const {
Expects(!_result.empty());
auto &last = _result.back();
if (_currentItemWordsUsedCount < last.wordsUsed) {
return true;
}
auto firstCharOfQuery = _query[0];
auto firstCharAfterColonLast = last.replacement->replacement[1];
auto firstCharAfterColonCurrent = item->replacement[1];
auto goodLast = (firstCharAfterColonLast == firstCharOfQuery);
auto goodCurrent = (firstCharAfterColonCurrent == firstCharOfQuery);
return !goodLast && goodCurrent;
}
void Completer::addResult(const Replacement *item) {
if (!isDuplicateOfLastResult(item)) {
_result.push_back({ item, _currentItemWordsUsedCount });
} else if (isBetterThanLastResult(item)) {
_result.back() = { item, _currentItemWordsUsedCount };
}
}
void Completer::processInitialList() {
if (_querySize > 1) {
filterInitialList();
} else {
_currentItemWordsUsedCount = 1;
for (auto item : *_initialList) {
addResult(item);
}
}
}
void Completer::initWordsTracking() {
auto maxWordsCount = 0;
for (auto item : *_initialList) {
auto wordsCount = item->words.size();
if (maxWordsCount < wordsCount) {
maxWordsCount = wordsCount;
}
}
_currentItemWordsUsedMap = std::vector<small>(maxWordsCount, 0);
}
void Completer::filterInitialList() {
initWordsTracking();
for (auto item : *_initialList) {
_currentItemWords = string_span(item->words);
_currentItemWordsUsedCount = 1;
if (matchQueryForCurrentItem()) {
addResult(item);
}
_currentItemWordsUsedCount = 0;
}
}
bool Completer::matchQueryForCurrentItem() {
Expects(_currentItemWords.size() != 0);
if (_currentItemWords.size() < 2) {
return startsWithQuery(*_currentItemWords.begin());
}
return matchQueryTailStartingFrom(0);
}
bool Completer::startsWithQuery(utf16string word) {
if (word.size() < _query.size()) {
return false;
}
for (auto i = std::size_t(0), size = _query.size(); i != size; ++i) {
if (word[i] != _query[i]) {
return false;
}
}
return true;
}
bool Completer::isExactMatch(utf16string replacement) {
if (replacement.size() != _initialQuery.size() + 1) {
return false;
}
for (auto i = std::size_t(0), size = _initialQuery.size(); i != size; ++i) {
if (replacement[i] != _initialQuery[i]) {
return false;
}
}
return true;
}
bool Completer::matchQueryTailStartingFrom(int position) {
auto charsLeftToMatch = (_querySize - position);
if (!charsLeftToMatch) {
return true;
}
auto firstCharToMatch = *(_queryBegin + position);
auto foundWords = findWordsStartingWith(firstCharToMatch);
for (auto word = foundWords.begin(), foundWordsEnd = word + foundWords.size(); word != foundWordsEnd; ++word) {
auto wordIndex = word - _currentItemWords.begin();
if (auto guard = UsedWordGuard(_currentItemWordsUsedMap, wordIndex)) {
++_currentItemWordsUsedCount;
auto equalCharsCount = findEqualCharsCount(position, word);
for (auto check = equalCharsCount; check != 0; --check) {
if (matchQueryTailStartingFrom(position + check)) {
return true;
}
}
--_currentItemWordsUsedCount;
}
}
return false;
}
int Completer::findEqualCharsCount(int position, const utf16string *word) {
auto charsLeft = (_querySize - position);
auto wordBegin = word->data();
auto wordSize = word->size();
auto possibleEqualCharsCount = (charsLeft > wordSize ? wordSize : charsLeft);
for (auto equalTill = 1; equalTill != possibleEqualCharsCount; ++equalTill) {
auto wordCh = *(wordBegin + equalTill);
auto queryCh = *(_queryBegin + position + equalTill);
if (wordCh != queryCh) {
return equalTill;
}
}
return possibleEqualCharsCount;
}
std::vector<Suggestion> Completer::prepareResult() {
auto firstCharOfQuery = _query[0];
auto reorder = [&](auto &&predicate) {
std::stable_partition(
std::begin(_result),
std::end(_result),
std::forward<decltype(predicate)>(predicate));
};
reorder([firstCharOfQuery](Result &result) {
auto firstCharAfterColon = result.replacement->replacement[1];
return (firstCharAfterColon == firstCharOfQuery);
});
reorder([](Result &result) {
return (result.wordsUsed < 2);
});
reorder([](Result &result) {
return (result.wordsUsed < 3);
});
reorder([&](Result &result) {
return isExactMatch(result.replacement->replacement);
});
auto result = std::vector<Suggestion>();
result.reserve(_result.size());
for (auto &item : _result) {
result.emplace_back(
item.replacement->emoji,
item.replacement->replacement,
item.replacement->replacement);
}
return result;
}
string_span Completer::findWordsStartingWith(utf16char ch) {
auto begin = std::lower_bound(
std::begin(_currentItemWords),
std::end(_currentItemWords),
ch,
[](utf16string word, utf16char ch) { return word[0] < ch; });
auto end = std::upper_bound(
std::begin(_currentItemWords),
std::end(_currentItemWords),
ch,
[](utf16char ch, utf16string word) { return ch < word[0]; });
return _currentItemWords.subspan(
begin - _currentItemWords.begin(),
end - begin);
}
} // namespace
std::vector<Suggestion> GetSuggestions(utf16string query) {
return Completer(query).resolve();
}
int GetSuggestionMaxLength() {
return internal::kReplacementMaxLength;
}
} // namespace Emoji
} // namespace Ui