/* This file is part of Telegram Desktop, the official desktop version of Telegram messaging app, see https://telegram.org Telegram Desktop is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. It is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. In addition, as a special exception, the copyright holders give permission to link the code of portions of this program with the OpenSSL library. Full license: https://github.com/telegramdesktop/tdesktop/blob/master/LICENSE Copyright (c) 2014-2017 John Preston, https://desktop.telegram.org */ #include "ui/text/text_entity.h" #include "auth_session.h" #include "lang/lang_tag.h" namespace TextUtilities { namespace { QString ExpressionDomain() { // Matches any domain name, containing at least one '.', including "file.txt". return QString::fromUtf8("(?|'\"\\[\\]\\{\\}\\~\\!\\?\\%\\^\\(\\)\\-\\+=\\x10") + additional; } QString ExpressionHashtag() { return qsl("(^|[") + ExpressionSeparators(qsl("`\\*/")) + qsl("])#[\\w]{2,64}([\\W]|$)"); } QString ExpressionMention() { return qsl("(^|[") + ExpressionSeparators(qsl("`\\*/")) + qsl("])@[A-Za-z_0-9]{1,32}([\\W]|$)"); } QString ExpressionBotCommand() { return qsl("(^|[") + ExpressionSeparators(qsl("`\\*")) + qsl("])/[A-Za-z_0-9]{1,64}(@[A-Za-z_0-9]{5,32})?([\\W]|$)"); } QString ExpressionMarkdownBold() { auto separators = ExpressionSeparators(qsl("`/")); return qsl("(^|[") + separators + qsl("])(\\*\\*)[\\s\\S]+?(\\*\\*)([") + separators + qsl("]|$)"); } QString ExpressionMarkdownItalic() { auto separators = ExpressionSeparators(qsl("`\\*/")); return qsl("(^|[") + separators + qsl("])(__)[\\s\\S]+?(__)([") + separators + qsl("]|$)"); } QString ExpressionMarkdownMonoInline() { // code auto separators = ExpressionSeparators(qsl("\\*/")); return qsl("(^|[") + separators + qsl("])(`)[^\\n]+?(`)([") + separators + qsl("]|$)"); } QString ExpressionMarkdownMonoBlock() { // pre auto separators = ExpressionSeparators(qsl("\\*/")); return qsl("(^|[") + separators + qsl("])(````?)[\\s\\S]+?(````?)([") + separators + qsl("]|$)"); } QRegularExpression CreateRegExp(const QString &expression) { return QRegularExpression(expression, QRegularExpression::UseUnicodePropertiesOption); } QSet CreateValidProtocols() { auto result = QSet(); auto addOne = [&result](const QString &string) { result.insert(hashCrc32(string.constData(), string.size() * sizeof(QChar))); }; addOne(qsl("itmss")); // itunes addOne(qsl("http")); addOne(qsl("https")); addOne(qsl("ftp")); addOne(qsl("tg")); // local urls return result; } QSet CreateValidTopDomains() { auto result = QSet(); auto addOne = [&result](const QString &string) { result.insert(hashCrc32(string.constData(), string.size() * sizeof(QChar))); }; addOne(qsl("ac")); addOne(qsl("ad")); addOne(qsl("ae")); addOne(qsl("af")); addOne(qsl("ag")); addOne(qsl("ai")); addOne(qsl("al")); addOne(qsl("am")); addOne(qsl("an")); addOne(qsl("ao")); addOne(qsl("aq")); addOne(qsl("ar")); addOne(qsl("as")); addOne(qsl("at")); addOne(qsl("au")); addOne(qsl("aw")); addOne(qsl("ax")); addOne(qsl("az")); addOne(qsl("ba")); addOne(qsl("bb")); addOne(qsl("bd")); addOne(qsl("be")); addOne(qsl("bf")); addOne(qsl("bg")); addOne(qsl("bh")); addOne(qsl("bi")); addOne(qsl("bj")); addOne(qsl("bm")); addOne(qsl("bn")); addOne(qsl("bo")); addOne(qsl("br")); addOne(qsl("bs")); addOne(qsl("bt")); addOne(qsl("bv")); addOne(qsl("bw")); addOne(qsl("by")); addOne(qsl("bz")); addOne(qsl("ca")); addOne(qsl("cc")); addOne(qsl("cd")); addOne(qsl("cf")); addOne(qsl("cg")); addOne(qsl("ch")); addOne(qsl("ci")); addOne(qsl("ck")); addOne(qsl("cl")); addOne(qsl("cm")); addOne(qsl("cn")); addOne(qsl("co")); addOne(qsl("cr")); addOne(qsl("cu")); addOne(qsl("cv")); addOne(qsl("cx")); addOne(qsl("cy")); addOne(qsl("cz")); addOne(qsl("de")); addOne(qsl("dj")); addOne(qsl("dk")); addOne(qsl("dm")); addOne(qsl("do")); addOne(qsl("dz")); addOne(qsl("ec")); addOne(qsl("ee")); addOne(qsl("eg")); addOne(qsl("eh")); addOne(qsl("er")); addOne(qsl("es")); addOne(qsl("et")); addOne(qsl("eu")); addOne(qsl("fi")); addOne(qsl("fj")); addOne(qsl("fk")); addOne(qsl("fm")); addOne(qsl("fo")); addOne(qsl("fr")); addOne(qsl("ga")); addOne(qsl("gd")); addOne(qsl("ge")); addOne(qsl("gf")); addOne(qsl("gg")); addOne(qsl("gh")); addOne(qsl("gi")); addOne(qsl("gl")); addOne(qsl("gm")); addOne(qsl("gn")); addOne(qsl("gp")); addOne(qsl("gq")); addOne(qsl("gr")); addOne(qsl("gs")); addOne(qsl("gt")); addOne(qsl("gu")); addOne(qsl("gw")); addOne(qsl("gy")); addOne(qsl("hk")); addOne(qsl("hm")); addOne(qsl("hn")); addOne(qsl("hr")); addOne(qsl("ht")); addOne(qsl("hu")); addOne(qsl("id")); addOne(qsl("ie")); addOne(qsl("il")); addOne(qsl("im")); addOne(qsl("in")); addOne(qsl("io")); addOne(qsl("iq")); addOne(qsl("ir")); addOne(qsl("is")); addOne(qsl("it")); addOne(qsl("je")); addOne(qsl("jm")); addOne(qsl("jo")); addOne(qsl("jp")); addOne(qsl("ke")); addOne(qsl("kg")); addOne(qsl("kh")); addOne(qsl("ki")); addOne(qsl("km")); addOne(qsl("kn")); addOne(qsl("kp")); addOne(qsl("kr")); addOne(qsl("kw")); addOne(qsl("ky")); addOne(qsl("kz")); addOne(qsl("la")); addOne(qsl("lb")); addOne(qsl("lc")); addOne(qsl("li")); addOne(qsl("lk")); addOne(qsl("lr")); addOne(qsl("ls")); addOne(qsl("lt")); addOne(qsl("lu")); addOne(qsl("lv")); addOne(qsl("ly")); addOne(qsl("ma")); addOne(qsl("mc")); addOne(qsl("md")); addOne(qsl("me")); addOne(qsl("mg")); addOne(qsl("mh")); addOne(qsl("mk")); addOne(qsl("ml")); addOne(qsl("mm")); addOne(qsl("mn")); addOne(qsl("mo")); addOne(qsl("mp")); addOne(qsl("mq")); addOne(qsl("mr")); addOne(qsl("ms")); addOne(qsl("mt")); addOne(qsl("mu")); addOne(qsl("mv")); addOne(qsl("mw")); addOne(qsl("mx")); addOne(qsl("my")); addOne(qsl("mz")); addOne(qsl("na")); addOne(qsl("nc")); addOne(qsl("ne")); addOne(qsl("nf")); addOne(qsl("ng")); addOne(qsl("ni")); addOne(qsl("nl")); addOne(qsl("no")); addOne(qsl("np")); addOne(qsl("nr")); addOne(qsl("nu")); addOne(qsl("nz")); addOne(qsl("om")); addOne(qsl("pa")); addOne(qsl("pe")); addOne(qsl("pf")); addOne(qsl("pg")); addOne(qsl("ph")); addOne(qsl("pk")); addOne(qsl("pl")); addOne(qsl("pm")); addOne(qsl("pn")); addOne(qsl("pr")); addOne(qsl("ps")); addOne(qsl("pt")); addOne(qsl("pw")); addOne(qsl("py")); addOne(qsl("qa")); addOne(qsl("re")); addOne(qsl("ro")); addOne(qsl("ru")); addOne(qsl("rs")); addOne(qsl("rw")); addOne(qsl("sa")); addOne(qsl("sb")); addOne(qsl("sc")); addOne(qsl("sd")); addOne(qsl("se")); addOne(qsl("sg")); addOne(qsl("sh")); addOne(qsl("si")); addOne(qsl("sj")); addOne(qsl("sk")); addOne(qsl("sl")); addOne(qsl("sm")); addOne(qsl("sn")); addOne(qsl("so")); addOne(qsl("sr")); addOne(qsl("ss")); addOne(qsl("st")); addOne(qsl("su")); addOne(qsl("sv")); addOne(qsl("sx")); addOne(qsl("sy")); addOne(qsl("sz")); addOne(qsl("tc")); addOne(qsl("td")); addOne(qsl("tf")); addOne(qsl("tg")); addOne(qsl("th")); addOne(qsl("tj")); addOne(qsl("tk")); addOne(qsl("tl")); addOne(qsl("tm")); addOne(qsl("tn")); addOne(qsl("to")); addOne(qsl("tp")); addOne(qsl("tr")); addOne(qsl("tt")); addOne(qsl("tv")); addOne(qsl("tw")); addOne(qsl("tz")); addOne(qsl("ua")); addOne(qsl("ug")); addOne(qsl("uk")); addOne(qsl("um")); addOne(qsl("us")); addOne(qsl("uy")); addOne(qsl("uz")); addOne(qsl("va")); addOne(qsl("vc")); addOne(qsl("ve")); addOne(qsl("vg")); addOne(qsl("vi")); addOne(qsl("vn")); addOne(qsl("vu")); addOne(qsl("wf")); addOne(qsl("ws")); addOne(qsl("ye")); addOne(qsl("yt")); addOne(qsl("yu")); addOne(qsl("za")); addOne(qsl("zm")); addOne(qsl("zw")); addOne(qsl("arpa")); addOne(qsl("aero")); addOne(qsl("asia")); addOne(qsl("biz")); addOne(qsl("cat")); addOne(qsl("com")); addOne(qsl("coop")); addOne(qsl("info")); addOne(qsl("int")); addOne(qsl("jobs")); addOne(qsl("mobi")); addOne(qsl("museum")); addOne(qsl("name")); addOne(qsl("net")); addOne(qsl("org")); addOne(qsl("post")); addOne(qsl("pro")); addOne(qsl("tel")); addOne(qsl("travel")); addOne(qsl("xxx")); addOne(qsl("edu")); addOne(qsl("gov")); addOne(qsl("mil")); addOne(qsl("local")); addOne(qsl("xn--lgbbat1ad8j")); addOne(qsl("xn--54b7fta0cc")); addOne(qsl("xn--fiqs8s")); addOne(qsl("xn--fiqz9s")); addOne(qsl("xn--wgbh1c")); addOne(qsl("xn--node")); addOne(qsl("xn--j6w193g")); addOne(qsl("xn--h2brj9c")); addOne(qsl("xn--mgbbh1a71e")); addOne(qsl("xn--fpcrj9c3d")); addOne(qsl("xn--gecrj9c")); addOne(qsl("xn--s9brj9c")); addOne(qsl("xn--xkc2dl3a5ee0h")); addOne(qsl("xn--45brj9c")); addOne(qsl("xn--mgba3a4f16a")); addOne(qsl("xn--mgbayh7gpa")); addOne(qsl("xn--80ao21a")); addOne(qsl("xn--mgbx4cd0ab")); addOne(qsl("xn--l1acc")); addOne(qsl("xn--mgbc0a9azcg")); addOne(qsl("xn--mgb9awbf")); addOne(qsl("xn--mgbai9azgqp6j")); addOne(qsl("xn--ygbi2ammx")); addOne(qsl("xn--wgbl6a")); addOne(qsl("xn--p1ai")); addOne(qsl("xn--mgberp4a5d4ar")); addOne(qsl("xn--90a3ac")); addOne(qsl("xn--yfro4i67o")); addOne(qsl("xn--clchc0ea0b2g2a9gcd")); addOne(qsl("xn--3e0b707e")); addOne(qsl("xn--fzc2c9e2c")); addOne(qsl("xn--xkc2al3hye2a")); addOne(qsl("xn--mgbtf8fl")); addOne(qsl("xn--kprw13d")); addOne(qsl("xn--kpry57d")); addOne(qsl("xn--o3cw4h")); addOne(qsl("xn--pgbs0dh")); addOne(qsl("xn--j1amh")); addOne(qsl("xn--mgbaam7a8h")); addOne(qsl("xn--mgb2ddes")); addOne(qsl("xn--ogbpf8fl")); addOne(QString::fromUtf8("\xd1\x80\xd1\x84")); return result; } // accent char list taken from https://github.com/aristus/accent-folding inline QChar RemoveOneAccent(uint32 code) { switch (code) { case 7834: return QChar(97); case 193: return QChar(97); case 225: return QChar(97); case 192: return QChar(97); case 224: return QChar(97); case 258: return QChar(97); case 259: return QChar(97); case 7854: return QChar(97); case 7855: return QChar(97); case 7856: return QChar(97); case 7857: return QChar(97); case 7860: return QChar(97); case 7861: return QChar(97); case 7858: return QChar(97); case 7859: return QChar(97); case 194: return QChar(97); case 226: return QChar(97); case 7844: return QChar(97); case 7845: return QChar(97); case 7846: return QChar(97); case 7847: return QChar(97); case 7850: return QChar(97); case 7851: return QChar(97); case 7848: return QChar(97); case 7849: return QChar(97); case 461: return QChar(97); case 462: return QChar(97); case 197: return QChar(97); case 229: return QChar(97); case 506: return QChar(97); case 507: return QChar(97); case 196: return QChar(97); case 228: return QChar(97); case 478: return QChar(97); case 479: return QChar(97); case 195: return QChar(97); case 227: return QChar(97); case 550: return QChar(97); case 551: return QChar(97); case 480: return QChar(97); case 481: return QChar(97); case 260: return QChar(97); case 261: return QChar(97); case 256: return QChar(97); case 257: return QChar(97); case 7842: return QChar(97); case 7843: return QChar(97); case 512: return QChar(97); case 513: return QChar(97); case 514: return QChar(97); case 515: return QChar(97); case 7840: return QChar(97); case 7841: return QChar(97); case 7862: return QChar(97); case 7863: return QChar(97); case 7852: return QChar(97); case 7853: return QChar(97); case 7680: return QChar(97); case 7681: return QChar(97); case 570: return QChar(97); case 11365: return QChar(97); case 508: return QChar(97); case 509: return QChar(97); case 482: return QChar(97); case 483: return QChar(97); case 7682: return QChar(98); case 7683: return QChar(98); case 7684: return QChar(98); case 7685: return QChar(98); case 7686: return QChar(98); case 7687: return QChar(98); case 579: return QChar(98); case 384: return QChar(98); case 7532: return QChar(98); case 385: return QChar(98); case 595: return QChar(98); case 386: return QChar(98); case 387: return QChar(98); case 262: return QChar(99); case 263: return QChar(99); case 264: return QChar(99); case 265: return QChar(99); case 268: return QChar(99); case 269: return QChar(99); case 266: return QChar(99); case 267: return QChar(99); case 199: return QChar(99); case 231: return QChar(99); case 7688: return QChar(99); case 7689: return QChar(99); case 571: return QChar(99); case 572: return QChar(99); case 391: return QChar(99); case 392: return QChar(99); case 597: return QChar(99); case 270: return QChar(100); case 271: return QChar(100); case 7690: return QChar(100); case 7691: return QChar(100); case 7696: return QChar(100); case 7697: return QChar(100); case 7692: return QChar(100); case 7693: return QChar(100); case 7698: return QChar(100); case 7699: return QChar(100); case 7694: return QChar(100); case 7695: return QChar(100); case 272: return QChar(100); case 273: return QChar(100); case 7533: return QChar(100); case 393: return QChar(100); case 598: return QChar(100); case 394: return QChar(100); case 599: return QChar(100); case 395: return QChar(100); case 396: return QChar(100); case 545: return QChar(100); case 240: return QChar(100); case 201: return QChar(101); case 399: return QChar(101); case 398: return QChar(101); case 477: return QChar(101); case 233: return QChar(101); case 200: return QChar(101); case 232: return QChar(101); case 276: return QChar(101); case 277: return QChar(101); case 202: return QChar(101); case 234: return QChar(101); case 7870: return QChar(101); case 7871: return QChar(101); case 7872: return QChar(101); case 7873: return QChar(101); case 7876: return QChar(101); case 7877: return QChar(101); case 7874: return QChar(101); case 7875: return QChar(101); case 282: return QChar(101); case 283: return QChar(101); case 203: return QChar(101); case 235: return QChar(101); case 7868: return QChar(101); case 7869: return QChar(101); case 278: return QChar(101); case 279: return QChar(101); case 552: return QChar(101); case 553: return QChar(101); case 7708: return QChar(101); case 7709: return QChar(101); case 280: return QChar(101); case 281: return QChar(101); case 274: return QChar(101); case 275: return QChar(101); case 7702: return QChar(101); case 7703: return QChar(101); case 7700: return QChar(101); case 7701: return QChar(101); case 7866: return QChar(101); case 7867: return QChar(101); case 516: return QChar(101); case 517: return QChar(101); case 518: return QChar(101); case 519: return QChar(101); case 7864: return QChar(101); case 7865: return QChar(101); case 7878: return QChar(101); case 7879: return QChar(101); case 7704: return QChar(101); case 7705: return QChar(101); case 7706: return QChar(101); case 7707: return QChar(101); case 582: return QChar(101); case 583: return QChar(101); case 602: return QChar(101); case 605: return QChar(101); case 7710: return QChar(102); case 7711: return QChar(102); case 7534: return QChar(102); case 401: return QChar(102); case 402: return QChar(102); case 500: return QChar(103); case 501: return QChar(103); case 286: return QChar(103); case 287: return QChar(103); case 284: return QChar(103); case 285: return QChar(103); case 486: return QChar(103); case 487: return QChar(103); case 288: return QChar(103); case 289: return QChar(103); case 290: return QChar(103); case 291: return QChar(103); case 7712: return QChar(103); case 7713: return QChar(103); case 484: return QChar(103); case 485: return QChar(103); case 403: return QChar(103); case 608: return QChar(103); case 292: return QChar(104); case 293: return QChar(104); case 542: return QChar(104); case 543: return QChar(104); case 7718: return QChar(104); case 7719: return QChar(104); case 7714: return QChar(104); case 7715: return QChar(104); case 7720: return QChar(104); case 7721: return QChar(104); case 7716: return QChar(104); case 7717: return QChar(104); case 7722: return QChar(104); case 7723: return QChar(104); case 817: return QChar(104); case 7830: return QChar(104); case 294: return QChar(104); case 295: return QChar(104); case 11367: return QChar(104); case 11368: return QChar(104); case 205: return QChar(105); case 237: return QChar(105); case 204: return QChar(105); case 236: return QChar(105); case 300: return QChar(105); case 301: return QChar(105); case 206: return QChar(105); case 238: return QChar(105); case 463: return QChar(105); case 464: return QChar(105); case 207: return QChar(105); case 239: return QChar(105); case 7726: return QChar(105); case 7727: return QChar(105); case 296: return QChar(105); case 297: return QChar(105); case 304: return QChar(105); case 302: return QChar(105); case 303: return QChar(105); case 298: return QChar(105); case 299: return QChar(105); case 7880: return QChar(105); case 7881: return QChar(105); case 520: return QChar(105); case 521: return QChar(105); case 522: return QChar(105); case 523: return QChar(105); case 7882: return QChar(105); case 7883: return QChar(105); case 7724: return QChar(105); case 7725: return QChar(105); case 305: return QChar(105); case 407: return QChar(105); case 616: return QChar(105); case 308: return QChar(106); case 309: return QChar(106); case 780: return QChar(106); case 496: return QChar(106); case 567: return QChar(106); case 584: return QChar(106); case 585: return QChar(106); case 669: return QChar(106); case 607: return QChar(106); case 644: return QChar(106); case 7728: return QChar(107); case 7729: return QChar(107); case 488: return QChar(107); case 489: return QChar(107); case 310: return QChar(107); case 311: return QChar(107); case 7730: return QChar(107); case 7731: return QChar(107); case 7732: return QChar(107); case 7733: return QChar(107); case 408: return QChar(107); case 409: return QChar(107); case 11369: return QChar(107); case 11370: return QChar(107); case 313: return QChar(97); case 314: return QChar(108); case 317: return QChar(108); case 318: return QChar(108); case 315: return QChar(108); case 316: return QChar(108); case 7734: return QChar(108); case 7735: return QChar(108); case 7736: return QChar(108); case 7737: return QChar(108); case 7740: return QChar(108); case 7741: return QChar(108); case 7738: return QChar(108); case 7739: return QChar(108); case 321: return QChar(108); case 322: return QChar(108); case 803: return QChar(108); case 319: return QChar(108); case 320: return QChar(108); case 573: return QChar(108); case 410: return QChar(108); case 11360: return QChar(108); case 11361: return QChar(108); case 11362: return QChar(108); case 619: return QChar(108); case 620: return QChar(108); case 621: return QChar(108); case 564: return QChar(108); case 7742: return QChar(109); case 7743: return QChar(109); case 7744: return QChar(109); case 7745: return QChar(109); case 7746: return QChar(109); case 7747: return QChar(109); case 625: return QChar(109); case 323: return QChar(110); case 324: return QChar(110); case 504: return QChar(110); case 505: return QChar(110); case 327: return QChar(110); case 328: return QChar(110); case 209: return QChar(110); case 241: return QChar(110); case 7748: return QChar(110); case 7749: return QChar(110); case 325: return QChar(110); case 326: return QChar(110); case 7750: return QChar(110); case 7751: return QChar(110); case 7754: return QChar(110); case 7755: return QChar(110); case 7752: return QChar(110); case 7753: return QChar(110); case 413: return QChar(110); case 626: return QChar(110); case 544: return QChar(110); case 414: return QChar(110); case 627: return QChar(110); case 565: return QChar(110); case 776: return QChar(116); case 211: return QChar(111); case 243: return QChar(111); case 210: return QChar(111); case 242: return QChar(111); case 334: return QChar(111); case 335: return QChar(111); case 212: return QChar(111); case 244: return QChar(111); case 7888: return QChar(111); case 7889: return QChar(111); case 7890: return QChar(111); case 7891: return QChar(111); case 7894: return QChar(111); case 7895: return QChar(111); case 7892: return QChar(111); case 7893: return QChar(111); case 465: return QChar(111); case 466: return QChar(111); case 214: return QChar(111); case 246: return QChar(111); case 554: return QChar(111); case 555: return QChar(111); case 336: return QChar(111); case 337: return QChar(111); case 213: return QChar(111); case 245: return QChar(111); case 7756: return QChar(111); case 7757: return QChar(111); case 7758: return QChar(111); case 7759: return QChar(111); case 556: return QChar(111); case 557: return QChar(111); case 558: return QChar(111); case 559: return QChar(111); case 560: return QChar(111); case 561: return QChar(111); case 216: return QChar(111); case 248: return QChar(111); case 510: return QChar(111); case 511: return QChar(111); case 490: return QChar(111); case 491: return QChar(111); case 492: return QChar(111); case 493: return QChar(111); case 332: return QChar(111); case 333: return QChar(111); case 7762: return QChar(111); case 7763: return QChar(111); case 7760: return QChar(111); case 7761: return QChar(111); case 7886: return QChar(111); case 7887: return QChar(111); case 524: return QChar(111); case 525: return QChar(111); case 526: return QChar(111); case 527: return QChar(111); case 416: return QChar(111); case 417: return QChar(111); case 7898: return QChar(111); case 7899: return QChar(111); case 7900: return QChar(111); case 7901: return QChar(111); case 7904: return QChar(111); case 7905: return QChar(111); case 7902: return QChar(111); case 7903: return QChar(111); case 7906: return QChar(111); case 7907: return QChar(111); case 7884: return QChar(111); case 7885: return QChar(111); case 7896: return QChar(111); case 7897: return QChar(111); case 415: return QChar(111); case 629: return QChar(111); case 7764: return QChar(112); case 7765: return QChar(112); case 7766: return QChar(112); case 7767: return QChar(112); case 11363: return QChar(112); case 420: return QChar(112); case 421: return QChar(112); case 771: return QChar(112); case 672: return QChar(113); case 586: return QChar(113); case 587: return QChar(113); case 340: return QChar(114); case 341: return QChar(114); case 344: return QChar(114); case 345: return QChar(114); case 7768: return QChar(114); case 7769: return QChar(114); case 342: return QChar(114); case 343: return QChar(114); case 528: return QChar(114); case 529: return QChar(114); case 530: return QChar(114); case 531: return QChar(114); case 7770: return QChar(114); case 7771: return QChar(114); case 7772: return QChar(114); case 7773: return QChar(114); case 7774: return QChar(114); case 7775: return QChar(114); case 588: return QChar(114); case 589: return QChar(114); case 7538: return QChar(114); case 636: return QChar(114); case 11364: return QChar(114); case 637: return QChar(114); case 638: return QChar(114); case 7539: return QChar(114); case 223: return QChar(115); case 346: return QChar(115); case 347: return QChar(115); case 7780: return QChar(115); case 7781: return QChar(115); case 348: return QChar(115); case 349: return QChar(115); case 352: return QChar(115); case 353: return QChar(115); case 7782: return QChar(115); case 7783: return QChar(115); case 7776: return QChar(115); case 7777: return QChar(115); case 7835: return QChar(115); case 350: return QChar(115); case 351: return QChar(115); case 7778: return QChar(115); case 7779: return QChar(115); case 7784: return QChar(115); case 7785: return QChar(115); case 536: return QChar(115); case 537: return QChar(115); case 642: return QChar(115); case 809: return QChar(115); case 222: return QChar(116); case 254: return QChar(116); case 356: return QChar(116); case 357: return QChar(116); case 7831: return QChar(116); case 7786: return QChar(116); case 7787: return QChar(116); case 354: return QChar(116); case 355: return QChar(116); case 7788: return QChar(116); case 7789: return QChar(116); case 538: return QChar(116); case 539: return QChar(116); case 7792: return QChar(116); case 7793: return QChar(116); case 7790: return QChar(116); case 7791: return QChar(116); case 358: return QChar(116); case 359: return QChar(116); case 574: return QChar(116); case 11366: return QChar(116); case 7541: return QChar(116); case 427: return QChar(116); case 428: return QChar(116); case 429: return QChar(116); case 430: return QChar(116); case 648: return QChar(116); case 566: return QChar(116); case 218: return QChar(117); case 250: return QChar(117); case 217: return QChar(117); case 249: return QChar(117); case 364: return QChar(117); case 365: return QChar(117); case 219: return QChar(117); case 251: return QChar(117); case 467: return QChar(117); case 468: return QChar(117); case 366: return QChar(117); case 367: return QChar(117); case 220: return QChar(117); case 252: return QChar(117); case 471: return QChar(117); case 472: return QChar(117); case 475: return QChar(117); case 476: return QChar(117); case 473: return QChar(117); case 474: return QChar(117); case 469: return QChar(117); case 470: return QChar(117); case 368: return QChar(117); case 369: return QChar(117); case 360: return QChar(117); case 361: return QChar(117); case 7800: return QChar(117); case 7801: return QChar(117); case 370: return QChar(117); case 371: return QChar(117); case 362: return QChar(117); case 363: return QChar(117); case 7802: return QChar(117); case 7803: return QChar(117); case 7910: return QChar(117); case 7911: return QChar(117); case 532: return QChar(117); case 533: return QChar(117); case 534: return QChar(117); case 535: return QChar(117); case 431: return QChar(117); case 432: return QChar(117); case 7912: return QChar(117); case 7913: return QChar(117); case 7914: return QChar(117); case 7915: return QChar(117); case 7918: return QChar(117); case 7919: return QChar(117); case 7916: return QChar(117); case 7917: return QChar(117); case 7920: return QChar(117); case 7921: return QChar(117); case 7908: return QChar(117); case 7909: return QChar(117); case 7794: return QChar(117); case 7795: return QChar(117); case 7798: return QChar(117); case 7799: return QChar(117); case 7796: return QChar(117); case 7797: return QChar(117); case 580: return QChar(117); case 649: return QChar(117); case 7804: return QChar(118); case 7805: return QChar(118); case 7806: return QChar(118); case 7807: return QChar(118); case 434: return QChar(118); case 651: return QChar(118); case 7810: return QChar(119); case 7811: return QChar(119); case 7808: return QChar(119); case 7809: return QChar(119); case 372: return QChar(119); case 373: return QChar(119); case 778: return QChar(121); case 7832: return QChar(119); case 7812: return QChar(119); case 7813: return QChar(119); case 7814: return QChar(119); case 7815: return QChar(119); case 7816: return QChar(119); case 7817: return QChar(119); case 7820: return QChar(120); case 7821: return QChar(120); case 7818: return QChar(120); case 7819: return QChar(120); case 221: return QChar(121); case 253: return QChar(121); case 7922: return QChar(121); case 7923: return QChar(121); case 374: return QChar(121); case 375: return QChar(121); case 7833: return QChar(121); case 376: return QChar(121); case 255: return QChar(121); case 7928: return QChar(121); case 7929: return QChar(121); case 7822: return QChar(121); case 7823: return QChar(121); case 562: return QChar(121); case 563: return QChar(121); case 7926: return QChar(121); case 7927: return QChar(121); case 7924: return QChar(121); case 7925: return QChar(121); case 655: return QChar(121); case 590: return QChar(121); case 591: return QChar(121); case 435: return QChar(121); case 436: return QChar(121); case 377: return QChar(122); case 378: return QChar(122); case 7824: return QChar(122); case 7825: return QChar(122); case 381: return QChar(122); case 382: return QChar(122); case 379: return QChar(122); case 380: return QChar(122); case 7826: return QChar(122); case 7827: return QChar(122); case 7828: return QChar(122); case 7829: return QChar(122); case 437: return QChar(122); case 438: return QChar(122); case 548: return QChar(122); case 549: return QChar(122); case 656: return QChar(122); case 657: return QChar(122); case 11371: return QChar(122); case 11372: return QChar(122); case 494: return QChar(122); case 495: return QChar(122); case 442: return QChar(122); case 65298: return QChar(50); case 65302: return QChar(54); case 65314: return QChar(66); case 65318: return QChar(70); case 65322: return QChar(74); case 65326: return QChar(78); case 65330: return QChar(82); case 65334: return QChar(86); case 65338: return QChar(90); case 65346: return QChar(98); case 65350: return QChar(102); case 65354: return QChar(106); case 65358: return QChar(110); case 65362: return QChar(114); case 65366: return QChar(118); case 65370: return QChar(122); case 65297: return QChar(49); case 65301: return QChar(53); case 65305: return QChar(57); case 65313: return QChar(65); case 65317: return QChar(69); case 65321: return QChar(73); case 65325: return QChar(77); case 65329: return QChar(81); case 65333: return QChar(85); case 65337: return QChar(89); case 65345: return QChar(97); case 65349: return QChar(101); case 65353: return QChar(105); case 65357: return QChar(109); case 65361: return QChar(113); case 65365: return QChar(117); case 65369: return QChar(121); case 65296: return QChar(48); case 65300: return QChar(52); case 65304: return QChar(56); case 65316: return QChar(68); case 65320: return QChar(72); case 65324: return QChar(76); case 65328: return QChar(80); case 65332: return QChar(84); case 65336: return QChar(88); case 65348: return QChar(100); case 65352: return QChar(104); case 65356: return QChar(108); case 65360: return QChar(112); case 65364: return QChar(116); case 65368: return QChar(120); case 65299: return QChar(51); case 65303: return QChar(55); case 65315: return QChar(67); case 65319: return QChar(71); case 65323: return QChar(75); case 65327: return QChar(79); case 65331: return QChar(83); case 65335: return QChar(87); case 65347: return QChar(99); case 65351: return QChar(103); case 65355: return QChar(107); case 65359: return QChar(111); case 65363: return QChar(115); case 65367: return QChar(119); case 1105: return QChar(1077); default: break; } return QChar(0); } const QRegularExpression &RegExpWordSplit() { static const auto result = QRegularExpression (qsl("[\\@\\s\\-\\+\\(\\)\\[\\]\\{\\}\\<\\>\\,\\.\\:\\!\\_\\;\\\"\\'\\x0]")); return result; } } // namespace const QRegularExpression &RegExpDomain() { static const auto result = CreateRegExp(ExpressionDomain()); return result; } const QRegularExpression &RegExpDomainExplicit() { static const auto result = CreateRegExp(ExpressionDomainExplicit()); return result; } const QRegularExpression &RegExpMailNameAtEnd() { static const auto result = CreateRegExp(ExpressionMailNameAtEnd()); return result; } const QRegularExpression &RegExpHashtag() { static const auto result = CreateRegExp(ExpressionHashtag()); return result; } const QRegularExpression &RegExpMention() { static const auto result = CreateRegExp(ExpressionMention()); return result; } const QRegularExpression &RegExpBotCommand() { static const auto result = CreateRegExp(ExpressionBotCommand()); return result; } const QRegularExpression &RegExpMarkdownBold() { static const auto result = CreateRegExp(ExpressionMarkdownBold()); return result; } const QRegularExpression &RegExpMarkdownItalic() { static const auto result = CreateRegExp(ExpressionMarkdownItalic()); return result; } const QRegularExpression &RegExpMarkdownMonoInline() { static const auto result = CreateRegExp(ExpressionMarkdownMonoInline()); return result; } const QRegularExpression &RegExpMarkdownMonoBlock() { static const auto result = CreateRegExp(ExpressionMarkdownMonoBlock()); return result; } bool IsValidProtocol(const QString &protocol) { static const auto list = CreateValidProtocols(); return list.contains(hashCrc32(protocol.constData(), protocol.size() * sizeof(QChar))); } bool IsValidTopDomain(const QString &protocol) { static const auto list = CreateValidTopDomains(); return list.contains(hashCrc32(protocol.constData(), protocol.size() * sizeof(QChar))); } QString Clean(const QString &text) { auto result = text; for (auto s = text.unicode(), ch = s, e = text.unicode() + text.size(); ch != e; ++ch) { if (*ch == TextCommand) { result[int(ch - s)] = QChar::Space; } } return result; } QString EscapeForRichParsing(const QString &text) { QString result; result.reserve(text.size()); auto s = text.constData(), ch = s; for (const QChar *e = s + text.size(); ch != e; ++ch) { if (*ch == TextCommand) { if (ch > s) result.append(s, ch - s); result.append(QChar::Space); s = ch + 1; continue; } if (ch->unicode() == '\\' || ch->unicode() == '[') { if (ch > s) result.append(s, ch - s); result.append('\\'); s = ch; continue; } } if (ch > s) result.append(s, ch - s); return result; } QString SingleLine(const QString &text) { auto result = text; auto s = text.unicode(), ch = s, e = text.unicode() + text.size(); // Trim. while (s < e && chIsTrimmed(*s)) { ++s; } while (s < e && chIsTrimmed(*(e - 1))) { --e; } if (e - s != text.size()) { result = text.mid(s - text.unicode(), e - s); } for (auto ch = s; ch != e; ++ch) { if (chIsNewline(*ch) || *ch == TextCommand) { result[int(ch - s)] = QChar::Space; } } return result; } QString RemoveAccents(const QString &text) { auto result = text; auto copying = false; auto i = 0; for (auto s = text.unicode(), ch = s, e = text.unicode() + text.size(); ch != e; ++ch, ++i) { if (ch->unicode() < 128) { if (copying) result[i] = *ch; continue; } if (chIsDiac(*ch)) { copying = true; --i; continue; } if (ch->isHighSurrogate() && ch + 1 < e && (ch + 1)->isLowSurrogate()) { auto noAccent = RemoveOneAccent(QChar::surrogateToUcs4(*ch, *(ch + 1))); if (noAccent.unicode() > 0) { copying = true; result[i] = noAccent; } else { if (copying) result[i] = *ch; ++ch, ++i; if (copying) result[i] = *ch; } } else { auto noAccent = RemoveOneAccent(ch->unicode()); if (noAccent.unicode() > 0 && noAccent != *ch) { result[i] = noAccent; } else if (copying) { result[i] = *ch; } } } return (i < result.size()) ? result.mid(0, i) : result; } QStringList PrepareSearchWords(const QString &query, const QRegularExpression *SplitterOverride) { auto clean = RemoveAccents(query.trimmed().toLower()); auto result = QStringList(); if (!clean.isEmpty()) { auto list = clean.split(SplitterOverride ? *SplitterOverride : RegExpWordSplit(), QString::SkipEmptyParts); auto size = list.size(); result.reserve(list.size()); for_const (auto &word, list) { auto trimmed = word.trimmed(); if (!trimmed.isEmpty()) { result.push_back(trimmed); } } } return result; } bool CutPart(TextWithEntities &sending, TextWithEntities &left, int32 limit) { if (left.text.isEmpty() || !limit) return false; int32 currentEntity = 0, goodEntity = currentEntity, entityCount = left.entities.size(); bool goodInEntity = false, goodCanBreakEntity = false; int32 s = 0, half = limit / 2, goodLevel = 0; for (const QChar *start = left.text.constData(), *ch = start, *end = left.text.constEnd(), *good = ch; ch != end; ++ch, ++s) { while (currentEntity < entityCount && ch >= start + left.entities[currentEntity].offset() + left.entities[currentEntity].length()) { ++currentEntity; } if (s > half) { bool inEntity = (currentEntity < entityCount) && (ch > start + left.entities[currentEntity].offset()) && (ch < start + left.entities[currentEntity].offset() + left.entities[currentEntity].length()); EntityInTextType entityType = (currentEntity < entityCount) ? left.entities[currentEntity].type() : EntityInTextInvalid; bool canBreakEntity = (entityType == EntityInTextPre || entityType == EntityInTextCode); int32 noEntityLevel = inEntity ? 0 : 1; auto markGoodAsLevel = [&](int newLevel) { if (goodLevel > newLevel) { return; } goodLevel = newLevel; good = ch; goodEntity = currentEntity; goodInEntity = inEntity; goodCanBreakEntity = canBreakEntity; }; if (inEntity && !canBreakEntity) { markGoodAsLevel(0); } else { if (chIsNewline(*ch)) { if (inEntity) { if (ch + 1 < end && chIsNewline(*(ch + 1))) { markGoodAsLevel(12); } else { markGoodAsLevel(11); } } else if (ch + 1 < end && chIsNewline(*(ch + 1))) { markGoodAsLevel(15); } else if (currentEntity < entityCount && ch + 1 == start + left.entities[currentEntity].offset() && left.entities[currentEntity].type() == EntityInTextPre) { markGoodAsLevel(14); } else if (currentEntity > 0 && ch == start + left.entities[currentEntity - 1].offset() + left.entities[currentEntity - 1].length() && left.entities[currentEntity - 1].type() == EntityInTextPre) { markGoodAsLevel(14); } else { markGoodAsLevel(13); } } else if (chIsSpace(*ch)) { if (chIsSentenceEnd(*(ch - 1))) { markGoodAsLevel(9 + noEntityLevel); } else if (chIsSentencePartEnd(*(ch - 1))) { markGoodAsLevel(7 + noEntityLevel); } else { markGoodAsLevel(5 + noEntityLevel); } } else if (chIsWordSeparator(*(ch - 1))) { markGoodAsLevel(3 + noEntityLevel); } else { markGoodAsLevel(1 + noEntityLevel); } } } int elen = 0; if (auto e = Ui::Emoji::Find(ch, end, &elen)) { for (int i = 0; i < elen; ++i, ++ch, ++s) { if (ch->isHighSurrogate() && i + 1 < elen && (ch + 1)->isLowSurrogate()) { ++ch; ++i; } } --ch; --s; } else if (ch->isHighSurrogate() && ch + 1 < end && (ch + 1)->isLowSurrogate()) { ++ch; } if (s >= limit) { sending.text = left.text.mid(0, good - start); left.text = left.text.mid(good - start); if (goodInEntity) { if (goodCanBreakEntity) { sending.entities = left.entities.mid(0, goodEntity + 1); sending.entities.back().updateTextEnd(good - start); left.entities = left.entities.mid(goodEntity); for (auto &entity : left.entities) { entity.shiftLeft(good - start); } } else { sending.entities = left.entities.mid(0, goodEntity); left.entities = left.entities.mid(goodEntity + 1); } } else { sending.entities = left.entities.mid(0, goodEntity); left.entities = left.entities.mid(goodEntity); for (auto &entity : left.entities) { entity.shiftLeft(good - start); } } return true; } } sending.text = left.text; left.text = QString(); sending.entities = left.entities; left.entities = EntitiesInText(); return true; } bool textcmdStartsLink(const QChar *start, int32 len, int32 commandOffset) { if (commandOffset + 2 < len) { if (*(start + commandOffset + 1) == TextCommandLinkIndex) { return (*(start + commandOffset + 2) != 0); } return (*(start + commandOffset + 1) != TextCommandLinkText); } return false; } bool checkTagStartInCommand(const QChar *start, int32 len, int32 tagStart, int32 &commandOffset, bool &commandIsLink, bool &inLink) { bool inCommand = false; const QChar *commandEnd = start + commandOffset; while (commandOffset < len && tagStart > commandOffset) { // skip commands, evaluating are we in link or not commandEnd = textSkipCommand(start + commandOffset, start + len); if (commandEnd > start + commandOffset) { if (tagStart < (commandEnd - start)) { inCommand = true; break; } for (commandOffset = commandEnd - start; commandOffset < len; ++commandOffset) { if (*(start + commandOffset) == TextCommand) { inLink = commandIsLink; commandIsLink = textcmdStartsLink(start, len, commandOffset); break; } } if (commandOffset >= len) { inLink = commandIsLink; commandIsLink = false; } } else { break; } } if (inCommand) { commandOffset = commandEnd - start; } return inCommand; } EntitiesInText EntitiesFromMTP(const QVector &entities) { auto result = EntitiesInText(); if (!entities.isEmpty()) { result.reserve(entities.size()); for_const (auto &entity, entities) { switch (entity.type()) { case mtpc_messageEntityUrl: { auto &d = entity.c_messageEntityUrl(); result.push_back(EntityInText(EntityInTextUrl, d.voffset.v, d.vlength.v)); } break; case mtpc_messageEntityTextUrl: { auto &d = entity.c_messageEntityTextUrl(); result.push_back(EntityInText(EntityInTextCustomUrl, d.voffset.v, d.vlength.v, Clean(qs(d.vurl)))); } break; case mtpc_messageEntityEmail: { auto &d = entity.c_messageEntityEmail(); result.push_back(EntityInText(EntityInTextEmail, d.voffset.v, d.vlength.v)); } break; case mtpc_messageEntityHashtag: { auto &d = entity.c_messageEntityHashtag(); result.push_back(EntityInText(EntityInTextHashtag, d.voffset.v, d.vlength.v)); } break; case mtpc_messageEntityMention: { auto &d = entity.c_messageEntityMention(); result.push_back(EntityInText(EntityInTextMention, d.voffset.v, d.vlength.v)); } break; case mtpc_messageEntityMentionName: { auto &d = entity.c_messageEntityMentionName(); auto data = [&d] { if (auto user = App::userLoaded(peerFromUser(d.vuser_id))) { return MentionNameDataFromFields({ d.vuser_id.v, user->accessHash() }); } return MentionNameDataFromFields(d.vuser_id.v); }; result.push_back(EntityInText(EntityInTextMentionName, d.voffset.v, d.vlength.v, data())); } break; case mtpc_inputMessageEntityMentionName: { auto &d = entity.c_inputMessageEntityMentionName(); auto data = ([&d]() -> QString { if (d.vuser_id.type() == mtpc_inputUserSelf) { return MentionNameDataFromFields(Auth().userId()); } else if (d.vuser_id.type() == mtpc_inputUser) { auto &user = d.vuser_id.c_inputUser(); return MentionNameDataFromFields({ user.vuser_id.v, user.vaccess_hash.v }); } return QString(); })(); if (!data.isEmpty()) { result.push_back(EntityInText(EntityInTextMentionName, d.voffset.v, d.vlength.v, data)); } } break; case mtpc_messageEntityBotCommand: { auto &d = entity.c_messageEntityBotCommand(); result.push_back(EntityInText(EntityInTextBotCommand, d.voffset.v, d.vlength.v)); } break; case mtpc_messageEntityBold: { auto &d = entity.c_messageEntityBold(); result.push_back(EntityInText(EntityInTextBold, d.voffset.v, d.vlength.v)); } break; case mtpc_messageEntityItalic: { auto &d = entity.c_messageEntityItalic(); result.push_back(EntityInText(EntityInTextItalic, d.voffset.v, d.vlength.v)); } break; case mtpc_messageEntityCode: { auto &d = entity.c_messageEntityCode(); result.push_back(EntityInText(EntityInTextCode, d.voffset.v, d.vlength.v)); } break; case mtpc_messageEntityPre: { auto &d = entity.c_messageEntityPre(); result.push_back(EntityInText(EntityInTextPre, d.voffset.v, d.vlength.v, Clean(qs(d.vlanguage)))); } break; } } } return result; } MTPVector EntitiesToMTP(const EntitiesInText &entities, ConvertOption option) { auto v = QVector(); v.reserve(entities.size()); for_const (auto &entity, entities) { if (entity.length() <= 0) continue; if (option == ConvertOption::SkipLocal && entity.type() != EntityInTextBold && entity.type() != EntityInTextItalic && entity.type() != EntityInTextCode && entity.type() != EntityInTextPre && entity.type() != EntityInTextMentionName) { continue; } auto offset = MTP_int(entity.offset()); auto length = MTP_int(entity.length()); switch (entity.type()) { case EntityInTextUrl: v.push_back(MTP_messageEntityUrl(offset, length)); break; case EntityInTextCustomUrl: v.push_back(MTP_messageEntityTextUrl(offset, length, MTP_string(entity.data()))); break; case EntityInTextEmail: v.push_back(MTP_messageEntityEmail(offset, length)); break; case EntityInTextHashtag: v.push_back(MTP_messageEntityHashtag(offset, length)); break; case EntityInTextMention: v.push_back(MTP_messageEntityMention(offset, length)); break; case EntityInTextMentionName: { auto inputUser = ([](const QString &data) -> MTPInputUser { auto fields = MentionNameDataToFields(data); if (fields.userId == Auth().userId()) { return MTP_inputUserSelf(); } else if (fields.userId) { return MTP_inputUser(MTP_int(fields.userId), MTP_long(fields.accessHash)); } return MTP_inputUserEmpty(); })(entity.data()); if (inputUser.type() != mtpc_inputUserEmpty) { v.push_back(MTP_inputMessageEntityMentionName(offset, length, inputUser)); } } break; case EntityInTextBotCommand: v.push_back(MTP_messageEntityBotCommand(offset, length)); break; case EntityInTextBold: v.push_back(MTP_messageEntityBold(offset, length)); break; case EntityInTextItalic: v.push_back(MTP_messageEntityItalic(offset, length)); break; case EntityInTextCode: v.push_back(MTP_messageEntityCode(offset, length)); break; case EntityInTextPre: v.push_back(MTP_messageEntityPre(offset, length, MTP_string(entity.data()))); break; } } return MTP_vector(std::move(v)); } struct MarkdownPart { MarkdownPart() = default; MarkdownPart(EntityInTextType type) : type(type), outerStart(-1) { } EntityInTextType type = EntityInTextInvalid; int outerStart = 0; int innerStart = 0; int innerEnd = 0; int outerEnd = 0; bool addNewlineBefore = false; bool addNewlineAfter = false; }; MarkdownPart GetMarkdownPart(EntityInTextType type, const QString &text, int matchFromOffset, bool rich) { auto result = MarkdownPart(); auto regexp = [type] { switch (type) { case EntityInTextBold: return RegExpMarkdownBold(); case EntityInTextItalic: return RegExpMarkdownItalic(); case EntityInTextCode: return RegExpMarkdownMonoInline(); case EntityInTextPre: return RegExpMarkdownMonoBlock(); } Unexpected("Type in GetMardownPart()"); }; if (matchFromOffset > 1) { // If matchFromOffset is after some separator that is allowed to // start our markdown tag the tag itself will start where we want it. // So we allow to see this separator and make a match. --matchFromOffset; } auto match = regexp().match(text, matchFromOffset); if (!match.hasMatch()) { return result; } result.outerStart = match.capturedStart(); result.outerEnd = match.capturedEnd(); if (!match.capturedRef(1).isEmpty()) { ++result.outerStart; } if (!match.capturedRef(4).isEmpty()) { --result.outerEnd; } result.innerStart = result.outerStart + match.capturedLength(2); result.innerEnd = result.outerEnd - match.capturedLength(3); result.type = type; return result; } void AdjustMarkdownPrePart(MarkdownPart &result, const TextWithEntities &text, bool rich) { auto start = text.text.constData(); auto length = text.text.size(); auto lastEntityBeforeEnd = 0; auto firstEntityInsideStart = result.innerEnd; auto lastEntityInsideEnd = result.innerStart; auto firstEntityAfterStart = length; for_const (auto &entity, text.entities) { if (entity.offset() < result.outerStart) { lastEntityBeforeEnd = entity.offset() + entity.length(); } else if (entity.offset() >= result.outerEnd) { firstEntityAfterStart = entity.offset(); break; } else if (entity.offset() >= result.innerStart) { accumulate_min(firstEntityInsideStart, entity.offset()); lastEntityInsideEnd = entity.offset() + entity.length(); } } while (result.outerStart > lastEntityBeforeEnd && chIsSpace(*(start + result.outerStart - 1), rich) && !chIsNewline(*(start + result.outerStart - 1))) { --result.outerStart; } result.addNewlineBefore = (result.outerStart > 0 && !chIsNewline(*(start + result.outerStart - 1))); for (auto testInnerStart = result.innerStart; testInnerStart < firstEntityInsideStart; ++testInnerStart) { if (chIsNewline(*(start + testInnerStart))) { result.innerStart = testInnerStart + 1; break; } else if (!chIsSpace(*(start + testInnerStart))) { break; } } for (auto testInnerEnd = result.innerEnd; lastEntityInsideEnd < testInnerEnd;) { --testInnerEnd; if (chIsNewline(*(start + testInnerEnd))) { result.innerEnd = testInnerEnd; break; } else if (!chIsSpace(*(start + testInnerEnd))) { break; } } while (result.outerEnd < firstEntityAfterStart && chIsSpace(*(start + result.outerEnd)) && !chIsNewline(*(start + result.outerEnd))) { ++result.outerEnd; } result.addNewlineAfter = (result.outerEnd < length && !chIsNewline(*(start + result.outerEnd))); } void ParseMarkdown(TextWithEntities &result, bool rich) { if (result.empty()) { return; } auto newResult = TextWithEntities(); MarkdownPart computedParts[4] = { { EntityInTextBold }, { EntityInTextItalic }, { EntityInTextPre }, { EntityInTextCode }, }; auto existingEntityIndex = 0; auto existingEntitiesCount = result.entities.size(); auto existingEntityShiftLeft = 0; auto copyFromOffset = 0; auto matchFromOffset = 0; auto length = result.text.size(); auto nextCommandOffset = rich ? 0 : length; auto inLink = false; auto commandIsLink = false; const auto start = result.text.constData(); for (; matchFromOffset < length;) { if (nextCommandOffset <= matchFromOffset) { for (nextCommandOffset = matchFromOffset; nextCommandOffset != length; ++nextCommandOffset) { if (*(start + nextCommandOffset) == TextCommand) { inLink = commandIsLink; commandIsLink = textcmdStartsLink(start, length, nextCommandOffset); break; } } if (nextCommandOffset >= length) { inLink = commandIsLink; commandIsLink = false; } } auto part = MarkdownPart(); auto checkType = [&part, &result, matchFromOffset, rich](MarkdownPart &computedPart) { if (computedPart.type == EntityInTextInvalid) { return; } if (matchFromOffset > computedPart.outerStart) { computedPart = GetMarkdownPart(computedPart.type, result.text, matchFromOffset, rich); if (computedPart.type == EntityInTextInvalid) { return; } } if (part.type == EntityInTextInvalid || part.outerStart > computedPart.outerStart) { part = computedPart; } }; for (auto &computedPart : computedParts) { checkType(computedPart); } if (part.type == EntityInTextInvalid) { break; } // Check if start sequence intersects a command. auto inCommand = checkTagStartInCommand(start, length, part.outerStart, nextCommandOffset, commandIsLink, inLink); if (inCommand || inLink) { matchFromOffset = nextCommandOffset; continue; } // Check if start or end sequences intersect any existing entity. auto intersectedEntityEnd = 0; for_const (auto &entity, result.entities) { if (qMin(part.innerStart, entity.offset() + entity.length()) > qMax(part.outerStart, entity.offset()) || qMin(part.outerEnd, entity.offset() + entity.length()) > qMax(part.innerEnd, entity.offset())) { intersectedEntityEnd = entity.offset() + entity.length(); break; } } if (intersectedEntityEnd > 0) { matchFromOffset = qMax(part.innerStart, intersectedEntityEnd); continue; } if (part.type == EntityInTextPre) { AdjustMarkdownPrePart(part, result, rich); } if (newResult.text.isEmpty()) newResult.text.reserve(result.text.size()); for (; existingEntityIndex < existingEntitiesCount && result.entities[existingEntityIndex].offset() < part.innerStart; ++existingEntityIndex) { auto &entity = result.entities[existingEntityIndex]; newResult.entities.push_back(entity); newResult.entities.back().shiftLeft(existingEntityShiftLeft); } if (part.outerStart > copyFromOffset) { newResult.text.append(start + copyFromOffset, part.outerStart - copyFromOffset); } if (part.addNewlineBefore) newResult.text.append('\n'); existingEntityShiftLeft += (part.innerStart - part.outerStart) - (part.addNewlineBefore ? 1 : 0); auto entityStart = newResult.text.size(); auto entityLength = part.innerEnd - part.innerStart; newResult.entities.push_back(EntityInText(part.type, entityStart, entityLength)); for (; existingEntityIndex < existingEntitiesCount && result.entities[existingEntityIndex].offset() <= part.innerEnd; ++existingEntityIndex) { auto &entity = result.entities[existingEntityIndex]; newResult.entities.push_back(entity); newResult.entities.back().shiftLeft(existingEntityShiftLeft); } newResult.text.append(start + part.innerStart, entityLength); if (part.addNewlineAfter) newResult.text.append('\n'); existingEntityShiftLeft += (part.outerEnd - part.innerEnd) - (part.addNewlineAfter ? 1 : 0); copyFromOffset = matchFromOffset = part.outerEnd; } if (!newResult.empty()) { newResult.text.append(start + copyFromOffset, length - copyFromOffset); for (; existingEntityIndex < existingEntitiesCount; ++existingEntityIndex) { auto &entity = result.entities[existingEntityIndex]; newResult.entities.push_back(entity); newResult.entities.back().shiftLeft(existingEntityShiftLeft); } result = std::move(newResult); } } // Some code is duplicated in flattextarea.cpp! void ParseEntities(TextWithEntities &result, int32 flags, bool rich) { if (flags & TextParseMarkdown) { // parse markdown entities (bold, italic, code and pre) ParseMarkdown(result, rich); } auto newEntities = EntitiesInText(); bool withHashtags = (flags & TextParseHashtags); bool withMentions = (flags & TextParseMentions); bool withBotCommands = (flags & TextParseBotCommands); int existingEntityIndex = 0, existingEntitiesCount = result.entities.size(); int existingEntityEnd = 0; int32 len = result.text.size(), commandOffset = rich ? 0 : len; bool inLink = false, commandIsLink = false; const QChar *start = result.text.constData(), *end = start + result.text.size(); for (int32 offset = 0, matchOffset = offset, mentionSkip = 0; offset < len;) { if (commandOffset <= offset) { for (commandOffset = offset; commandOffset < len; ++commandOffset) { if (*(start + commandOffset) == TextCommand) { inLink = commandIsLink; commandIsLink = textcmdStartsLink(start, len, commandOffset); break; } } } auto mDomain = RegExpDomain().match(result.text, matchOffset); auto mExplicitDomain = RegExpDomainExplicit().match(result.text, matchOffset); auto mHashtag = withHashtags ? RegExpHashtag().match(result.text, matchOffset) : QRegularExpressionMatch(); auto mMention = withMentions ? RegExpMention().match(result.text, qMax(mentionSkip, matchOffset)) : QRegularExpressionMatch(); auto mBotCommand = withBotCommands ? RegExpBotCommand().match(result.text, matchOffset) : QRegularExpressionMatch(); EntityInTextType lnkType = EntityInTextUrl; int32 lnkStart = 0, lnkLength = 0; int32 domainStart = mDomain.hasMatch() ? mDomain.capturedStart() : INT_MAX, domainEnd = mDomain.hasMatch() ? mDomain.capturedEnd() : INT_MAX, explicitDomainStart = mExplicitDomain.hasMatch() ? mExplicitDomain.capturedStart() : INT_MAX, explicitDomainEnd = mExplicitDomain.hasMatch() ? mExplicitDomain.capturedEnd() : INT_MAX, hashtagStart = mHashtag.hasMatch() ? mHashtag.capturedStart() : INT_MAX, hashtagEnd = mHashtag.hasMatch() ? mHashtag.capturedEnd() : INT_MAX, mentionStart = mMention.hasMatch() ? mMention.capturedStart() : INT_MAX, mentionEnd = mMention.hasMatch() ? mMention.capturedEnd() : INT_MAX, botCommandStart = mBotCommand.hasMatch() ? mBotCommand.capturedStart() : INT_MAX, botCommandEnd = mBotCommand.hasMatch() ? mBotCommand.capturedEnd() : INT_MAX; if (mHashtag.hasMatch()) { if (!mHashtag.capturedRef(1).isEmpty()) { ++hashtagStart; } if (!mHashtag.capturedRef(2).isEmpty()) { --hashtagEnd; } } while (mMention.hasMatch()) { if (!mMention.capturedRef(1).isEmpty()) { ++mentionStart; } if (!mMention.capturedRef(2).isEmpty()) { --mentionEnd; } if (!(start + mentionStart + 1)->isLetter() || !(start + mentionEnd - 1)->isLetterOrNumber()) { mentionSkip = mentionEnd; mMention = RegExpMention().match(result.text, qMax(mentionSkip, matchOffset)); if (mMention.hasMatch()) { mentionStart = mMention.capturedStart(); mentionEnd = mMention.capturedEnd(); } else { mentionStart = INT_MAX; mentionEnd = INT_MAX; } } else { break; } } if (mBotCommand.hasMatch()) { if (!mBotCommand.capturedRef(1).isEmpty()) { ++botCommandStart; } if (!mBotCommand.capturedRef(3).isEmpty()) { --botCommandEnd; } } if (!mDomain.hasMatch() && !mExplicitDomain.hasMatch() && !mHashtag.hasMatch() && !mMention.hasMatch() && !mBotCommand.hasMatch()) { break; } if (explicitDomainStart < domainStart) { domainStart = explicitDomainStart; domainEnd = explicitDomainEnd; mDomain = mExplicitDomain; } if (mentionStart < hashtagStart && mentionStart < domainStart && mentionStart < botCommandStart) { bool inCommand = checkTagStartInCommand(start, len, mentionStart, commandOffset, commandIsLink, inLink); if (inCommand || inLink) { offset = matchOffset = commandOffset; continue; } lnkType = EntityInTextMention; lnkStart = mentionStart; lnkLength = mentionEnd - mentionStart; } else if (hashtagStart < domainStart && hashtagStart < botCommandStart) { bool inCommand = checkTagStartInCommand(start, len, hashtagStart, commandOffset, commandIsLink, inLink); if (inCommand || inLink) { offset = matchOffset = commandOffset; continue; } lnkType = EntityInTextHashtag; lnkStart = hashtagStart; lnkLength = hashtagEnd - hashtagStart; } else if (botCommandStart < domainStart) { bool inCommand = checkTagStartInCommand(start, len, botCommandStart, commandOffset, commandIsLink, inLink); if (inCommand || inLink) { offset = matchOffset = commandOffset; continue; } lnkType = EntityInTextBotCommand; lnkStart = botCommandStart; lnkLength = botCommandEnd - botCommandStart; } else { auto inCommand = checkTagStartInCommand(start, len, domainStart, commandOffset, commandIsLink, inLink); if (inCommand || inLink) { offset = matchOffset = commandOffset; continue; } auto protocol = mDomain.captured(1).toLower(); auto topDomain = mDomain.captured(3).toLower(); auto isProtocolValid = protocol.isEmpty() || IsValidProtocol(protocol); auto isTopDomainValid = !protocol.isEmpty() || IsValidTopDomain(topDomain); if (protocol.isEmpty() && domainStart > offset + 1 && *(start + domainStart - 1) == QChar('@')) { auto forMailName = result.text.mid(offset, domainStart - offset - 1); auto mMailName = RegExpMailNameAtEnd().match(forMailName); if (mMailName.hasMatch()) { auto mailStart = offset + mMailName.capturedStart(); if (mailStart < offset) { mailStart = offset; } lnkType = EntityInTextEmail; lnkStart = mailStart; lnkLength = domainEnd - mailStart; } } if (lnkType == EntityInTextUrl && !lnkLength) { if (!isProtocolValid || !isTopDomainValid) { matchOffset = domainEnd; continue; } lnkStart = domainStart; QStack parenth; const QChar *domainEnd = start + mDomain.capturedEnd(), *p = domainEnd; for (; p < end; ++p) { QChar ch(*p); if (chIsLinkEnd(ch)) break; // link finished if (chIsAlmostLinkEnd(ch)) { const QChar *endTest = p + 1; while (endTest < end && chIsAlmostLinkEnd(*endTest)) { ++endTest; } if (endTest >= end || chIsLinkEnd(*endTest)) { break; // link finished at p } p = endTest; ch = *p; } if (ch == '(' || ch == '[' || ch == '{' || ch == '<') { parenth.push(p); } else if (ch == ')' || ch == ']' || ch == '}' || ch == '>') { if (parenth.isEmpty()) break; const QChar *q = parenth.pop(), open(*q); if ((ch == ')' && open != '(') || (ch == ']' && open != '[') || (ch == '}' && open != '{') || (ch == '>' && open != '<')) { p = q; break; } } } if (p > domainEnd) { // check, that domain ended if (domainEnd->unicode() != '/' && domainEnd->unicode() != '?') { matchOffset = domainEnd - start; continue; } } lnkLength = (p - start) - lnkStart; } } for (; existingEntityIndex < existingEntitiesCount && result.entities[existingEntityIndex].offset() <= lnkStart; ++existingEntityIndex) { auto &entity = result.entities[existingEntityIndex]; accumulate_max(existingEntityEnd, entity.offset() + entity.length()); newEntities.push_back(entity); } if (lnkStart >= existingEntityEnd) { result.entities.push_back(EntityInText(lnkType, lnkStart, lnkLength)); } offset = matchOffset = lnkStart + lnkLength; } if (!newEntities.isEmpty()) { for (; existingEntityIndex < existingEntitiesCount; ++existingEntityIndex) { auto &entity = result.entities[existingEntityIndex]; newEntities.push_back(entity); } result.entities = newEntities; } } QString ApplyEntities(const TextWithEntities &text) { if (text.entities.isEmpty()) return text.text; QMultiMap closingTags; QMap tags; tags.insert(EntityInTextCode, qsl("`")); tags.insert(EntityInTextPre, qsl("```")); tags.insert(EntityInTextBold, qsl("**")); tags.insert(EntityInTextItalic, qsl("__")); constexpr auto kLargestOpenCloseLength = 6; QString result; int32 size = text.text.size(); const QChar *b = text.text.constData(), *already = b, *e = b + size; auto entity = text.entities.cbegin(), end = text.entities.cend(); auto skipTillRelevantAndGetTag = [&entity, &end, size, &tags] { while (entity != end) { if (entity->length() <= 0 || entity->offset() >= size) { ++entity; continue; } auto it = tags.constFind(entity->type()); if (it == tags.cend()) { ++entity; continue; } return it.value(); } return QString(); }; auto tag = skipTillRelevantAndGetTag(); while (entity != end || !closingTags.isEmpty()) { auto nextOpenEntity = (entity == end) ? (size + 1) : entity->offset(); auto nextCloseEntity = closingTags.isEmpty() ? (size + 1) : closingTags.cbegin().key(); if (nextOpenEntity <= nextCloseEntity) { if (result.isEmpty()) result.reserve(text.text.size() + text.entities.size() * kLargestOpenCloseLength); const QChar *offset = b + nextOpenEntity; if (offset > already) { result.append(already, offset - already); already = offset; } result.append(tag); closingTags.insert(qMin(entity->offset() + entity->length(), size), tag); ++entity; tag = skipTillRelevantAndGetTag(); } else { const QChar *offset = b + nextCloseEntity; if (offset > already) { result.append(already, offset - already); already = offset; } result.append(closingTags.cbegin().value()); closingTags.erase(closingTags.begin()); } } if (result.isEmpty()) { return text.text; } const QChar *offset = b + size; if (offset > already) { result.append(already, offset - already); } return result; } void MoveStringPart(TextWithEntities &result, int to, int from, int count) { if (!count) return; if (to != from) { auto start = result.text.data(); memmove(start + to, start + from, count * sizeof(QChar)); for (auto &entity : result.entities) { if (entity.offset() >= from + count) break; if (entity.offset() + entity.length() <= from) continue; if (entity.offset() >= from) { entity.extendToLeft(from - to); } if (entity.offset() + entity.length() <= from + count) { entity.shrinkFromRight(from - to); } } } } void MovePartAndGoForward(TextWithEntities &result, int &to, int &from, int count) { if (!count) return; MoveStringPart(result, to, from, count); to += count; from += count; } void ReplaceStringWithChar(const QLatin1String &from, QChar to, TextWithEntities &result, bool checkSpace = false) { Expects(from.size() > 1); auto len = from.size(), s = result.text.size(), offset = 0, length = 0; auto i = result.entities.begin(), e = result.entities.end(); for (auto start = result.text.data(); offset < s;) { auto nextOffset = result.text.indexOf(from, offset); if (nextOffset < 0) { MovePartAndGoForward(result, length, offset, s - offset); break; } if (checkSpace) { bool spaceBefore = (nextOffset > 0) && (start + nextOffset - 1)->isSpace(); bool spaceAfter = (nextOffset + len < s) && (start + nextOffset + len)->isSpace(); if (!spaceBefore && !spaceAfter) { MovePartAndGoForward(result, length, offset, nextOffset - offset + len + 1); continue; } } auto skip = false; for (; i != e; ++i) { // find and check next finishing entity if (i->offset() + i->length() > nextOffset) { skip = (i->offset() < nextOffset + len); break; } } if (skip) { MovePartAndGoForward(result, length, offset, nextOffset - offset + len); continue; } MovePartAndGoForward(result, length, offset, nextOffset - offset); *(start + length) = to; ++length; offset += len; } if (length < s) result.text.resize(length); } void PrepareForSending(TextWithEntities &result, int32 flags) { ApplyServerCleaning(result); if (flags) { ParseEntities(result, flags); } ReplaceStringWithChar(qstr("--"), QChar(8212), result, true); ReplaceStringWithChar(qstr("<<"), QChar(171), result); ReplaceStringWithChar(qstr(">>"), QChar(187), result); if (cReplaceEmojis()) { Ui::Emoji::ReplaceInText(result); } Trim(result); } // Replace bad symbols with space and remove '\r'. void ApplyServerCleaning(TextWithEntities &result) { auto len = result.text.size(); // Replace tabs with two spaces. if (auto tabs = std::count(result.text.cbegin(), result.text.cend(), '\t')) { auto replacement = qsl(" "); auto replacementLength = replacement.size(); auto shift = (replacementLength - 1); result.text.resize(len + shift * tabs); for (auto i = len, movedTill = len, to = result.text.size(); i > 0; --i) { if (result.text[i - 1] == '\t') { auto toMove = movedTill - i; to -= toMove; MoveStringPart(result, to, i, toMove); to -= replacementLength; memcpy(result.text.data() + to, replacement.constData(), replacementLength * sizeof(QChar)); movedTill = i - 1; } } len = result.text.size(); } auto to = 0; auto from = 0; auto start = result.text.data(); for (auto ch = start, end = start + len; ch < end; ++ch) { if (ch->unicode() == '\r') { MovePartAndGoForward(result, to, from, (ch - start) - from); ++from; } else if (chReplacedBySpace(*ch)) { *ch = ' '; } } MovePartAndGoForward(result, to, from, len - from); if (to < len) result.text.resize(to); } void Trim(TextWithEntities &result) { auto foundNotTrimmedChar = false; // right trim for (auto s = result.text.data(), e = s + result.text.size(), ch = e; ch != s;) { --ch; if (!chIsTrimmed(*ch)) { if (ch + 1 < e) { auto l = ch + 1 - s; for (auto &entity : result.entities) { entity.updateTextEnd(l); } result.text.resize(l); } foundNotTrimmedChar = true; break; } } if (!foundNotTrimmedChar) { result = TextWithEntities(); return; } auto firstMonospaceOffset = EntityInText::firstMonospaceOffset(result.entities, result.text.size()); // left trim for (auto s = result.text.data(), ch = s, e = s + result.text.size(); ch != e; ++ch) { if (!chIsTrimmed(*ch) || (ch - s) == firstMonospaceOffset) { if (ch > s) { auto l = ch - s; for (auto &entity : result.entities) { entity.shiftLeft(l); } result.text = result.text.mid(l); } break; } } } } // namespace TextUtilities namespace Lang { TextWithEntities ReplaceTag::Call(TextWithEntities &&original, ushort tag, const TextWithEntities &replacement) { auto replacementPosition = FindTagReplacementPosition(original.text, tag); if (replacementPosition < 0) { return std::move(original); } auto result = TextWithEntities(); result.text = ReplaceTag::Replace(std::move(original.text), replacement.text, replacementPosition); auto originalEntitiesCount = original.entities.size(); auto replacementEntitiesCount = replacement.entities.size(); if (originalEntitiesCount != 0 || replacementEntitiesCount != 0) { result.entities.reserve(originalEntitiesCount + replacementEntitiesCount); auto replacementEnd = replacementPosition + replacement.text.size(); auto replacementEntity = replacement.entities.cbegin(); auto addReplacementEntitiesUntil = [&replacementEntity, &replacement, &result, replacementPosition, replacementEnd](int untilPosition) { while (replacementEntity != replacement.entities.cend()) { auto newOffset = replacementPosition + replacementEntity->offset(); if (newOffset >= untilPosition) { return; } auto newEnd = newOffset + replacementEntity->length(); newOffset = snap(newOffset, replacementPosition, replacementEnd); newEnd = snap(newEnd, replacementPosition, replacementEnd); if (auto newLength = newEnd - newOffset) { result.entities.push_back(EntityInText(replacementEntity->type(), newOffset, newLength, replacementEntity->data())); } ++replacementEntity; } }; for_const (auto &entity, original.entities) { // Transform the entity by the replacement. auto offset = entity.offset(); auto end = offset + entity.length(); if (offset > replacementPosition) { offset = offset + replacement.text.size() - kTagReplacementSize; } if (end > replacementPosition) { end = end + replacement.text.size() - kTagReplacementSize; } offset = snap(offset, 0, result.text.size()); end = snap(end, 0, result.text.size()); // Add all replacement entities that start before the current original entity. addReplacementEntitiesUntil(offset); // Add a modified original entity. if (auto length = end - offset) { result.entities.push_back(EntityInText(entity.type(), offset, length, entity.data())); } } // Add the remaining replacement entities. addReplacementEntitiesUntil(result.text.size()); } return result; } } // namespace Lang