optimize HTML entities lookup

Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
Ivailo Monev 2021-03-22 12:38:50 +02:00
parent d52a9e5386
commit 764a1f6a5c

View file

@ -39,285 +39,278 @@ QT_BEGIN_NAMESPACE
// see also tst_qtextdocumentfragment.cpp // see also tst_qtextdocumentfragment.cpp
#define MAX_ENTITY 258 #define MAX_ENTITY 258
static const struct QTextHtmlEntity { const char *name; quint16 code; } entities[MAX_ENTITY]= { static const struct entitiesTblData {
{ "AElig", 0x00c6 }, const QLatin1String name;
{ "AMP", 38 }, quint16 code;
{ "Aacute", 0x00c1 }, } entitiesTbl[MAX_ENTITY]= {
{ "Acirc", 0x00c2 }, { QLatin1String("AElig"), 0x00c6 },
{ "Agrave", 0x00c0 }, { QLatin1String("AMP"), 38 },
{ "Alpha", 0x0391 }, { QLatin1String("Aacute"), 0x00c1 },
{ "Aring", 0x00c5 }, { QLatin1String("Acirc"), 0x00c2 },
{ "Atilde", 0x00c3 }, { QLatin1String("Agrave"), 0x00c0 },
{ "Auml", 0x00c4 }, { QLatin1String("Alpha"), 0x0391 },
{ "Beta", 0x0392 }, { QLatin1String("Aring"), 0x00c5 },
{ "Ccedil", 0x00c7 }, { QLatin1String("Atilde"), 0x00c3 },
{ "Chi", 0x03a7 }, { QLatin1String("Auml"), 0x00c4 },
{ "Dagger", 0x2021 }, { QLatin1String("Beta"), 0x0392 },
{ "Delta", 0x0394 }, { QLatin1String("Ccedil"), 0x00c7 },
{ "ETH", 0x00d0 }, { QLatin1String("Chi"), 0x03a7 },
{ "Eacute", 0x00c9 }, { QLatin1String("Dagger"), 0x2021 },
{ "Ecirc", 0x00ca }, { QLatin1String("Delta"), 0x0394 },
{ "Egrave", 0x00c8 }, { QLatin1String("ETH"), 0x00d0 },
{ "Epsilon", 0x0395 }, { QLatin1String("Eacute"), 0x00c9 },
{ "Eta", 0x0397 }, { QLatin1String("Ecirc"), 0x00ca },
{ "Euml", 0x00cb }, { QLatin1String("Egrave"), 0x00c8 },
{ "GT", 62 }, { QLatin1String("Epsilon"), 0x0395 },
{ "Gamma", 0x0393 }, { QLatin1String("Eta"), 0x0397 },
{ "Iacute", 0x00cd }, { QLatin1String("Euml"), 0x00cb },
{ "Icirc", 0x00ce }, { QLatin1String("GT"), 62 },
{ "Igrave", 0x00cc }, { QLatin1String("Gamma"), 0x0393 },
{ "Iota", 0x0399 }, { QLatin1String("Iacute"), 0x00cd },
{ "Iuml", 0x00cf }, { QLatin1String("Icirc"), 0x00ce },
{ "Kappa", 0x039a }, { QLatin1String("Igrave"), 0x00cc },
{ "LT", 60 }, { QLatin1String("Iota"), 0x0399 },
{ "Lambda", 0x039b }, { QLatin1String("Iuml"), 0x00cf },
{ "Mu", 0x039c }, { QLatin1String("Kappa"), 0x039a },
{ "Ntilde", 0x00d1 }, { QLatin1String("LT"), 60 },
{ "Nu", 0x039d }, { QLatin1String("Lambda"), 0x039b },
{ "OElig", 0x0152 }, { QLatin1String("Mu"), 0x039c },
{ "Oacute", 0x00d3 }, { QLatin1String("Ntilde"), 0x00d1 },
{ "Ocirc", 0x00d4 }, { QLatin1String("Nu"), 0x039d },
{ "Ograve", 0x00d2 }, { QLatin1String("OElig"), 0x0152 },
{ "Omega", 0x03a9 }, { QLatin1String("Oacute"), 0x00d3 },
{ "Omicron", 0x039f }, { QLatin1String("Ocirc"), 0x00d4 },
{ "Oslash", 0x00d8 }, { QLatin1String("Ograve"), 0x00d2 },
{ "Otilde", 0x00d5 }, { QLatin1String("Omega"), 0x03a9 },
{ "Ouml", 0x00d6 }, { QLatin1String("Omicron"), 0x039f },
{ "Phi", 0x03a6 }, { QLatin1String("Oslash"), 0x00d8 },
{ "Pi", 0x03a0 }, { QLatin1String("Otilde"), 0x00d5 },
{ "Prime", 0x2033 }, { QLatin1String("Ouml"), 0x00d6 },
{ "Psi", 0x03a8 }, { QLatin1String("Phi"), 0x03a6 },
{ "QUOT", 34 }, { QLatin1String("Pi"), 0x03a0 },
{ "Rho", 0x03a1 }, { QLatin1String("Prime"), 0x2033 },
{ "Scaron", 0x0160 }, { QLatin1String("Psi"), 0x03a8 },
{ "Sigma", 0x03a3 }, { QLatin1String("QUOT"), 34 },
{ "THORN", 0x00de }, { QLatin1String("Rho"), 0x03a1 },
{ "Tau", 0x03a4 }, { QLatin1String("Scaron"), 0x0160 },
{ "Theta", 0x0398 }, { QLatin1String("Sigma"), 0x03a3 },
{ "Uacute", 0x00da }, { QLatin1String("THORN"), 0x00de },
{ "Ucirc", 0x00db }, { QLatin1String("Tau"), 0x03a4 },
{ "Ugrave", 0x00d9 }, { QLatin1String("Theta"), 0x0398 },
{ "Upsilon", 0x03a5 }, { QLatin1String("Uacute"), 0x00da },
{ "Uuml", 0x00dc }, { QLatin1String("Ucirc"), 0x00db },
{ "Xi", 0x039e }, { QLatin1String("Ugrave"), 0x00d9 },
{ "Yacute", 0x00dd }, { QLatin1String("Upsilon"), 0x03a5 },
{ "Yuml", 0x0178 }, { QLatin1String("Uuml"), 0x00dc },
{ "Zeta", 0x0396 }, { QLatin1String("Xi"), 0x039e },
{ "aacute", 0x00e1 }, { QLatin1String("Yacute"), 0x00dd },
{ "acirc", 0x00e2 }, { QLatin1String("Yuml"), 0x0178 },
{ "acute", 0x00b4 }, { QLatin1String("Zeta"), 0x0396 },
{ "aelig", 0x00e6 }, { QLatin1String("aacute"), 0x00e1 },
{ "agrave", 0x00e0 }, { QLatin1String("acirc"), 0x00e2 },
{ "alefsym", 0x2135 }, { QLatin1String("acute"), 0x00b4 },
{ "alpha", 0x03b1 }, { QLatin1String("aelig"), 0x00e6 },
{ "amp", 38 }, { QLatin1String("agrave"), 0x00e0 },
{ "and", 0x22a5 }, { QLatin1String("alefsym"), 0x2135 },
{ "ang", 0x2220 }, { QLatin1String("alpha"), 0x03b1 },
{ "apos", 0x0027 }, { QLatin1String("amp"), 38 },
{ "aring", 0x00e5 }, { QLatin1String("and"), 0x22a5 },
{ "asymp", 0x2248 }, { QLatin1String("ang"), 0x2220 },
{ "atilde", 0x00e3 }, { QLatin1String("apos"), 0x0027 },
{ "auml", 0x00e4 }, { QLatin1String("aring"), 0x00e5 },
{ "bdquo", 0x201e }, { QLatin1String("asymp"), 0x2248 },
{ "beta", 0x03b2 }, { QLatin1String("atilde"), 0x00e3 },
{ "brvbar", 0x00a6 }, { QLatin1String("auml"), 0x00e4 },
{ "bull", 0x2022 }, { QLatin1String("bdquo"), 0x201e },
{ "cap", 0x2229 }, { QLatin1String("beta"), 0x03b2 },
{ "ccedil", 0x00e7 }, { QLatin1String("brvbar"), 0x00a6 },
{ "cedil", 0x00b8 }, { QLatin1String("bull"), 0x2022 },
{ "cent", 0x00a2 }, { QLatin1String("cap"), 0x2229 },
{ "chi", 0x03c7 }, { QLatin1String("ccedil"), 0x00e7 },
{ "circ", 0x02c6 }, { QLatin1String("cedil"), 0x00b8 },
{ "clubs", 0x2663 }, { QLatin1String("cent"), 0x00a2 },
{ "cong", 0x2245 }, { QLatin1String("chi"), 0x03c7 },
{ "copy", 0x00a9 }, { QLatin1String("circ"), 0x02c6 },
{ "crarr", 0x21b5 }, { QLatin1String("clubs"), 0x2663 },
{ "cup", 0x222a }, { QLatin1String("cong"), 0x2245 },
{ "curren", 0x00a4 }, { QLatin1String("copy"), 0x00a9 },
{ "dArr", 0x21d3 }, { QLatin1String("crarr"), 0x21b5 },
{ "dagger", 0x2020 }, { QLatin1String("cup"), 0x222a },
{ "darr", 0x2193 }, { QLatin1String("curren"), 0x00a4 },
{ "deg", 0x00b0 }, { QLatin1String("dArr"), 0x21d3 },
{ "delta", 0x03b4 }, { QLatin1String("dagger"), 0x2020 },
{ "diams", 0x2666 }, { QLatin1String("darr"), 0x2193 },
{ "divide", 0x00f7 }, { QLatin1String("deg"), 0x00b0 },
{ "eacute", 0x00e9 }, { QLatin1String("delta"), 0x03b4 },
{ "ecirc", 0x00ea }, { QLatin1String("diams"), 0x2666 },
{ "egrave", 0x00e8 }, { QLatin1String("divide"), 0x00f7 },
{ "empty", 0x2205 }, { QLatin1String("eacute"), 0x00e9 },
{ "emsp", 0x2003 }, { QLatin1String("ecirc"), 0x00ea },
{ "ensp", 0x2002 }, { QLatin1String("egrave"), 0x00e8 },
{ "epsilon", 0x03b5 }, { QLatin1String("empty"), 0x2205 },
{ "equiv", 0x2261 }, { QLatin1String("emsp"), 0x2003 },
{ "eta", 0x03b7 }, { QLatin1String("ensp"), 0x2002 },
{ "eth", 0x00f0 }, { QLatin1String("epsilon"), 0x03b5 },
{ "euml", 0x00eb }, { QLatin1String("equiv"), 0x2261 },
{ "euro", 0x20ac }, { QLatin1String("eta"), 0x03b7 },
{ "exist", 0x2203 }, { QLatin1String("eth"), 0x00f0 },
{ "fnof", 0x0192 }, { QLatin1String("euml"), 0x00eb },
{ "forall", 0x2200 }, { QLatin1String("euro"), 0x20ac },
{ "frac12", 0x00bd }, { QLatin1String("exist"), 0x2203 },
{ "frac14", 0x00bc }, { QLatin1String("fnof"), 0x0192 },
{ "frac34", 0x00be }, { QLatin1String("forall"), 0x2200 },
{ "frasl", 0x2044 }, { QLatin1String("frac12"), 0x00bd },
{ "gamma", 0x03b3 }, { QLatin1String("frac14"), 0x00bc },
{ "ge", 0x2265 }, { QLatin1String("frac34"), 0x00be },
{ "gt", 62 }, { QLatin1String("frasl"), 0x2044 },
{ "hArr", 0x21d4 }, { QLatin1String("gamma"), 0x03b3 },
{ "harr", 0x2194 }, { QLatin1String("ge"), 0x2265 },
{ "hearts", 0x2665 }, { QLatin1String("gt"), 62 },
{ "hellip", 0x2026 }, { QLatin1String("hArr"), 0x21d4 },
{ "iacute", 0x00ed }, { QLatin1String("harr"), 0x2194 },
{ "icirc", 0x00ee }, { QLatin1String("hearts"), 0x2665 },
{ "iexcl", 0x00a1 }, { QLatin1String("hellip"), 0x2026 },
{ "igrave", 0x00ec }, { QLatin1String("iacute"), 0x00ed },
{ "image", 0x2111 }, { QLatin1String("icirc"), 0x00ee },
{ "infin", 0x221e }, { QLatin1String("iexcl"), 0x00a1 },
{ "int", 0x222b }, { QLatin1String("igrave"), 0x00ec },
{ "iota", 0x03b9 }, { QLatin1String("image"), 0x2111 },
{ "iquest", 0x00bf }, { QLatin1String("infin"), 0x221e },
{ "isin", 0x2208 }, { QLatin1String("int"), 0x222b },
{ "iuml", 0x00ef }, { QLatin1String("iota"), 0x03b9 },
{ "kappa", 0x03ba }, { QLatin1String("iquest"), 0x00bf },
{ "lArr", 0x21d0 }, { QLatin1String("isin"), 0x2208 },
{ "lambda", 0x03bb }, { QLatin1String("iuml"), 0x00ef },
{ "lang", 0x2329 }, { QLatin1String("kappa"), 0x03ba },
{ "laquo", 0x00ab }, { QLatin1String("lArr"), 0x21d0 },
{ "larr", 0x2190 }, { QLatin1String("lambda"), 0x03bb },
{ "lceil", 0x2308 }, { QLatin1String("lang"), 0x2329 },
{ "ldquo", 0x201c }, { QLatin1String("laquo"), 0x00ab },
{ "le", 0x2264 }, { QLatin1String("larr"), 0x2190 },
{ "lfloor", 0x230a }, { QLatin1String("lceil"), 0x2308 },
{ "lowast", 0x2217 }, { QLatin1String("ldquo"), 0x201c },
{ "loz", 0x25ca }, { QLatin1String("le"), 0x2264 },
{ "lrm", 0x200e }, { QLatin1String("lfloor"), 0x230a },
{ "lsaquo", 0x2039 }, { QLatin1String("lowast"), 0x2217 },
{ "lsquo", 0x2018 }, { QLatin1String("loz"), 0x25ca },
{ "lt", 60 }, { QLatin1String("lrm"), 0x200e },
{ "macr", 0x00af }, { QLatin1String("lsaquo"), 0x2039 },
{ "mdash", 0x2014 }, { QLatin1String("lsquo"), 0x2018 },
{ "micro", 0x00b5 }, { QLatin1String("lt"), 60 },
{ "middot", 0x00b7 }, { QLatin1String("macr"), 0x00af },
{ "minus", 0x2212 }, { QLatin1String("mdash"), 0x2014 },
{ "mu", 0x03bc }, { QLatin1String("micro"), 0x00b5 },
{ "nabla", 0x2207 }, { QLatin1String("middot"), 0x00b7 },
{ "nbsp", 0x00a0 }, { QLatin1String("minus"), 0x2212 },
{ "ndash", 0x2013 }, { QLatin1String("mu"), 0x03bc },
{ "ne", 0x2260 }, { QLatin1String("nabla"), 0x2207 },
{ "ni", 0x220b }, { QLatin1String("nbsp"), 0x00a0 },
{ "not", 0x00ac }, { QLatin1String("ndash"), 0x2013 },
{ "notin", 0x2209 }, { QLatin1String("ne"), 0x2260 },
{ "nsub", 0x2284 }, { QLatin1String("ni"), 0x220b },
{ "ntilde", 0x00f1 }, { QLatin1String("not"), 0x00ac },
{ "nu", 0x03bd }, { QLatin1String("notin"), 0x2209 },
{ "oacute", 0x00f3 }, { QLatin1String("nsub"), 0x2284 },
{ "ocirc", 0x00f4 }, { QLatin1String("ntilde"), 0x00f1 },
{ "oelig", 0x0153 }, { QLatin1String("nu"), 0x03bd },
{ "ograve", 0x00f2 }, { QLatin1String("oacute"), 0x00f3 },
{ "oline", 0x203e }, { QLatin1String("ocirc"), 0x00f4 },
{ "omega", 0x03c9 }, { QLatin1String("oelig"), 0x0153 },
{ "omicron", 0x03bf }, { QLatin1String("ograve"), 0x00f2 },
{ "oplus", 0x2295 }, { QLatin1String("oline"), 0x203e },
{ "or", 0x22a6 }, { QLatin1String("omega"), 0x03c9 },
{ "ordf", 0x00aa }, { QLatin1String("omicron"), 0x03bf },
{ "ordm", 0x00ba }, { QLatin1String("oplus"), 0x2295 },
{ "oslash", 0x00f8 }, { QLatin1String("or"), 0x22a6 },
{ "otilde", 0x00f5 }, { QLatin1String("ordf"), 0x00aa },
{ "otimes", 0x2297 }, { QLatin1String("ordm"), 0x00ba },
{ "ouml", 0x00f6 }, { QLatin1String("oslash"), 0x00f8 },
{ "para", 0x00b6 }, { QLatin1String("otilde"), 0x00f5 },
{ "part", 0x2202 }, { QLatin1String("otimes"), 0x2297 },
{ "percnt", 0x0025 }, { QLatin1String("ouml"), 0x00f6 },
{ "permil", 0x2030 }, { QLatin1String("para"), 0x00b6 },
{ "perp", 0x22a5 }, { QLatin1String("part"), 0x2202 },
{ "phi", 0x03c6 }, { QLatin1String("percnt"), 0x0025 },
{ "pi", 0x03c0 }, { QLatin1String("permil"), 0x2030 },
{ "piv", 0x03d6 }, { QLatin1String("perp"), 0x22a5 },
{ "plusmn", 0x00b1 }, { QLatin1String("phi"), 0x03c6 },
{ "pound", 0x00a3 }, { QLatin1String("pi"), 0x03c0 },
{ "prime", 0x2032 }, { QLatin1String("piv"), 0x03d6 },
{ "prod", 0x220f }, { QLatin1String("plusmn"), 0x00b1 },
{ "prop", 0x221d }, { QLatin1String("pound"), 0x00a3 },
{ "psi", 0x03c8 }, { QLatin1String("prime"), 0x2032 },
{ "quot", 34 }, { QLatin1String("prod"), 0x220f },
{ "rArr", 0x21d2 }, { QLatin1String("prop"), 0x221d },
{ "radic", 0x221a }, { QLatin1String("psi"), 0x03c8 },
{ "rang", 0x232a }, { QLatin1String("quot"), 34 },
{ "raquo", 0x00bb }, { QLatin1String("rArr"), 0x21d2 },
{ "rarr", 0x2192 }, { QLatin1String("radic"), 0x221a },
{ "rceil", 0x2309 }, { QLatin1String("rang"), 0x232a },
{ "rdquo", 0x201d }, { QLatin1String("raquo"), 0x00bb },
{ "real", 0x211c }, { QLatin1String("rarr"), 0x2192 },
{ "reg", 0x00ae }, { QLatin1String("rceil"), 0x2309 },
{ "rfloor", 0x230b }, { QLatin1String("rdquo"), 0x201d },
{ "rho", 0x03c1 }, { QLatin1String("real"), 0x211c },
{ "rlm", 0x200f }, { QLatin1String("reg"), 0x00ae },
{ "rsaquo", 0x203a }, { QLatin1String("rfloor"), 0x230b },
{ "rsquo", 0x2019 }, { QLatin1String("rho"), 0x03c1 },
{ "sbquo", 0x201a }, { QLatin1String("rlm"), 0x200f },
{ "scaron", 0x0161 }, { QLatin1String("rsaquo"), 0x203a },
{ "sdot", 0x22c5 }, { QLatin1String("rsquo"), 0x2019 },
{ "sect", 0x00a7 }, { QLatin1String("sbquo"), 0x201a },
{ "shy", 0x00ad }, { QLatin1String("scaron"), 0x0161 },
{ "sigma", 0x03c3 }, { QLatin1String("sdot"), 0x22c5 },
{ "sigmaf", 0x03c2 }, { QLatin1String("sect"), 0x00a7 },
{ "sim", 0x223c }, { QLatin1String("shy"), 0x00ad },
{ "spades", 0x2660 }, { QLatin1String("sigma"), 0x03c3 },
{ "sub", 0x2282 }, { QLatin1String("sigmaf"), 0x03c2 },
{ "sube", 0x2286 }, { QLatin1String("sim"), 0x223c },
{ "sum", 0x2211 }, { QLatin1String("spades"), 0x2660 },
{ "sup", 0x2283 }, { QLatin1String("sub"), 0x2282 },
{ "sup1", 0x00b9 }, { QLatin1String("sube"), 0x2286 },
{ "sup2", 0x00b2 }, { QLatin1String("sum"), 0x2211 },
{ "sup3", 0x00b3 }, { QLatin1String("sup"), 0x2283 },
{ "supe", 0x2287 }, { QLatin1String("sup1"), 0x00b9 },
{ "szlig", 0x00df }, { QLatin1String("sup2"), 0x00b2 },
{ "tau", 0x03c4 }, { QLatin1String("sup3"), 0x00b3 },
{ "there4", 0x2234 }, { QLatin1String("supe"), 0x2287 },
{ "theta", 0x03b8 }, { QLatin1String("szlig"), 0x00df },
{ "thetasym", 0x03d1 }, { QLatin1String("tau"), 0x03c4 },
{ "thinsp", 0x2009 }, { QLatin1String("there4"), 0x2234 },
{ "thorn", 0x00fe }, { QLatin1String("theta"), 0x03b8 },
{ "tilde", 0x02dc }, { QLatin1String("thetasym"), 0x03d1 },
{ "times", 0x00d7 }, { QLatin1String("thinsp"), 0x2009 },
{ "trade", 0x2122 }, { QLatin1String("thorn"), 0x00fe },
{ "uArr", 0x21d1 }, { QLatin1String("tilde"), 0x02dc },
{ "uacute", 0x00fa }, { QLatin1String("times"), 0x00d7 },
{ "uarr", 0x2191 }, { QLatin1String("trade"), 0x2122 },
{ "ucirc", 0x00fb }, { QLatin1String("uArr"), 0x21d1 },
{ "ugrave", 0x00f9 }, { QLatin1String("uacute"), 0x00fa },
{ "uml", 0x00a8 }, { QLatin1String("uarr"), 0x2191 },
{ "upsih", 0x03d2 }, { QLatin1String("ucirc"), 0x00fb },
{ "upsilon", 0x03c5 }, { QLatin1String("ugrave"), 0x00f9 },
{ "uuml", 0x00fc }, { QLatin1String("uml"), 0x00a8 },
{ "weierp", 0x2118 }, { QLatin1String("upsih"), 0x03d2 },
{ "xi", 0x03be }, { QLatin1String("upsilon"), 0x03c5 },
{ "yacute", 0x00fd }, { QLatin1String("uuml"), 0x00fc },
{ "yen", 0x00a5 }, { QLatin1String("weierp"), 0x2118 },
{ "yuml", 0x00ff }, { QLatin1String("xi"), 0x03be },
{ "zeta", 0x03b6 }, { QLatin1String("yacute"), 0x00fd },
{ "zwj", 0x200d }, { QLatin1String("yen"), 0x00a5 },
{ "zwnj", 0x200c } { QLatin1String("yuml"), 0x00ff },
{ QLatin1String("zeta"), 0x03b6 },
{ QLatin1String("zwj"), 0x200d },
{ QLatin1String("zwnj"), 0x200c }
}; };
static inline bool operator<(const QString &entityStr, const QTextHtmlEntity &entity) static inline QChar resolveEntity(const QString &entity)
{ {
return entityStr < QLatin1String(entity.name); for (qint16 i = 0; i < MAX_ENTITY; i++) {
} if (entity == entitiesTbl[i].name) {
return entitiesTbl[i].code;
static inline bool operator<(const QTextHtmlEntity &entity, const QString &entityStr) }
{ }
return QLatin1String(entity.name) < entityStr; return QChar();
}
static QChar resolveEntity(const QString &entity)
{
const QTextHtmlEntity *start = &entities[0];
const QTextHtmlEntity *end = &entities[MAX_ENTITY];
const QTextHtmlEntity *e = qBinaryFind(start, end, entity);
if (e == end)
return QChar();
return e->code;
} }
static const ushort windowsLatin1ExtendedCharacters[0xA0 - 0x80] = { static const ushort windowsLatin1ExtendedCharacters[0xA0 - 0x80] = {