better rich text detection from Qt::mightBeRichText()

because Katana uses its own markup for text the first tag may not be valid
HTML tag but there may be valid tags after the first tag

Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
Ivailo Monev 2023-08-17 18:06:05 +03:00
parent 0c717a4fb8
commit e1f487bd60

View file

@ -1919,12 +1919,14 @@ static const qint16 elementsTblSize = 59;
*/ */
bool Qt::mightBeRichText(const QString& text) bool Qt::mightBeRichText(const QString& text)
{ {
if (text.isEmpty()) if (text.isEmpty()) {
return false; return false;
int start = 0; }
while (start < text.length() && text.at(start).isSpace()) int start = 0;
while (start < text.length() && text.at(start).isSpace()) {
++start; ++start;
}
// skip a leading <?xml ... ?> as for example with xhtml // skip a leading <?xml ... ?> as for example with xhtml
if (text.mid(start, 5) == QLatin1String("<?xml")) { if (text.mid(start, 5) == QLatin1String("<?xml")) {
@ -1938,40 +1940,49 @@ bool Qt::mightBeRichText(const QString& text)
++start; ++start;
} }
while (start < text.length() && text.at(start).isSpace()) while (start < text.length() && text.at(start).isSpace()) {
++start; ++start;
}
} }
if (text.mid(start, 5).toLower() == QLatin1String("<!doc")) if (text.mid(start, 5).toLower() == QLatin1String("<!doc")) {
return true; return true;
}
int open = start; int open = start;
while (open < text.length() && text.at(open) != QLatin1Char('<') while (open < text.length() && text.at(open) != QLatin1Char('<')
&& text.at(open) != QLatin1Char('\n')) { && text.at(open) != QLatin1Char('\n')) {
if (text.at(open) == QLatin1Char('&') && text.mid(open+1,3) == QLatin1String("lt;")) if (text.at(open) == QLatin1Char('&') && text.mid(open+1,3) == QLatin1String("lt;")) {
return true; // support desperate attempt of user to see <...> // support desperate attempt of user to see <...>
return true;
}
++open; ++open;
} }
if (open < text.length() && text.at(open) == QLatin1Char('<')) {
const int close = text.indexOf(QLatin1Char('>'), open); int tagstart = text.indexOf(QLatin1Char('<'), start);
if (close > -1) { while (tagstart > -1) {
const int tagclose = text.indexOf(QLatin1Char('>'), tagstart);
if (tagclose > -1) {
QByteArray tag; QByteArray tag;
for (int i = open+1; i < close; ++i) { for (int i = tagstart+1; i < tagclose; ++i) {
if (text[i].isDigit() || text[i].isLetter()) if (text[i].isDigit() || text[i].isLetter()) {
tag += text[i].toLatin1(); tag += text[i].toLatin1();
else if (!tag.isEmpty() && text[i].isSpace()) } else if (!tag.isEmpty() && text[i].isSpace()) {
break; break;
else if (!tag.isEmpty() && text[i] == QLatin1Char('/') && i + 1 == close) } else if (!tag.isEmpty() && text[i] == QLatin1Char('/') && i + 1 == tagclose) {
break; break;
else if (!text[i].isSpace() && (!tag.isEmpty() || text[i] != QLatin1Char('!'))) } else if (!text[i].isSpace() && (!tag.isEmpty() || text[i] != QLatin1Char('!'))) {
return false; // that's not a tag // that's not a tag
goto nexttag;
}
} }
for (qint16 i = 0; i < elementsTblSize; i++) { for (qint16 i = 0; i < elementsTblSize; i++) {
if (qstricmp(tag.constData(), elementsTbl[i]) == 0) { if (qstricmp(tag.constData(), elementsTbl[i]) == 0) {
return true; return true;
} }
} }
return false;
} }
nexttag:
tagstart = text.indexOf(QLatin1Char('<'), tagstart + 1);
} }
return false; return false;
} }