ICU text codec optimizations

Signed-off-by: Ivailo Monev <xakepa10@laimg.moc>
This commit is contained in:
Ivailo Monev 2019-11-23 01:33:16 +00:00
parent 82f1d03b02
commit b3dc4b3e3c

View file

@ -38,11 +38,6 @@
QT_BEGIN_NAMESPACE QT_BEGIN_NAMESPACE
static inline bool qTextCodecNameMatch(const char *n, const char *h)
{
return ucnv_compareNames(n, h) == 0;
}
/* The list below is generated from http://www.iana.org/assignments/character-sets/ /* The list below is generated from http://www.iana.org/assignments/character-sets/
using the snippet of code below: using the snippet of code below:
@ -329,6 +324,9 @@ static const char mibToNameTable[] =
"windows-1258\0" "windows-1258\0"
"TIS-620\0"; "TIS-620\0";
static const char *nullchar = "\0";
static const char *questionmarkchar = "?";
/// \threadsafe /// \threadsafe
QList<QByteArray> QIcuCodec::availableCodecs() QList<QByteArray> QIcuCodec::availableCodecs()
{ {
@ -391,14 +389,14 @@ UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const
UErrorCode error = U_ZERO_ERROR; UErrorCode error = U_ZERO_ERROR;
state->d = ucnv_open(m_name, &error); state->d = ucnv_open(m_name, &error);
if (Q_UNLIKELY(U_FAILURE(error))) if (Q_UNLIKELY(U_FAILURE(error)))
qWarning("getConverter(state) ucnv_open failed %s %s", m_name, u_errorName(error)); qWarning("QIcuCodec::getConverter: ucnv_open(%s) failed %s", m_name, u_errorName(error));
if (state->d) { if (state->d) {
error = U_ZERO_ERROR; error = U_ZERO_ERROR;
ucnv_setSubstChars(static_cast<UConverter *>(state->d), ucnv_setSubstChars(static_cast<UConverter *>(state->d),
state->flags & QTextCodec::ConvertInvalidToNull ? "\0" : "?", 1, &error); state->flags & QTextCodec::ConvertInvalidToNull ? nullchar : questionmarkchar, 1, &error);
if (Q_UNLIKELY(U_FAILURE(error))) if (Q_UNLIKELY(U_FAILURE(error)))
qWarning("getConverter(state) ucnv_setSubstChars failed %s %s", m_name, u_errorName(error)); qWarning("QIcuCodec::getConverter: ucnv_setSubstChars(%s) failed %s", m_name, u_errorName(error));
conv = static_cast<UConverter *>(state->d); conv = static_cast<UConverter *>(state->d);
} }
@ -409,13 +407,13 @@ UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const
UErrorCode error = U_ZERO_ERROR; UErrorCode error = U_ZERO_ERROR;
conv = ucnv_open(m_name, &error); conv = ucnv_open(m_name, &error);
if (Q_UNLIKELY(U_FAILURE(error))) if (Q_UNLIKELY(U_FAILURE(error)))
qWarning("getConverter(no state) ucnv_open failed %s %s", m_name, u_errorName(error)); qWarning("QIcuCodec::getConverter: ucnv_open(%s) failed %s", m_name, u_errorName(error));
if (conv) { if (conv) {
error = U_ZERO_ERROR; error = U_ZERO_ERROR;
ucnv_setSubstChars(conv, "?", 1, &error); ucnv_setSubstChars(conv, questionmarkchar, 1, &error);
if (Q_UNLIKELY(U_FAILURE(error))) if (Q_UNLIKELY(U_FAILURE(error)))
qWarning("getConverter(no state) ucnv_setSubstChars failed %s %s", m_name, u_errorName(error)); qWarning("QIcuCodec::getConverter: ucnv_setSubstChars(%s) failed %s", m_name, u_errorName(error));
} }
} }
return conv; return conv;
@ -425,13 +423,16 @@ QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::C
{ {
UConverter *conv = getConverter(state); UConverter *conv = getConverter(state);
QString string(length + 2, Qt::Uninitialized); // for small strings pre-allocate 2x the length, else increment by 2 to be
// able to hold atleast one more surrogate pair
const bool small = (length < 100);
QString string(small ? (length * 2) : (length + 2), Qt::Uninitialized);
bool failed = false; bool failed = false;
const char *end = chars + length; const char *end = chars + length;
int convertedChars = 0; int convertedChars = 0;
UErrorCode error = U_ZERO_ERROR; UErrorCode error = U_ZERO_ERROR;
while (1) { while (chars != end) {
UChar *uc = reinterpret_cast<UChar *>(string.data()); UChar *uc = reinterpret_cast<UChar *>(string.data());
const UChar *ucEnd = uc + string.length(); const UChar *ucEnd = uc + string.length();
uc += convertedChars; uc += convertedChars;
@ -440,15 +441,14 @@ QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::C
&chars, end, &chars, end,
0, false, &error); 0, false, &error);
if (Q_UNLIKELY(U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR)) { if (Q_UNLIKELY(U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR)) {
qWarning("convertToUnicode failed: %s", u_errorName(error)); qWarning("QIcuCodec::convertToUnicode: failed %s", u_errorName(error));
failed = true; failed = true;
break; break;
} }
convertedChars = uc - reinterpret_cast<const UChar *>(string.constData()); convertedChars = uc - reinterpret_cast<const UChar *>(string.constData());
if (chars >= end) if (!small)
break; string.resize(string.length() + 2);
string.resize(string.length()*2);
} }
string.resize(convertedChars); string.resize(convertedChars);
@ -468,7 +468,6 @@ QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::C
return string; return string;
} }
QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const
{ {
UConverter *conv = getConverter(state); UConverter *conv = getConverter(state);
@ -481,7 +480,7 @@ QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QText
const UChar *uc = reinterpret_cast<const UChar *>(unicode); const UChar *uc = reinterpret_cast<const UChar *>(unicode);
const UChar *end = uc + length; const UChar *end = uc + length;
int convertedChars = 0; int convertedChars = 0;
while (1) { while (uc != end) {
char *ch = (char *)string.data(); char *ch = (char *)string.data();
char *chEnd = ch + string.length(); char *chEnd = ch + string.length();
ch += convertedChars; ch += convertedChars;
@ -490,14 +489,11 @@ QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QText
&uc, end, &uc, end,
0, false, &error); 0, false, &error);
if (Q_UNLIKELY(U_FAILURE(error))) { if (Q_UNLIKELY(U_FAILURE(error))) {
qWarning("convertFromUnicode failed: %s", u_errorName(error)); qWarning("QIcuCodec::convertFromUnicode: failed %s", u_errorName(error));
failed = true; failed = true;
break; break;
} }
convertedChars = ch - string.data(); convertedChars = ch - string.data();
if (uc >= end)
break;
string.resize(string.length()*2);
} }
string.resize(convertedChars); string.resize(convertedChars);
@ -524,7 +520,6 @@ QByteArray QIcuCodec::name() const
return m_name; return m_name;
} }
QList<QByteArray> QIcuCodec::aliases() const QList<QByteArray> QIcuCodec::aliases() const
{ {
UErrorCode error = U_ZERO_ERROR; UErrorCode error = U_ZERO_ERROR;
@ -543,11 +538,10 @@ QList<QByteArray> QIcuCodec::aliases() const
return aliases; return aliases;
} }
int QIcuCodec::mibEnum() const int QIcuCodec::mibEnum() const
{ {
for (int i = 0; i < mibToNameSize; ++i) { for (int i = 0; i < mibToNameSize; ++i) {
if (qTextCodecNameMatch(m_name, (mibToNameTable + mibToName[i].index))) if (ucnv_compareNames(m_name, (mibToNameTable + mibToName[i].index)) == 0)
return mibToName[i].mib; return mibToName[i].mib;
} }