rework ICU codec to avoid relocations during conversion

This commit is contained in:
Ivailo Monev 2019-11-23 21:52:42 +00:00
parent 2b483c823f
commit 7bd3616658

View file

@ -419,93 +419,63 @@ UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const
return conv;
}
QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::ConverterState *state) const
// enough space to hold BOM, each char as surrogate pair and terminator
#define QMAXSTRLEN(X) 1 + (X * 2) + 2
QString QIcuCodec::convertToUnicode(const char *src, int length, QTextCodec::ConverterState *state) const
{
UConverter *conv = getConverter(state);
// for small strings pre-allocate 2x the length, else increment by 2 to be
// able to hold atleast one more surrogate pair
const bool small = (length < 100);
QString string(small ? (length * 2) : (length + 2), Qt::Uninitialized);
bool failed = false;
const char *end = chars + length;
int convertedChars = 0;
QString string(QMAXSTRLEN(length), Qt::Uninitialized);
UChar *dest = reinterpret_cast<UChar *>(string.data());
UErrorCode error = U_ZERO_ERROR;
while (chars != end) {
UChar *uc = reinterpret_cast<UChar *>(string.data());
const UChar *ucEnd = uc + string.length();
uc += convertedChars;
ucnv_toUnicode(conv,
&uc, ucEnd,
&chars, end,
0, false, &error);
if (Q_UNLIKELY(U_FAILURE(error))) {
qWarning("QIcuCodec::convertToUnicode: failed %s", u_errorName(error));
failed = true;
break;
const int convresult = ucnv_toUChars(conv, dest, string.length(), src, length, &error);
if (Q_UNLIKELY(U_FAILURE(error))) {
qWarning("QIcuCodec::convertToUnicode: failed %s", u_errorName(error));
if (state) {
error = U_ZERO_ERROR;
char errorbytes[10];
int8_t invalidlen = 0;
ucnv_getInvalidChars(conv,
errorbytes,
&invalidlen,
&error);
state->invalidChars = invalidlen;
}
convertedChars = uc - reinterpret_cast<const UChar *>(string.constData());
if (!small)
string.resize(string.length() + 2);
}
string.resize(convertedChars);
if (failed && state) {
error = U_ZERO_ERROR;
char errorbytes[10];
int8_t invalidlen = 0;
ucnv_getInvalidChars(conv,
errorbytes,
&invalidlen,
&error);
state->invalidChars = invalidlen;
} else {
string.resize(convresult);
}
if (!state)
ucnv_close(conv);
return string;
}
#undef QMAXSTRLEN
QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const
{
UConverter *conv = getConverter(state);
int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv));
QByteArray string(requiredLength, Qt::Uninitialized);
bool failed = false;
int maxbytes = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv));
QByteArray string(maxbytes, Qt::Uninitialized);
const UChar *src = reinterpret_cast<const UChar *>(unicode);
char *dest = reinterpret_cast<char *>(string.data());
UErrorCode error = U_ZERO_ERROR;
const UChar *uc = reinterpret_cast<const UChar *>(unicode);
const UChar *end = uc + length;
int convertedChars = 0;
while (uc != end) {
char *ch = (char *)string.data();
char *chEnd = ch + string.length();
ch += convertedChars;
ucnv_fromUnicode(conv,
&ch, chEnd,
&uc, end,
0, false, &error);
if (Q_UNLIKELY(U_FAILURE(error))) {
qWarning("QIcuCodec::convertFromUnicode: failed %s", u_errorName(error));
failed = true;
break;
const int convresult = ucnv_fromUChars(conv, dest, string.length(), src, length, &error);
if (Q_UNLIKELY(U_FAILURE(error))) {
qWarning("QIcuCodec::convertFromUnicode: failed %s", u_errorName(error));
if (state) {
error = U_ZERO_ERROR;
char errorbytes[10];
int8_t invalidlen = 0;
ucnv_getInvalidChars(conv,
errorbytes,
&invalidlen,
&error);
state->invalidChars = invalidlen;
}
convertedChars = ch - string.data();
}
string.resize(convertedChars);
if (failed && state) {
error = U_ZERO_ERROR;
char errorbytes[10];
int8_t invalidlen = 0;
ucnv_getInvalidChars(conv,
errorbytes,
&invalidlen,
&error);
state->invalidChars = invalidlen;
} else {
string.resize(convresult);
}
if (!state)