kde-playground/kdepimlibs/kimap/rfccodecs.cpp
2015-04-14 21:49:29 +00:00

660 lines
18 KiB
C++

/**********************************************************************
*
* rfccodecs.cpp - handler for various rfc/mime encodings
* Copyright (C) 2000 s.carstens@gmx.de
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*********************************************************************/
/**
* @file
* This file is part of the IMAP support library and defines the
* RfcCodecs class.
*
* @brief
* Defines the RfcCodecs class.
*
* @author Sven Carstens
*/
#include "rfccodecs.h"
#include <ctype.h>
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <QtCore/QTextCodec>
#include <QtCore/QBuffer>
#include <QtCore/QRegExp>
#include <QtCore/QByteArray>
#include <QtCore/QLatin1Char>
#include <kcodecs.h>
using namespace KIMAP;
// This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
// adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
//@cond PRIVATE
static const unsigned char base64chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
#define UNDEFINED 64
#define MAXLINE 76
static const char especials[17] = "()<>@,;:\"/[]?.= ";
/* UTF16 definitions */
#define UTF16MASK 0x03FFUL
#define UTF16SHIFT 10
#define UTF16BASE 0x10000UL
#define UTF16HIGHSTART 0xD800UL
#define UTF16HIGHEND 0xDBFFUL
#define UTF16LOSTART 0xDC00UL
#define UTF16LOEND 0xDFFFUL
//@endcond
//-----------------------------------------------------------------------------
QByteArray KIMAP::decodeImapFolderName( const QByteArray &inSrc )
{
unsigned char c, i, bitcount;
unsigned long ucs4, utf16, bitbuf;
unsigned char base64[256], utf8[6];
unsigned int srcPtr = 0;
QByteArray dst;
QByteArray src = inSrc;
uint srcLen = inSrc.length();
/* initialize modified base64 decoding table */
memset( base64, UNDEFINED, sizeof( base64 ) );
for ( i = 0; i < sizeof( base64chars ); ++i ) {
base64[(int)base64chars[i]] = i;
}
/* loop until end of string */
while ( srcPtr < srcLen ) {
c = src[srcPtr++];
/* deal with literal characters and &- */
if ( c != '&' || src[srcPtr] == '-' ) {
/* encode literally */
dst += c;
/* skip over the '-' if this is an &- sequence */
if ( c == '&' ) {
srcPtr++;
}
} else {
/* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
bitbuf = 0;
bitcount = 0;
ucs4 = 0;
while ( ( c = base64[(unsigned char)src[srcPtr]] ) != UNDEFINED ) {
++srcPtr;
bitbuf = ( bitbuf << 6 ) | c;
bitcount += 6;
/* enough bits for a UTF-16 character? */
if ( bitcount >= 16 ) {
bitcount -= 16;
utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & 0xffff;
/* convert UTF16 to UCS4 */
if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) {
ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT;
continue;
} else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) {
ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
} else {
ucs4 = utf16;
}
/* convert UTF-16 range of UCS4 to UTF-8 */
if ( ucs4 <= 0x7fUL ) {
utf8[0] = ucs4;
i = 1;
} else if ( ucs4 <= 0x7ffUL ) {
utf8[0] = 0xc0 | ( ucs4 >> 6 );
utf8[1] = 0x80 | ( ucs4 & 0x3f );
i = 2;
} else if ( ucs4 <= 0xffffUL ) {
utf8[0] = 0xe0 | ( ucs4 >> 12 );
utf8[1] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
utf8[2] = 0x80 | ( ucs4 & 0x3f );
i = 3;
} else {
utf8[0] = 0xf0 | ( ucs4 >> 18 );
utf8[1] = 0x80 | ( ( ucs4 >> 12 ) & 0x3f );
utf8[2] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
utf8[3] = 0x80 | ( ucs4 & 0x3f );
i = 4;
}
/* copy it */
for ( c = 0; c < i; ++c ) {
dst += utf8[c];
}
}
}
/* skip over trailing '-' in modified UTF-7 encoding */
if ( src[srcPtr] == '-' ) {
++srcPtr;
}
}
}
return dst;
}
QString KIMAP::decodeImapFolderName( const QString &inSrc )
{
return QString::fromUtf8( decodeImapFolderName( inSrc.toUtf8() ).data() );
}
//-----------------------------------------------------------------------------
QByteArray KIMAP::quoteIMAP( const QByteArray &src )
{
uint len = src.length();
QByteArray result;
result.reserve( 2 * len );
for ( unsigned int i = 0; i < len; i++ ) {
if ( src[i] == '"' || src[i] == '\\' ) {
result += '\\';
}
result += src[i];
}
result.squeeze();
return result;
}
QString KIMAP::quoteIMAP( const QString &src )
{
uint len = src.length();
QString result;
result.reserve( 2 * len );
for ( unsigned int i = 0; i < len; i++ ) {
if ( src[i] == QLatin1Char('"') || src[i] == QLatin1Char('\\') ) {
result += QLatin1Char('\\');
}
result += src[i];
}
//result.squeeze(); - unnecessary and slow
return result;
}
//-----------------------------------------------------------------------------
QString KIMAP::encodeImapFolderName( const QString &inSrc )
{
return QString::fromUtf8( encodeImapFolderName( inSrc.toUtf8() ).data() );
}
QByteArray KIMAP::encodeImapFolderName( const QByteArray &inSrc )
{
unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
unsigned int ucs4, bitbuf;
QByteArray src = inSrc;
QByteArray dst;
int srcPtr = 0;
utf7mode = 0;
utf8total = 0;
bitstogo = 0;
utf8pos = 0;
bitbuf = 0;
ucs4 = 0;
while ( srcPtr < src.length () ) {
c = (unsigned char)src[srcPtr++];
/* normal character? */
if ( c >= ' ' && c <= '~' ) {
/* switch out of UTF-7 mode */
if ( utf7mode ) {
if ( bitstogo ) {
dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
bitstogo = 0;
}
dst += '-';
utf7mode = 0;
}
dst += c;
/* encode '&' as '&-' */
if ( c == '&' ) {
dst += '-';
}
continue;
}
/* switch to UTF-7 mode */
if ( !utf7mode ) {
dst += '&';
utf7mode = 1;
}
/* Encode US-ASCII characters as themselves */
if ( c < 0x80 ) {
ucs4 = c;
utf8total = 1;
} else if ( utf8total ) {
/* save UTF8 bits into UCS4 */
ucs4 = ( ucs4 << 6 ) | ( c & 0x3FUL );
if ( ++utf8pos < utf8total ) {
continue;
}
} else {
utf8pos = 1;
if ( c < 0xE0 ) {
utf8total = 2;
ucs4 = c & 0x1F;
} else if ( c < 0xF0 ) {
utf8total = 3;
ucs4 = c & 0x0F;
} else {
/* NOTE: can't convert UTF8 sequences longer than 4 */
utf8total = 4;
ucs4 = c & 0x03;
}
continue;
}
/* loop to split ucs4 into two utf16 chars if necessary */
utf8total = 0;
do
{
if ( ucs4 >= UTF16BASE ) {
ucs4 -= UTF16BASE;
bitbuf =
( bitbuf << 16 ) | ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART );
ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART;
utf16flag = 1;
} else {
bitbuf = ( bitbuf << 16 ) | ucs4;
utf16flag = 0;
}
bitstogo += 16;
/* spew out base64 */
while ( bitstogo >= 6 ) {
bitstogo -= 6;
dst +=
base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & 0x3F];
}
}
while ( utf16flag );
}
/* if in UTF-7 mode, finish in ASCII */
if ( utf7mode ) {
if ( bitstogo ) {
dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
}
dst += '-';
}
return quoteIMAP( dst );
}
//-----------------------------------------------------------------------------
QTextCodec *KIMAP::codecForName( const QString &str )
{
if ( str.isEmpty () ) {
return 0;
}
return QTextCodec::codecForName ( str.toLower ().
replace ( QLatin1String("windows"), QLatin1String("cp") ).toLatin1 () );
}
//-----------------------------------------------------------------------------
const QString KIMAP::decodeRFC2047String( const QString &str )
{
QString throw_away;
return decodeRFC2047String( str, throw_away );
}
//-----------------------------------------------------------------------------
const QString KIMAP::decodeRFC2047String( const QString &str,
QString &charset )
{
QString throw_away;
return decodeRFC2047String( str, charset, throw_away );
}
//-----------------------------------------------------------------------------
const QString KIMAP::decodeRFC2047String( const QString &str,
QString &charset,
QString &language )
{
//do we have a rfc string
if ( !str.contains( QLatin1String("=?") ) ) {
return str;
}
// FIXME get rid of the conversion?
QByteArray aStr = str.toLatin1 (); // QString.length() means Unicode chars
QByteArray result;
char *pos, *beg, *end, *mid = 0;
QByteArray cstr;
char encoding = 0, ch;
bool valid;
const int maxLen = 200;
int i;
// result.truncate(aStr.length());
for ( pos = aStr.data (); *pos; pos++ ) {
if ( pos[0] != '=' || pos[1] != '?' ) {
result += *pos;
continue;
}
beg = pos + 2;
end = beg;
valid = true;
// parse charset name
for ( i = 2, pos += 2;
i < maxLen &&
( *pos != '?' && ( ispunct( *pos ) || isalnum ( *pos ) ) );
i++ ) {
pos++;
}
if ( *pos != '?' || i < 4 || i >= maxLen ) {
valid = false;
} else {
charset = QLatin1String(QByteArray( beg, i - 1 )); // -2 + 1 for the zero
int pt = charset.lastIndexOf( QLatin1Char('*') );
if ( pt != -1 ) {
// save language for later usage
language = charset.right( charset.length () - pt - 1 );
// tie off language as defined in rfc2047
charset.truncate( pt );
}
// get encoding and check delimiting question marks
encoding = toupper( pos[1] );
if ( pos[2] != '?' ||
( encoding != 'Q' && encoding != 'B' &&
encoding != 'q' && encoding != 'b' ) ) {
valid = false;
}
pos += 3;
i += 3;
// kDebug() << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
}
if ( valid ) {
mid = pos;
// search for end of encoded part
while ( i < maxLen && *pos && !( *pos == '?' && *( pos + 1 ) == '=' ) ) {
i++;
pos++;
}
end = pos + 2;//end now points to the first char after the encoded string
if ( i >= maxLen || !*pos ) {
valid = false;
}
}
if ( valid ) {
ch = *pos;
*pos = '\0';
cstr = QByteArray (mid).left( (int)( mid - pos - 1 ) );
if ( encoding == 'Q' ) {
// decode quoted printable text
for ( i = cstr.length () - 1; i >= 0; --i ) {
if ( cstr[i] == '_' ) {
cstr[i] = ' ';
}
}
// kDebug() << "before QP '"
// << cstr << "'";
cstr = KCodecs::quotedPrintableDecode( cstr );
// kDebug() << "after QP '"
// << cstr << "'";
} else {
// decode base64 text
cstr = QByteArray::fromBase64( cstr );
}
*pos = ch;
int len = cstr.length();
for ( i = 0; i < len; ++i ) {
result += cstr[i];
}
pos = end - 1;
} else {
// kDebug() << "invalid";
//result += "=?";
//pos = beg -1; // because pos gets increased shortly afterwards
pos = beg - 2;
result += *pos++;
result += *pos;
}
}
if ( !charset.isEmpty () ) {
QTextCodec *aCodec = codecForName( QLatin1String(charset.toLatin1 ()) );
if ( aCodec ) {
// kDebug() << "Codec is" << aCodec->name();
return aCodec->toUnicode( result );
}
}
return QLatin1String(result);
}
//-----------------------------------------------------------------------------
const QString KIMAP::encodeRFC2047String( const QString &str )
{
return QLatin1String(encodeRFC2047String( str.toLatin1() ));
}
//-----------------------------------------------------------------------------
const QByteArray KIMAP::encodeRFC2047String( const QByteArray &str )
{
if ( str.isEmpty () ) {
return str;
}
const signed char *latin =
reinterpret_cast<const signed char *>
( str.data() ), *l, *start, *stop;
char hexcode;
int numQuotes, i;
int rptr = 0;
// My stats show this number results in 12 resize() out of 73,000
int resultLen = 3 * str.length() / 2;
QByteArray result( resultLen, '\0' );
while ( *latin ) {
l = latin;
start = latin;
while ( *l ) {
if ( *l == 32 ) {
start = l + 1;
}
if ( *l < 0 ) {
break;
}
l++;
}
if ( *l ) {
numQuotes = 1;
while ( *l ) {
/* The encoded word must be limited to 75 character */
for ( i = 0; i < 16; ++i ) {
if ( *l == especials[i] ) {
numQuotes++;
}
}
if ( *l < 0 ) {
numQuotes++;
}
/* Stop after 58 = 75 - 17 characters or at "<user@host..." */
if ( l - start + 2 * numQuotes >= 58 || *l == 60 ) {
break;
}
l++;
}
if ( *l ) {
stop = l - 1;
while ( stop >= start && *stop != 32 ) {
stop--;
}
if ( stop <= start ) {
stop = l;
}
} else {
stop = l;
}
if ( resultLen - rptr - 1 <= start - latin + 1 + 16 ) {
// =?iso-88...
resultLen += ( start - latin + 1 ) * 2 + 20; // more space
result.resize( resultLen );
}
while ( latin < start ) {
result[rptr++] = *latin;
latin++;
}
result.replace( rptr, 15, "=?iso-8859-1?q?" );
rptr += 15;
if ( resultLen - rptr - 1 <= 3 * ( stop - latin + 1 ) ) {
resultLen += ( stop - latin + 1 ) * 4 + 20; // more space
result.resize( resultLen );
}
while ( latin < stop ) {
// can add up to 3 chars/iteration
numQuotes = 0;
for ( i = 0; i < 16; ++i ) {
if ( *latin == especials[i] ) {
numQuotes = 1;
}
}
if ( *latin < 0 ) {
numQuotes = 1;
}
if ( numQuotes ) {
result[rptr++] = '=';
hexcode = ( ( *latin & 0xF0 ) >> 4 ) + 48;
if ( hexcode >= 58 ) {
hexcode += 7;
}
result[rptr++] = hexcode;
hexcode = ( *latin & 0x0F ) + 48;
if ( hexcode >= 58 ) {
hexcode += 7;
}
result[rptr++] = hexcode;
} else {
result[rptr++] = *latin;
}
latin++;
}
result[rptr++] = '?';
result[rptr++] = '=';
} else {
while ( *latin ) {
if ( rptr == resultLen - 1 ) {
resultLen += 30;
result.resize( resultLen );
}
result[rptr++] = *latin;
latin++;
}
}
}
result[rptr] = 0;
return result;
}
//-----------------------------------------------------------------------------
const QString KIMAP::encodeRFC2231String( const QString &str )
{
if ( str.isEmpty () ) {
return str;
}
signed char *latin = (signed char *)calloc( 1, str.length () + 1 );
char *latin_us = (char *)latin;
strcpy( latin_us, str.toLatin1 () );
signed char *l = latin;
char hexcode;
int i;
bool quote;
while ( *l ) {
if ( *l < 0 ) {
break;
}
l++;
}
if ( !*l ) {
free( latin );
return str;
}
QByteArray result;
l = latin;
while ( *l ) {
quote = *l < 0;
for ( i = 0; i < 16; ++i ) {
if ( *l == especials[i] ) {
quote = true;
}
}
if ( quote ) {
result += '%';
hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
if ( hexcode >= 58 ) {
hexcode += 7;
}
result += hexcode;
hexcode = ( *l & 0x0F ) + 48;
if ( hexcode >= 58 ) {
hexcode += 7;
}
result += hexcode;
} else {
result += *l;
}
l++;
}
free( latin );
return QLatin1String(result);
}
//-----------------------------------------------------------------------------
const QString KIMAP::decodeRFC2231String( const QString &str )
{
int p = str.indexOf ( QLatin1Char('\'') );
//see if it is an rfc string
if ( p < 0 ) {
return str;
}
int l = str.lastIndexOf( QLatin1Char('\'') );
//second is language
if ( p >= l ) {
return str;
}
//first is charset or empty
//QString charset = str.left ( p );
QString st = str.mid ( l + 1 );
//QString language = str.mid ( p + 1, l - p - 1 );
//kDebug() << "Charset:" << charset << "Language:" << language;
char ch, ch2;
p = 0;
while ( p < (int) st.length () ) {
if ( st.at( p ) == 37 ) {
ch = st.at( p + 1 ).toLatin1 () - 48;
if ( ch > 16 ) {
ch -= 7;
}
ch2 = st.at( p + 2 ).toLatin1 () - 48;
if ( ch2 > 16 ) {
ch2 -= 7;
}
st.replace( p, 1, ch * 16 + ch2 );
st.remove ( p + 1, 2 );
}
p++;
}
return st;
}