kde-playground/kdepimlibs/kmime/kmime_header_parsing.cpp
2015-04-14 21:49:29 +00:00

2270 lines
60 KiB
C++

/* -*- c++ -*-
kmime_header_parsing.cpp
KMime, the KDE Internet mail/usenet news message library.
Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "kmime_header_parsing.h"
#include "kmime_codecs.h"
#include "kmime_headerfactory_p.h"
#include "kmime_headers.h"
#include "kmime_util.h"
#include "kmime_util_p.h"
#include "kmime_dateformatter.h"
#include "kmime_warning.h"
#include <kglobal.h>
#include <kcharsets.h>
#include <QtCore/QTextCodec>
#include <QtCore/QMap>
#include <QtCore/QStringList>
#include <QtCore/QUrl>
#include <ctype.h> // for isdigit
#include <cassert>
using namespace KMime;
using namespace KMime::Types;
namespace KMime {
namespace Types {
// QUrl::fromAce is extremely expensive, so only use it when necessary.
// Fortunately, the presence of IDNA is readily detected with a substring match...
static inline QString QUrl_fromAce_wrapper( const QString & domain )
{
if ( domain.contains( QLatin1String( "xn--" ) ) ) {
return QUrl::fromAce( domain.toLatin1() );
} else {
return domain;
}
}
static QString addr_spec_as_string( const AddrSpec & as, bool pretty )
{
if ( as.isEmpty() ) {
return QString();
}
static QChar dotChar = QLatin1Char( '.' );
static QChar backslashChar = QLatin1Char( '\\' );
static QChar quoteChar = QLatin1Char( '"' );
bool needsQuotes = false;
QString result;
result.reserve( as.localPart.length() + as.domain.length() + 1 );
for ( int i = 0 ; i < as.localPart.length() ; ++i ) {
const QChar ch = as.localPart.at( i );
if ( ch == dotChar || isAText( ch.toLatin1() ) ) {
result += ch;
} else {
needsQuotes = true;
if ( ch == backslashChar || ch == quoteChar ) {
result += backslashChar;
}
result += ch;
}
}
const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ;
if ( needsQuotes ) {
result = quoteChar + result + quoteChar;
}
if ( dom.isEmpty() ) {
return result;
} else {
result += QLatin1Char( '@' );
result += dom;
return result;
}
}
QString AddrSpec::asString() const
{
return addr_spec_as_string( *this, false );
}
QString AddrSpec::asPrettyString() const
{
return addr_spec_as_string( *this, true );
}
bool AddrSpec::isEmpty() const
{
return localPart.isEmpty() && domain.isEmpty();
}
QByteArray Mailbox::address() const
{
QByteArray result;
const QString asString = addr_spec_as_string( mAddrSpec, false );
if ( !asString.isEmpty() ) {
result = asString.toLatin1();
}
return result;
//return mAddrSpec.asString().toLatin1();
}
AddrSpec Mailbox::addrSpec() const
{
return mAddrSpec;
}
QString Mailbox::name() const
{
return mDisplayName;
}
void Mailbox::setAddress( const AddrSpec &addr )
{
mAddrSpec = addr;
}
void Mailbox::setAddress( const QByteArray &addr )
{
const char *cursor = addr.constData();
if ( !HeaderParsing::parseAngleAddr( cursor,
cursor + addr.length(), mAddrSpec ) ) {
if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(),
mAddrSpec ) ) {
kWarning() << "Invalid address";
return;
}
}
}
void Mailbox::setName( const QString &name )
{
mDisplayName = removeBidiControlChars( name );
}
void Mailbox::setNameFrom7Bit( const QByteArray &name,
const QByteArray &defaultCharset )
{
QByteArray cs;
setName( decodeRFC2047String( name, cs, defaultCharset, false ) );
}
bool Mailbox::hasAddress() const
{
return !mAddrSpec.isEmpty();
}
bool Mailbox::hasName() const
{
return !mDisplayName.isEmpty();
}
QString Mailbox::prettyAddress() const
{
return prettyAddress( QuoteNever );
}
QString Mailbox::prettyAddress( Quoting quoting ) const
{
if ( !hasName() ) {
return QLatin1String( address() );
}
QString s = name();
if ( quoting != QuoteNever ) {
addQuotes( s, quoting == QuoteAlways /*bool force*/ );
}
if ( hasAddress() ) {
s += QLatin1String( " <" ) + QLatin1String( address() ) + QLatin1Char( '>' );
}
return s;
}
void Mailbox::fromUnicodeString( const QString &s )
{
from7BitString( encodeRFC2047Sentence( s, "utf-8" ) );
}
void Mailbox::from7BitString( const QByteArray &s )
{
const char *cursor = s.constData();
HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this );
}
QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const
{
if ( !hasName() ) {
return address();
}
QByteArray rv;
if ( isUsAscii( name() ) ) {
QByteArray tmp = name().toLatin1();
addQuotes( tmp, false );
rv += tmp;
} else {
rv += encodeRFC2047String( name(), encCharset, true );
}
if ( hasAddress() ) {
rv += " <" + address() + '>';
}
return rv;
}
} // namespace Types
namespace HeaderParsing {
// parse the encoded-word (scursor points to after the initial '=')
bool parseEncodedWord( const char* &scursor, const char * const send,
QString &result, QByteArray &language,
QByteArray &usedCS, const QByteArray &defaultCS,
bool forceCS )
{
// make sure the caller already did a bit of the work.
assert( *( scursor - 1 ) == '=' );
//
// STEP 1:
// scan for the charset/language portion of the encoded-word
//
char ch = *scursor++;
if ( ch != '?' ) {
// kDebug() << "first";
//KMIME_WARN_PREMATURE_END_OF( EncodedWord );
return false;
}
// remember start of charset (ie. just after the initial "=?") and
// language (just after the first '*') fields:
const char * charsetStart = scursor;
const char * languageStart = 0;
// find delimiting '?' (and the '*' separating charset and language
// tags, if any):
for ( ; scursor != send ; scursor++ ) {
if ( *scursor == '?' ) {
break;
} else if ( *scursor == '*' && languageStart == 0 ) {
languageStart = scursor + 1;
}
}
// not found? can't be an encoded-word!
if ( scursor == send || *scursor != '?' ) {
// kDebug() << "second";
KMIME_WARN_PREMATURE_END_OF( EncodedWord );
return false;
}
// extract the language information, if any (if languageStart is 0,
// language will be null, too):
QByteArray maybeLanguage( languageStart, scursor - languageStart );
// extract charset information (keep in mind: the size given to the
// ctor is one off due to the \0 terminator):
QByteArray maybeCharset( charsetStart,
( languageStart ? languageStart - 1 : scursor ) - charsetStart );
//
// STEP 2:
// scan for the encoding portion of the encoded-word
//
// remember start of encoding (just _after_ the second '?'):
scursor++;
const char * encodingStart = scursor;
// find next '?' (ending the encoding tag):
for ( ; scursor != send ; scursor++ ) {
if ( *scursor == '?' ) {
break;
}
}
// not found? Can't be an encoded-word!
if ( scursor == send || *scursor != '?' ) {
// kDebug() << "third";
KMIME_WARN_PREMATURE_END_OF( EncodedWord );
return false;
}
// extract the encoding information:
QByteArray maybeEncoding( encodingStart, scursor - encodingStart );
// kDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
// << "\"; language == \"" << maybeLanguage
// << "\"; encoding == \"" << maybeEncoding << "\"";
//
// STEP 3:
// scan for encoded-text portion of encoded-word
//
// remember start of encoded-text (just after the third '?'):
scursor++;
const char * encodedTextStart = scursor;
// find the '?=' sequence (ending the encoded-text):
for ( ; scursor != send ; scursor++ ) {
if ( *scursor == '?' ) {
if ( scursor + 1 != send ) {
if ( *( scursor + 1 ) != '=' ) { // We expect a '=' after the '?', but we got something else; ignore
KMIME_WARN << "Stray '?' in q-encoded word, ignoring this.";
continue;
}
else { // yep, found a '?=' sequence
scursor += 2;
break;
}
}
else { // The '?' is the last char, but we need a '=' after it!
KMIME_WARN_PREMATURE_END_OF( EncodedWord );
return false;
}
}
}
if ( *( scursor - 2 ) != '?' || *( scursor - 1 ) != '=' ||
scursor < encodedTextStart + 2 ) {
KMIME_WARN_PREMATURE_END_OF( EncodedWord );
return false;
}
// set end sentinel for encoded-text:
const char * const encodedTextEnd = scursor - 2;
//
// STEP 4:
// setup decoders for the transfer encoding and the charset
//
// try if there's a codec for the encoding found:
Codec * codec = Codec::codecForName( maybeEncoding );
if ( !codec ) {
KMIME_WARN_UNKNOWN( Encoding, maybeEncoding );
return false;
}
// get an instance of a corresponding decoder:
Decoder * dec = codec->makeDecoder();
assert( dec );
// try if there's a (text)codec for the charset found:
bool matchOK = false;
QTextCodec *textCodec = 0;
if ( forceCS || maybeCharset.isEmpty() ) {
textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK );
usedCS = cachedCharset( defaultCS );
} else {
textCodec = KGlobal::charsets()->codecForName( QLatin1String( maybeCharset ), matchOK );
if ( !matchOK ) { //no suitable codec found => use default charset
textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK );
usedCS = cachedCharset( defaultCS );
} else {
usedCS = cachedCharset( maybeCharset );
}
}
if ( !matchOK || !textCodec ) {
KMIME_WARN_UNKNOWN( Charset, maybeCharset );
delete dec;
return false;
};
// kDebug() << "mimeName(): \"" << textCodec->name() << "\"";
// allocate a temporary buffer to store the 8bit text:
int encodedTextLength = encodedTextEnd - encodedTextStart;
QByteArray buffer;
buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) );
char *bbegin = buffer.data();
char *bend = bbegin + buffer.length();
//
// STEP 5:
// do the actual decoding
//
if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) {
KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
<< encodedTextLength << ")\nresult may be truncated";
}
result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() );
// kDebug() << "result now: \"" << result << "\"";
// cleanup:
delete dec;
language = maybeLanguage;
return true;
}
static inline void eatWhiteSpace( const char* &scursor, const char * const send )
{
while ( scursor != send &&
( *scursor == ' ' || *scursor == '\n' ||
*scursor == '\t' || *scursor == '\r' ) )
scursor++;
}
bool parseAtom( const char * &scursor, const char * const send,
QString &result, bool allow8Bit )
{
QPair<const char*, int> maybeResult;
if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
result += QString::fromLatin1( maybeResult.first, maybeResult.second );
return true;
}
return false;
}
bool parseAtom( const char * &scursor, const char * const send,
QPair<const char*,int> &result, bool allow8Bit )
{
bool success = false;
const char *start = scursor;
while ( scursor != send ) {
signed char ch = *scursor++;
if ( ch > 0 && isAText( ch ) ) {
// AText: OK
success = true;
} else if ( allow8Bit && ch < 0 ) {
// 8bit char: not OK, but be tolerant.
KMIME_WARN_8BIT( ch );
success = true;
} else {
// CTL or special - marking the end of the atom:
// re-set sursor to point to the offending
// char and return:
scursor--;
break;
}
}
result.first = start;
result.second = scursor - start;
return success;
}
// FIXME: Remove this and the other parseToken() method. add a new one where "result" is a
// QByteArray.
bool parseToken( const char * &scursor, const char * const send,
QString &result, bool allow8Bit )
{
QPair<const char*, int> maybeResult;
if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
result += QString::fromLatin1( maybeResult.first, maybeResult.second );
return true;
}
return false;
}
bool parseToken( const char * &scursor, const char * const send,
QPair<const char*,int> &result, bool allow8Bit )
{
bool success = false;
const char * start = scursor;
while ( scursor != send ) {
signed char ch = *scursor++;
if ( ch > 0 && isTText( ch ) ) {
// TText: OK
success = true;
} else if ( allow8Bit && ch < 0 ) {
// 8bit char: not OK, but be tolerant.
KMIME_WARN_8BIT( ch );
success = true;
} else {
// CTL or tspecial - marking the end of the atom:
// re-set sursor to point to the offending
// char and return:
scursor--;
break;
}
}
result.first = start;
result.second = scursor - start;
return success;
}
#define READ_ch_OR_FAIL if ( scursor == send ) { \
KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
return false; \
} else { \
ch = *scursor++; \
}
// known issues:
//
// - doesn't handle quoted CRLF
// FIXME: Why is result a QString? This should be a QByteArray, since at this level, we don't
// know about encodings yet!
bool parseGenericQuotedString( const char* &scursor, const char * const send,
QString &result, bool isCRLF,
const char openChar, const char closeChar )
{
char ch;
// We are in a quoted-string or domain-literal or comment and the
// cursor points to the first char after the openChar.
// We will apply unfolding and quoted-pair removal.
// We return when we either encounter the end or unescaped openChar
// or closeChar.
assert( *( scursor - 1 ) == openChar || *( scursor - 1 ) == closeChar );
while ( scursor != send ) {
ch = *scursor++;
if ( ch == closeChar || ch == openChar ) {
// end of quoted-string or another opening char:
// let caller decide what to do.
return true;
}
switch ( ch ) {
case '\\': // quoted-pair
// misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
READ_ch_OR_FAIL;
KMIME_WARN_IF_8BIT( ch );
result += QLatin1Char( ch );
break;
case '\r':
// ###
// The case of lonely '\r' is easy to solve, as they're
// not part of Unix Line-ending conventions.
// But I see a problem if we are given Unix-native
// line-ending-mails, where we cannot determine anymore
// whether a given '\n' was part of a CRLF or was occurring
// on it's own.
READ_ch_OR_FAIL;
if ( ch != '\n' ) {
// CR on it's own...
KMIME_WARN_LONE( CR );
result += QLatin1Char( '\r' );
scursor--; // points to after the '\r' again
} else {
// CRLF encountered.
// lookahead: check for folding
READ_ch_OR_FAIL;
if ( ch == ' ' || ch == '\t' ) {
// correct folding;
// position cursor behind the CRLF WSP (unfolding)
// and add the WSP to the result
result += QLatin1Char( ch );
} else {
// this is the "shouldn't happen"-case. There is a CRLF
// inside a quoted-string without it being part of FWS.
// We take it verbatim.
KMIME_WARN_NON_FOLDING( CRLF );
result += QLatin1String( "\r\n" );
// the cursor is decremented again, so's we need not
// duplicate the whole switch here. "ch" could've been
// everything (incl. openChar or closeChar).
scursor--;
}
}
break;
case '\n':
// Note: CRLF has been handled above already!
// ### LF needs special treatment, depending on whether isCRLF
// is true (we can be sure a lonely '\n' was meant this way) or
// false ('\n' alone could have meant LF or CRLF in the original
// message. This parser assumes CRLF iff the LF is followed by
// either WSP (folding) or NULL (premature end of quoted-string;
// Should be fixed, since NULL is allowed as per rfc822).
READ_ch_OR_FAIL;
if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
// folding
// correct folding
result += QLatin1Char( ch );
} else {
// non-folding
KMIME_WARN_LONE( LF );
result += QLatin1Char( '\n' );
// pos is decremented, so's we need not duplicate the whole
// switch here. ch could've been everything (incl. <">, "\").
scursor--;
}
break;
case '=':
{
// ### Work around broken clients that send encoded words in quoted-strings
// For example, older KMail versions.
if ( scursor == send ) {
break;
}
const char *oldscursor = scursor;
QString tmp;
QByteArray lang, charset;
if ( *scursor++ == '?' ) {
--scursor;
if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
result += tmp;
break;
} else {
scursor = oldscursor;
}
} else {
scursor = oldscursor;
}
// fall through
}
default:
KMIME_WARN_IF_8BIT( ch );
result += QLatin1Char( ch );
}
}
return false;
}
// known issues:
//
// - doesn't handle encoded-word inside comments.
bool parseComment( const char* &scursor, const char * const send,
QString &result, bool isCRLF, bool reallySave )
{
int commentNestingDepth = 1;
const char *afterLastClosingParenPos = 0;
QString maybeCmnt;
const char *oldscursor = scursor;
assert( *( scursor - 1 ) == '(' );
while ( commentNestingDepth ) {
QString cmntPart;
if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
assert( *( scursor - 1 ) == ')' || *( scursor - 1 ) == '(' );
// see the kdoc for above function for the possible conditions
// we have to check:
switch ( *( scursor - 1 ) ) {
case ')':
if ( reallySave ) {
// add the chunk that's now surely inside the comment.
result += maybeCmnt;
result += cmntPart;
if ( commentNestingDepth > 1 ) {
// don't add the outermost ')'...
result += QLatin1Char( ')' );
}
maybeCmnt.clear();
}
afterLastClosingParenPos = scursor;
--commentNestingDepth;
break;
case '(':
if ( reallySave ) {
// don't add to "result" yet, because we might find that we
// are already outside the (broken) comment...
maybeCmnt += cmntPart;
maybeCmnt += QLatin1Char( '(' );
}
++commentNestingDepth;
break;
default: assert( 0 );
} // switch
} else {
// !parseGenericQuotedString, ie. premature end
if ( afterLastClosingParenPos ) {
scursor = afterLastClosingParenPos;
} else {
scursor = oldscursor;
}
return false;
}
} // while
return true;
}
// known issues: none.
bool parsePhrase( const char* &scursor, const char * const send,
QString &result, bool isCRLF )
{
enum {
None, Phrase, Atom, EncodedWord, QuotedString
} found = None;
QString tmp;
QByteArray lang, charset;
const char *successfullyParsed = 0;
// only used by the encoded-word branch
const char *oldscursor;
// used to suppress whitespace between adjacent encoded-words
// (rfc2047, 6.2):
bool lastWasEncodedWord = false;
while ( scursor != send ) {
char ch = *scursor++;
switch ( ch ) {
case '.': // broken, but allow for intorop's sake
if ( found == None ) {
--scursor;
return false;
} else {
if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) {
result += QLatin1String( ". " );
} else {
result += QLatin1Char( '.' );
}
successfullyParsed = scursor;
}
break;
case '"': // quoted-string
tmp.clear();
if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
successfullyParsed = scursor;
assert( *( scursor - 1 ) == '"' );
switch ( found ) {
case None:
found = QuotedString;
break;
case Phrase:
case Atom:
case EncodedWord:
case QuotedString:
found = Phrase;
result += QLatin1Char( ' ' ); // rfc822, 3.4.4
break;
default:
assert( 0 );
}
lastWasEncodedWord = false;
result += tmp;
} else {
// premature end of quoted string.
// What to do? Return leading '"' as special? Return as quoted-string?
// We do the latter if we already found something, else signal failure.
if ( found == None ) {
return false;
} else {
result += QLatin1Char( ' ' ); // rfc822, 3.4.4
result += tmp;
return true;
}
}
break;
case '(': // comment
// parse it, but ignore content:
tmp.clear();
if ( parseComment( scursor, send, tmp, isCRLF,
false /*don't bother with the content*/ ) ) {
successfullyParsed = scursor;
lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
} else {
if ( found == None ) {
return false;
} else {
scursor = successfullyParsed;
return true;
}
}
break;
case '=': // encoded-word
tmp.clear();
oldscursor = scursor;
lang.clear();
charset.clear();
if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
successfullyParsed = scursor;
switch ( found ) {
case None:
found = EncodedWord;
break;
case Phrase:
case EncodedWord:
case Atom:
case QuotedString:
if ( !lastWasEncodedWord ) {
result += QLatin1Char( ' ' ); // rfc822, 3.4.4
}
found = Phrase;
break;
default: assert( 0 );
}
lastWasEncodedWord = true;
result += tmp;
break;
} else {
// parse as atom:
scursor = oldscursor;
}
// fall though...
default: //atom
tmp.clear();
scursor--;
if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
successfullyParsed = scursor;
switch ( found ) {
case None:
found = Atom;
break;
case Phrase:
case Atom:
case EncodedWord:
case QuotedString:
found = Phrase;
result += QLatin1Char( ' ' ); // rfc822, 3.4.4
break;
default:
assert( 0 );
}
lastWasEncodedWord = false;
result += tmp;
} else {
if ( found == None ) {
return false;
} else {
scursor = successfullyParsed;
return true;
}
}
}
eatWhiteSpace( scursor, send );
}
return found != None;
}
// FIXME: This should probably by QByteArray &result instead?
bool parseDotAtom( const char* &scursor, const char * const send,
QString &result, bool isCRLF )
{
eatCFWS( scursor, send, isCRLF );
// always points to just after the last atom parsed:
const char *successfullyParsed;
QString tmp;
if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
return false;
}
result += tmp;
successfullyParsed = scursor;
while ( scursor != send ) {
// end of header or no '.' -> return
if ( scursor == send || *scursor != '.' ) {
return true;
}
scursor++; // eat '.'
if ( scursor == send || !isAText( *scursor ) ) {
// end of header or no AText, but this time following a '.'!:
// reset cursor to just after last successfully parsed char and
// return:
scursor = successfullyParsed;
return true;
}
// try to parse the next atom:
QString maybeAtom;
if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
scursor = successfullyParsed;
return true;
}
result += QLatin1Char( '.' );
result += maybeAtom;
successfullyParsed = scursor;
}
scursor = successfullyParsed;
return true;
}
void eatCFWS( const char* &scursor, const char * const send, bool isCRLF )
{
QString dummy;
while ( scursor != send ) {
const char *oldscursor = scursor;
char ch = *scursor++;
switch ( ch ) {
case ' ':
case '\t': // whitespace
case '\r':
case '\n': // folding
continue;
case '(': // comment
if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) {
continue;
}
scursor = oldscursor;
return;
default:
scursor = oldscursor;
return;
}
}
}
bool parseDomain( const char* &scursor, const char * const send,
QString &result, bool isCRLF )
{
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
// domain := dot-atom / domain-literal / atom *("." atom)
//
// equivalent to:
// domain = dot-atom / domain-literal,
// since parseDotAtom does allow CFWS between atoms and dots
if ( *scursor == '[' ) {
// domain-literal:
QString maybeDomainLiteral;
// eat '[':
scursor++;
while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
isCRLF, '[', ']' ) ) {
if ( scursor == send ) {
// end of header: check for closing ']':
if ( *( scursor - 1 ) == ']' ) {
// OK, last char was ']':
result = maybeDomainLiteral;
return true;
} else {
// not OK, domain-literal wasn't closed:
return false;
}
}
// we hit openChar in parseGenericQuotedString.
// include it in maybeDomainLiteral and keep on parsing:
if ( *( scursor - 1 ) == '[' ) {
maybeDomainLiteral += QLatin1Char( '[' );
continue;
}
// OK, real end of domain-literal:
result = maybeDomainLiteral;
return true;
}
} else {
// dot-atom:
QString maybeDotAtom;
if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
result = maybeDotAtom;
// Domain may end with '.', if so preserve it'
if ( scursor != send && *scursor == '.' ) {
result += QLatin1Char( '.' );
scursor++;
}
return true;
}
}
return false;
}
bool parseObsRoute( const char* &scursor, const char* const send,
QStringList &result, bool isCRLF, bool save )
{
while ( scursor != send ) {
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
// empty entry:
if ( *scursor == ',' ) {
scursor++;
if ( save ) {
result.append( QString() );
}
continue;
}
// empty entry ending the list:
if ( *scursor == ':' ) {
scursor++;
if ( save ) {
result.append( QString() );
}
return true;
}
// each non-empty entry must begin with '@':
if ( *scursor != '@' ) {
return false;
} else {
scursor++;
}
QString maybeDomain;
if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
return false;
}
if ( save ) {
result.append( maybeDomain );
}
// eat the following (optional) comma:
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
if ( *scursor == ':' ) {
scursor++;
return true;
}
if ( *scursor == ',' ) {
scursor++;
}
}
return false;
}
bool parseAddrSpec( const char* &scursor, const char * const send,
AddrSpec &result, bool isCRLF )
{
//
// STEP 1:
// local-part := dot-atom / quoted-string / word *("." word)
//
// this is equivalent to:
// local-part := word *("." word)
QString maybeLocalPart;
QString tmp;
while ( scursor != send ) {
// first, eat any whitespace
eatCFWS( scursor, send, isCRLF );
char ch = *scursor++;
switch ( ch ) {
case '.': // dot
maybeLocalPart += QLatin1Char( '.' );
break;
case '@':
goto SAW_AT_SIGN;
break;
case '"': // quoted-string
tmp.clear();
if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
maybeLocalPart += tmp;
} else {
return false;
}
break;
default: // atom
scursor--; // re-set scursor to point to ch again
tmp.clear();
if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
maybeLocalPart += tmp;
} else {
return false; // parseAtom can only fail if the first char is non-atext.
}
break;
}
}
return false;
//
// STEP 2:
// domain
//
SAW_AT_SIGN:
assert( *( scursor - 1 ) == '@' );
QString maybeDomain;
if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
return false;
}
result.localPart = maybeLocalPart;
result.domain = maybeDomain;
return true;
}
bool parseAngleAddr( const char* &scursor, const char * const send,
AddrSpec &result, bool isCRLF )
{
// first, we need an opening angle bracket:
eatCFWS( scursor, send, isCRLF );
if ( scursor == send || *scursor != '<' ) {
return false;
}
scursor++; // eat '<'
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
if ( *scursor == '@' || *scursor == ',' ) {
// obs-route: parse, but ignore:
KMIME_WARN << "obsolete source route found! ignoring.";
QStringList dummy;
if ( !parseObsRoute( scursor, send, dummy,
isCRLF, false /* don't save */ ) ) {
return false;
}
// angle-addr isn't complete until after the '>':
if ( scursor == send ) {
return false;
}
}
// parse addr-spec:
AddrSpec maybeAddrSpec;
if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
return false;
}
eatCFWS( scursor, send, isCRLF );
if ( scursor == send || *scursor != '>' ) {
return false;
}
scursor++;
result = maybeAddrSpec;
return true;
}
static QString stripQuotes( const QString &input )
{
const QLatin1Char quotes( '"' );
if ( input.startsWith( quotes ) && input.endsWith( quotes ) ) {
QString stripped( input.mid( 1, input.size() - 2 ) );
return stripped;
} else {
return input;
}
}
bool parseMailbox( const char* &scursor, const char * const send,
Mailbox &result, bool isCRLF )
{
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
AddrSpec maybeAddrSpec;
QString maybeDisplayName;
// first, try if it's a vanilla addr-spec:
const char * oldscursor = scursor;
if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
result.setAddress( maybeAddrSpec );
// check for the obsolete form of display-name (as comment):
eatWhiteSpace( scursor, send );
if ( scursor != send && *scursor == '(' ) {
scursor++;
if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
return false;
}
}
result.setName( stripQuotes( maybeDisplayName ) );
return true;
}
scursor = oldscursor;
// second, see if there's a display-name:
if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
// failed: reset cursor, note absent display-name
maybeDisplayName.clear();
scursor = oldscursor;
} else {
// succeeded: eat CFWS
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
}
// third, parse the angle-addr:
if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) {
return false;
}
if ( maybeDisplayName.isNull() ) {
// check for the obsolete form of display-name (as comment):
eatWhiteSpace( scursor, send );
if ( scursor != send && *scursor == '(' ) {
scursor++;
if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
return false;
}
}
}
result.setName( stripQuotes( maybeDisplayName ) );
result.setAddress( maybeAddrSpec );
return true;
}
bool parseGroup( const char* &scursor, const char * const send,
Address &result, bool isCRLF )
{
// group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
//
// equivalent to:
// group := display-name ":" [ obs-mbox-list ] ";"
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
// get display-name:
QString maybeDisplayName;
if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
return false;
}
// get ":":
eatCFWS( scursor, send, isCRLF );
if ( scursor == send || *scursor != ':' ) {
return false;
}
// KDE5 TODO: Don't expose displayName as public, but rather add setter for it that
// automatically calls removeBidiControlChars
result.displayName = removeBidiControlChars( maybeDisplayName );
// get obs-mbox-list (may contain empty entries):
scursor++;
while ( scursor != send ) {
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
// empty entry:
if ( *scursor == ',' ) {
scursor++;
continue;
}
// empty entry ending the list:
if ( *scursor == ';' ) {
scursor++;
return true;
}
Mailbox maybeMailbox;
if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
return false;
}
result.mailboxList.append( maybeMailbox );
eatCFWS( scursor, send, isCRLF );
// premature end:
if ( scursor == send ) {
return false;
}
// regular end of the list:
if ( *scursor == ';' ) {
scursor++;
return true;
}
// eat regular list entry separator:
if ( *scursor == ',' ) {
scursor++;
}
}
return false;
}
bool parseAddress( const char* &scursor, const char * const send,
Address &result, bool isCRLF )
{
// address := mailbox / group
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
// first try if it's a single mailbox:
Mailbox maybeMailbox;
const char * oldscursor = scursor;
if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
// yes, it is:
result.displayName.clear();
result.mailboxList.append( maybeMailbox );
return true;
}
scursor = oldscursor;
Address maybeAddress;
// no, it's not a single mailbox. Try if it's a group:
if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) {
return false;
}
result = maybeAddress;
return true;
}
bool parseAddressList( const char* &scursor, const char * const send,
AddressList &result, bool isCRLF )
{
while ( scursor != send ) {
eatCFWS( scursor, send, isCRLF );
// end of header: this is OK.
if ( scursor == send ) {
return true;
}
// empty entry: ignore:
if ( *scursor == ',' ) {
scursor++;
continue;
}
// broken clients might use ';' as list delimiter, accept that as well
if ( *scursor == ';' ) {
scursor++;
continue;
}
// parse one entry
Address maybeAddress;
if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) {
return false;
}
result.append( maybeAddress );
eatCFWS( scursor, send, isCRLF );
// end of header: this is OK.
if ( scursor == send ) {
return true;
}
// comma separating entries: eat it.
if ( *scursor == ',' ) {
scursor++;
}
}
return true;
}
static QString asterisk = QString::fromLatin1( "*0*", 1 );
static QString asteriskZero = QString::fromLatin1( "*0*", 2 );
//static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 );
// FIXME: Get rid of the very ugly "QStringOrQPair" thing. At this level, we are supposed to work
// on byte arrays, not strings! The result parameter should be a simple
// QPair<QByteArray,QByteArray>, which is the attribute name and the value.
bool parseParameter( const char* &scursor, const char * const send,
QPair<QString,QStringOrQPair> &result, bool isCRLF )
{
// parameter = regular-parameter / extended-parameter
// regular-parameter = regular-parameter-name "=" value
// extended-parameter =
// value = token / quoted-string
//
// note that rfc2231 handling is out of the scope of this function.
// Therefore we return the attribute as QString and the value as
// (start,length) tupel if we see that the value is encoded
// (trailing asterisk), for parseParameterList to decode...
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
//
// parse the parameter name:
//
// FIXME: maybeAttribute should be a QByteArray
QString maybeAttribute;
if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) {
return false;
}
eatCFWS( scursor, send, isCRLF );
// premature end: not OK (haven't seen '=' yet).
if ( scursor == send || *scursor != '=' ) {
return false;
}
scursor++; // eat '='
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
// don't choke on attribute=, meaning the value was omitted:
if ( maybeAttribute.endsWith( asterisk ) ) {
KMIME_WARN << "attribute ends with \"*\", but value is empty!"
"Chopping away \"*\".";
maybeAttribute.truncate( maybeAttribute.length() - 1 );
}
result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
return true;
}
const char * oldscursor = scursor;
//
// parse the parameter value:
//
QStringOrQPair maybeValue;
if ( *scursor == '"' ) {
// value is a quoted-string:
scursor++;
if ( maybeAttribute.endsWith( asterisk ) ) {
// attributes ending with "*" designate extended-parameters,
// which cannot have quoted-strings as values. So we remove the
// trailing "*" to not confuse upper layers.
KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
"Chopping away \"*\".";
maybeAttribute.truncate( maybeAttribute.length() - 1 );
}
if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
scursor = oldscursor;
result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
return false; // this case needs further processing by upper layers!!
}
} else {
// value is a token:
if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
scursor = oldscursor;
result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
return false; // this case needs further processing by upper layers!!
}
}
result = qMakePair( maybeAttribute.toLower(), maybeValue );
return true;
}
// FIXME: Get rid of QStringOrQPair: Use a simply QMap<QByteArray, QByteArray> for "result"
// instead!
bool parseRawParameterList( const char* &scursor, const char * const send,
QMap<QString,QStringOrQPair> &result,
bool isCRLF )
{
// we use parseParameter() consecutively to obtain a map of raw
// attributes to raw values. "Raw" here means that we don't do
// rfc2231 decoding and concatenation. This is left to
// parseParameterList(), which will call this function.
//
// The main reason for making this chunk of code a separate
// (private) method is that we can deal with broken parameters
// _here_ and leave the rfc2231 handling solely to
// parseParameterList(), which will still be enough work.
while ( scursor != send ) {
eatCFWS( scursor, send, isCRLF );
// empty entry ending the list: OK.
if ( scursor == send ) {
return true;
}
// empty list entry: ignore.
if ( *scursor == ';' ) {
scursor++;
continue;
}
QPair<QString, QStringOrQPair> maybeParameter;
if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
// we need to do a bit of work if the attribute is not
// NULL. These are the cases marked with "needs further
// processing" in parseParameter(). Specifically, parsing of the
// token or the quoted-string, which should represent the value,
// failed. We take the easy way out and simply search for the
// next ';' to start parsing again. (Another option would be to
// take the text between '=' and ';' as value)
if ( maybeParameter.first.isNull() ) {
return false;
}
while ( scursor != send ) {
if ( *scursor++ == ';' ) {
goto IS_SEMICOLON;
}
}
// scursor == send case: end of list.
return true;
IS_SEMICOLON:
// *scursor == ';' case: parse next entry.
continue;
}
// successful parsing brings us here:
result.insert( maybeParameter.first, maybeParameter.second );
eatCFWS( scursor, send, isCRLF );
// end of header: ends list.
if ( scursor == send ) {
return true;
}
// regular separator: eat it.
if ( *scursor == ';' ) {
scursor++;
}
}
return true;
}
static void decodeRFC2231Value( Codec* &rfc2231Codec,
QTextCodec* &textcodec,
bool isContinuation, QString &value,
QPair<const char*,int> &source, QByteArray& charset )
{
//
// parse the raw value into (charset,language,text):
//
const char * decBegin = source.first;
const char * decCursor = decBegin;
const char * decEnd = decCursor + source.second;
if ( !isContinuation ) {
// find the first single quote
while ( decCursor != decEnd ) {
if ( *decCursor == '\'' ) {
break;
} else {
decCursor++;
}
}
if ( decCursor == decEnd ) {
// there wasn't a single single quote at all!
// take the whole value to be in latin-1:
KMIME_WARN << "No charset in extended-initial-value."
"Assuming \"iso-8859-1\".";
value += QString::fromLatin1( decBegin, source.second );
return;
}
charset = QByteArray( decBegin, decCursor - decBegin );
const char * oldDecCursor = ++decCursor;
// find the second single quote (we ignore the language tag):
while ( decCursor != decEnd ) {
if ( *decCursor == '\'' ) {
break;
} else {
decCursor++;
}
}
if ( decCursor == decEnd ) {
KMIME_WARN << "No language in extended-initial-value."
"Trying to recover.";
decCursor = oldDecCursor;
} else {
decCursor++;
}
// decCursor now points to the start of the
// "extended-other-values":
//
// get the decoders:
//
bool matchOK = false;
textcodec = KGlobal::charsets()->codecForName( QLatin1String( charset ), matchOK );
if ( !matchOK ) {
textcodec = 0;
KMIME_WARN_UNKNOWN( Charset, charset );
}
}
if ( !rfc2231Codec ) {
rfc2231Codec = Codec::codecForName( "x-kmime-rfc2231" );
assert( rfc2231Codec );
}
if ( !textcodec ) {
value += QString::fromLatin1( decCursor, decEnd - decCursor );
return;
}
Decoder * dec = rfc2231Codec->makeDecoder();
assert( dec );
//
// do the decoding:
//
QByteArray buffer;
buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
QByteArray::Iterator bit = buffer.begin();
QByteArray::ConstIterator bend = buffer.end();
if ( !dec->decode( decCursor, decEnd, bit, bend ) ) {
KMIME_WARN << rfc2231Codec->name()
<< "codec lies about its maxDecodedSizeFor()" << endl
<< "result may be truncated";
}
value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
// kDebug() << "value now: \"" << value << "\"";
// cleanup:
delete dec;
}
// known issues:
// - permutes rfc2231 continuations when the total number of parts
// exceeds 10 (other-sections then becomes *xy, ie. two digits)
bool parseParameterListWithCharset( const char* &scursor,
const char * const send,
QMap<QString,QString> &result,
QByteArray& charset, bool isCRLF )
{
// parse the list into raw attribute-value pairs:
QMap<QString, QStringOrQPair> rawParameterList;
if ( !parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) {
return false;
}
if ( rawParameterList.isEmpty() ) {
return true;
}
// decode rfc 2231 continuations and alternate charset encoding:
// NOTE: this code assumes that what QMapIterator delivers is sorted
// by the key!
Codec * rfc2231Codec = 0;
QTextCodec * textcodec = 0;
QString attribute;
QString value;
enum Mode {
NoMode = 0x0, Continued = 0x1, Encoded = 0x2
};
enum EncodingMode {
NoEncoding,
RFC2047,
RFC2231
};
QMap<QString, QStringOrQPair>::Iterator it, end = rawParameterList.end();
for ( it = rawParameterList.begin() ; it != end ; ++it ) {
if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
//
// new attribute:
//
// store the last attribute/value pair in the result map now:
if ( !attribute.isNull() ) {
result.insert( attribute, value );
}
// and extract the information from the new raw attribute:
value.clear();
attribute = it.key();
int mode = NoMode;
EncodingMode encodingMode = NoEncoding;
// is the value rfc2331-encoded?
if ( attribute.endsWith( asterisk ) ) {
attribute.truncate( attribute.length() - 1 );
mode |= Encoded;
encodingMode = RFC2231;
}
// is the value rfc2047-encoded?
if ( !( *it ).qstring.isNull() && ( *it ).qstring.contains( QLatin1String( "=?" ) ) ) {
mode |= Encoded;
encodingMode = RFC2047;
}
// is the value continued?
if ( attribute.endsWith( asteriskZero ) ) {
attribute.truncate( attribute.length() - 2 );
mode |= Continued;
}
//
// decode if necessary:
//
if ( mode & Encoded ) {
if ( encodingMode == RFC2231 ) {
decodeRFC2231Value( rfc2231Codec, textcodec,
false, /* isn't continuation */
value, ( *it ).qpair, charset );
}
else if ( encodingMode == RFC2047 ) {
value += decodeRFC2047String( ( *it ).qstring.toLatin1(), charset );
}
} else {
// not encoded.
if ( ( *it ).qpair.first ) {
value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second );
} else {
value += ( *it ).qstring;
}
}
//
// shortcut-processing when the value isn't encoded:
//
if ( !( mode & Continued ) ) {
// save result already:
result.insert( attribute, value );
// force begin of a new attribute:
attribute.clear();
}
} else { // it.key().startsWith( attribute )
//
// continuation
//
// ignore the section and trust QMap to have sorted the keys:
if ( it.key().endsWith( asterisk ) ) {
// encoded
decodeRFC2231Value( rfc2231Codec, textcodec,
true, /* is continuation */
value, ( *it ).qpair, charset );
} else {
// not encoded
if ( ( *it ).qpair.first ) {
value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second );
} else {
value += ( *it ).qstring;
}
}
}
}
// write last attr/value pair:
if ( !attribute.isNull() ) {
result.insert( attribute, value );
}
return true;
}
bool parseParameterList( const char* &scursor, const char * const send,
QMap<QString,QString> &result, bool isCRLF )
{
QByteArray charset;
return parseParameterListWithCharset( scursor, send, result, charset, isCRLF );
}
static const char * const stdDayNames[] = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
static bool parseDayName( const char* &scursor, const char * const send )
{
// check bounds:
if ( send - scursor < 3 ) {
return false;
}
for ( int i = 0 ; i < stdDayNamesLen ; ++i ) {
if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
scursor += 3;
// kDebug() << "found" << stdDayNames[i];
return true;
}
}
return false;
}
static const char * const stdMonthNames[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
static const int stdMonthNamesLen =
sizeof stdMonthNames / sizeof *stdMonthNames;
static bool parseMonthName( const char* &scursor, const char * const send,
int &result )
{
// check bounds:
if ( send - scursor < 3 ) {
return false;
}
for ( result = 0 ; result < stdMonthNamesLen ; ++result ) {
if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
scursor += 3;
return true;
}
}
// not found:
return false;
}
static const struct {
const char * tzName;
long int secsEastOfGMT;
} timeZones[] = {
// rfc 822 timezones:
{ "GMT", 0 },
{ "UT", 0 },
{ "EDT", -4*3600 },
{ "EST", -5*3600 },
{ "MST", -5*3600 },
{ "CST", -6*3600 },
{ "MDT", -6*3600 },
{ "MST", -7*3600 },
{ "PDT", -7*3600 },
{ "PST", -8*3600 },
// common, non-rfc-822 zones:
{ "CET", 1*3600 },
{ "MET", 1*3600 },
{ "UTC", 0 },
{ "CEST", 2*3600 },
{ "BST", 1*3600 },
// rfc 822 military timezones:
{ "Z", 0 },
{ "A", -1*3600 },
{ "B", -2*3600 },
{ "C", -3*3600 },
{ "D", -4*3600 },
{ "E", -5*3600 },
{ "F", -6*3600 },
{ "G", -7*3600 },
{ "H", -8*3600 },
{ "I", -9*3600 },
// J is not used!
{ "K", -10*3600 },
{ "L", -11*3600 },
{ "M", -12*3600 },
{ "N", 1*3600 },
{ "O", 2*3600 },
{ "P", 3*3600 },
{ "Q", 4*3600 },
{ "R", 5*3600 },
{ "S", 6*3600 },
{ "T", 7*3600 },
{ "U", 8*3600 },
{ "V", 9*3600 },
{ "W", 10*3600 },
{ "X", 11*3600 },
{ "Y", 12*3600 },
};
static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
static bool parseAlphaNumericTimeZone( const char* &scursor,
const char * const send,
long int &secsEastOfGMT,
bool &timeZoneKnown )
{
// allow the timezone to be wrapped in quotes; bug 260761
if ( *scursor == '"' ) {
scursor++;
if ( scursor == send ) {
return false;
}
}
QPair<const char*, int> maybeTimeZone( 0, 0 );
if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) {
return false;
}
for ( int i = 0 ; i < timeZonesLen ; ++i ) {
if ( qstrnicmp( timeZones[i].tzName,
maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
scursor += maybeTimeZone.second;
secsEastOfGMT = timeZones[i].secsEastOfGMT;
timeZoneKnown = true;
if ( *scursor == '"' ) {
scursor++;
}
return true;
}
}
// don't choke just because we don't happen to know the time zone
KMIME_WARN_UNKNOWN( time zone,
QByteArray( maybeTimeZone.first, maybeTimeZone.second ) );
secsEastOfGMT = 0;
timeZoneKnown = false;
return true;
}
// parse a number and return the number of digits parsed:
int parseDigits( const char* &scursor, const char * const send, int &result )
{
result = 0;
int digits = 0;
for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
result *= 10;
result += int( *scursor - '0' );
}
return digits;
}
static bool parseTimeOfDay( const char* &scursor, const char * const send,
int &hour, int &min, int &sec, bool isCRLF=false )
{
// time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
//
// 2DIGIT representing "hour":
//
if ( !parseDigits( scursor, send, hour ) ) {
return false;
}
eatCFWS( scursor, send, isCRLF );
if ( scursor == send || *scursor != ':' ) {
return false;
}
scursor++; // eat ':'
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
//
// 2DIGIT representing "minute":
//
if ( !parseDigits( scursor, send, min ) ) {
return false;
}
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return true; // seconds are optional
}
//
// let's see if we have a 2DIGIT representing "second":
//
if ( *scursor == ':' ) {
// yepp, there are seconds:
scursor++; // eat ':'
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
if ( !parseDigits( scursor, send, sec ) ) {
return false;
}
} else {
sec = 0;
}
return true;
}
bool parseTime( const char* &scursor, const char * send,
int &hour, int &min, int &sec, long int &secsEastOfGMT,
bool &timeZoneKnown, bool isCRLF )
{
// time := time-of-day CFWS ( zone / obs-zone )
//
// obs-zone := "UT" / "GMT" /
// "EST" / "EDT" / ; -0500 / -0400
// "CST" / "CDT" / ; -0600 / -0500
// "MST" / "MDT" / ; -0700 / -0600
// "PST" / "PDT" / ; -0800 / -0700
// "A"-"I" / "a"-"i" /
// "K"-"Z" / "k"-"z"
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) {
return false;
}
eatCFWS( scursor, send, isCRLF );
// there might be no timezone but a year following
if ( ( scursor == send ) || isdigit( *scursor ) ) {
timeZoneKnown = false;
secsEastOfGMT = 0;
return true; // allow missing timezone
}
timeZoneKnown = true;
if ( *scursor == '+' || *scursor == '-' ) {
// remember and eat '-'/'+':
const char sign = *scursor++;
// numerical timezone:
int maybeTimeZone;
if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) {
return false;
}
secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
if ( sign == '-' ) {
secsEastOfGMT *= -1;
if ( secsEastOfGMT == 0 ) {
timeZoneKnown = false; // -0000 means indetermined tz
}
}
} else {
// maybe alphanumeric timezone:
if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) {
return false;
}
}
return true;
}
bool parseDateTime( const char* &scursor, const char * const send,
KDateTime &result, bool isCRLF )
{
// Parsing date-time; strict mode:
//
// date-time := [ [CFWS] day-name [CFWS] "," ] ; wday
// (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
// time
//
// day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
// month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
// "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
result = KDateTime();
QDateTime maybeDateTime;
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
//
// let's see if there's a day-of-week:
//
if ( parseDayName( scursor, send ) ) {
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
// day-name should be followed by ',' but we treat it as optional:
if ( *scursor == ',' ) {
scursor++; // eat ','
eatCFWS( scursor, send, isCRLF );
}
}
int maybeMonth = -1;
bool asctimeFormat = false;
// ANSI-C asctime() format is: Wed Jun 30 21:49:08 1993
if ( !isdigit( *scursor ) && parseMonthName( scursor, send, maybeMonth ) ) {
asctimeFormat = true;
eatCFWS( scursor, send, isCRLF );
}
//
// 1*2DIGIT representing "day" (of month):
//
int maybeDay;
if ( !parseDigits( scursor, send, maybeDay ) ) {
return false;
}
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
// ignore ","; bug 54098
if ( *scursor == ',' ) {
scursor++;
}
//
// month-name:
//
if ( !asctimeFormat && !parseMonthName( scursor, send, maybeMonth ) ) {
return false;
}
if ( scursor == send ) {
return false;
}
assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
++maybeMonth; // 0-11 -> 1-12
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
// check for "year HH:MM:SS" or only "HH:MM:SS" (or "H:MM:SS")
bool timeAfterYear = true;
if ( ( send - scursor > 3 ) && ( ( scursor[1] == ':' ) || ( scursor[2] == ':' ) ) ) {
timeAfterYear = false; // first read time, then year
}
//
// 2*DIGIT representing "year":
//
int maybeYear = 0;
if ( timeAfterYear && !parseDigits( scursor, send, maybeYear ) ) {
return false;
}
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
//
// time
//
int maybeHour, maybeMinute, maybeSecond;
long int secsEastOfGMT;
bool timeZoneKnown = true;
if ( !parseTime( scursor, send,
maybeHour, maybeMinute, maybeSecond,
secsEastOfGMT, timeZoneKnown, isCRLF ) ) {
return false;
}
// in asctime() the year follows the time
if ( !timeAfterYear ) {
eatCFWS( scursor, send, isCRLF );
if ( scursor == send ) {
return false;
}
if ( !parseDigits( scursor, send, maybeYear ) ) {
return false;
}
}
// RFC 2822 4.3 processing:
if ( maybeYear < 50 ) {
maybeYear += 2000;
} else if ( maybeYear < 1000 ) {
maybeYear += 1900;
}
// else keep as is
if ( maybeYear < 1900 ) {
return false; // rfc2822, 3.3
}
maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) );
maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) );
if ( !maybeDateTime.isValid() ) {
return false;
}
result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) );
if ( !result.isValid() ) {
return false;
}
return true;
}
Headers::Base *extractFirstHeader( QByteArray &head )
{
int endOfFieldBody = 0;
bool folded = false;
Headers::Base *header = 0;
int startOfFieldBody = head.indexOf( ':' );
const int endOfFieldHeader = startOfFieldBody;
if ( startOfFieldBody > -1 ) { //there is another header
startOfFieldBody++; //skip the ':'
if ( head[startOfFieldBody] == ' ' ) { // skip the space after the ':', if there
startOfFieldBody++;
}
endOfFieldBody = findHeaderLineEnd( head, startOfFieldBody, &folded );
QByteArray rawType = head.left( endOfFieldHeader );
QByteArray rawFieldBody = head.mid( startOfFieldBody, endOfFieldBody - startOfFieldBody );
if ( folded ) {
rawFieldBody = unfoldHeader( rawFieldBody );
}
// We might get an invalid mail without a field name, don't crash on that.
if ( !rawType.isEmpty() ) {
header = HeaderFactory::self()->createHeader( rawType );
}
if ( !header ) {
//kWarning() << "Returning Generic header of type" << rawType;
header = new Headers::Generic( rawType.constData() );
}
header->from7BitString( rawFieldBody );
head.remove( 0, endOfFieldBody + 1 );
} else {
head.clear();
}
return header;
}
void extractHeaderAndBody( const QByteArray &content, QByteArray &header, QByteArray &body )
{
header.clear();
body.clear();
// empty header
if ( content.startsWith( '\n' ) ) {
body = content.right( content.length() - 1 );
return;
}
int pos = content.indexOf( "\n\n", 0 );
if ( pos > -1 ) {
header = content.left( ++pos ); //header *must* end with "\n" !!
body = content.mid( pos + 1, content.length() - pos - 1 );
} else {
header = content;
}
}
Headers::Base::List parseHeaders( const QByteArray &head )
{
Headers::Base::List ret;
Headers::Base *h;
QByteArray copy = head;
while ( ( h = extractFirstHeader( copy ) ) ) {
ret << h;
}
return ret;
}
} // namespace HeaderParsing
} // namespace KMime