kde-playground/kdepimlibs/kmime/kmime_header_parsing.cpp

/*  -*- c++ -*-
    kmime_header_parsing.cpp

    KMime, the KDE Internet mail/usenet news message library.
    Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA 02110-1301, USA.
*/

#include "kmime_header_parsing.h"

#include "kmime_codecs.h"
#include "kmime_headerfactory_p.h"
#include "kmime_headers.h"
#include "kmime_util.h"
#include "kmime_util_p.h"
#include "kmime_dateformatter.h"
#include "kmime_warning.h"

#include <kglobal.h>
#include <kcharsets.h>

#include <QtCore/QTextCodec>
#include <QtCore/QMap>
#include <QtCore/QStringList>
#include <QtCore/QUrl>

#include <ctype.h> // for isdigit
#include <cassert>

using namespace KMime;
using namespace KMime::Types;

namespace KMime {

namespace Types {

// QUrl::fromAce is extremely expensive, so only use it when necessary.
// Fortunately, the presence of IDNA is readily detected with a substring match...
static inline QString QUrl_fromAce_wrapper( const QString & domain )
{
  if ( domain.contains( QLatin1String( "xn--" ) ) ) {
    return QUrl::fromAce( domain.toLatin1() );
  } else {
    return domain;
  }
}

static QString addr_spec_as_string( const AddrSpec & as, bool pretty )
{
  if ( as.isEmpty() ) {
    return QString();
  }

  static QChar dotChar = QLatin1Char( '.' );
  static QChar backslashChar = QLatin1Char( '\\' );
  static QChar quoteChar = QLatin1Char( '"' );

  bool needsQuotes = false;
  QString result;
  result.reserve( as.localPart.length() + as.domain.length() + 1 );
  for ( int i = 0 ; i < as.localPart.length() ; ++i ) {
    const QChar ch = as.localPart.at( i );
    if ( ch == dotChar || isAText( ch.toLatin1() ) ) {
      result += ch;
    } else {
      needsQuotes = true;
      if ( ch == backslashChar || ch == quoteChar ) {
        result += backslashChar;
      }
      result += ch;
    }
  }
  const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ;
  if ( needsQuotes ) {
    result = quoteChar + result + quoteChar;
  }
  if ( dom.isEmpty() ) {
    return result;
  } else {
    result += QLatin1Char( '@' );
    result += dom;
    return result;
  }
}

QString AddrSpec::asString() const
{
    return addr_spec_as_string( *this, false );
}

QString AddrSpec::asPrettyString() const
{
    return addr_spec_as_string( *this, true );
}

bool AddrSpec::isEmpty() const
{
  return localPart.isEmpty() && domain.isEmpty();
}

QByteArray Mailbox::address() const
{
  QByteArray result;
  const QString asString = addr_spec_as_string( mAddrSpec, false );
  if ( !asString.isEmpty() ) {
    result = asString.toLatin1();
  }
  return result;
  //return mAddrSpec.asString().toLatin1();
}

AddrSpec Mailbox::addrSpec() const
{
  return mAddrSpec;
}

QString Mailbox::name() const
{
  return mDisplayName;
}

void Mailbox::setAddress( const AddrSpec &addr )
{
  mAddrSpec = addr;
}

void Mailbox::setAddress( const QByteArray &addr )
{
  const char *cursor = addr.constData();
  if ( !HeaderParsing::parseAngleAddr( cursor,
                                       cursor + addr.length(), mAddrSpec ) ) {
    if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(),
                                        mAddrSpec ) ) {
      kWarning() << "Invalid address";
      return;
    }
  }
}

void Mailbox::setName( const QString &name )
{
  mDisplayName = removeBidiControlChars( name );
}

void Mailbox::setNameFrom7Bit( const QByteArray &name,
                               const QByteArray &defaultCharset )
{
  QByteArray cs;
  setName( decodeRFC2047String( name, cs, defaultCharset, false ) );
}

bool Mailbox::hasAddress() const
{
  return !mAddrSpec.isEmpty();
}

bool Mailbox::hasName() const
{
  return !mDisplayName.isEmpty();
}

QString Mailbox::prettyAddress() const
{
  return prettyAddress( QuoteNever );
}

QString Mailbox::prettyAddress( Quoting quoting ) const
{
  if ( !hasName() ) {
    return QLatin1String( address() );
  }
  QString s = name();
  if ( quoting != QuoteNever ) {
    addQuotes( s, quoting == QuoteAlways /*bool force*/ );
  }

  if ( hasAddress() ) {
    s += QLatin1String( " <" ) + QLatin1String( address() ) + QLatin1Char( '>' );
  }
  return s;
}

void Mailbox::fromUnicodeString( const QString &s )
{
  from7BitString( encodeRFC2047Sentence( s, "utf-8" ) );
}

void Mailbox::from7BitString( const QByteArray &s )
{
  const char *cursor = s.constData();
  HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this );
}

QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const
{
  if ( !hasName() ) {
    return address();
  }
  QByteArray rv;
  if ( isUsAscii( name() ) ) {
    QByteArray tmp = name().toLatin1();
    addQuotes( tmp, false );
    rv += tmp;
  } else {
    rv += encodeRFC2047String( name(), encCharset, true );
  }
  if ( hasAddress() ) {
    rv += " <" + address() + '>';
  }
  return rv;
}

} // namespace Types

namespace HeaderParsing {

// parse the encoded-word (scursor points to after the initial '=')
bool parseEncodedWord( const char* &scursor, const char * const send,
                       QString &result, QByteArray &language,
                       QByteArray &usedCS, const QByteArray &defaultCS,
                       bool forceCS )
{
  // make sure the caller already did a bit of the work.
  assert( *( scursor - 1 ) == '=' );

  //
  // STEP 1:
  // scan for the charset/language portion of the encoded-word
  //

  char ch = *scursor++;

  if ( ch != '?' ) {
    // kDebug() << "first";
    //KMIME_WARN_PREMATURE_END_OF( EncodedWord );
    return false;
  }

  // remember start of charset (ie. just after the initial "=?") and
  // language (just after the first '*') fields:
  const char * charsetStart = scursor;
  const char * languageStart = 0;

  // find delimiting '?' (and the '*' separating charset and language
  // tags, if any):
  for ( ; scursor != send ; scursor++ ) {
    if ( *scursor == '?' ) {
      break;
    } else if ( *scursor == '*' && languageStart == 0 ) {
      languageStart = scursor + 1;
    }
  }

  // not found? can't be an encoded-word!
  if ( scursor == send || *scursor != '?' ) {
    // kDebug() << "second";
    KMIME_WARN_PREMATURE_END_OF( EncodedWord );
    return false;
  }

  // extract the language information, if any (if languageStart is 0,
  // language will be null, too):
  QByteArray maybeLanguage( languageStart, scursor - languageStart );
  // extract charset information (keep in mind: the size given to the
  // ctor is one off due to the \0 terminator):
  QByteArray maybeCharset( charsetStart,
                           ( languageStart ? languageStart - 1 : scursor ) - charsetStart );

  //
  // STEP 2:
  // scan for the encoding portion of the encoded-word
  //

  // remember start of encoding (just _after_ the second '?'):
  scursor++;
  const char * encodingStart = scursor;

  // find next '?' (ending the encoding tag):
  for ( ; scursor != send ; scursor++ ) {
    if ( *scursor == '?' ) {
      break;
    }
  }

  // not found? Can't be an encoded-word!
  if ( scursor == send || *scursor != '?' ) {
    // kDebug() << "third";
    KMIME_WARN_PREMATURE_END_OF( EncodedWord );
    return false;
  }

  // extract the encoding information:
  QByteArray maybeEncoding( encodingStart, scursor - encodingStart );

  // kDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
  //         << "\"; language == \"" << maybeLanguage
  //         << "\"; encoding == \"" << maybeEncoding << "\"";

  //
  // STEP 3:
  // scan for encoded-text portion of encoded-word
  //

  // remember start of encoded-text (just after the third '?'):
  scursor++;
  const char * encodedTextStart = scursor;

  // find the '?=' sequence (ending the encoded-text):
  for ( ; scursor != send ; scursor++ ) {
    if ( *scursor == '?' ) {
      if ( scursor + 1 != send ) {
        if ( *( scursor + 1 ) != '=' ) { // We expect a '=' after the '?', but we got something else; ignore
          KMIME_WARN << "Stray '?' in q-encoded word, ignoring this.";
          continue;
        }
        else { // yep, found a '?=' sequence
          scursor += 2;
          break;
        }
      }
      else { // The '?' is the last char, but we need a '=' after it!
        KMIME_WARN_PREMATURE_END_OF( EncodedWord );
        return false;
      }
    }
  }

  if ( *( scursor - 2 ) != '?' || *( scursor - 1 ) != '=' ||
       scursor < encodedTextStart + 2 ) {
    KMIME_WARN_PREMATURE_END_OF( EncodedWord );
    return false;
  }

  // set end sentinel for encoded-text:
  const char * const encodedTextEnd = scursor - 2;

  //
  // STEP 4:
  // setup decoders for the transfer encoding and the charset
  //

  // try if there's a codec for the encoding found:
  Codec * codec = Codec::codecForName( maybeEncoding );
  if ( !codec ) {
    KMIME_WARN_UNKNOWN( Encoding, maybeEncoding );
    return false;
  }

  // get an instance of a corresponding decoder:
  Decoder * dec = codec->makeDecoder();
  assert( dec );

  // try if there's a (text)codec for the charset found:
  bool matchOK = false;
  QTextCodec *textCodec = 0;
  if ( forceCS || maybeCharset.isEmpty() ) {
    textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK );
    usedCS = cachedCharset( defaultCS );
  } else {
    textCodec = KGlobal::charsets()->codecForName( QLatin1String( maybeCharset ), matchOK );
    if ( !matchOK ) {  //no suitable codec found => use default charset
      textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK );
      usedCS = cachedCharset( defaultCS );
    } else {
      usedCS = cachedCharset( maybeCharset );
    }
  }

  if ( !matchOK || !textCodec ) {
    KMIME_WARN_UNKNOWN( Charset, maybeCharset );
    delete dec;
    return false;
  };

  // kDebug() << "mimeName(): \"" << textCodec->name() << "\"";

  // allocate a temporary buffer to store the 8bit text:
  int encodedTextLength = encodedTextEnd - encodedTextStart;
  QByteArray buffer;
  buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) );
  char *bbegin = buffer.data();
  char *bend = bbegin + buffer.length();

  //
  // STEP 5:
  // do the actual decoding
  //

  if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) {
    KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
               << encodedTextLength << ")\nresult may be truncated";
  }

  result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() );

  // kDebug() << "result now: \"" << result << "\"";
  // cleanup:
  delete dec;
  language = maybeLanguage;

  return true;
}

static inline void eatWhiteSpace( const char* &scursor, const char * const send )
{
  while ( scursor != send &&
          ( *scursor == ' ' || *scursor == '\n' ||
            *scursor == '\t' || *scursor == '\r' ) )
    scursor++;
}

bool parseAtom( const char * &scursor, const char * const send,
                QString &result, bool allow8Bit )
{
  QPair<const char*, int> maybeResult;

  if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
    result += QString::fromLatin1( maybeResult.first, maybeResult.second );
    return true;
  }

  return false;
}

bool parseAtom( const char * &scursor, const char * const send,
                QPair<const char*,int> &result, bool allow8Bit )
{
  bool success = false;
  const char *start = scursor;

  while ( scursor != send ) {
    signed char ch = *scursor++;
    if ( ch > 0 && isAText( ch ) ) {
      // AText: OK
      success = true;
    } else if ( allow8Bit && ch < 0 ) {
      // 8bit char: not OK, but be tolerant.
      KMIME_WARN_8BIT( ch );
      success = true;
    } else {
      // CTL or special - marking the end of the atom:
      // re-set sursor to point to the offending
      // char and return:
      scursor--;
      break;
    }
  }
  result.first = start;
  result.second = scursor - start;
  return success;
}

// FIXME: Remove this and the other parseToken() method. add a new one where "result" is a
//        QByteArray.
bool parseToken( const char * &scursor, const char * const send,
                 QString &result, bool allow8Bit )
{
  QPair<const char*, int> maybeResult;

  if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
    result += QString::fromLatin1( maybeResult.first, maybeResult.second );
    return true;
  }

  return false;
}

bool parseToken( const char * &scursor, const char * const send,
                 QPair<const char*,int> &result, bool allow8Bit )
{
  bool success = false;
  const char * start = scursor;

  while ( scursor != send ) {
    signed char ch = *scursor++;
    if ( ch > 0 && isTText( ch ) ) {
      // TText: OK
      success = true;
    } else if ( allow8Bit && ch < 0 ) {
      // 8bit char: not OK, but be tolerant.
      KMIME_WARN_8BIT( ch );
      success = true;
    } else {
      // CTL or tspecial - marking the end of the atom:
      // re-set sursor to point to the offending
      // char and return:
      scursor--;
      break;
    }
  }
  result.first = start;
  result.second = scursor - start;
  return success;
}

#define READ_ch_OR_FAIL if ( scursor == send ) {        \
    KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
    return false;                                       \
  } else {                                              \
    ch = *scursor++;                                    \
  }

// known issues:
//
// - doesn't handle quoted CRLF

// FIXME: Why is result a QString? This should be a QByteArray, since at this level, we don't
//        know about encodings yet!
bool parseGenericQuotedString( const char* &scursor, const char * const send,
                               QString &result, bool isCRLF,
                               const char openChar, const char closeChar )
{
  char ch;
  // We are in a quoted-string or domain-literal or comment and the
  // cursor points to the first char after the openChar.
  // We will apply unfolding and quoted-pair removal.
  // We return when we either encounter the end or unescaped openChar
  // or closeChar.

  assert( *( scursor - 1 ) == openChar || *( scursor - 1 ) == closeChar );

  while ( scursor != send ) {
    ch = *scursor++;

    if ( ch == closeChar || ch == openChar ) {
      // end of quoted-string or another opening char:
      // let caller decide what to do.
      return true;
    }

    switch ( ch ) {
    case '\\':      // quoted-pair
      // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
      READ_ch_OR_FAIL;
      KMIME_WARN_IF_8BIT( ch );
      result += QLatin1Char( ch );
      break;
    case '\r':
      // ###
      // The case of lonely '\r' is easy to solve, as they're
      // not part of Unix Line-ending conventions.
      // But I see a problem if we are given Unix-native
      // line-ending-mails, where we cannot determine anymore
      // whether a given '\n' was part of a CRLF or was occurring
      // on it's own.
      READ_ch_OR_FAIL;
      if ( ch != '\n' ) {
        // CR on it's own...
        KMIME_WARN_LONE( CR );
        result += QLatin1Char( '\r' );
        scursor--; // points to after the '\r' again
      } else {
        // CRLF encountered.
        // lookahead: check for folding
        READ_ch_OR_FAIL;
        if ( ch == ' ' || ch == '\t' ) {
          // correct folding;
          // position cursor behind the CRLF WSP (unfolding)
          // and add the WSP to the result
          result += QLatin1Char( ch );
        } else {
          // this is the "shouldn't happen"-case. There is a CRLF
          // inside a quoted-string without it being part of FWS.
          // We take it verbatim.
          KMIME_WARN_NON_FOLDING( CRLF );
          result += QLatin1String( "\r\n" );
          // the cursor is decremented again, so's we need not
          // duplicate the whole switch here. "ch" could've been
          // everything (incl. openChar or closeChar).
          scursor--;
        }
      }
      break;
    case '\n':
      // Note: CRLF has been handled above already!
      // ### LF needs special treatment, depending on whether isCRLF
      // is true (we can be sure a lonely '\n' was meant this way) or
      // false ('\n' alone could have meant LF or CRLF in the original
      // message. This parser assumes CRLF iff the LF is followed by
      // either WSP (folding) or NULL (premature end of quoted-string;
      // Should be fixed, since NULL is allowed as per rfc822).
      READ_ch_OR_FAIL;
      if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
        // folding
        // correct folding
        result += QLatin1Char( ch );
      } else {
        // non-folding
        KMIME_WARN_LONE( LF );
        result += QLatin1Char( '\n' );
        // pos is decremented, so's we need not duplicate the whole
        // switch here. ch could've been everything (incl. <">, "\").
        scursor--;
      }
      break;
    case '=':
    {
      // ### Work around broken clients that send encoded words in quoted-strings
      //     For example, older KMail versions.
      if ( scursor == send ) {
        break;
      }

      const char *oldscursor = scursor;
      QString tmp;
      QByteArray lang, charset;
      if ( *scursor++ == '?' ) {
        --scursor;
        if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
          result += tmp;
          break;
        } else {
          scursor = oldscursor;
        }
      } else {
        scursor = oldscursor;
      }
      // fall through
    }
    default:
      KMIME_WARN_IF_8BIT( ch );
      result += QLatin1Char( ch );
    }
  }

  return false;
}

// known issues:
//
// - doesn't handle encoded-word inside comments.

bool parseComment( const char* &scursor, const char * const send,
                   QString &result, bool isCRLF, bool reallySave )
{
  int commentNestingDepth = 1;
  const char *afterLastClosingParenPos = 0;
  QString maybeCmnt;
  const char *oldscursor = scursor;

  assert( *( scursor - 1 ) == '(' );

  while ( commentNestingDepth ) {
    QString cmntPart;
    if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
      assert( *( scursor - 1 ) == ')' || *( scursor - 1 ) == '(' );
      // see the kdoc for above function for the possible conditions
      // we have to check:
      switch ( *( scursor - 1 ) ) {
      case ')':
        if ( reallySave ) {
          // add the chunk that's now surely inside the comment.
          result += maybeCmnt;
          result += cmntPart;
          if ( commentNestingDepth > 1 ) {
            // don't add the outermost ')'...
            result += QLatin1Char( ')' );
          }
          maybeCmnt.clear();
        }
        afterLastClosingParenPos = scursor;
        --commentNestingDepth;
        break;
      case '(':
        if ( reallySave ) {
          // don't add to "result" yet, because we might find that we
          // are already outside the (broken) comment...
          maybeCmnt += cmntPart;
          maybeCmnt += QLatin1Char( '(' );
        }
        ++commentNestingDepth;
        break;
      default: assert( 0 );
      } // switch
    } else {
      // !parseGenericQuotedString, ie. premature end
      if ( afterLastClosingParenPos ) {
        scursor = afterLastClosingParenPos;
      } else {
        scursor = oldscursor;
      }
      return false;
    }
  } // while

  return true;
}

// known issues: none.

bool parsePhrase( const char* &scursor, const char * const send,
                  QString &result, bool isCRLF )
{
  enum {
    None, Phrase, Atom, EncodedWord, QuotedString
  } found = None;

  QString tmp;
  QByteArray lang, charset;
  const char *successfullyParsed = 0;
  // only used by the encoded-word branch
  const char *oldscursor;
  // used to suppress whitespace between adjacent encoded-words
  // (rfc2047, 6.2):
  bool lastWasEncodedWord = false;

  while ( scursor != send ) {
    char ch = *scursor++;
    switch ( ch ) {
    case '.': // broken, but allow for intorop's sake
      if ( found == None ) {
        --scursor;
        return false;
      } else {
        if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) {
          result += QLatin1String( ". " );
        } else {
          result += QLatin1Char( '.' );
        }
        successfullyParsed = scursor;
      }
      break;
    case '"': // quoted-string
      tmp.clear();
      if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
        successfullyParsed = scursor;
        assert( *( scursor - 1 ) == '"' );
        switch ( found ) {
        case None:
          found = QuotedString;
          break;
        case Phrase:
        case Atom:
        case EncodedWord:
        case QuotedString:
          found = Phrase;
          result += QLatin1Char( ' ' ); // rfc822, 3.4.4
          break;
        default:
          assert( 0 );
        }
        lastWasEncodedWord = false;
        result += tmp;
      } else {
        // premature end of quoted string.
        // What to do? Return leading '"' as special? Return as quoted-string?
        // We do the latter if we already found something, else signal failure.
        if ( found == None ) {
          return false;
        } else {
          result += QLatin1Char( ' ' ); // rfc822, 3.4.4
          result += tmp;
          return true;
        }
      }
      break;
    case '(': // comment
      // parse it, but ignore content:
      tmp.clear();
      if ( parseComment( scursor, send, tmp, isCRLF,
                         false /*don't bother with the content*/ ) ) {
        successfullyParsed = scursor;
        lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
      } else {
        if ( found == None ) {
          return false;
        } else {
          scursor = successfullyParsed;
          return true;
        }
      }
      break;
    case '=': // encoded-word
      tmp.clear();
      oldscursor = scursor;
      lang.clear();
      charset.clear();
      if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
        successfullyParsed = scursor;
        switch ( found ) {
        case None:
          found = EncodedWord;
          break;
        case Phrase:
        case EncodedWord:
        case Atom:
        case QuotedString:
          if ( !lastWasEncodedWord ) {
            result += QLatin1Char( ' ' ); // rfc822, 3.4.4
          }
          found = Phrase;
          break;
        default: assert( 0 );
        }
        lastWasEncodedWord = true;
        result += tmp;
        break;
      } else {
        // parse as atom:
        scursor = oldscursor;
      }
      // fall though...

    default: //atom
      tmp.clear();
      scursor--;
      if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
        successfullyParsed = scursor;
        switch ( found ) {
        case None:
          found = Atom;
          break;
        case Phrase:
        case Atom:
        case EncodedWord:
        case QuotedString:
          found = Phrase;
          result += QLatin1Char( ' ' ); // rfc822, 3.4.4
          break;
        default:
          assert( 0 );
        }
        lastWasEncodedWord = false;
        result += tmp;
      } else {
        if ( found == None ) {
          return false;
        } else {
          scursor = successfullyParsed;
          return true;
        }
      }
    }
    eatWhiteSpace( scursor, send );
  }

  return found != None;
}

// FIXME: This should probably by QByteArray &result instead?
bool parseDotAtom( const char* &scursor, const char * const send,
                   QString &result, bool isCRLF )
{
  eatCFWS( scursor, send, isCRLF );

  // always points to just after the last atom parsed:
  const char *successfullyParsed;

  QString tmp;
  if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
    return false;
  }
  result += tmp;
  successfullyParsed = scursor;

  while ( scursor != send ) {

    // end of header or no '.' -> return
    if ( scursor == send || *scursor != '.' ) {
      return true;
    }
    scursor++; // eat '.'

    if ( scursor == send || !isAText( *scursor ) ) {
      // end of header or no AText, but this time following a '.'!:
      // reset cursor to just after last successfully parsed char and
      // return:
      scursor = successfullyParsed;
      return true;
    }

    // try to parse the next atom:
    QString maybeAtom;
    if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
      scursor = successfullyParsed;
      return true;
    }

    result += QLatin1Char( '.' );
    result += maybeAtom;
    successfullyParsed = scursor;
  }

  scursor = successfullyParsed;
  return true;
}

void eatCFWS( const char* &scursor, const char * const send, bool isCRLF )
{
  QString dummy;

  while ( scursor != send ) {
    const char *oldscursor = scursor;

    char ch = *scursor++;

    switch ( ch ) {
    case ' ':
    case '\t': // whitespace
    case '\r':
    case '\n': // folding
      continue;

    case '(': // comment
      if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) {
        continue;
      }
      scursor = oldscursor;
      return;

    default:
      scursor = oldscursor;
      return;
    }
  }
}

bool parseDomain( const char* &scursor, const char * const send,
                  QString &result, bool isCRLF )
{
  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  // domain := dot-atom / domain-literal / atom *("." atom)
  //
  // equivalent to:
  // domain = dot-atom / domain-literal,
  // since parseDotAtom does allow CFWS between atoms and dots

  if ( *scursor == '[' ) {
    // domain-literal:
    QString maybeDomainLiteral;
    // eat '[':
    scursor++;
    while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
                                      isCRLF, '[', ']' ) ) {
      if ( scursor == send ) {
        // end of header: check for closing ']':
        if ( *( scursor - 1 ) == ']' ) {
          // OK, last char was ']':
          result = maybeDomainLiteral;
          return true;
        } else {
          // not OK, domain-literal wasn't closed:
          return false;
        }
      }
      // we hit openChar in parseGenericQuotedString.
      // include it in maybeDomainLiteral and keep on parsing:
      if ( *( scursor - 1 ) == '[' ) {
        maybeDomainLiteral += QLatin1Char( '[' );
        continue;
      }
      // OK, real end of domain-literal:
      result = maybeDomainLiteral;
      return true;
    }
  } else {
    // dot-atom:
    QString maybeDotAtom;
    if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
      result = maybeDotAtom;
      // Domain may end with '.', if so preserve it'
      if ( scursor != send && *scursor == '.' ) {
        result += QLatin1Char( '.' );
        scursor++;
      }
      return true;
    }
  }
  return false;
}

bool parseObsRoute( const char* &scursor, const char* const send,
                    QStringList &result, bool isCRLF, bool save )
{
  while ( scursor != send ) {
    eatCFWS( scursor, send, isCRLF );
    if ( scursor == send ) {
      return false;
    }

    // empty entry:
    if ( *scursor == ',' ) {
      scursor++;
      if ( save ) {
        result.append( QString() );
      }
      continue;
    }

    // empty entry ending the list:
    if ( *scursor == ':' ) {
      scursor++;
      if ( save ) {
        result.append( QString() );
      }
      return true;
    }

    // each non-empty entry must begin with '@':
    if ( *scursor != '@' ) {
      return false;
    } else {
      scursor++;
    }

    QString maybeDomain;
    if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
      return false;
    }
    if ( save ) {
      result.append( maybeDomain );
    }

    // eat the following (optional) comma:
    eatCFWS( scursor, send, isCRLF );
    if ( scursor == send ) {
      return false;
    }
    if ( *scursor == ':' ) {
      scursor++;
      return true;
    }
    if ( *scursor == ',' ) {
      scursor++;
    }
  }

  return false;
}

bool parseAddrSpec( const char* &scursor, const char * const send,
                    AddrSpec &result, bool isCRLF )
{
  //
  // STEP 1:
  // local-part := dot-atom / quoted-string / word *("." word)
  //
  // this is equivalent to:
  // local-part := word *("." word)

  QString maybeLocalPart;
  QString tmp;

  while ( scursor != send ) {
    // first, eat any whitespace
    eatCFWS( scursor, send, isCRLF );

    char ch = *scursor++;
    switch ( ch ) {
    case '.': // dot
      maybeLocalPart += QLatin1Char( '.' );
      break;

    case '@':
      goto SAW_AT_SIGN;
      break;

    case '"': // quoted-string
      tmp.clear();
      if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
        maybeLocalPart += tmp;
      } else {
        return false;
      }
      break;

    default: // atom
      scursor--; // re-set scursor to point to ch again
      tmp.clear();
      if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
        maybeLocalPart += tmp;
      } else {
        return false; // parseAtom can only fail if the first char is non-atext.
      }
      break;
    }
  }

  return false;

  //
  // STEP 2:
  // domain
  //

SAW_AT_SIGN:

  assert( *( scursor - 1 ) == '@' );

  QString maybeDomain;
  if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
    return false;
  }

  result.localPart = maybeLocalPart;
  result.domain = maybeDomain;

  return true;
}

bool parseAngleAddr( const char* &scursor, const char * const send,
                     AddrSpec &result, bool isCRLF )
{
  // first, we need an opening angle bracket:
  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send || *scursor != '<' ) {
    return false;
  }
  scursor++; // eat '<'

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  if ( *scursor == '@' || *scursor == ',' ) {
    // obs-route: parse, but ignore:
    KMIME_WARN << "obsolete source route found! ignoring.";
    QStringList dummy;
    if ( !parseObsRoute( scursor, send, dummy,
                         isCRLF, false /* don't save */ ) ) {
      return false;
    }
    // angle-addr isn't complete until after the '>':
    if ( scursor == send ) {
      return false;
    }
  }

  // parse addr-spec:
  AddrSpec maybeAddrSpec;
  if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
    return false;
  }

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send || *scursor != '>' ) {
    return false;
  }
  scursor++;

  result = maybeAddrSpec;
  return true;

}

static QString stripQuotes( const QString &input )
{
  const QLatin1Char quotes( '"' );
  if ( input.startsWith( quotes ) && input.endsWith( quotes ) ) {
    QString stripped( input.mid( 1, input.size() - 2 ) );
    return stripped;
  } else {
    return input;
  }
}

bool parseMailbox( const char* &scursor, const char * const send,
                   Mailbox &result, bool isCRLF )
{
  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  AddrSpec maybeAddrSpec;
  QString maybeDisplayName;

  // first, try if it's a vanilla addr-spec:
  const char * oldscursor = scursor;
  if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
    result.setAddress( maybeAddrSpec );
    // check for the obsolete form of display-name (as comment):
    eatWhiteSpace( scursor, send );
    if ( scursor != send && *scursor == '(' ) {
      scursor++;
      if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
        return false;
      }
    }
    result.setName( stripQuotes( maybeDisplayName ) );
    return true;
  }
  scursor = oldscursor;

  // second, see if there's a display-name:
  if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
    // failed: reset cursor, note absent display-name
    maybeDisplayName.clear();
    scursor = oldscursor;
  } else {
    // succeeded: eat CFWS
    eatCFWS( scursor, send, isCRLF );
    if ( scursor == send ) {
      return false;
    }
  }

  // third, parse the angle-addr:
  if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) {
    return false;
  }

  if ( maybeDisplayName.isNull() ) {
    // check for the obsolete form of display-name (as comment):
    eatWhiteSpace( scursor, send );
    if ( scursor != send && *scursor == '(' ) {
      scursor++;
      if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
        return false;
      }
    }
  }

  result.setName( stripQuotes( maybeDisplayName ) );
  result.setAddress( maybeAddrSpec );
  return true;
}

bool parseGroup( const char* &scursor, const char * const send,
                 Address &result, bool isCRLF )
{
  // group         := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
  //
  // equivalent to:
  // group   := display-name ":" [ obs-mbox-list ] ";"

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  // get display-name:
  QString maybeDisplayName;
  if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
    return false;
  }

  // get ":":
  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send || *scursor != ':' ) {
    return false;
  }

  // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that
  //            automatically calls removeBidiControlChars
  result.displayName = removeBidiControlChars( maybeDisplayName );

  // get obs-mbox-list (may contain empty entries):
  scursor++;
  while ( scursor != send ) {
    eatCFWS( scursor, send, isCRLF );
    if ( scursor == send ) {
      return false;
    }

    // empty entry:
    if ( *scursor == ',' ) {
      scursor++;
      continue;
    }

    // empty entry ending the list:
    if ( *scursor == ';' ) {
      scursor++;
      return true;
    }

    Mailbox maybeMailbox;
    if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
      return false;
    }
    result.mailboxList.append( maybeMailbox );

    eatCFWS( scursor, send, isCRLF );
    // premature end:
    if ( scursor == send ) {
      return false;
    }
    // regular end of the list:
    if ( *scursor == ';' ) {
      scursor++;
      return true;
    }
    // eat regular list entry separator:
    if ( *scursor == ',' ) {
      scursor++;
    }
  }
  return false;
}

bool parseAddress( const char* &scursor, const char * const send,
                   Address &result, bool isCRLF )
{
  // address       := mailbox / group

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  // first try if it's a single mailbox:
  Mailbox maybeMailbox;
  const char * oldscursor = scursor;
  if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
    // yes, it is:
    result.displayName.clear();
    result.mailboxList.append( maybeMailbox );
    return true;
  }
  scursor = oldscursor;

  Address maybeAddress;

  // no, it's not a single mailbox. Try if it's a group:
  if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) {
    return false;
  }

  result = maybeAddress;
  return true;
}

bool parseAddressList( const char* &scursor, const char * const send,
                       AddressList &result, bool isCRLF )
{
  while ( scursor != send ) {
    eatCFWS( scursor, send, isCRLF );
    // end of header: this is OK.
    if ( scursor == send ) {
      return true;
    }
    // empty entry: ignore:
    if ( *scursor == ',' ) {
      scursor++;
      continue;
    }
    // broken clients might use ';' as list delimiter, accept that as well
    if ( *scursor == ';' ) {
      scursor++;
      continue;
    }

    // parse one entry
    Address maybeAddress;
    if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) {
      return false;
    }
    result.append( maybeAddress );

    eatCFWS( scursor, send, isCRLF );
    // end of header: this is OK.
    if ( scursor == send ) {
      return true;
    }
    // comma separating entries: eat it.
    if ( *scursor == ',' ) {
      scursor++;
    }
  }
  return true;
}

static QString asterisk = QString::fromLatin1( "*0*", 1 );
static QString asteriskZero = QString::fromLatin1( "*0*", 2 );
//static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 );

// FIXME: Get rid of the very ugly "QStringOrQPair" thing. At this level, we are supposed to work
//        on byte arrays, not strings! The result parameter should be a simple
//        QPair<QByteArray,QByteArray>, which is the attribute name and the value.
bool parseParameter( const char* &scursor, const char * const send,
                     QPair<QString,QStringOrQPair> &result, bool isCRLF )
{
  // parameter = regular-parameter / extended-parameter
  // regular-parameter = regular-parameter-name "=" value
  // extended-parameter =
  // value = token / quoted-string
  //
  // note that rfc2231 handling is out of the scope of this function.
  // Therefore we return the attribute as QString and the value as
  // (start,length) tupel if we see that the value is encoded
  // (trailing asterisk), for parseParameterList to decode...

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  //
  // parse the parameter name:
  //
  // FIXME: maybeAttribute should be a QByteArray
  QString maybeAttribute;
  if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) {
    return false;
  }

  eatCFWS( scursor, send, isCRLF );
  // premature end: not OK (haven't seen '=' yet).
  if ( scursor == send || *scursor != '=' ) {
    return false;
  }
  scursor++; // eat '='

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    // don't choke on attribute=, meaning the value was omitted:
    if ( maybeAttribute.endsWith( asterisk ) ) {
      KMIME_WARN << "attribute ends with \"*\", but value is empty!"
        "Chopping away \"*\".";
      maybeAttribute.truncate( maybeAttribute.length() - 1 );
    }
    result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
    return true;
  }

  const char * oldscursor = scursor;

  //
  // parse the parameter value:
  //
  QStringOrQPair maybeValue;
  if ( *scursor == '"' ) {
    // value is a quoted-string:
    scursor++;
    if ( maybeAttribute.endsWith( asterisk ) ) {
      // attributes ending with "*" designate extended-parameters,
      // which cannot have quoted-strings as values. So we remove the
      // trailing "*" to not confuse upper layers.
      KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
        "Chopping away \"*\".";
      maybeAttribute.truncate( maybeAttribute.length() - 1 );
    }

    if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
      scursor = oldscursor;
      result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
      return false; // this case needs further processing by upper layers!!
    }
  } else {
    // value is a token:
    if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
      scursor = oldscursor;
      result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
      return false; // this case needs further processing by upper layers!!
    }
  }

  result = qMakePair( maybeAttribute.toLower(), maybeValue );
  return true;
}

// FIXME: Get rid of QStringOrQPair: Use a simply QMap<QByteArray, QByteArray> for "result"
//        instead!
bool parseRawParameterList( const char* &scursor, const char * const send,
                            QMap<QString,QStringOrQPair> &result,
                            bool isCRLF )
{
  // we use parseParameter() consecutively to obtain a map of raw
  // attributes to raw values. "Raw" here means that we don't do
  // rfc2231 decoding and concatenation. This is left to
  // parseParameterList(), which will call this function.
  //
  // The main reason for making this chunk of code a separate
  // (private) method is that we can deal with broken parameters
  // _here_ and leave the rfc2231 handling solely to
  // parseParameterList(), which will still be enough work.

  while ( scursor != send ) {
    eatCFWS( scursor, send, isCRLF );
    // empty entry ending the list: OK.
    if ( scursor == send ) {
      return true;
    }
    // empty list entry: ignore.
    if ( *scursor == ';' ) {
      scursor++;
      continue;
    }

    QPair<QString, QStringOrQPair> maybeParameter;
    if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
      // we need to do a bit of work if the attribute is not
      // NULL. These are the cases marked with "needs further
      // processing" in parseParameter(). Specifically, parsing of the
      // token or the quoted-string, which should represent the value,
      // failed. We take the easy way out and simply search for the
      // next ';' to start parsing again. (Another option would be to
      // take the text between '=' and ';' as value)
      if ( maybeParameter.first.isNull() ) {
        return false;
      }
      while ( scursor != send ) {
        if ( *scursor++ == ';' ) {
          goto IS_SEMICOLON;
        }
      }
      // scursor == send case: end of list.
      return true;
    IS_SEMICOLON:
      // *scursor == ';' case: parse next entry.
      continue;
    }
    // successful parsing brings us here:
    result.insert( maybeParameter.first, maybeParameter.second );

    eatCFWS( scursor, send, isCRLF );
    // end of header: ends list.
    if ( scursor == send ) {
      return true;
    }
    // regular separator: eat it.
    if ( *scursor == ';' ) {
      scursor++;
    }
  }
  return true;
}

static void decodeRFC2231Value( Codec* &rfc2231Codec,
                                QTextCodec* &textcodec,
                                bool isContinuation, QString &value,
                                QPair<const char*,int> &source, QByteArray& charset )
{
  //
  // parse the raw value into (charset,language,text):
  //

  const char * decBegin = source.first;
  const char * decCursor = decBegin;
  const char * decEnd = decCursor + source.second;

  if ( !isContinuation ) {
    // find the first single quote
    while ( decCursor != decEnd ) {
      if ( *decCursor == '\'' ) {
        break;
      } else {
        decCursor++;
      }
    }

    if ( decCursor == decEnd ) {
      // there wasn't a single single quote at all!
      // take the whole value to be in latin-1:
      KMIME_WARN << "No charset in extended-initial-value."
        "Assuming \"iso-8859-1\".";
      value += QString::fromLatin1( decBegin, source.second );
      return;
    }

    charset = QByteArray( decBegin, decCursor - decBegin );

    const char * oldDecCursor = ++decCursor;
    // find the second single quote (we ignore the language tag):
    while ( decCursor != decEnd ) {
      if ( *decCursor == '\'' ) {
        break;
      } else {
        decCursor++;
      }
    }
    if ( decCursor == decEnd ) {
      KMIME_WARN << "No language in extended-initial-value."
        "Trying to recover.";
      decCursor = oldDecCursor;
    } else {
      decCursor++;
    }

    // decCursor now points to the start of the
    // "extended-other-values":

    //
    // get the decoders:
    //

    bool matchOK = false;
    textcodec = KGlobal::charsets()->codecForName( QLatin1String( charset ), matchOK );
    if ( !matchOK ) {
      textcodec = 0;
      KMIME_WARN_UNKNOWN( Charset, charset );
    }
  }

  if ( !rfc2231Codec ) {
    rfc2231Codec = Codec::codecForName( "x-kmime-rfc2231" );
    assert( rfc2231Codec );
  }

  if ( !textcodec ) {
    value += QString::fromLatin1( decCursor, decEnd - decCursor );
    return;
  }

  Decoder * dec = rfc2231Codec->makeDecoder();
  assert( dec );

  //
  // do the decoding:
  //

  QByteArray buffer;
  buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
  QByteArray::Iterator bit = buffer.begin();
  QByteArray::ConstIterator bend = buffer.end();

  if ( !dec->decode( decCursor, decEnd, bit, bend ) ) {
    KMIME_WARN << rfc2231Codec->name()
               << "codec lies about its maxDecodedSizeFor()" << endl
               << "result may be truncated";
  }

  value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );

  // kDebug() << "value now: \"" << value << "\"";
  // cleanup:
  delete dec;
}

// known issues:
//  - permutes rfc2231 continuations when the total number of parts
//    exceeds 10 (other-sections then becomes *xy, ie. two digits)

bool parseParameterListWithCharset( const char* &scursor,
                                                const char * const send,
                                                QMap<QString,QString> &result,
                                                QByteArray& charset, bool isCRLF )
{
// parse the list into raw attribute-value pairs:
  QMap<QString, QStringOrQPair> rawParameterList;
  if ( !parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) {
    return false;
  }

  if ( rawParameterList.isEmpty() ) {
    return true;
  }

  // decode rfc 2231 continuations and alternate charset encoding:

  // NOTE: this code assumes that what QMapIterator delivers is sorted
  // by the key!

  Codec * rfc2231Codec = 0;
  QTextCodec * textcodec = 0;
  QString attribute;
  QString value;
  enum Mode {
    NoMode = 0x0, Continued = 0x1, Encoded = 0x2
  };

  enum EncodingMode {
    NoEncoding,
    RFC2047,
    RFC2231
  };

  QMap<QString, QStringOrQPair>::Iterator it, end = rawParameterList.end();

  for ( it = rawParameterList.begin() ; it != end ; ++it ) {
    if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
      //
      // new attribute:
      //

      // store the last attribute/value pair in the result map now:
      if ( !attribute.isNull() ) {
        result.insert( attribute, value );
      }
      // and extract the information from the new raw attribute:
      value.clear();
      attribute = it.key();
      int mode = NoMode;
      EncodingMode encodingMode = NoEncoding;

      // is the value rfc2331-encoded?
      if ( attribute.endsWith( asterisk ) ) {
        attribute.truncate( attribute.length() - 1 );
        mode |= Encoded;
        encodingMode = RFC2231;
      }
      // is the value rfc2047-encoded?
      if ( !( *it ).qstring.isNull() && ( *it ).qstring.contains( QLatin1String( "=?" ) ) ) {
        mode |= Encoded;
        encodingMode = RFC2047;
      }
      // is the value continued?
      if ( attribute.endsWith( asteriskZero ) ) {
        attribute.truncate( attribute.length() - 2 );
        mode |= Continued;
      }
      //
      // decode if necessary:
      //
      if ( mode & Encoded ) {
        if ( encodingMode == RFC2231 ) {
          decodeRFC2231Value( rfc2231Codec, textcodec,
                              false, /* isn't continuation */
                              value, ( *it ).qpair, charset );
        }
        else if ( encodingMode == RFC2047 ) {
          value += decodeRFC2047String( ( *it ).qstring.toLatin1(), charset );
        }
      } else {
        // not encoded.
        if ( ( *it ).qpair.first ) {
          value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second );
        } else {
          value += ( *it ).qstring;
        }
      }

      //
      // shortcut-processing when the value isn't encoded:
      //

      if ( !( mode & Continued ) ) {
        // save result already:
        result.insert( attribute, value );
        // force begin of a new attribute:
        attribute.clear();
      }
    } else { // it.key().startsWith( attribute )
      //
      // continuation
      //

      // ignore the section and trust QMap to have sorted the keys:
      if ( it.key().endsWith( asterisk ) ) {
        // encoded
        decodeRFC2231Value( rfc2231Codec, textcodec,
                            true, /* is continuation */
                            value, ( *it ).qpair, charset );
      } else {
        // not encoded
        if ( ( *it ).qpair.first ) {
          value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second );
        } else {
          value += ( *it ).qstring;
        }
      }
    }
  }

  // write last attr/value pair:
  if ( !attribute.isNull() ) {
    result.insert( attribute, value );
  }

  return true;
}


bool parseParameterList( const char* &scursor, const char * const send,
                         QMap<QString,QString> &result, bool isCRLF )
{
  QByteArray charset;
  return parseParameterListWithCharset( scursor, send, result, charset, isCRLF );
}

static const char * const stdDayNames[] = {
  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;

static bool parseDayName( const char* &scursor, const char * const send )
{
  // check bounds:
  if ( send - scursor < 3 ) {
    return false;
  }

  for ( int i = 0 ; i < stdDayNamesLen ; ++i ) {
    if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
      scursor += 3;
      // kDebug() << "found" << stdDayNames[i];
      return true;
    }
  }

  return false;
}

static const char * const stdMonthNames[] = {
  "Jan", "Feb", "Mar", "Apr", "May", "Jun",
  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
static const int stdMonthNamesLen =
                              sizeof stdMonthNames / sizeof *stdMonthNames;

static bool parseMonthName( const char* &scursor, const char * const send,
                            int &result )
{
  // check bounds:
  if ( send - scursor < 3 ) {
    return false;
  }

  for ( result = 0 ; result < stdMonthNamesLen ; ++result ) {
    if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
      scursor += 3;
      return true;
    }
  }

  // not found:
  return false;
}

static const struct {
  const char * tzName;
  long int secsEastOfGMT;
} timeZones[] = {
  // rfc 822 timezones:
  { "GMT", 0 },
  { "UT", 0 },
  { "EDT", -4*3600 },
  { "EST", -5*3600 },
  { "MST", -5*3600 },
  { "CST", -6*3600 },
  { "MDT", -6*3600 },
  { "MST", -7*3600 },
  { "PDT", -7*3600 },
  { "PST", -8*3600 },
  // common, non-rfc-822 zones:
  { "CET", 1*3600 },
  { "MET", 1*3600 },
  { "UTC", 0 },
  { "CEST", 2*3600 },
  { "BST", 1*3600 },
  // rfc 822 military timezones:
  { "Z", 0 },
  { "A", -1*3600 },
  { "B", -2*3600 },
  { "C", -3*3600 },
  { "D", -4*3600 },
  { "E", -5*3600 },
  { "F", -6*3600 },
  { "G", -7*3600 },
  { "H", -8*3600 },
  { "I", -9*3600 },
  // J is not used!
  { "K", -10*3600 },
  { "L", -11*3600 },
  { "M", -12*3600 },
  { "N", 1*3600 },
  { "O", 2*3600 },
  { "P", 3*3600 },
  { "Q", 4*3600 },
  { "R", 5*3600 },
  { "S", 6*3600 },
  { "T", 7*3600 },
  { "U", 8*3600 },
  { "V", 9*3600 },
  { "W", 10*3600 },
  { "X", 11*3600 },
  { "Y", 12*3600 },
};
static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;

static bool parseAlphaNumericTimeZone( const char* &scursor,
                                       const char * const send,
                                       long int &secsEastOfGMT,
                                       bool &timeZoneKnown )
{
  // allow the timezone to be wrapped in quotes; bug 260761
  if ( *scursor == '"' ) {
    scursor++;

    if ( scursor == send ) {
      return false;
    }
  }

  QPair<const char*, int> maybeTimeZone( 0, 0 );
  if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) {
    return false;
  }
  for ( int i = 0 ; i < timeZonesLen ; ++i ) {
    if ( qstrnicmp( timeZones[i].tzName,
                    maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
      scursor += maybeTimeZone.second;
      secsEastOfGMT = timeZones[i].secsEastOfGMT;
      timeZoneKnown = true;

      if ( *scursor == '"' ) {
        scursor++;
      }

      return true;
    }
  }

  // don't choke just because we don't happen to know the time zone
  KMIME_WARN_UNKNOWN( time zone,
                      QByteArray( maybeTimeZone.first, maybeTimeZone.second ) );
  secsEastOfGMT = 0;
  timeZoneKnown = false;
  return true;
}

// parse a number and return the number of digits parsed:
int parseDigits( const char* &scursor, const char * const send, int &result )
{
  result = 0;
  int digits = 0;
  for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
    result *= 10;
    result += int( *scursor - '0' );
  }
  return digits;
}

static bool parseTimeOfDay( const char* &scursor, const char * const send,
                            int &hour, int &min, int &sec, bool isCRLF=false )
{
  // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]

  //
  // 2DIGIT representing "hour":
  //
  if ( !parseDigits( scursor, send, hour ) ) {
    return false;
  }

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send || *scursor != ':' ) {
    return false;
  }
  scursor++; // eat ':'

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  //
  // 2DIGIT representing "minute":
  //
  if ( !parseDigits( scursor, send, min ) ) {
    return false;
  }

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return true; // seconds are optional
  }

  //
  // let's see if we have a 2DIGIT representing "second":
  //
  if ( *scursor == ':' ) {
    // yepp, there are seconds:
    scursor++; // eat ':'
    eatCFWS( scursor, send, isCRLF );
    if ( scursor == send ) {
      return false;
    }

    if ( !parseDigits( scursor, send, sec ) ) {
      return false;
    }
  } else {
    sec = 0;
  }

  return true;
}

bool parseTime( const char* &scursor, const char * send,
                int &hour, int &min, int &sec, long int &secsEastOfGMT,
                bool &timeZoneKnown, bool isCRLF )
{
  // time := time-of-day CFWS ( zone / obs-zone )
  //
  // obs-zone    := "UT" / "GMT" /
  //                "EST" / "EDT" / ; -0500 / -0400
  //                "CST" / "CDT" / ; -0600 / -0500
  //                "MST" / "MDT" / ; -0700 / -0600
  //                "PST" / "PDT" / ; -0800 / -0700
  //                "A"-"I" / "a"-"i" /
  //                "K"-"Z" / "k"-"z"

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) {
    return false;
  }

  eatCFWS( scursor, send, isCRLF );
  // there might be no timezone but a year following
  if ( ( scursor == send ) || isdigit( *scursor ) ) {
    timeZoneKnown = false;
    secsEastOfGMT = 0;
    return true; // allow missing timezone
  }

  timeZoneKnown = true;
  if ( *scursor == '+' || *scursor == '-' ) {
    // remember and eat '-'/'+':
    const char sign = *scursor++;
    // numerical timezone:
    int maybeTimeZone;
    if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) {
      return false;
    }
    secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
    if ( sign == '-' ) {
      secsEastOfGMT *= -1;
      if ( secsEastOfGMT == 0 ) {
        timeZoneKnown = false; // -0000 means indetermined tz
      }
    }
  } else {
    // maybe alphanumeric timezone:
    if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) {
      return false;
    }
  }
  return true;
}

bool parseDateTime( const char* &scursor, const char * const send,
                    KDateTime &result, bool isCRLF )
{
  // Parsing date-time; strict mode:
  //
  // date-time   := [ [CFWS] day-name [CFWS] "," ]                      ; wday
  // (expanded)     [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
  //                time
  //
  // day-name    := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
  // month-name  := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
  //                "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"

  result = KDateTime();
  QDateTime maybeDateTime;

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  //
  // let's see if there's a day-of-week:
  //
  if ( parseDayName( scursor, send ) ) {
    eatCFWS( scursor, send, isCRLF );
    if ( scursor == send ) {
      return false;
    }
    // day-name should be followed by ',' but we treat it as optional:
    if ( *scursor == ',' ) {
      scursor++; // eat ','
      eatCFWS( scursor, send, isCRLF );
    }
  }

  int maybeMonth = -1;
  bool asctimeFormat = false;

  // ANSI-C asctime() format is: Wed Jun 30 21:49:08 1993
  if ( !isdigit( *scursor ) && parseMonthName( scursor, send, maybeMonth ) ) {
    asctimeFormat = true;
    eatCFWS( scursor, send, isCRLF );
  }

  //
  // 1*2DIGIT representing "day" (of month):
  //
  int maybeDay;
  if ( !parseDigits( scursor, send, maybeDay ) ) {
    return false;
  }

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  // ignore ","; bug 54098
  if ( *scursor == ',' ) {
    scursor++;
  }

  //
  // month-name:
  //
  if ( !asctimeFormat && !parseMonthName( scursor, send, maybeMonth ) ) {
    return false;
  }
  if ( scursor == send ) {
    return false;
  }
  assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
  ++maybeMonth; // 0-11 -> 1-12

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  // check for "year HH:MM:SS" or only "HH:MM:SS" (or "H:MM:SS")
  bool timeAfterYear = true;
  if ( ( send - scursor > 3 ) && ( ( scursor[1] == ':' ) || ( scursor[2] == ':' ) ) ) {
    timeAfterYear = false;  // first read time, then year
  }

  //
  // 2*DIGIT representing "year":
  //
  int maybeYear = 0;

  if ( timeAfterYear && !parseDigits( scursor, send, maybeYear ) ) {
    return false;
  }

  eatCFWS( scursor, send, isCRLF );
  if ( scursor == send ) {
    return false;
  }

  //
  // time
  //
  int maybeHour, maybeMinute, maybeSecond;
  long int secsEastOfGMT;
  bool timeZoneKnown = true;

  if ( !parseTime( scursor, send,
                   maybeHour, maybeMinute, maybeSecond,
                   secsEastOfGMT, timeZoneKnown, isCRLF ) ) {
    return false;
  }

  // in asctime() the year follows the time
  if ( !timeAfterYear ) {
    eatCFWS( scursor, send, isCRLF );
    if ( scursor == send ) {
      return false;
    }

    if ( !parseDigits( scursor, send, maybeYear ) ) {
      return false;
    }
  }

  // RFC 2822 4.3 processing:
  if ( maybeYear < 50 ) {
    maybeYear += 2000;
  } else if ( maybeYear < 1000 ) {
    maybeYear += 1900;
  }
  // else keep as is
  if ( maybeYear < 1900 ) {
    return false; // rfc2822, 3.3
  }

  maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) );
  maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) );

  if ( !maybeDateTime.isValid() ) {
    return false;
  }

  result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) );
  if ( !result.isValid() ) {
    return false;
  }
  return true;
}

Headers::Base *extractFirstHeader( QByteArray &head )
{
  int endOfFieldBody = 0;
  bool folded = false;
  Headers::Base *header = 0;

  int startOfFieldBody = head.indexOf( ':' );
  const int endOfFieldHeader = startOfFieldBody;

  if ( startOfFieldBody > -1 ) {    //there is another header
    startOfFieldBody++; //skip the ':'
    if ( head[startOfFieldBody] == ' ' ) { // skip the space after the ':', if there
      startOfFieldBody++;
    }
    endOfFieldBody = findHeaderLineEnd( head, startOfFieldBody, &folded );

    QByteArray rawType = head.left( endOfFieldHeader );
    QByteArray rawFieldBody = head.mid( startOfFieldBody, endOfFieldBody - startOfFieldBody );
    if ( folded ) {
      rawFieldBody = unfoldHeader( rawFieldBody );
    }
    // We might get an invalid mail without a field name, don't crash on that.
    if ( !rawType.isEmpty() ) {
      header = HeaderFactory::self()->createHeader( rawType );
    }
    if ( !header ) {
      //kWarning() << "Returning Generic header of type" << rawType;
      header = new Headers::Generic( rawType.constData() );
    }
    header->from7BitString( rawFieldBody );

    head.remove( 0, endOfFieldBody + 1 );
  } else {
    head.clear();
  }

  return header;
}

void extractHeaderAndBody( const QByteArray &content, QByteArray &header, QByteArray &body )
{
  header.clear();
  body.clear();

  // empty header
  if ( content.startsWith( '\n' ) ) {
    body = content.right( content.length() - 1 );
    return;
  }

  int pos = content.indexOf( "\n\n", 0 );
  if ( pos > -1 ) {
    header = content.left( ++pos );  //header *must* end with "\n" !!
    body = content.mid( pos + 1, content.length() - pos - 1 );
  } else {
    header = content;
  }
}

Headers::Base::List parseHeaders( const QByteArray &head )
{
  Headers::Base::List ret;
  Headers::Base *h;

  QByteArray copy = head;
  while ( ( h = extractFirstHeader( copy ) ) ) {
    ret << h;
  }

  return ret;
}

} // namespace HeaderParsing

} // namespace KMime