/* kmime_parsers.cpp KMime, the KDE Internet mail/usenet news message library. Copyright (c) 2001 the KMime authors. See file AUTHORS for details This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kmime_parsers.h" #include #include using namespace KMime::Parser; namespace KMime { namespace Parser { MultiPart::MultiPart( const QByteArray &src, const QByteArray &boundary ) { s_rc=src; b_oundary=boundary; } bool MultiPart::parse() { QByteArray b = "--" + b_oundary, part; int pos1=0, pos2=0, blen=b.length(); p_arts.clear(); //find the first valid boundary while ( 1 ) { if ( ( pos1 = s_rc.indexOf( b, pos1 ) ) == -1 || pos1 == 0 || s_rc[pos1-1] == '\n' ) { //valid boundary found or no boundary at all break; } pos1 += blen; //boundary found but not valid => skip it; } if ( pos1 > -1 ) { pos1 += blen; if ( s_rc[pos1] == '-' && s_rc[pos1+1] == '-' ) { // the only valid boundary is the end-boundary // this message is *really* broken pos1 = -1; //we give up } else if ( ( pos1 - blen ) > 1 ) { //preamble present p_reamble = s_rc.left( pos1 - blen - 1 ); } } while ( pos1 > -1 && pos2 > -1 ) { //skip the rest of the line for the first boundary - the message-part starts here if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { //now search the next linebreak //now find the next valid boundary pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary while ( 1 ) { if ( ( pos2 = s_rc.indexOf( b, pos2 ) ) == -1 || s_rc[pos2-1] == '\n' ) { //valid boundary or no more boundaries found break; } pos2 += blen; //boundary is invalid => skip it; } if ( pos2 == -1 ) { // no more boundaries found part = s_rc.mid( pos1, s_rc.length() - pos1 ); //take the rest of the string p_arts.append( part ); pos1 = -1; pos2 = -1; //break; } else { part = s_rc.mid( pos1, pos2 - pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1) p_arts.append( part ); pos2 += blen; //pos2 points now to the first character after the boundary if ( s_rc[pos2] == '-' && s_rc[pos2+1] == '-' ) { //end-boundary pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { //skip the rest of this line //everything after the end-boundary is considered as the epilouge e_pilouge = s_rc.mid( pos1 + 1, s_rc.length() - pos1 - 1 ); } pos1 = -1; pos2 = -1; //break } else { pos1 = pos2; //the search continues ... } } } } return !p_arts.isEmpty(); } //============================================================================= NonMimeParser::NonMimeParser( const QByteArray &src ) : s_rc( src ), p_artNr( -1 ), t_otalNr( -1 ) { } /** * try to guess the mimetype from the file-extension */ QByteArray NonMimeParser::guessMimeType( const QByteArray &fileName ) { QByteArray tmp, mimeType; int pos; if ( !fileName.isEmpty() ) { pos = fileName.lastIndexOf( '.' ); if ( pos++ != -1 ) { tmp = fileName.mid( pos, fileName.length() - pos ).toUpper(); if ( tmp == "JPG" || tmp == "JPEG" ) { mimeType = "image/jpeg"; } else if ( tmp == "GIF" ) { mimeType = "image/gif"; } else if ( tmp == "PNG" ) { mimeType = "image/png"; } else if ( tmp == "TIFF" || tmp == "TIF" ) { mimeType = "image/tiff"; } else if ( tmp == "XPM" ) { mimeType = "image/x-xpixmap"; } else if ( tmp == "XBM" ) { mimeType = "image/x-xbitmap"; } else if ( tmp == "BMP" ) { mimeType = "image/bmp"; } else if ( tmp == "TXT" || tmp == "ASC" || tmp == "H" || tmp == "C" || tmp == "CC" || tmp == "CPP" ) { mimeType = "text/plain"; } else if ( tmp == "HTML" || tmp == "HTM" ) { mimeType = "text/html"; } else { mimeType = "application/octet-stream"; } } else { mimeType = "application/octet-stream"; } } else { mimeType = "application/octet-stream"; } return mimeType; } //============================================================================== UUEncoded::UUEncoded( const QByteArray &src, const QByteArray &subject ) : NonMimeParser( src ), s_ubject( subject ) {} bool UUEncoded::parse() { int currentPos=0; bool success=true, firstIteration=true; while ( success ) { int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0; bool containsBegin=false, containsEnd=false; QByteArray tmp, fileName; if ( ( beginPos = QString::fromLatin1( s_rc ).indexOf( QRegExp( QLatin1String( "begin [0-9][0-9][0-9]" ) ), currentPos ) ) > -1 && ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) { containsBegin = true; uuStart = s_rc.indexOf( '\n', beginPos ); if ( uuStart == -1 ) {//no more line breaks found, we give up success = false; break; } else { uuStart++; //points now at the beginning of the next line } } else { beginPos=currentPos; } if ( ( endPos = s_rc.indexOf( "\nend", ( uuStart > 0 ) ? uuStart - 1 : 0 ) ) == -1 ) { endPos = s_rc.length(); //no end found } else { containsEnd = true; } if ( ( containsBegin && containsEnd ) || firstIteration ) { //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos); //all lines in a uuencoded text start with 'M' for ( int idx=uuStart; idx 10 || ( ( !containsBegin || !containsEnd ) && ( MCount < 15 ) ) ) { // harder check for split-articles success = false; break; //too many "non-M-Lines" found, we give up } if ( ( !containsBegin || !containsEnd ) && !s_ubject.isNull() ) { // message may be split up => parse subject QRegExp rx( QLatin1String( "[0-9]+/[0-9]+" ) ); pos = rx.indexIn( QLatin1String( s_ubject ), 0 ); len = rx.matchedLength(); if ( pos != -1 ) { tmp = s_ubject.mid( pos, len ); pos = tmp.indexOf( '/' ); p_artNr = tmp.left( pos ).toInt(); t_otalNr = tmp.right( tmp.length() - pos - 1 ).toInt(); } else { success = false; break; //no "part-numbers" found in the subject, we give up } } //everything before "begin" is text if ( beginPos > 0 ) { t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) ); } if ( containsBegin ) { //everything between "begin ### " and the next LF is considered as the filename fileName = s_rc.mid( beginPos + 10, uuStart - beginPos - 11 ); } else { fileName = ""; } f_ilenames.append( fileName ); //everything beetween "begin" and "end" is uuencoded b_ins.append( s_rc.mid( uuStart, endPos - uuStart + 1 ) ); m_imeTypes.append( guessMimeType( fileName ) ); firstIteration = false; int next = s_rc.indexOf( '\n', endPos + 1 ); if ( next == -1 ) { //no more line breaks found, we give up success = false; break; } else { next++; //points now at the beginning of the next line } currentPos = next; } else { success = false; } } // append trailing text part of the article t_ext.append( s_rc.right( s_rc.length() - currentPos ) ); return ( ( b_ins.count() > 0 ) || isPartial() ); } //============================================================================== YENCEncoded::YENCEncoded( const QByteArray &src ) : NonMimeParser( src ) { } bool YENCEncoded::yencMeta( QByteArray &src, const QByteArray &name, int *value ) { bool found = false; QByteArray sought=name + '='; int iPos = src.indexOf( sought ); if ( iPos > -1 ) { int pos1 = src.indexOf( ' ', iPos ); int pos2 = src.indexOf( '\r', iPos ); int pos3 = src.indexOf( '\t', iPos ); int pos4 = src.indexOf( '\n', iPos ); if ( pos2 >= 0 && ( pos1 < 0 || pos1 > pos2 ) ) { pos1 = pos2; } if ( pos3 >= 0 && ( pos1 < 0 || pos1 > pos3 ) ) { pos1 = pos3; } if ( pos4 >= 0 && ( pos1 < 0 || pos1 > pos4 ) ) { pos1 = pos4; } iPos=src.lastIndexOf( '=', pos1 ) + 1; if ( iPos < pos1 ) { char c = src.at( iPos ); if ( c>='0' && c<='9' ) { found = true; *value = src.mid( iPos, pos1 - iPos ).toInt(); } } } return found; } bool YENCEncoded::parse() { int currentPos=0; bool success=true; while ( success ) { int beginPos=currentPos, yencStart=currentPos; bool containsPart=false; QByteArray fileName, mimeType; if ( ( beginPos = s_rc.indexOf( "=ybegin ", currentPos ) ) > -1 && ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) { yencStart = s_rc.indexOf( '\n', beginPos ); if ( yencStart == -1 ) { // no more line breaks found, give up success = false; break; } else { yencStart++; if ( s_rc.indexOf( "=ypart", yencStart ) == yencStart ) { containsPart = true; yencStart = s_rc.indexOf( '\n', yencStart ); if ( yencStart == -1 ) { success = false; break; } yencStart++; } } // Try to identify yenc meta data // Filenames can contain any embedded chars until end of line QByteArray meta = s_rc.mid( beginPos, yencStart - beginPos ); int namePos = meta.indexOf( "name=" ); if ( namePos == -1 ) { success = false; break; } int eolPos = meta.indexOf( '\r', namePos ); if ( eolPos == -1 ) { eolPos = meta.indexOf( '\n', namePos ); } if ( eolPos == -1 ) { success = false; break; } fileName = meta.mid( namePos + 5, eolPos - ( namePos + 5 ) ); // Other metadata is integer int yencLine; if ( !yencMeta( meta, "line", ¥cLine ) ) { success = false; break; } int yencSize; if ( !yencMeta( meta, "size", ¥cSize ) ) { success = false; break; } int partBegin, partEnd; if ( containsPart ) { if ( !yencMeta( meta, "part", &p_artNr ) ) { success = false; break; } if ( !yencMeta( meta, "begin", &partBegin ) || !yencMeta( meta, "end", &partEnd ) ) { success = false; break; } if ( !yencMeta( meta, "total", &t_otalNr ) ) { t_otalNr = p_artNr + 1; } if ( yencSize == partEnd - partBegin + 1 ) { t_otalNr = 1; } else { yencSize = partEnd - partBegin + 1; } } // We have a valid yenc header; now we extract the binary data int totalSize = 0; int pos = yencStart; int len = s_rc.length(); bool lineStart = true; int lineLength = 0; bool containsEnd = false; QByteArray binary; binary.resize( yencSize ); while ( pos < len ) { int ch = s_rc.at( pos ); if ( ch < 0 ) { ch += 256; } if ( ch == '\r' ) { if ( lineLength != yencLine && totalSize != yencSize ) { break; } pos++; } else if ( ch == '\n' ) { lineStart = true; lineLength = 0; pos++; } else { if ( ch == '=' ) { if ( pos + 1 < len ) { ch = s_rc.at( pos + 1 ); if ( lineStart && ch == 'y' ) { containsEnd = true; break; } pos += 2; ch -= 64+42; if ( ch < 0 ) { ch += 256; } if ( totalSize >= yencSize ) { break; } binary[totalSize++] = ch; lineLength++; } else { break; } } else { ch -= 42; if ( ch < 0 ) { ch += 256; } if ( totalSize >= yencSize ) { break; } binary[totalSize++] = ch; lineLength++; pos++; } lineStart = false; } } if ( !containsEnd ) { success = false; break; } if ( totalSize != yencSize ) { success = false; break; } // pos now points to =yend; get end data eolPos = s_rc.indexOf( '\n', pos ); if ( eolPos == -1 ) { success = false; break; } meta = s_rc.mid( pos, eolPos - pos ); if ( !yencMeta( meta, "size", &totalSize ) ) { success = false; break; } if ( totalSize != yencSize ) { success = false; break; } f_ilenames.append( fileName ); m_imeTypes.append( guessMimeType( fileName ) ); b_ins.append( binary ); //everything before "begin" is text if ( beginPos > 0 ) { t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) ); } currentPos = eolPos + 1; } else { success = false; } } // append trailing text part of the article t_ext.append( s_rc.right( s_rc.length() - currentPos ) ); return b_ins.count()>0; } } // namespace Parser } // namespace KMime