mirror of
https://bitbucket.org/smil3y/kde-playground.git
synced 2025-02-24 10:52:52 +00:00
505 lines
14 KiB
C++
505 lines
14 KiB
C++
![]() |
/*
|
||
|
kmime_parsers.cpp
|
||
|
|
||
|
KMime, the KDE Internet mail/usenet news message library.
|
||
|
Copyright (c) 2001 the KMime authors.
|
||
|
See file AUTHORS for details
|
||
|
|
||
|
This library is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU Library General Public
|
||
|
License as published by the Free Software Foundation; either
|
||
|
version 2 of the License, or (at your option) any later version.
|
||
|
|
||
|
This library is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
Library General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Library General Public License
|
||
|
along with this library; see the file COPYING.LIB. If not, write to
|
||
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||
|
Boston, MA 02110-1301, USA.
|
||
|
*/
|
||
|
#include "kmime_parsers.h"
|
||
|
|
||
|
#include <QtCore/QRegExp>
|
||
|
#include <QtCore/QByteArray>
|
||
|
|
||
|
using namespace KMime::Parser;
|
||
|
|
||
|
namespace KMime {
|
||
|
namespace Parser {
|
||
|
|
||
|
MultiPart::MultiPart( const QByteArray &src, const QByteArray &boundary )
|
||
|
{
|
||
|
s_rc=src;
|
||
|
b_oundary=boundary;
|
||
|
}
|
||
|
|
||
|
bool MultiPart::parse()
|
||
|
{
|
||
|
QByteArray b = "--" + b_oundary, part;
|
||
|
int pos1=0, pos2=0, blen=b.length();
|
||
|
|
||
|
p_arts.clear();
|
||
|
|
||
|
//find the first valid boundary
|
||
|
while ( 1 ) {
|
||
|
if ( ( pos1 = s_rc.indexOf( b, pos1 ) ) == -1 || pos1 == 0 ||
|
||
|
s_rc[pos1-1] == '\n' ) { //valid boundary found or no boundary at all
|
||
|
break;
|
||
|
}
|
||
|
pos1 += blen; //boundary found but not valid => skip it;
|
||
|
}
|
||
|
|
||
|
if ( pos1 > -1 ) {
|
||
|
pos1 += blen;
|
||
|
if ( s_rc[pos1] == '-' && s_rc[pos1+1] == '-' ) {
|
||
|
// the only valid boundary is the end-boundary
|
||
|
// this message is *really* broken
|
||
|
pos1 = -1; //we give up
|
||
|
} else if ( ( pos1 - blen ) > 1 ) { //preamble present
|
||
|
p_reamble = s_rc.left( pos1 - blen - 1 );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
while ( pos1 > -1 && pos2 > -1 ) {
|
||
|
|
||
|
//skip the rest of the line for the first boundary - the message-part starts here
|
||
|
if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) {
|
||
|
//now search the next linebreak
|
||
|
//now find the next valid boundary
|
||
|
pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
|
||
|
while ( 1 ) {
|
||
|
if ( ( pos2 = s_rc.indexOf( b, pos2 ) ) == -1 ||
|
||
|
s_rc[pos2-1] == '\n' ) { //valid boundary or no more boundaries found
|
||
|
break;
|
||
|
}
|
||
|
pos2 += blen; //boundary is invalid => skip it;
|
||
|
}
|
||
|
|
||
|
if ( pos2 == -1 ) { // no more boundaries found
|
||
|
part = s_rc.mid( pos1, s_rc.length() - pos1 ); //take the rest of the string
|
||
|
p_arts.append( part );
|
||
|
pos1 = -1;
|
||
|
pos2 = -1; //break;
|
||
|
} else {
|
||
|
part = s_rc.mid( pos1, pos2 - pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
|
||
|
p_arts.append( part );
|
||
|
pos2 += blen; //pos2 points now to the first character after the boundary
|
||
|
if ( s_rc[pos2] == '-' && s_rc[pos2+1] == '-' ) { //end-boundary
|
||
|
pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary
|
||
|
|
||
|
if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { //skip the rest of this line
|
||
|
//everything after the end-boundary is considered as the epilouge
|
||
|
e_pilouge = s_rc.mid( pos1 + 1, s_rc.length() - pos1 - 1 );
|
||
|
}
|
||
|
pos1 = -1;
|
||
|
pos2 = -1; //break
|
||
|
} else {
|
||
|
pos1 = pos2; //the search continues ...
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return !p_arts.isEmpty();
|
||
|
}
|
||
|
|
||
|
//=============================================================================
|
||
|
|
||
|
NonMimeParser::NonMimeParser( const QByteArray &src ) :
|
||
|
s_rc( src ), p_artNr( -1 ), t_otalNr( -1 )
|
||
|
{
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* try to guess the mimetype from the file-extension
|
||
|
*/
|
||
|
QByteArray NonMimeParser::guessMimeType( const QByteArray &fileName )
|
||
|
{
|
||
|
QByteArray tmp, mimeType;
|
||
|
int pos;
|
||
|
|
||
|
if ( !fileName.isEmpty() ) {
|
||
|
pos = fileName.lastIndexOf( '.' );
|
||
|
if ( pos++ != -1 ) {
|
||
|
tmp = fileName.mid( pos, fileName.length() - pos ).toUpper();
|
||
|
if ( tmp == "JPG" || tmp == "JPEG" ) {
|
||
|
mimeType = "image/jpeg";
|
||
|
} else if ( tmp == "GIF" ) {
|
||
|
mimeType = "image/gif";
|
||
|
} else if ( tmp == "PNG" ) {
|
||
|
mimeType = "image/png";
|
||
|
} else if ( tmp == "TIFF" || tmp == "TIF" ) {
|
||
|
mimeType = "image/tiff";
|
||
|
} else if ( tmp == "XPM" ) {
|
||
|
mimeType = "image/x-xpixmap";
|
||
|
} else if ( tmp == "XBM" ) {
|
||
|
mimeType = "image/x-xbitmap";
|
||
|
} else if ( tmp == "BMP" ) {
|
||
|
mimeType = "image/bmp";
|
||
|
} else if ( tmp == "TXT" ||
|
||
|
tmp == "ASC" ||
|
||
|
tmp == "H" ||
|
||
|
tmp == "C" ||
|
||
|
tmp == "CC" ||
|
||
|
tmp == "CPP" ) {
|
||
|
mimeType = "text/plain";
|
||
|
} else if ( tmp == "HTML" || tmp == "HTM" ) {
|
||
|
mimeType = "text/html";
|
||
|
} else {
|
||
|
mimeType = "application/octet-stream";
|
||
|
}
|
||
|
} else {
|
||
|
mimeType = "application/octet-stream";
|
||
|
}
|
||
|
} else {
|
||
|
mimeType = "application/octet-stream";
|
||
|
}
|
||
|
|
||
|
return mimeType;
|
||
|
}
|
||
|
|
||
|
//==============================================================================
|
||
|
|
||
|
UUEncoded::UUEncoded( const QByteArray &src, const QByteArray &subject ) :
|
||
|
NonMimeParser( src ), s_ubject( subject )
|
||
|
{}
|
||
|
|
||
|
bool UUEncoded::parse()
|
||
|
{
|
||
|
int currentPos=0;
|
||
|
bool success=true, firstIteration=true;
|
||
|
|
||
|
while ( success ) {
|
||
|
int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
|
||
|
bool containsBegin=false, containsEnd=false;
|
||
|
QByteArray tmp, fileName;
|
||
|
|
||
|
if ( ( beginPos = QString::fromLatin1( s_rc ).indexOf( QRegExp( QLatin1String( "begin [0-9][0-9][0-9]" ) ),
|
||
|
currentPos ) ) > -1 &&
|
||
|
( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) {
|
||
|
containsBegin = true;
|
||
|
uuStart = s_rc.indexOf( '\n', beginPos );
|
||
|
if ( uuStart == -1 ) {//no more line breaks found, we give up
|
||
|
success = false;
|
||
|
break;
|
||
|
} else {
|
||
|
uuStart++; //points now at the beginning of the next line
|
||
|
}
|
||
|
} else {
|
||
|
beginPos=currentPos;
|
||
|
}
|
||
|
|
||
|
if ( ( endPos = s_rc.indexOf( "\nend", ( uuStart > 0 ) ? uuStart - 1 : 0 ) ) == -1 ) {
|
||
|
endPos = s_rc.length(); //no end found
|
||
|
} else {
|
||
|
containsEnd = true;
|
||
|
}
|
||
|
|
||
|
if ( ( containsBegin && containsEnd ) || firstIteration ) {
|
||
|
|
||
|
//printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
|
||
|
//all lines in a uuencoded text start with 'M'
|
||
|
for ( int idx=uuStart; idx<endPos; idx++ ) {
|
||
|
if ( s_rc[idx] == '\n' ) {
|
||
|
lineCount++;
|
||
|
if ( idx + 1 < endPos && s_rc[idx + 1] == 'M' ) {
|
||
|
idx++;
|
||
|
MCount++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
|
||
|
if ( MCount == 0 || ( lineCount - MCount ) > 10 ||
|
||
|
( ( !containsBegin || !containsEnd ) && ( MCount < 15 ) ) ) {
|
||
|
// harder check for split-articles
|
||
|
success = false;
|
||
|
break; //too many "non-M-Lines" found, we give up
|
||
|
}
|
||
|
|
||
|
if ( ( !containsBegin || !containsEnd ) && !s_ubject.isNull() ) {
|
||
|
// message may be split up => parse subject
|
||
|
QRegExp rx( QLatin1String( "[0-9]+/[0-9]+" ) );
|
||
|
pos = rx.indexIn( QLatin1String( s_ubject ), 0 );
|
||
|
len = rx.matchedLength();
|
||
|
if ( pos != -1 ) {
|
||
|
tmp = s_ubject.mid( pos, len );
|
||
|
pos = tmp.indexOf( '/' );
|
||
|
p_artNr = tmp.left( pos ).toInt();
|
||
|
t_otalNr = tmp.right( tmp.length() - pos - 1 ).toInt();
|
||
|
} else {
|
||
|
success = false;
|
||
|
break; //no "part-numbers" found in the subject, we give up
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//everything before "begin" is text
|
||
|
if ( beginPos > 0 ) {
|
||
|
t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) );
|
||
|
}
|
||
|
|
||
|
if ( containsBegin ) {
|
||
|
//everything between "begin ### " and the next LF is considered as the filename
|
||
|
fileName = s_rc.mid( beginPos + 10, uuStart - beginPos - 11 );
|
||
|
} else {
|
||
|
fileName = "";
|
||
|
}
|
||
|
f_ilenames.append( fileName );
|
||
|
//everything beetween "begin" and "end" is uuencoded
|
||
|
b_ins.append( s_rc.mid( uuStart, endPos - uuStart + 1 ) );
|
||
|
m_imeTypes.append( guessMimeType( fileName ) );
|
||
|
firstIteration = false;
|
||
|
|
||
|
int next = s_rc.indexOf( '\n', endPos + 1 );
|
||
|
if ( next == -1 ) { //no more line breaks found, we give up
|
||
|
success = false;
|
||
|
break;
|
||
|
} else {
|
||
|
next++; //points now at the beginning of the next line
|
||
|
}
|
||
|
currentPos = next;
|
||
|
|
||
|
} else {
|
||
|
success = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// append trailing text part of the article
|
||
|
t_ext.append( s_rc.right( s_rc.length() - currentPos ) );
|
||
|
|
||
|
return ( ( b_ins.count() > 0 ) || isPartial() );
|
||
|
}
|
||
|
|
||
|
//==============================================================================
|
||
|
|
||
|
YENCEncoded::YENCEncoded( const QByteArray &src ) :
|
||
|
NonMimeParser( src )
|
||
|
{
|
||
|
}
|
||
|
|
||
|
bool YENCEncoded::yencMeta( QByteArray &src, const QByteArray &name, int *value )
|
||
|
{
|
||
|
bool found = false;
|
||
|
QByteArray sought=name + '=';
|
||
|
|
||
|
int iPos = src.indexOf( sought );
|
||
|
if ( iPos > -1 ) {
|
||
|
int pos1 = src.indexOf( ' ', iPos );
|
||
|
int pos2 = src.indexOf( '\r', iPos );
|
||
|
int pos3 = src.indexOf( '\t', iPos );
|
||
|
int pos4 = src.indexOf( '\n', iPos );
|
||
|
if ( pos2 >= 0 && ( pos1 < 0 || pos1 > pos2 ) ) {
|
||
|
pos1 = pos2;
|
||
|
}
|
||
|
if ( pos3 >= 0 && ( pos1 < 0 || pos1 > pos3 ) ) {
|
||
|
pos1 = pos3;
|
||
|
}
|
||
|
if ( pos4 >= 0 && ( pos1 < 0 || pos1 > pos4 ) ) {
|
||
|
pos1 = pos4;
|
||
|
}
|
||
|
iPos=src.lastIndexOf( '=', pos1 ) + 1;
|
||
|
if ( iPos < pos1 ) {
|
||
|
char c = src.at( iPos );
|
||
|
if ( c>='0' && c<='9' ) {
|
||
|
found = true;
|
||
|
*value = src.mid( iPos, pos1 - iPos ).toInt();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return found;
|
||
|
}
|
||
|
|
||
|
bool YENCEncoded::parse()
|
||
|
{
|
||
|
int currentPos=0;
|
||
|
bool success=true;
|
||
|
|
||
|
while ( success ) {
|
||
|
int beginPos=currentPos, yencStart=currentPos;
|
||
|
bool containsPart=false;
|
||
|
QByteArray fileName, mimeType;
|
||
|
|
||
|
if ( ( beginPos = s_rc.indexOf( "=ybegin ", currentPos ) ) > -1 &&
|
||
|
( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) {
|
||
|
yencStart = s_rc.indexOf( '\n', beginPos );
|
||
|
if ( yencStart == -1 ) { // no more line breaks found, give up
|
||
|
success = false;
|
||
|
break;
|
||
|
} else {
|
||
|
yencStart++;
|
||
|
if ( s_rc.indexOf( "=ypart", yencStart ) == yencStart ) {
|
||
|
containsPart = true;
|
||
|
yencStart = s_rc.indexOf( '\n', yencStart );
|
||
|
if ( yencStart == -1 ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
yencStart++;
|
||
|
}
|
||
|
}
|
||
|
// Try to identify yenc meta data
|
||
|
|
||
|
// Filenames can contain any embedded chars until end of line
|
||
|
QByteArray meta = s_rc.mid( beginPos, yencStart - beginPos );
|
||
|
int namePos = meta.indexOf( "name=" );
|
||
|
if ( namePos == -1 ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
int eolPos = meta.indexOf( '\r', namePos );
|
||
|
if ( eolPos == -1 ) {
|
||
|
eolPos = meta.indexOf( '\n', namePos );
|
||
|
}
|
||
|
if ( eolPos == -1 ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
fileName = meta.mid( namePos + 5, eolPos - ( namePos + 5 ) );
|
||
|
|
||
|
// Other metadata is integer
|
||
|
int yencLine;
|
||
|
if ( !yencMeta( meta, "line", ¥cLine ) ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
int yencSize;
|
||
|
if ( !yencMeta( meta, "size", ¥cSize ) ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
int partBegin, partEnd;
|
||
|
if ( containsPart ) {
|
||
|
if ( !yencMeta( meta, "part", &p_artNr ) ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
if ( !yencMeta( meta, "begin", &partBegin ) ||
|
||
|
!yencMeta( meta, "end", &partEnd ) ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
if ( !yencMeta( meta, "total", &t_otalNr ) ) {
|
||
|
t_otalNr = p_artNr + 1;
|
||
|
}
|
||
|
if ( yencSize == partEnd - partBegin + 1 ) {
|
||
|
t_otalNr = 1;
|
||
|
} else {
|
||
|
yencSize = partEnd - partBegin + 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// We have a valid yenc header; now we extract the binary data
|
||
|
int totalSize = 0;
|
||
|
int pos = yencStart;
|
||
|
int len = s_rc.length();
|
||
|
bool lineStart = true;
|
||
|
int lineLength = 0;
|
||
|
bool containsEnd = false;
|
||
|
QByteArray binary;
|
||
|
binary.resize( yencSize );
|
||
|
while ( pos < len ) {
|
||
|
int ch = s_rc.at( pos );
|
||
|
if ( ch < 0 ) {
|
||
|
ch += 256;
|
||
|
}
|
||
|
if ( ch == '\r' ) {
|
||
|
if ( lineLength != yencLine && totalSize != yencSize ) {
|
||
|
break;
|
||
|
}
|
||
|
pos++;
|
||
|
}
|
||
|
else if ( ch == '\n' ) {
|
||
|
lineStart = true;
|
||
|
lineLength = 0;
|
||
|
pos++;
|
||
|
} else {
|
||
|
if ( ch == '=' ) {
|
||
|
if ( pos + 1 < len ) {
|
||
|
ch = s_rc.at( pos + 1 );
|
||
|
if ( lineStart && ch == 'y' ) {
|
||
|
containsEnd = true;
|
||
|
break;
|
||
|
}
|
||
|
pos += 2;
|
||
|
ch -= 64+42;
|
||
|
if ( ch < 0 ) {
|
||
|
ch += 256;
|
||
|
}
|
||
|
if ( totalSize >= yencSize ) {
|
||
|
break;
|
||
|
}
|
||
|
binary[totalSize++] = ch;
|
||
|
lineLength++;
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
} else {
|
||
|
ch -= 42;
|
||
|
if ( ch < 0 ) {
|
||
|
ch += 256;
|
||
|
}
|
||
|
if ( totalSize >= yencSize ) {
|
||
|
break;
|
||
|
}
|
||
|
binary[totalSize++] = ch;
|
||
|
lineLength++;
|
||
|
pos++;
|
||
|
}
|
||
|
lineStart = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ( !containsEnd ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
if ( totalSize != yencSize ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// pos now points to =yend; get end data
|
||
|
eolPos = s_rc.indexOf( '\n', pos );
|
||
|
if ( eolPos == -1 ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
meta = s_rc.mid( pos, eolPos - pos );
|
||
|
if ( !yencMeta( meta, "size", &totalSize ) ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
if ( totalSize != yencSize ) {
|
||
|
success = false;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
f_ilenames.append( fileName );
|
||
|
m_imeTypes.append( guessMimeType( fileName ) );
|
||
|
b_ins.append( binary );
|
||
|
|
||
|
//everything before "begin" is text
|
||
|
if ( beginPos > 0 ) {
|
||
|
t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) );
|
||
|
}
|
||
|
currentPos = eolPos + 1;
|
||
|
|
||
|
} else {
|
||
|
success = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// append trailing text part of the article
|
||
|
t_ext.append( s_rc.right( s_rc.length() - currentPos ) );
|
||
|
|
||
|
return b_ins.count()>0;
|
||
|
}
|
||
|
|
||
|
} // namespace Parser
|
||
|
|
||
|
} // namespace KMime
|