mirror of
https://bitbucket.org/smil3y/kdelibs.git
synced 2025-02-24 10:52:49 +00:00
598 lines
20 KiB
C++
598 lines
20 KiB
C++
/* This file is part of the KDE libraries
|
|
Copyright (C) 2008 Andreas Hartmetz <ahartmetz@gmail.com>
|
|
Copyright (C) 2010,2011 Rolf Eike Beer <kde@opensource.sf-tec.de>
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Library General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Library General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Library General Public License
|
|
along with this library; see the file COPYING.LIB. If not, write to
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <QDir>
|
|
#include <QMap>
|
|
#include <QTextCodec>
|
|
#include <QUrl>
|
|
|
|
#include <kcodecs.h>
|
|
#include <kdebug.h>
|
|
|
|
// Advance *pos beyond spaces / tabs
|
|
static void skipSpace(const char input[], int *pos, int end)
|
|
{
|
|
int idx = *pos;
|
|
while (idx < end && (input[idx] == ' ' || input[idx] == '\t')) {
|
|
idx++;
|
|
}
|
|
*pos = idx;
|
|
return;
|
|
}
|
|
|
|
// Advance *pos to start of next line while being forgiving about line endings.
|
|
// Return false if the end of the header has been reached, true otherwise.
|
|
static bool nextLine(const char input[], int *pos, int end)
|
|
{
|
|
int idx = *pos;
|
|
while (idx < end && input[idx] != '\r' && input[idx] != '\n') {
|
|
idx++;
|
|
}
|
|
int rCount = 0;
|
|
int nCount = 0;
|
|
while (idx < end && qMax(rCount, nCount) < 2 && (input[idx] == '\r' || input[idx] == '\n')) {
|
|
input[idx] == '\r' ? rCount++ : nCount++;
|
|
idx++;
|
|
}
|
|
if (idx < end && qMax(rCount, nCount) == 2 && qMin(rCount, nCount) == 1) {
|
|
// if just one of the others is missing eat it too.
|
|
// this ensures that conforming headers using the proper
|
|
// \r\n sequence (and also \n\r) will be parsed correctly.
|
|
if ((rCount == 1 && input[idx] == '\r') || (nCount == 1 && input[idx] == '\n')) {
|
|
idx++;
|
|
}
|
|
}
|
|
|
|
*pos = idx;
|
|
return idx < end && rCount < 2 && nCount < 2;
|
|
}
|
|
|
|
// QByteArray::fromPercentEncoding() does not notify us about encoding errors so we need
|
|
// to check here if this is valid at all.
|
|
static bool isValidPercentEncoding(const QByteArray &data)
|
|
{
|
|
int i = 0;
|
|
const int last = data.length() - 1;
|
|
const char *d = data.constData();
|
|
|
|
while ( (i = data.indexOf('%', i)) != -1) {
|
|
if ( i >= last - 2 )
|
|
return false;
|
|
if ( ! isxdigit(d[i + 1]) )
|
|
return false;
|
|
if ( ! isxdigit(d[i + 2]) )
|
|
return false;
|
|
i++;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
QByteArray TokenIterator::next()
|
|
{
|
|
QPair<int, int> token = m_tokens[m_currentToken++];
|
|
//fromRawData brings some speed advantage but also the requirement to keep the text buffer
|
|
//around. this together with implicit sharing (you don't know where copies end up)
|
|
//is dangerous!
|
|
//return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
|
|
return QByteArray(&m_buffer[token.first], token.second - token.first);
|
|
}
|
|
|
|
QByteArray TokenIterator::current() const
|
|
{
|
|
QPair<int, int> token = m_tokens[m_currentToken - 1];
|
|
//return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
|
|
return QByteArray(&m_buffer[token.first], token.second - token.first);
|
|
}
|
|
|
|
QList<QByteArray> TokenIterator::all() const
|
|
{
|
|
QList<QByteArray> ret;
|
|
for (int i = 0; i < m_tokens.count(); i++) {
|
|
QPair<int, int> token = m_tokens[i];
|
|
ret.append(QByteArray(&m_buffer[token.first], token.second - token.first));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
|
|
HeaderTokenizer::HeaderTokenizer(char *buffer)
|
|
: m_buffer(buffer)
|
|
{
|
|
// add information about available headers and whether they have one or multiple,
|
|
// comma-separated values.
|
|
|
|
//The following response header fields are from RFC 2616 unless otherwise specified.
|
|
//Hint: search the web for e.g. 'http "accept-ranges header"' to find information about
|
|
//a header field.
|
|
static const HeaderFieldTemplate headerFieldTemplates[] = {
|
|
{"accept-ranges", false},
|
|
{"age", false},
|
|
{"cache-control", true},
|
|
{"connection", true},
|
|
{"content-disposition", false}, //is multi-valued in a way, but with ";" separator!
|
|
{"content-encoding", true},
|
|
{"content-language", true},
|
|
{"content-length", false},
|
|
{"content-location", false},
|
|
{"content-md5", false},
|
|
{"content-type", false},
|
|
{"date", false},
|
|
{"dav", true}, //RFC 2518
|
|
{"etag", false},
|
|
{"expires", false},
|
|
{"keep-alive", true}, //RFC 2068
|
|
{"last-modified", false},
|
|
{"link", false}, //RFC 2068, multi-valued with ";" separator
|
|
{"location", false},
|
|
{"p3p", true}, // http://www.w3.org/TR/P3P/
|
|
{"pragma", true},
|
|
{"proxy-authenticate", false}, //complicated multi-valuedness: quoted commas don't separate
|
|
//multiple values. we handle this at a higher level.
|
|
{"proxy-connection", true}, //inofficial but well-known; to avoid misunderstandings
|
|
//when using "connection" when talking to a proxy.
|
|
{"refresh", false}, //not sure, only found some mailing list posts mentioning it
|
|
{"set-cookie", false}, //RFC 2109; the multi-valuedness seems to be usually achieved
|
|
//by sending several instances of this field as opposed to
|
|
//usually comma-separated lists with maybe multiple instances.
|
|
{"transfer-encoding", true},
|
|
{"upgrade", true},
|
|
{"warning", true},
|
|
{"www-authenticate", false} //see proxy-authenticate
|
|
};
|
|
|
|
for (uint i = 0; i < sizeof(headerFieldTemplates) / sizeof(HeaderFieldTemplate); i++) {
|
|
const HeaderFieldTemplate &ft = headerFieldTemplates[i];
|
|
insert(QByteArray(ft.name), HeaderField(ft.isMultiValued));
|
|
}
|
|
}
|
|
|
|
int HeaderTokenizer::tokenize(int begin, int end)
|
|
{
|
|
char *buf = m_buffer; //keep line length in check :/
|
|
int idx = begin;
|
|
int startIdx = begin; //multi-purpose start of current token
|
|
bool multiValuedEndedWithComma = false; //did the last multi-valued line end with a comma?
|
|
QByteArray headerKey;
|
|
do {
|
|
|
|
if (buf[idx] == ' ' || buf [idx] == '\t') {
|
|
// line continuation; preserve startIdx except (see below)
|
|
if (headerKey.isEmpty()) {
|
|
continue;
|
|
}
|
|
// turn CR/LF into spaces for later parsing convenience
|
|
int backIdx = idx - 1;
|
|
while (backIdx >= begin && (buf[backIdx] == '\r' || buf[backIdx] == '\n')) {
|
|
buf[backIdx--] = ' ';
|
|
}
|
|
|
|
// multiple values, comma-separated: add new value or continue previous?
|
|
if (operator[](headerKey).isMultiValued) {
|
|
if (multiValuedEndedWithComma) {
|
|
// start new value; this is almost like no line continuation
|
|
skipSpace(buf, &idx, end);
|
|
startIdx = idx;
|
|
} else {
|
|
// continue previous value; this is tricky. unit tests to the rescue!
|
|
if (operator[](headerKey).beginEnd.last().first == startIdx) {
|
|
// remove entry, it will be re-added because already idx != startIdx
|
|
operator[](headerKey).beginEnd.removeLast();
|
|
} else {
|
|
// no comma, no entry: the prev line was whitespace only - start new value
|
|
skipSpace(buf, &idx, end);
|
|
startIdx = idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
} else {
|
|
// new field
|
|
startIdx = idx;
|
|
// also make sure that there is at least one char after the colon
|
|
while (idx < (end - 1) && buf[idx] != ':' && buf[idx] != '\r' && buf[idx] != '\n') {
|
|
buf[idx] = tolower(buf[idx]);
|
|
idx++;
|
|
}
|
|
if (buf[idx] != ':') {
|
|
//malformed line: no colon
|
|
headerKey.clear();
|
|
continue;
|
|
}
|
|
headerKey = QByteArray(&buf[startIdx], idx - startIdx);
|
|
if (!contains(headerKey)) {
|
|
//we don't recognize this header line
|
|
headerKey.clear();
|
|
continue;
|
|
}
|
|
// skip colon & leading whitespace
|
|
idx++;
|
|
skipSpace(buf, &idx, end);
|
|
startIdx = idx;
|
|
}
|
|
|
|
// we have the name/key of the field, now parse the value
|
|
if (!operator[](headerKey).isMultiValued) {
|
|
|
|
// scan to end of line
|
|
while (idx < end && buf[idx] != '\r' && buf[idx] != '\n') {
|
|
idx++;
|
|
}
|
|
if (!operator[](headerKey).beginEnd.isEmpty()) {
|
|
// there already is an entry; are we just in a line continuation?
|
|
if (operator[](headerKey).beginEnd.last().first == startIdx) {
|
|
// line continuation: delete previous entry and later insert a new, longer one.
|
|
operator[](headerKey).beginEnd.removeLast();
|
|
}
|
|
}
|
|
operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
|
|
|
|
} else {
|
|
|
|
// comma-separated list
|
|
while (true) {
|
|
//skip one value
|
|
while (idx < end && buf[idx] != '\r' && buf[idx] != '\n' && buf[idx] != ',') {
|
|
idx++;
|
|
}
|
|
if (idx != startIdx) {
|
|
operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
|
|
}
|
|
multiValuedEndedWithComma = buf[idx] == ',';
|
|
//skip comma(s) and leading whitespace, if any respectively
|
|
while (idx < end && buf[idx] == ',') {
|
|
idx++;
|
|
}
|
|
skipSpace(buf, &idx, end);
|
|
//next value or end-of-line / end of header?
|
|
if (buf[idx] >= end || buf[idx] == '\r' || buf[idx] == '\n') {
|
|
break;
|
|
}
|
|
//next value
|
|
startIdx = idx;
|
|
}
|
|
}
|
|
} while (nextLine(buf, &idx, end));
|
|
return idx;
|
|
}
|
|
|
|
|
|
TokenIterator HeaderTokenizer::iterator(const char *key) const
|
|
{
|
|
QByteArray keyBa = QByteArray::fromRawData(key, strlen(key));
|
|
if (contains(keyBa)) {
|
|
return TokenIterator(value(keyBa).beginEnd, m_buffer);
|
|
} else {
|
|
return TokenIterator(m_nullTokens, m_buffer);
|
|
}
|
|
}
|
|
|
|
static void skipLWS(const QString &str, int &pos)
|
|
{
|
|
while (pos < str.length() && (str[pos] == QLatin1Char(' ') || str[pos] == QLatin1Char('\t'))) {
|
|
++pos;
|
|
}
|
|
}
|
|
|
|
// keep the common ending, this allows the compiler to join them
|
|
static const char typeSpecials[] = "{}*'%()<>@,;:\\\"/[]?=";
|
|
static const char attrSpecials[] = "'%()<>@,;:\\\"/[]?=";
|
|
static const char valueSpecials[] = "()<>@,;:\\\"/[]?=";
|
|
|
|
static bool specialChar(const QChar &ch, const char *specials)
|
|
{
|
|
// WORKAROUND: According to RFC 2616, any character other than ascii
|
|
// characters should NOT be allowed in unquoted content-disposition file
|
|
// names. However, since none of the major browsers follow this rule, we do
|
|
// the same thing here and allow all printable unicode characters. See
|
|
// https://bugs.kde.org/show_bug.cgi?id=261223 for the detials.
|
|
if (!ch.isPrint()) {
|
|
return true;
|
|
}
|
|
|
|
for (int i = qstrlen(specials) - 1; i >= 0; i--) {
|
|
if (ch == QLatin1Char(specials[i])) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* read and parse the input until the given terminator
|
|
* @param str input string to parse
|
|
* @param term terminator
|
|
* @param pos position marker in the input string
|
|
* @param specials characters forbidden in this section
|
|
* @return the next section or an empty string if it was invalid
|
|
*
|
|
* Extracts token-like input until terminator char or EOL.
|
|
* Also skips over the terminator.
|
|
*
|
|
* pos is correctly incremented even if this functions returns
|
|
* an empty string so this can be used to skip over invalid
|
|
* parts and continue.
|
|
*/
|
|
static QString extractUntil(const QString &str, QChar term, int &pos, const char *specials)
|
|
{
|
|
QString out;
|
|
skipLWS(str, pos);
|
|
bool valid = true;
|
|
|
|
while (pos < str.length() && (str[pos] != term)) {
|
|
out += str[pos];
|
|
valid = (valid && !specialChar(str[pos], specials));
|
|
++pos;
|
|
}
|
|
|
|
if (pos < str.length()) { // Stopped due to finding term
|
|
++pos;
|
|
}
|
|
|
|
if (!valid) {
|
|
return QString();
|
|
}
|
|
|
|
// Remove trailing linear whitespace...
|
|
while (out.endsWith(QLatin1Char(' ')) || out.endsWith(QLatin1Char('\t'))) {
|
|
out.chop(1);
|
|
}
|
|
|
|
if (out.contains(QLatin1Char(' '))) {
|
|
out.clear();
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
// As above, but also handles quotes..
|
|
// pos is set to -1 on parse error
|
|
static QString extractMaybeQuotedUntil(const QString &str, int &pos)
|
|
{
|
|
const QChar term = QLatin1Char(';');
|
|
|
|
skipLWS(str, pos);
|
|
|
|
// Are we quoted?
|
|
if (pos < str.length() && str[pos] == QLatin1Char('"')) {
|
|
QString out;
|
|
|
|
// Skip the quote...
|
|
++pos;
|
|
|
|
// when quoted we also need an end-quote
|
|
bool endquote = false;
|
|
|
|
// Parse until trailing quote...
|
|
while (pos < str.length()) {
|
|
if (str[pos] == QLatin1Char('\\') && pos + 1 < str.length()) {
|
|
// quoted-pair = "\" CHAR
|
|
out += str[pos + 1];
|
|
pos += 2; // Skip both...
|
|
} else if (str[pos] == QLatin1Char('"')) {
|
|
++pos;
|
|
endquote = true;
|
|
break;
|
|
} else if (!str[pos].isPrint()) { // Don't allow CTL's RFC 2616 sec 2.2
|
|
break;
|
|
} else {
|
|
out += str[pos];
|
|
++pos;
|
|
}
|
|
}
|
|
|
|
if (!endquote) {
|
|
pos = -1;
|
|
return QString();
|
|
}
|
|
|
|
// Skip until term..
|
|
while (pos < str.length() && (str[pos] != term)) {
|
|
if ((str[pos] != QLatin1Char(' ')) && (str[pos] != QLatin1Char('\t'))) {
|
|
pos = -1;
|
|
return QString();
|
|
}
|
|
++pos;
|
|
}
|
|
|
|
if (pos < str.length()) { // Stopped due to finding term
|
|
++pos;
|
|
}
|
|
|
|
return out;
|
|
} else {
|
|
return extractUntil(str, term, pos, valueSpecials);
|
|
}
|
|
}
|
|
|
|
static QMap<QString, QString> contentDispositionParserInternal(const QString &disposition)
|
|
{
|
|
kDebug(7113) << "disposition: " << disposition;
|
|
int pos = 0;
|
|
const QString strDisposition = extractUntil(disposition, QLatin1Char(';'), pos, typeSpecials).toLower();
|
|
|
|
QMap<QString, QString> parameters;
|
|
QMap<QString, QString> contparams; // all parameters that contain continuations
|
|
QMap<QString, QString> encparams; // all parameters that have character encoding
|
|
|
|
// the type is invalid, the complete header is junk
|
|
if (strDisposition.isEmpty()) {
|
|
return parameters;
|
|
}
|
|
|
|
parameters.insert(QLatin1String("type"), strDisposition);
|
|
|
|
while (pos < disposition.length()) {
|
|
QString key = extractUntil(disposition, QLatin1Char('='), pos, attrSpecials).toLower();
|
|
|
|
if (key.isEmpty()) {
|
|
// parse error in this key: do not parse more, but add up
|
|
// everything we already got
|
|
kDebug(7113) << "parse error in key, abort parsing";
|
|
break;
|
|
}
|
|
|
|
QString val;
|
|
if (key.endsWith(QLatin1Char('*'))) {
|
|
val = extractUntil(disposition, QLatin1Char(';'), pos, valueSpecials);
|
|
} else {
|
|
val = extractMaybeQuotedUntil(disposition, pos);
|
|
}
|
|
|
|
if (val.isEmpty()) {
|
|
if (pos == -1) {
|
|
kDebug(7113) << "parse error in value, abort parsing";
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
const int spos = key.indexOf(QLatin1Char('*'));
|
|
if (spos == key.length() - 1) {
|
|
key.chop(1);
|
|
encparams.insert(key, val);
|
|
} else if (spos >= 0) {
|
|
contparams.insert(key, val);
|
|
} else if (parameters.contains(key)) {
|
|
kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
|
|
parameters.remove(key);
|
|
return parameters;
|
|
} else {
|
|
parameters.insert(key, val);
|
|
}
|
|
}
|
|
|
|
QMap<QString, QString>::iterator i = contparams.begin();
|
|
while (i != contparams.end()) {
|
|
QString key = i.key();
|
|
int spos = key.indexOf(QLatin1Char('*'));
|
|
bool hasencoding = false;
|
|
|
|
if (key.at(spos + 1) != QLatin1Char('0')) {
|
|
++i;
|
|
continue;
|
|
}
|
|
|
|
// no leading zeros allowed, so delete the junk
|
|
int klen = key.length();
|
|
if (klen > spos + 2) {
|
|
// nothing but continuations and encodings may insert * into parameter name
|
|
if ((klen > spos + 3) || ((klen == spos + 3) && (key.at(spos + 2) != QLatin1Char('*')))) {
|
|
kDebug(7113) << "removing invalid key " << key << "with val" << i.value() << key.at(spos + 2);
|
|
i = contparams.erase(i);
|
|
continue;
|
|
}
|
|
hasencoding = true;
|
|
}
|
|
|
|
int seqnum = 1;
|
|
QMap<QString, QString>::iterator partsi;
|
|
// we do not need to care about encoding specifications: only the first
|
|
// part is allowed to have one
|
|
QString val = i.value();
|
|
|
|
key.chop(hasencoding ? 2 : 1);
|
|
|
|
while ((partsi = contparams.find(key + QString::number(seqnum))) != contparams.end()) {
|
|
val += partsi.value();
|
|
contparams.erase(partsi);
|
|
}
|
|
|
|
i = contparams.erase(i);
|
|
|
|
key.chop(1);
|
|
if (hasencoding) {
|
|
encparams.insert(key, val);
|
|
} else {
|
|
if (parameters.contains(key)) {
|
|
kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
|
|
parameters.remove(key);
|
|
return parameters;
|
|
}
|
|
|
|
parameters.insert(key, val);
|
|
}
|
|
}
|
|
|
|
for (QMap<QString, QString>::iterator i = encparams.begin(); i != encparams.end(); ++i) {
|
|
QString val = i.value();
|
|
|
|
// RfC 2231 encoded character set in filename
|
|
int spos = val.indexOf(QLatin1Char('\''));
|
|
if (spos == -1) {
|
|
continue;
|
|
}
|
|
int npos = val.indexOf(QLatin1Char('\''), spos + 1);
|
|
if (npos == -1) {
|
|
continue;
|
|
}
|
|
|
|
const QString charset = val.left(spos);
|
|
const QString lang = val.mid(spos + 1, npos - spos - 1);
|
|
const QByteArray encodedVal = val.mid(npos + 1).toLatin1();
|
|
|
|
if ( ! isValidPercentEncoding(encodedVal) )
|
|
continue;
|
|
|
|
const QByteArray rawval = QByteArray::fromPercentEncoding(encodedVal);
|
|
|
|
if (charset.isEmpty() || (charset == QLatin1String("us-ascii"))) {
|
|
bool valid = true;
|
|
for (int j = rawval.length() - 1; (j >= 0) && valid; j--) {
|
|
valid = (rawval.at(j) >= 32);
|
|
}
|
|
|
|
if (!valid)
|
|
continue;
|
|
val = QString::fromLatin1(rawval.constData());
|
|
} else {
|
|
QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1());
|
|
if (!codec)
|
|
continue;
|
|
val = codec->toUnicode(rawval);
|
|
}
|
|
|
|
parameters.insert(i.key(), val);
|
|
}
|
|
|
|
return parameters;
|
|
}
|
|
|
|
static QMap<QString, QString> contentDispositionParser(const QString &disposition)
|
|
{
|
|
QMap<QString, QString> parameters = contentDispositionParserInternal(disposition);
|
|
|
|
const QLatin1String fn("filename");
|
|
if (parameters.contains(fn)) {
|
|
// Content-Disposition is not allowed to dictate directory
|
|
// path, thus we extract the filename only.
|
|
const QString val = QDir::toNativeSeparators(parameters[fn]);
|
|
int slpos = val.lastIndexOf(QDir::separator());
|
|
|
|
if (slpos > -1) {
|
|
parameters.insert(fn, val.mid(slpos + 1));
|
|
}
|
|
}
|
|
|
|
return parameters;
|
|
}
|