mirror of
https://bitbucket.org/smil3y/kdelibs.git
synced 2025-02-23 18:32:49 +00:00
kdecore: optimize KMimeType::findByContent()
read once, match until match is found or otherwise. that means less disk I/O Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
parent
ddbcca439d
commit
969f3e428e
6 changed files with 31 additions and 69 deletions
|
@ -41,12 +41,12 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool testMatches(QIODevice* device, qint64 deviceSize, QByteArray& availableData, const QList<KMimeMagicMatch>& matches, const QString& mimeType)
|
static bool testMatches(const QByteArray &availableData, const QList<KMimeMagicMatch>& matches, const QString& mimeType)
|
||||||
{
|
{
|
||||||
for ( QList<KMimeMagicMatch>::const_iterator it = matches.begin(), end = matches.end() ;
|
for ( QList<KMimeMagicMatch>::const_iterator it = matches.begin(), end = matches.end() ;
|
||||||
it != end ; ++it ) {
|
it != end ; ++it ) {
|
||||||
const KMimeMagicMatch& match = *it;
|
const KMimeMagicMatch& match = *it;
|
||||||
if (match.match(device, deviceSize, availableData, mimeType)) {
|
if (match.match(availableData, mimeType)) {
|
||||||
// One of the hierarchies matched -> mimetype recognized.
|
// One of the hierarchies matched -> mimetype recognized.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -54,53 +54,29 @@ static bool testMatches(QIODevice* device, qint64 deviceSize, QByteArray& availa
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KMimeMagicRule::match(QIODevice* device, qint64 deviceSize, QByteArray& availableData) const
|
bool KMimeMagicRule::match(const QByteArray &availableData) const
|
||||||
{
|
{
|
||||||
return testMatches(device, deviceSize, availableData, m_matches, m_mimetype);
|
return testMatches(availableData, m_matches, m_mimetype);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KMimeMagicMatch::match(QIODevice* device, qint64 deviceSize, QByteArray& availableData, const QString& mimeType) const
|
bool KMimeMagicMatch::match(const QByteArray &availableData, const QString& mimeType) const
|
||||||
{
|
{
|
||||||
// First, check that "this" matches, then we'll dive into subMatches if any.
|
// First, check that "this" matches, then we'll dive into subMatches if any.
|
||||||
|
|
||||||
const qint64 mDataSize = m_data.size();
|
const qint64 mDataSize = m_data.size();
|
||||||
if (m_rangeStart + mDataSize > deviceSize)
|
if (m_rangeStart + mDataSize > availableData.size())
|
||||||
return false; // file is too small
|
return false; // file is too small
|
||||||
|
|
||||||
// Read in one block all the data we'll need
|
// Read in one block all the data we'll need
|
||||||
// Example: m_data="ABC", m_rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match)
|
// Example: m_data="ABC", m_rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match)
|
||||||
const int dataNeeded = qMin(mDataSize + m_rangeLength - 1, deviceSize - m_rangeStart);
|
const int dataNeeded = qMin(mDataSize + m_rangeLength - 1, availableData.size() - m_rangeStart);
|
||||||
QByteArray readData;
|
QByteArray readData = QByteArray::fromRawData(
|
||||||
|
availableData.constData() + m_rangeStart,
|
||||||
/*kDebug() << "need " << dataNeeded << " bytes of data starting at " << m_rangeStart
|
dataNeeded
|
||||||
<< " - availableData has " << availableData.size() << " bytes,"
|
);
|
||||||
<< " device has " << deviceSize << " bytes.";*/
|
|
||||||
|
|
||||||
if (m_rangeStart + dataNeeded > availableData.size() && availableData.size() < deviceSize) {
|
|
||||||
// Need to read from device
|
|
||||||
if (!device->seek(m_rangeStart))
|
|
||||||
return false;
|
|
||||||
readData.resize(dataNeeded);
|
|
||||||
const int nread = device->read(readData.data(), dataNeeded);
|
|
||||||
//kDebug() << "readData (from device): reading" << dataNeeded << "bytes.";
|
|
||||||
if (nread < mDataSize)
|
|
||||||
return false; // error (or not enough data but we checked for that already)
|
|
||||||
if (m_rangeStart == 0 && readData.size() > availableData.size()) {
|
|
||||||
availableData = readData; // update cache
|
|
||||||
}
|
|
||||||
if (nread < readData.size()) {
|
|
||||||
// File big enough to contain m_data, but not big enough for the full rangeLength.
|
|
||||||
// Pad with zeros.
|
|
||||||
memset(readData.data() + nread, 0, dataNeeded - nread);
|
|
||||||
}
|
|
||||||
//kDebug() << "readData (from device) at pos " << m_rangeStart << ":" << readData;
|
|
||||||
} else {
|
|
||||||
readData = QByteArray::fromRawData(availableData.constData() + m_rangeStart,
|
|
||||||
dataNeeded);
|
|
||||||
// Warning, readData isn't null-terminated so this kDebug
|
// Warning, readData isn't null-terminated so this kDebug
|
||||||
// gives valgrind warnings (when printing as char* data).
|
// gives valgrind warnings (when printing as char* data).
|
||||||
//kDebug() << "readData (from availableData) at pos " << m_rangeStart << ":" << readData;
|
//kDebug() << "readData (from availableData) at pos " << m_rangeStart << ":" << readData;
|
||||||
}
|
|
||||||
|
|
||||||
// All we need to do now, is to look for m_data in readData (whose size is dataNeeded).
|
// All we need to do now, is to look for m_data in readData (whose size is dataNeeded).
|
||||||
// Either as a simple indexOf search, or applying the mask.
|
// Either as a simple indexOf search, or applying the mask.
|
||||||
|
@ -141,5 +117,5 @@ bool KMimeMagicMatch::match(QIODevice* device, qint64 deviceSize, QByteArray& av
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// Check that one of the submatches matches too
|
// Check that one of the submatches matches too
|
||||||
return testMatches(device, deviceSize, availableData, m_subMatches, mimeType);
|
return testMatches(availableData, m_subMatches, mimeType);
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
*/
|
*/
|
||||||
struct KMimeMagicMatch
|
struct KMimeMagicMatch
|
||||||
{
|
{
|
||||||
bool match(QIODevice* device, qint64 deviceSize, QByteArray& availableData, const QString& mimeType) const;
|
bool match(const QByteArray &availableData, const QString& mimeType) const;
|
||||||
|
|
||||||
qint64 m_rangeStart;
|
qint64 m_rangeStart;
|
||||||
qint64 m_rangeLength;
|
qint64 m_rangeLength;
|
||||||
|
@ -57,7 +57,7 @@ public:
|
||||||
KMimeMagicRule(const QString &mimetype, int priority, const QList<KMimeMagicMatch> &matches)
|
KMimeMagicRule(const QString &mimetype, int priority, const QList<KMimeMagicMatch> &matches)
|
||||||
: m_mimetype(mimetype), m_priority(priority), m_matches(matches) {}
|
: m_mimetype(mimetype), m_priority(priority), m_matches(matches) {}
|
||||||
|
|
||||||
bool match(QIODevice* device, qint64 deviceSize, QByteArray& availableData) const;
|
bool match(const QByteArray &availableData) const;
|
||||||
|
|
||||||
QString mimetype() const { return m_mimetype; }
|
QString mimetype() const { return m_mimetype; }
|
||||||
int priority() const { return m_priority; }
|
int priority() const { return m_priority; }
|
||||||
|
|
|
@ -223,7 +223,8 @@ KMimeType::Ptr KMimeType::findByUrl(const KUrl &url, mode_t mode,
|
||||||
QFile file(localfile);
|
QFile file(localfile);
|
||||||
if (file.open(QIODevice::ReadOnly)) {
|
if (file.open(QIODevice::ReadOnly)) {
|
||||||
int magicAccuracy = 0;
|
int magicAccuracy = 0;
|
||||||
KMimeType::Ptr mime = KMimeTypeRepository::self()->findFromContent(&file, &magicAccuracy);
|
// provide enough data for most rules (there are exceptions which require twice as much tho)
|
||||||
|
KMimeType::Ptr mime = KMimeTypeRepository::self()->findFromContent(file.read(16384), &magicAccuracy);
|
||||||
// mime can't be 0, except in case of install problems.
|
// mime can't be 0, except in case of install problems.
|
||||||
// However we get magicAccuracy==0 for octet-stream, i.e. no magic match found.
|
// However we get magicAccuracy==0 for octet-stream, i.e. no magic match found.
|
||||||
// kDebug() << "findFromContent said" << (mime?mime->name():QString()) << "with accuracy" << magicAccuracy;
|
// kDebug() << "findFromContent said" << (mime?mime->name():QString()) << "with accuracy" << magicAccuracy;
|
||||||
|
@ -288,9 +289,7 @@ KMimeType::Ptr KMimeType::findByName(const QString &fileName, int *accuracy)
|
||||||
KMimeType::Ptr KMimeType::findByContent(const QByteArray &data, int *accuracy)
|
KMimeType::Ptr KMimeType::findByContent(const QByteArray &data, int *accuracy)
|
||||||
{
|
{
|
||||||
KMimeTypeRepository::self()->checkEssentialMimeTypes();
|
KMimeTypeRepository::self()->checkEssentialMimeTypes();
|
||||||
QBuffer buffer(const_cast<QByteArray *>(&data));
|
return KMimeTypeRepository::self()->findFromContent(data, accuracy);
|
||||||
buffer.open(QIODevice::ReadOnly);
|
|
||||||
return KMimeTypeRepository::self()->findFromContent(&buffer, accuracy);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
QString KMimeType::extractKnownExtension(const QString &fileName)
|
QString KMimeType::extractKnownExtension(const QString &fileName)
|
||||||
|
|
|
@ -314,26 +314,18 @@ QStringList KMimeTypeRepository::findFromFileName(const QString &fileName, QStri
|
||||||
return matchingMimeTypes;
|
return matchingMimeTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
KMimeType::Ptr KMimeTypeRepository::findFromContent(QIODevice* device, int* accuracy)
|
KMimeType::Ptr KMimeTypeRepository::findFromContent(const QByteArray &data, int* accuracy)
|
||||||
{
|
{
|
||||||
Q_ASSERT(device->isOpen());
|
if (data.size() == 0) {
|
||||||
const qint64 deviceSize = device->size();
|
|
||||||
if (deviceSize == 0) {
|
|
||||||
if (accuracy) {
|
if (accuracy) {
|
||||||
*accuracy = 100;
|
*accuracy = 100;
|
||||||
}
|
}
|
||||||
return findMimeTypeByName(QLatin1String("application/x-zerosize"), KMimeType::DontResolveAlias);
|
return findMimeTypeByName(QLatin1String("application/x-zerosize"), KMimeType::DontResolveAlias);
|
||||||
}
|
}
|
||||||
// provide enough data for most rules (there are exceptions which require twice as much tho)
|
|
||||||
const qint64 dataNeeded = qMin(deviceSize, (qint64) 16384);
|
|
||||||
QByteArray beginning(dataNeeded, '\0');
|
|
||||||
if (!device->seek(0) || device->read(beginning.data(), dataNeeded) == -1) {
|
|
||||||
return defaultMimeTypePtr(); // don't bother detecting unreadable file
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply magic rules
|
// Apply magic rules
|
||||||
Q_FOREACH ( const KMimeMagicRule& rule, m_magicRules ) {
|
Q_FOREACH ( const KMimeMagicRule& rule, m_magicRules ) {
|
||||||
if (rule.match(device, deviceSize, beginning)) {
|
if (rule.match( data)) {
|
||||||
if (accuracy) {
|
if (accuracy) {
|
||||||
*accuracy = rule.priority();
|
*accuracy = rule.priority();
|
||||||
}
|
}
|
||||||
|
@ -343,7 +335,7 @@ KMimeType::Ptr KMimeTypeRepository::findFromContent(QIODevice* device, int* accu
|
||||||
|
|
||||||
// Do fallback code so that we never return 0
|
// Do fallback code so that we never return 0
|
||||||
// Nothing worked, check if the file contents looks like binary or text
|
// Nothing worked, check if the file contents looks like binary or text
|
||||||
if (!KMimeType::isBufferBinaryData(beginning)) {
|
if (!KMimeType::isBufferBinaryData(data)) {
|
||||||
if (accuracy) {
|
if (accuracy) {
|
||||||
*accuracy = 5;
|
*accuracy = 5;
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,13 +97,13 @@ private: // only for KMimeType and unittests
|
||||||
QStringList findFromFileName(const QString &filename, QString *matchingExtension = nullptr) const;
|
QStringList findFromFileName(const QString &filename, QString *matchingExtension = nullptr) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find a mimetype from the content of a file or buffer
|
* Find a mimetype from the content of data chunk
|
||||||
* @param device the file or buffer. Must be open.
|
* @param data chunk of data
|
||||||
* @param accuracy returns the priority of the rule that matched
|
* @param accuracy returns the priority of the rule that matched
|
||||||
*
|
*
|
||||||
* This is internal API, use KMimeType::findByUrl instead.
|
* This is internal API, use KMimeType::findByUrl instead.
|
||||||
*/
|
*/
|
||||||
KMimeType::Ptr findFromContent(QIODevice *device, int *accuracy);
|
KMimeType::Ptr findFromContent(const QByteArray &data, int *accuracy);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if at least one mimetype is present
|
* @return true if at least one mimetype is present
|
||||||
|
|
|
@ -842,20 +842,15 @@ void KMimeTypeTest::testParseMagicFile()
|
||||||
QFETCH(QString, testData);
|
QFETCH(QString, testData);
|
||||||
// kDebug() << QTest::currentDataTag();
|
// kDebug() << QTest::currentDataTag();
|
||||||
QFETCH(QString, expected);
|
QFETCH(QString, expected);
|
||||||
QBuffer testBuffer;
|
QByteArray testDataBytes = testData.toLatin1();
|
||||||
testBuffer.setData(testData.toLatin1());
|
|
||||||
QVERIFY(testBuffer.open(QIODevice::ReadOnly));
|
|
||||||
const qint64 testBufferSize = testBuffer.size();
|
|
||||||
QString found;
|
QString found;
|
||||||
QByteArray beginning;
|
|
||||||
Q_FOREACH(const KMimeMagicRule& rule, m_rules) {
|
Q_FOREACH(const KMimeMagicRule& rule, m_rules) {
|
||||||
if (rule.match(&testBuffer, testBufferSize, beginning)) {
|
if (rule.match(testDataBytes)) {
|
||||||
found = rule.mimetype();
|
found = rule.mimetype();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
QCOMPARE(found, expected);
|
QCOMPARE(found, expected);
|
||||||
testBuffer.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void KMimeTypeTest::testHelperProtocols()
|
void KMimeTypeTest::testHelperProtocols()
|
||||||
|
|
Loading…
Add table
Reference in a new issue