mirror of
https://bitbucket.org/smil3y/kdelibs.git
synced 2025-02-23 18:32:49 +00:00
kdecore: MIME glob matching optimization
by doing it with a single iteration over the globs a lot less processing has to be done Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
parent
d338e13b8d
commit
1ff6f4d2ee
5 changed files with 67 additions and 156 deletions
|
@ -18,25 +18,23 @@
|
|||
*/
|
||||
|
||||
#include "kmimeglobsfileparser_p.h"
|
||||
#include <kglobal.h>
|
||||
#include <kdeversion.h>
|
||||
#include <kmimetype.h>
|
||||
#include <kstandarddirs.h>
|
||||
#include "kglobal.h"
|
||||
#include "kdeversion.h"
|
||||
#include "kmimetype.h"
|
||||
#include "kstandarddirs.h"
|
||||
#include "kmimetyperepository_p.h"
|
||||
#include <kdebug.h>
|
||||
#include <QtCore/QTextStream>
|
||||
#include <QtCore/QFile>
|
||||
#include "kdebug.h"
|
||||
|
||||
KMimeGlobsFileParser::AllGlobs KMimeGlobsFileParser::parseGlobs()
|
||||
#include <QFile>
|
||||
|
||||
static bool kGlobSort(const KMimeGlobsFileParser::Glob &first, const KMimeGlobsFileParser::Glob &second)
|
||||
{
|
||||
const QStringList globFiles = KGlobal::dirs()->findAllResources("xdgdata-mime", QString::fromLatin1("globs2"));
|
||||
//kDebug() << globFiles;
|
||||
return parseGlobs(globFiles);
|
||||
return (first.weight >= second.weight);
|
||||
}
|
||||
|
||||
KMimeGlobsFileParser::AllGlobs KMimeGlobsFileParser::parseGlobs(const QStringList &globFiles)
|
||||
KMimeGlobsFileParser::GlobList KMimeGlobsFileParser::parseGlobs(const QStringList &globFiles)
|
||||
{
|
||||
KMimeGlobsFileParser::AllGlobs allGlobs;
|
||||
KMimeGlobsFileParser::GlobList allGlobs;
|
||||
QListIterator<QString> globIter(globFiles);
|
||||
globIter.toBack();
|
||||
// At each level, we must be able to override (not just add to) the information that we read at higher levels
|
||||
|
@ -47,6 +45,10 @@ KMimeGlobsFileParser::AllGlobs KMimeGlobsFileParser::parseGlobs(const QStringLis
|
|||
//kDebug() << "Now parsing" << fileName;
|
||||
parseGlobFile(&globFile, allGlobs);
|
||||
}
|
||||
// glob2 files are weight-sorted, manually sort only when more than one file is parsed
|
||||
if (globFiles.size() > 1) {
|
||||
qStableSort(allGlobs.begin(), allGlobs.end(), kGlobSort);
|
||||
}
|
||||
return allGlobs;
|
||||
}
|
||||
|
||||
|
@ -63,7 +65,7 @@ static void filterEmptyFromList(QList<QByteArray>* bytelist)
|
|||
}
|
||||
|
||||
// uses a QIODevice to make unit tests possible
|
||||
bool KMimeGlobsFileParser::parseGlobFile(QIODevice* file, AllGlobs& globs)
|
||||
bool KMimeGlobsFileParser::parseGlobFile(QIODevice* file, GlobList& globs)
|
||||
{
|
||||
Q_ASSERT(file);
|
||||
if (!file->open(QIODevice::ReadOnly)) {
|
||||
|
@ -106,8 +108,6 @@ bool KMimeGlobsFileParser::parseGlobFile(QIODevice* file, AllGlobs& globs)
|
|||
continue;
|
||||
}
|
||||
|
||||
bool caseSensitive = flagList.contains(QByteArray("cs"));
|
||||
|
||||
const QString mimeTypeNameStr = QString::fromLatin1(mimeTypeName.constData(), mimeTypeName.size());
|
||||
if (pattern == "__NOGLOBS__") {
|
||||
// kDebug() << "removing" << mimeTypeName;
|
||||
|
@ -118,62 +118,22 @@ bool KMimeGlobsFileParser::parseGlobFile(QIODevice* file, AllGlobs& globs)
|
|||
// kDebug() << "Adding pattern" << pattern << "to mimetype" << mimeTypeName << "from globs file, with weight" << weight;
|
||||
//if (pattern.toLower() == "*.c")
|
||||
// kDebug() << " Adding pattern" << pattern << "to mimetype" << mimeTypeName << "from globs file, with weight" << weight << "flags" << flags;
|
||||
const QString patternStr = QString::fromLatin1(pattern.constData(), pattern.size());
|
||||
globs.addGlob(Glob(mimeTypeNameStr, weight, patternStr, caseSensitive));
|
||||
const bool caseSensitive = flagList.contains(QByteArray("cs"));
|
||||
const QByteArray patternCs = (caseSensitive ? pattern : pattern.toLower());
|
||||
const QString patternStr = QString::fromLatin1(patternCs.constData(), patternCs.size());
|
||||
if (!globs.hasPattern(mimeTypeNameStr, patternStr)) {
|
||||
globs.append(
|
||||
Glob(
|
||||
mimeTypeNameStr,
|
||||
weight,
|
||||
patternStr,
|
||||
caseSensitive
|
||||
)
|
||||
);
|
||||
}
|
||||
lastMime = mimeTypeName;
|
||||
lastPattern = pattern;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void KMimeGlobsFileParser::AllGlobs::addGlob(const Glob& glob)
|
||||
{
|
||||
// Note that in each case, we check for duplicates to avoid inserting duplicated patterns.
|
||||
// This can happen when installing kde.xml and freedesktop.org.xml
|
||||
// in the same prefix, and they both have text/plain:*.txt
|
||||
|
||||
const QString &pattern = glob.pattern;
|
||||
Q_ASSERT(!pattern.isEmpty());
|
||||
Q_UNUSED(pattern);
|
||||
|
||||
//kDebug() << "pattern" << pattern << "glob.weight=" << glob.weight << glob.flags;
|
||||
|
||||
// Store each patterns into either m_fastPatternDict (*.txt, *.html etc. with default weight 50)
|
||||
// or for the rest, like core.*, *.tar.bz2, *~, into highWeightPatternOffset (>50)
|
||||
// or lowWeightPatternOffset (<=50)
|
||||
|
||||
Glob adjustedGlob(glob);
|
||||
if (!adjustedGlob.casesensitive)
|
||||
adjustedGlob.pattern = adjustedGlob.pattern.toLower();
|
||||
if (adjustedGlob.weight >= 50) {
|
||||
if (!m_highWeightGlobs.hasPattern(adjustedGlob.mimeType, adjustedGlob.pattern))
|
||||
m_highWeightGlobs.append(adjustedGlob);
|
||||
} else {
|
||||
if (!m_lowWeightGlobs.hasPattern(adjustedGlob.mimeType, adjustedGlob.pattern))
|
||||
m_lowWeightGlobs.append(adjustedGlob);
|
||||
}
|
||||
}
|
||||
|
||||
KMimeGlobsFileParser::PatternsMap KMimeGlobsFileParser::AllGlobs::patternsMap() const
|
||||
{
|
||||
PatternsMap patMap;
|
||||
patMap.reserve(m_highWeightGlobs.size() + m_lowWeightGlobs.size());
|
||||
|
||||
// This is just to fill in KMimeType::patterns. This has no real effect
|
||||
// on the actual mimetype matching.
|
||||
|
||||
Q_FOREACH(const Glob& glob, m_highWeightGlobs)
|
||||
patMap[glob.mimeType].append(glob.pattern);
|
||||
|
||||
Q_FOREACH(const Glob& glob, m_lowWeightGlobs)
|
||||
patMap[glob.mimeType].append(glob.pattern);
|
||||
|
||||
return patMap;
|
||||
}
|
||||
|
||||
void KMimeGlobsFileParser::AllGlobs::removeMime(const QString& mime)
|
||||
{
|
||||
m_highWeightGlobs.removeMime(mime);
|
||||
m_lowWeightGlobs.removeMime(mime);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include <QIODevice>
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
|
||||
/**
|
||||
* @internal
|
||||
|
@ -32,13 +33,7 @@
|
|||
class KMimeGlobsFileParser
|
||||
{
|
||||
public:
|
||||
class AllGlobs;
|
||||
|
||||
// Read globs (patterns) files
|
||||
static AllGlobs parseGlobs();
|
||||
|
||||
// Separate method, for unit test
|
||||
static AllGlobs parseGlobs(const QStringList &globFiles);
|
||||
typedef QHash<QString, QStringList> PatternsMap; // mimetype -> patterns
|
||||
|
||||
struct Glob {
|
||||
Glob(const QString &mime, int w = 50, const QString &pat = QString(), bool cs = false)
|
||||
|
@ -52,7 +47,8 @@ public:
|
|||
class GlobList : public QList<Glob>
|
||||
{
|
||||
public:
|
||||
bool hasPattern(const QString &mime, const QString &pattern) const {
|
||||
bool hasPattern(const QString &mime, const QString &pattern) const
|
||||
{
|
||||
const_iterator it = begin();
|
||||
const const_iterator myend = end();
|
||||
for (; it != myend; ++it)
|
||||
|
@ -61,37 +57,32 @@ public:
|
|||
return false;
|
||||
}
|
||||
// "noglobs" is very rare occurrence, so it's ok if it's slow
|
||||
void removeMime(const QString& mime) {
|
||||
void removeMime(const QString &mime)
|
||||
{
|
||||
QMutableListIterator<Glob> it(*this);
|
||||
while (it.hasNext()) {
|
||||
if (it.next().mimeType == mime)
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
// for tests
|
||||
PatternsMap patternsMap() const
|
||||
{
|
||||
PatternsMap patMap;
|
||||
patMap.reserve(this->size());
|
||||
const_iterator it = begin();
|
||||
const const_iterator myend = end();
|
||||
for (; it != myend; ++it)
|
||||
patMap[(*it).mimeType].append((*it).pattern);
|
||||
return patMap;
|
||||
}
|
||||
};
|
||||
|
||||
typedef QHash<QString, QStringList> PatternsMap; // mimetype -> patterns
|
||||
// Read globs (patterns) files
|
||||
static GlobList parseGlobs(const QStringList &globFiles);
|
||||
|
||||
/**
|
||||
* Result of the globs parsing, as data structures ready for efficient mimetype matching.
|
||||
* This contains:
|
||||
* 1) a map of fast regular patterns (e.g. *.txt is stored as "txt" in a qhash's key)
|
||||
* 2) a linear list of high-weight globs
|
||||
* 3) a linear list of low-weight globs
|
||||
* The mime-matching algorithms on top of these data structures are in KMimeTypeFactory.
|
||||
*/
|
||||
class AllGlobs
|
||||
{
|
||||
public:
|
||||
void addGlob(const Glob &glob);
|
||||
void removeMime(const QString &mime);
|
||||
PatternsMap patternsMap() const; // for KMimeTypeFactory
|
||||
|
||||
GlobList m_highWeightGlobs; // >= 50 patterns
|
||||
GlobList m_lowWeightGlobs; // < 50 patterns
|
||||
};
|
||||
|
||||
static bool parseGlobFile(QIODevice *file, AllGlobs &globs);
|
||||
static bool parseGlobFile(QIODevice *file, GlobList &globs);
|
||||
};
|
||||
|
||||
#endif /* KMIMEFILEPARSER_H */
|
||||
|
|
|
@ -110,8 +110,8 @@ void KMimeTypeRepository::parseMimeData()
|
|||
{
|
||||
QMutexLocker locker(&m_mutex);
|
||||
|
||||
KMimeGlobsFileParser parser;
|
||||
m_globs = parser.parseGlobs();
|
||||
const QStringList globFiles = KGlobal::dirs()->findAllResources("xdgdata-mime", QString::fromLatin1("globs2"));
|
||||
m_globs = KMimeGlobsFileParser::parseGlobs(globFiles);
|
||||
|
||||
m_aliases.clear();
|
||||
const QStringList aliasFiles = KGlobal::dirs()->findAllResources("xdgdata-mime", QLatin1String("aliases"));
|
||||
|
@ -286,31 +286,19 @@ bool KMimeTypeRepository::matchFileName(const QString &filename, const QString &
|
|||
return rx.exactMatch(filename);
|
||||
}
|
||||
|
||||
// Helper for findFromFileName
|
||||
void KMimeTypeRepository::findFromOtherPatternList(QStringList &matchingMimeTypes,
|
||||
const QString &fileName,
|
||||
QString &foundExt,
|
||||
bool highWeight) const
|
||||
QStringList KMimeTypeRepository::findFromFileName(const QString &fileName, QString *pMatchingExtension) const
|
||||
{
|
||||
const KMimeGlobsFileParser::GlobList patternList = highWeight ? m_globs.m_highWeightGlobs : m_globs.m_lowWeightGlobs;
|
||||
|
||||
QStringList matchingMimeTypes;
|
||||
QString foundExt;
|
||||
int matchingPatternLength = 0;
|
||||
qint32 lastMatchedWeight = 0;
|
||||
if (!highWeight && !matchingMimeTypes.isEmpty()) {
|
||||
// We found matches in the fast pattern dict already:
|
||||
matchingPatternLength = foundExt.length() + 2; // *.foo -> length=5
|
||||
lastMatchedWeight = 50;
|
||||
}
|
||||
|
||||
// "Applications MUST match globs case-insensitively, except when the case-sensitive
|
||||
// attribute is set to true."
|
||||
// KMimeGlobsFileParser takes care of putting case-insensitive patterns in lowercase.
|
||||
const QString lowerCaseFileName = fileName.toLower();
|
||||
|
||||
KMimeGlobsFileParser::GlobList::const_iterator it = patternList.constBegin();
|
||||
const KMimeGlobsFileParser::GlobList::const_iterator end = patternList.constEnd();
|
||||
for ( ; it != end; ++it ) {
|
||||
const KMimeGlobsFileParser::Glob& glob = *it;
|
||||
foreach (const KMimeGlobsFileParser::Glob &glob, m_globs) {
|
||||
if (matchFileName(glob.casesensitive ? fileName : lowerCaseFileName, glob.pattern)) {
|
||||
// Is this a lower-weight pattern than the last match? Stop here then.
|
||||
if (glob.weight < lastMatchedWeight) {
|
||||
|
@ -331,25 +319,13 @@ void KMimeTypeRepository::findFromOtherPatternList(QStringList &matchingMimeType
|
|||
// remember the new "longer" length
|
||||
matchingPatternLength = glob.pattern.length();
|
||||
}
|
||||
matchingMimeTypes.push_back(glob.mimeType);
|
||||
matchingMimeTypes.append(glob.mimeType);
|
||||
lastMatchedWeight = glob.weight;
|
||||
if (glob.pattern.startsWith(QLatin1String("*."))) {
|
||||
foundExt = glob.pattern.mid(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
QStringList KMimeTypeRepository::findFromFileName(const QString &fileName, QString *pMatchingExtension) const
|
||||
{
|
||||
// First try the high weight matches (>=50), if any.
|
||||
QStringList matchingMimeTypes;
|
||||
QString foundExt;
|
||||
findFromOtherPatternList(matchingMimeTypes, fileName, foundExt, true);
|
||||
|
||||
if (matchingMimeTypes.isEmpty() || foundExt.isEmpty()) {
|
||||
// Try the low weight matches (<50)
|
||||
findFromOtherPatternList(matchingMimeTypes, fileName, foundExt, false);
|
||||
}
|
||||
|
||||
if (pMatchingExtension) {
|
||||
*pMatchingExtension = foundExt;
|
||||
|
|
|
@ -128,20 +128,6 @@ private:
|
|||
*/
|
||||
QList<KMimeMagicRule> parseMagicFile(QIODevice *file, const QString &fileName) const;
|
||||
|
||||
/**
|
||||
* Look into either the high-weight patterns or the low-weight patterns.
|
||||
* @param matchingMimeTypes in/out parameter. In: the already found mimetypes;
|
||||
* this is only set when the fast pattern dict found matches (i.e. weight 50)
|
||||
* and we want to check if there are other, longer, weight 50 matches.
|
||||
* @param filename the filename we are trying to match
|
||||
* @param foundExt in/out parameter, the recognized extension of the match
|
||||
* @param highWeight whether to look into >50 or <=50 patterns.
|
||||
*/
|
||||
void findFromOtherPatternList(QStringList &matchingMimeTypes,
|
||||
const QString &filename,
|
||||
QString &foundExt,
|
||||
bool highWeight) const;
|
||||
|
||||
typedef QHash<QString, QString> AliasesMap;
|
||||
AliasesMap m_aliases; // alias -> canonicalName
|
||||
|
||||
|
@ -153,7 +139,7 @@ private:
|
|||
bool m_useFavIconsChecked;
|
||||
int m_sharedMimeInfoVersion;
|
||||
QList<KMimeMagicRule> m_magicRules;
|
||||
KMimeGlobsFileParser::AllGlobs m_globs;
|
||||
KMimeGlobsFileParser::GlobList m_globs;
|
||||
KMimeType::Ptr m_defaultMimeType;
|
||||
QMutex m_mutex;
|
||||
};
|
||||
|
|
|
@ -45,10 +45,9 @@ private Q_SLOTS:
|
|||
"40:text/plain:*.kmimefileparserunittest\n"
|
||||
"20:text/plain:*.kmimefileparserunittest2::futureextension";
|
||||
QBuffer buf(&testFile);
|
||||
KMimeGlobsFileParser::AllGlobs mimeTypeGlobs;
|
||||
QVERIFY(KMimeGlobsFileParser::parseGlobFile(&buf, mimeTypeGlobs));
|
||||
//kDebug() << mimeTypeGlobs.keys();
|
||||
const KMimeGlobsFileParser::GlobList textGlobs = mimeTypeGlobs.m_lowWeightGlobs;
|
||||
KMimeGlobsFileParser::GlobList textGlobs;
|
||||
QVERIFY(KMimeGlobsFileParser::parseGlobFile(&buf, textGlobs));
|
||||
//kDebug() << textGlobs.keys();
|
||||
QCOMPARE(textGlobs.count(), 2);
|
||||
QCOMPARE(textGlobs[0].pattern, ext1);
|
||||
QCOMPARE(textGlobs[0].mimeType, QString("text/plain"));
|
||||
|
@ -74,7 +73,7 @@ private Q_SLOTS:
|
|||
const QString fileName = globTempFile.fileName();
|
||||
globTempFile.close();
|
||||
|
||||
KMimeGlobsFileParser::AllGlobs globs = parser.parseGlobs(QStringList() << fileName);
|
||||
KMimeGlobsFileParser::GlobList globs = parser.parseGlobs(QStringList() << fileName);
|
||||
|
||||
const QStringList textPlainPatterns = globs.patternsMap().value("text/plain");
|
||||
QVERIFY(textPlainPatterns.contains(ext1));
|
||||
|
@ -109,7 +108,7 @@ private Q_SLOTS:
|
|||
const QString fileName2 = globTempFile2.fileName();
|
||||
globTempFile2.close();
|
||||
|
||||
KMimeGlobsFileParser::AllGlobs globs = parser.parseGlobs(QStringList() << fileName1 << fileName2);
|
||||
KMimeGlobsFileParser::GlobList globs = parser.parseGlobs(QStringList() << fileName1 << fileName2);
|
||||
|
||||
const QStringList textPlainPatterns = globs.patternsMap().value("text/plain");
|
||||
kDebug() << textPlainPatterns;
|
||||
|
@ -144,7 +143,7 @@ private Q_SLOTS:
|
|||
const QString fileName2 = globTempFile2.fileName();
|
||||
globTempFile2.close();
|
||||
|
||||
KMimeGlobsFileParser::AllGlobs globs = parser.parseGlobs(QStringList() << fileName1 << fileName2);
|
||||
KMimeGlobsFileParser::GlobList globs = parser.parseGlobs(QStringList() << fileName1 << fileName2);
|
||||
|
||||
const QStringList textPlainPatterns = globs.patternsMap().value("text/plain");
|
||||
kDebug() << textPlainPatterns;
|
||||
|
@ -179,8 +178,7 @@ private Q_SLOTS:
|
|||
const QStringList globFiles = KGlobal::dirs()->findAllResources("xdgdata-mime", "globs2");
|
||||
m_allGlobs = KMimeGlobsFileParser::parseGlobs(globFiles);
|
||||
m_patternsMap = m_allGlobs.patternsMap();
|
||||
const int patCount = m_allGlobs.m_highWeightGlobs.count() + m_allGlobs.m_lowWeightGlobs.count();
|
||||
qDebug() << m_patternsMap.count() << "mimetypes," << patCount << "patterns";
|
||||
qDebug() << m_patternsMap.count() << "mimetypes," << m_allGlobs.count() << "patterns";
|
||||
}
|
||||
|
||||
void testGlobMatchingPerformance()
|
||||
|
@ -244,7 +242,7 @@ private:
|
|||
}
|
||||
|
||||
private:
|
||||
KMimeGlobsFileParser::AllGlobs m_allGlobs;
|
||||
KMimeGlobsFileParser::GlobList m_allGlobs;
|
||||
KMimeGlobsFileParser::PatternsMap m_patternsMap;
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue