mirror of
https://bitbucket.org/smil3y/katie.git
synced 2025-02-25 03:12:56 +00:00

the only thing left to fix is the dbus component which requires the pointer operator backported from9c5a77f0ef
but it messes up the erase and insert templates and causes source incompatibilities so it has to investigated further. upstream commits:fad1fa65ed
7c1e0fef8e
c4fbe872be
0412ad3513
Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
615 lines
19 KiB
C++
615 lines
19 KiB
C++
/****************************************************************************
|
|
**
|
|
** Copyright (C) 2015 The Qt Company Ltd.
|
|
** Contact: http://www.qt.io/licensing/
|
|
**
|
|
** This file is part of the Qt Assistant of the Qt Toolkit.
|
|
**
|
|
** $QT_BEGIN_LICENSE:LGPL$
|
|
** Commercial License Usage
|
|
** Licensees holding valid commercial Qt licenses may use this file in
|
|
** accordance with the commercial license agreement provided with the
|
|
** Software or, alternatively, in accordance with the terms contained in
|
|
** a written agreement between you and The Qt Company. For licensing terms
|
|
** and conditions see http://www.qt.io/terms-conditions. For further
|
|
** information use the contact form at http://www.qt.io/contact-us.
|
|
**
|
|
** GNU Lesser General Public License Usage
|
|
** Alternatively, this file may be used under the terms of the GNU Lesser
|
|
** General Public License version 2.1 or version 3 as published by the Free
|
|
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
|
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
|
** following information to ensure the GNU Lesser General Public License
|
|
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
|
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
|
**
|
|
** As a special exception, The Qt Company gives you certain additional
|
|
** rights. These rights are described in The Qt Company LGPL Exception
|
|
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
|
**
|
|
** GNU General Public License Usage
|
|
** Alternatively, this file may be used under the terms of the GNU
|
|
** General Public License version 3.0 as published by the Free Software
|
|
** Foundation and appearing in the file LICENSE.GPL included in the
|
|
** packaging of this file. Please review the following information to
|
|
** ensure the GNU General Public License version 3.0 requirements will be
|
|
** met: http://www.gnu.org/copyleft/gpl.html.
|
|
**
|
|
** $QT_END_LICENSE$
|
|
**
|
|
****************************************************************************/
|
|
|
|
#include "qhelpenginecore.h"
|
|
#include "qhelpsearchindexreader_default_p.h"
|
|
|
|
#include <QtCore/QDir>
|
|
#include <QtCore/QUrl>
|
|
#include <QtCore/QFile>
|
|
#include <QtCore/QVariant>
|
|
#include <QtCore/QFileInfo>
|
|
#include <QtCore/QDataStream>
|
|
#include <QtCore/QTextStream>
|
|
|
|
QT_BEGIN_NAMESPACE
|
|
|
|
namespace fulltextsearch {
|
|
namespace std {
|
|
|
|
namespace {
|
|
QStringList split( const QString &str )
|
|
{
|
|
QStringList lst;
|
|
int j = 0;
|
|
int i = str.indexOf(QLatin1Char('*'), j );
|
|
|
|
if (str.startsWith(QLatin1String("*")))
|
|
lst << QLatin1String("*");
|
|
|
|
while ( i != -1 ) {
|
|
if ( i > j && i <= (int)str.length() ) {
|
|
lst << str.mid( j, i - j );
|
|
lst << QLatin1String("*");
|
|
}
|
|
j = i + 1;
|
|
i = str.indexOf(QLatin1Char('*'), j );
|
|
}
|
|
|
|
int l = str.length() - 1;
|
|
if ( str.mid( j, l - j + 1 ).length() > 0 )
|
|
lst << str.mid( j, l - j + 1 );
|
|
|
|
return lst;
|
|
}
|
|
}
|
|
|
|
|
|
Reader::Reader()
|
|
: indexPath(QString())
|
|
, indexFile(QString())
|
|
, documentFile(QString())
|
|
{
|
|
termList.clear();
|
|
indexTable.clear();
|
|
searchIndexTable.clear();
|
|
}
|
|
|
|
Reader::~Reader()
|
|
{
|
|
reset();
|
|
searchIndexTable.clear();
|
|
}
|
|
|
|
bool Reader::readIndex()
|
|
{
|
|
if (indexTable.contains(indexFile))
|
|
return true;
|
|
|
|
QFile idxFile(indexFile);
|
|
if (!idxFile.open(QFile::ReadOnly))
|
|
return false;
|
|
|
|
QString key;
|
|
int numOfDocs;
|
|
EntryTable entryTable;
|
|
QVector<Document> docs;
|
|
QDataStream dictStream(&idxFile);
|
|
while (!dictStream.atEnd()) {
|
|
dictStream >> key;
|
|
dictStream >> numOfDocs;
|
|
docs.resize(numOfDocs);
|
|
dictStream >> docs;
|
|
entryTable.insert(key, new Entry(docs));
|
|
}
|
|
idxFile.close();
|
|
|
|
if (entryTable.isEmpty())
|
|
return false;
|
|
|
|
QFile docFile(documentFile);
|
|
if (!docFile.open(QFile::ReadOnly))
|
|
return false;
|
|
|
|
QString title, url;
|
|
DocumentList documentList;
|
|
QDataStream docStream(&docFile);
|
|
while (!docStream.atEnd()) {
|
|
docStream >> title;
|
|
docStream >> url;
|
|
documentList.append(QStringList(title) << url);
|
|
}
|
|
docFile.close();
|
|
|
|
if (documentList.isEmpty()) {
|
|
cleanupIndex(entryTable);
|
|
return false;
|
|
}
|
|
|
|
indexTable.insert(indexFile, Index(entryTable, documentList));
|
|
return true;
|
|
}
|
|
|
|
bool Reader::initCheck() const
|
|
{
|
|
return !searchIndexTable.isEmpty();
|
|
}
|
|
|
|
void Reader::setIndexPath(const QString &path)
|
|
{
|
|
indexPath = path;
|
|
}
|
|
|
|
void Reader::filterFilesForAttributes(const QStringList &attributes)
|
|
{
|
|
searchIndexTable.clear();
|
|
for(IndexTable::ConstIterator it = indexTable.constBegin(); it != indexTable.constEnd(); ++it) {
|
|
const QString fileName = it.key();
|
|
bool containsAll = true;
|
|
QStringList split = fileName.split(QLatin1String("@"));
|
|
foreach (const QString &attribute, attributes) {
|
|
if (!split.contains(attribute, Qt::CaseInsensitive)) {
|
|
containsAll = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (containsAll)
|
|
searchIndexTable.insert(fileName, it.value());
|
|
}
|
|
}
|
|
|
|
void Reader::setIndexFile(const QString &namespaceName, const QString &attributes)
|
|
{
|
|
QString extension = namespaceName + QLatin1String("@") + attributes;
|
|
indexFile = indexPath + QLatin1String("/indexdb40.") + extension;
|
|
documentFile = indexPath + QLatin1String("/indexdoc40.") + extension;
|
|
}
|
|
|
|
bool Reader::splitSearchTerm(const QString &searchTerm, QStringList *terms,
|
|
QStringList *termSeq, QStringList *seqWords)
|
|
{
|
|
QString term = searchTerm;
|
|
|
|
term = term.simplified();
|
|
term = term.replace(QLatin1String("\'"), QLatin1String("\""));
|
|
term = term.replace(QLatin1String("`"), QLatin1String("\""));
|
|
term = term.replace(QLatin1String("-"), QLatin1String(" "));
|
|
term = term.replace(QRegExp(QLatin1String("\\s[\\S]?\\s")), QLatin1String(" "));
|
|
|
|
*terms = term.split(QLatin1Char(' '));
|
|
QStringList::iterator it = terms->begin();
|
|
for (; it != terms->end(); ++it) {
|
|
(*it) = (*it).simplified();
|
|
(*it) = (*it).toLower();
|
|
(*it) = (*it).replace(QLatin1String("\""), QLatin1String(""));
|
|
}
|
|
|
|
if (term.contains(QLatin1Char('\"'))) {
|
|
if ((term.count(QLatin1Char('\"')))%2 == 0) {
|
|
int beg = 0;
|
|
int end = 0;
|
|
QString s;
|
|
beg = term.indexOf(QLatin1Char('\"'), beg);
|
|
while (beg != -1) {
|
|
beg++;
|
|
end = term.indexOf(QLatin1Char('\"'), beg);
|
|
s = term.mid(beg, end - beg);
|
|
s = s.toLower();
|
|
s = s.simplified();
|
|
if (s.contains(QLatin1Char('*'))) {
|
|
qWarning("Full Text Search, using a wildcard within phrases is not allowed.");
|
|
return false;
|
|
}
|
|
*seqWords += s.split(QLatin1Char(' '));
|
|
*termSeq << s;
|
|
beg = term.indexOf(QLatin1Char('\"'), end + 1);
|
|
}
|
|
} else {
|
|
qWarning("Full Text Search, the closing quotation mark is missing.");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void Reader::searchInIndex(const QStringList &terms)
|
|
{
|
|
foreach (const QString &term, terms) {
|
|
QVector<Document> documents;
|
|
|
|
for(IndexTable::ConstIterator it = searchIndexTable.constBegin();
|
|
it != searchIndexTable.constEnd(); ++it) {
|
|
EntryTable entryTable = it.value().first;
|
|
DocumentList documentList = it.value().second;
|
|
|
|
if (term.contains(QLatin1Char('*')))
|
|
documents = setupDummyTerm(getWildcardTerms(term, entryTable), entryTable);
|
|
else if (entryTable.value(term))
|
|
documents = entryTable.value(term)->documents;
|
|
else
|
|
continue;
|
|
|
|
if (!documents.isEmpty()) {
|
|
DocumentInfo info;
|
|
QString title, url;
|
|
QVector<DocumentInfo> documentsInfo;
|
|
foreach(const Document &doc, documents) {
|
|
info.docNumber = doc.docNumber;
|
|
info.frequency = doc.frequency;
|
|
info.documentUrl = documentList.at(doc.docNumber).at(1);
|
|
info.documentTitle = documentList.at(doc.docNumber).at(0);
|
|
documentsInfo.append(info);
|
|
}
|
|
|
|
bool found = false;
|
|
for(QList<TermInfo>::Iterator tit = termList.begin();
|
|
tit != termList.end(); ++tit) {
|
|
TermInfo *t = &(*tit);
|
|
if(t->term == term) {
|
|
t->documents += documentsInfo;
|
|
t->frequency += documentsInfo.count();
|
|
found = true; break;
|
|
}
|
|
}
|
|
if (!found)
|
|
termList.append(TermInfo(term, documentsInfo.count(), documentsInfo));
|
|
}
|
|
}
|
|
}
|
|
qSort(termList);
|
|
}
|
|
|
|
QVector<DocumentInfo> Reader::hits()
|
|
{
|
|
QVector<DocumentInfo> documents;
|
|
if (!termList.count())
|
|
return documents;
|
|
|
|
documents = termList.takeFirst().documents;
|
|
for(QList<TermInfo>::Iterator it = termList.begin(); it != termList.end(); ++it) {
|
|
TermInfo *t = &(*it);
|
|
QVector<DocumentInfo> docs = t->documents;
|
|
for(QVector<DocumentInfo>::Iterator minDoc_it = documents.begin();
|
|
minDoc_it != documents.end(); ) {
|
|
bool found = false;
|
|
for (QVector<DocumentInfo>::ConstIterator doc_it = docs.constBegin();
|
|
doc_it != docs.constEnd(); ++doc_it ) {
|
|
if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) {
|
|
(*minDoc_it).frequency += (*doc_it).frequency;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!found)
|
|
minDoc_it = documents.erase(minDoc_it);
|
|
else
|
|
++minDoc_it;
|
|
}
|
|
}
|
|
|
|
qSort(documents);
|
|
return documents;
|
|
}
|
|
|
|
bool Reader::searchForPattern(const QStringList &patterns, const QStringList &words,
|
|
const QByteArray &data)
|
|
{
|
|
if (data.isEmpty())
|
|
return false;
|
|
|
|
for(QHash<QString, PosEntry*>::ConstIterator mit =
|
|
miniIndex.constBegin(); mit != miniIndex.constEnd(); ++mit) {
|
|
delete mit.value();
|
|
}
|
|
miniIndex.clear();
|
|
|
|
wordNum = 3;
|
|
QStringList::ConstIterator cIt = words.begin();
|
|
for ( ; cIt != words.end(); ++cIt )
|
|
miniIndex.insert(*cIt, new PosEntry(0));
|
|
|
|
QTextStream s(data);
|
|
QString text = s.readAll();
|
|
bool valid = true;
|
|
const QChar *buf = text.unicode();
|
|
QChar str[64];
|
|
QChar c = buf[0];
|
|
int j = 0;
|
|
int i = 0;
|
|
while ( j < text.length() ) {
|
|
if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
|
|
valid = false;
|
|
if ( i > 1 )
|
|
buildMiniIndex( QString(str,i) );
|
|
i = 0;
|
|
c = buf[++j];
|
|
continue;
|
|
}
|
|
if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
|
|
valid = true;
|
|
c = buf[++j];
|
|
continue;
|
|
}
|
|
if ( !valid ) {
|
|
c = buf[++j];
|
|
continue;
|
|
}
|
|
if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
|
|
str[i] = c.toLower();
|
|
++i;
|
|
} else {
|
|
if ( i > 1 )
|
|
buildMiniIndex( QString(str,i) );
|
|
i = 0;
|
|
}
|
|
c = buf[++j];
|
|
}
|
|
if ( i > 1 )
|
|
buildMiniIndex( QString(str,i) );
|
|
|
|
QStringList::ConstIterator patIt = patterns.begin();
|
|
QStringList wordLst;
|
|
QList<uint> a, b;
|
|
QList<uint>::iterator aIt;
|
|
for ( ; patIt != patterns.end(); ++patIt ) {
|
|
wordLst = (*patIt).split(QLatin1Char(' '));
|
|
a = miniIndex[ wordLst[0] ]->positions;
|
|
for ( int j = 1; j < (int)wordLst.count(); ++j ) {
|
|
b = miniIndex[ wordLst[j] ]->positions;
|
|
aIt = a.begin();
|
|
while ( aIt != a.end() ) {
|
|
if ( b.contains( *aIt + 1 )) {
|
|
(*aIt)++;
|
|
++aIt;
|
|
} else {
|
|
aIt = a.erase( aIt );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ( a.count() )
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
QVector<Document> Reader::setupDummyTerm(const QStringList &terms,
|
|
const EntryTable &entryTable)
|
|
{
|
|
QList<Term> termList;
|
|
for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) {
|
|
if (entryTable.value(*it)) {
|
|
Entry *e = entryTable.value(*it);
|
|
termList.append(Term(*it, e->documents.count(), e->documents ) );
|
|
}
|
|
}
|
|
QVector<Document> maxList(0);
|
|
if ( !termList.count() )
|
|
return maxList;
|
|
qSort(termList);
|
|
|
|
maxList = termList.takeLast().documents;
|
|
for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {
|
|
Term *t = &(*it);
|
|
QVector<Document> docs = t->documents;
|
|
for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) {
|
|
if ( maxList.indexOf( *docIt ) == -1 )
|
|
maxList.append( *docIt );
|
|
}
|
|
}
|
|
return maxList;
|
|
}
|
|
|
|
QStringList Reader::getWildcardTerms(const QString &term,
|
|
const EntryTable &entryTable)
|
|
{
|
|
QStringList lst;
|
|
QStringList terms = split(term);
|
|
QStringList::Iterator iter;
|
|
|
|
for(EntryTable::ConstIterator it = entryTable.begin();
|
|
it != entryTable.end(); ++it) {
|
|
int index = 0;
|
|
bool found = false;
|
|
QString text( it.key() );
|
|
for ( iter = terms.begin(); iter != terms.end(); ++iter ) {
|
|
if ( *iter == QLatin1String("*") ) {
|
|
found = true;
|
|
continue;
|
|
}
|
|
if ( iter == terms.begin() && (*iter)[0] != text[0] ) {
|
|
found = false;
|
|
break;
|
|
}
|
|
index = text.indexOf( *iter, index );
|
|
if ( *iter == terms.last() && index != (int)text.length()-1 ) {
|
|
index = text.lastIndexOf( *iter );
|
|
if ( index != (int)text.length() - (int)(*iter).length() ) {
|
|
found = false;
|
|
break;
|
|
}
|
|
}
|
|
if ( index != -1 ) {
|
|
found = true;
|
|
index += (*iter).length();
|
|
continue;
|
|
} else {
|
|
found = false;
|
|
break;
|
|
}
|
|
}
|
|
if (found)
|
|
lst << text;
|
|
}
|
|
|
|
return lst;
|
|
}
|
|
|
|
void Reader::buildMiniIndex(const QString &string)
|
|
{
|
|
if (miniIndex[string])
|
|
miniIndex[string]->positions.append(wordNum);
|
|
++wordNum;
|
|
}
|
|
|
|
void Reader::reset()
|
|
{
|
|
for(IndexTable::Iterator it = indexTable.begin();
|
|
it != indexTable.end(); ++it) {
|
|
cleanupIndex(it.value().first);
|
|
it.value().second.clear();
|
|
}
|
|
}
|
|
|
|
void Reader::cleanupIndex(EntryTable &entryTable)
|
|
{
|
|
for(EntryTable::ConstIterator it =
|
|
entryTable.constBegin(); it != entryTable.constEnd(); ++it) {
|
|
delete it.value();
|
|
}
|
|
|
|
entryTable.clear();
|
|
}
|
|
|
|
|
|
QHelpSearchIndexReaderDefault::QHelpSearchIndexReaderDefault()
|
|
: QHelpSearchIndexReader()
|
|
{
|
|
// nothing todo
|
|
}
|
|
|
|
QHelpSearchIndexReaderDefault::~QHelpSearchIndexReaderDefault()
|
|
{
|
|
}
|
|
|
|
void QHelpSearchIndexReaderDefault::run()
|
|
{
|
|
mutex.lock();
|
|
|
|
if (m_cancel) {
|
|
mutex.unlock();
|
|
return;
|
|
}
|
|
|
|
const QList<QHelpSearchQuery> &queryList = this->m_query;
|
|
const QLatin1String key("DefaultSearchNamespaces");
|
|
const QString collectionFile(this->m_collectionFile);
|
|
const QString indexPath = m_indexFilesFolder;
|
|
|
|
mutex.unlock();
|
|
|
|
QString queryTerm;
|
|
foreach (const QHelpSearchQuery &query, queryList) {
|
|
if (query.fieldName == QHelpSearchQuery::DEFAULT) {
|
|
queryTerm = query.wordList.at(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (queryTerm.isEmpty())
|
|
return;
|
|
|
|
QHelpEngineCore engine(collectionFile, 0);
|
|
if (!engine.setupData())
|
|
return;
|
|
|
|
const QStringList registeredDocs = engine.registeredDocumentations();
|
|
const QStringList indexedNamespaces = engine.customValue(key).toString().
|
|
split(QLatin1String("|"), QString::SkipEmptyParts);
|
|
|
|
emit searchingStarted();
|
|
|
|
// setup the reader
|
|
m_reader.setIndexPath(indexPath);
|
|
foreach(const QString &namespaceName, registeredDocs) {
|
|
mutex.lock();
|
|
if (m_cancel) {
|
|
mutex.unlock();
|
|
searchingFinished(0); // TODO: check this ???
|
|
return;
|
|
}
|
|
mutex.unlock();
|
|
|
|
const QList<QStringList> attributeSets =
|
|
engine.filterAttributeSets(namespaceName);
|
|
|
|
foreach (const QStringList &attributes, attributeSets) {
|
|
// read all index files
|
|
m_reader.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
|
|
if (!m_reader.readIndex()) {
|
|
qWarning("Full Text Search, could not read file for namespace: %s.",
|
|
namespaceName.toUtf8().constData());
|
|
}
|
|
}
|
|
}
|
|
|
|
// get the current filter attributes and minimize the index files table
|
|
m_reader.filterFilesForAttributes(engine.filterAttributes(engine.currentFilter()));
|
|
|
|
hitList.clear();
|
|
QStringList terms, termSeq, seqWords;
|
|
if (m_reader.initCheck() && // check if we could read anything
|
|
m_reader.splitSearchTerm(queryTerm, &terms, &termSeq, &seqWords) ) {
|
|
|
|
// search for term(s)
|
|
m_reader.searchInIndex(terms); // TODO: should this be interruptible as well ???
|
|
|
|
QVector<DocumentInfo> hits = m_reader.hits();
|
|
if (!hits.isEmpty()) {
|
|
if (termSeq.isEmpty()) {
|
|
foreach (const DocumentInfo &docInfo, hits) {
|
|
mutex.lock();
|
|
if (m_cancel) {
|
|
mutex.unlock();
|
|
searchingFinished(0); // TODO: check this, speed issue while locking???
|
|
return;
|
|
}
|
|
mutex.unlock();
|
|
hitList.append(qMakePair(docInfo.documentTitle, docInfo.documentUrl));
|
|
}
|
|
} else {
|
|
foreach (const DocumentInfo &docInfo, hits) {
|
|
mutex.lock();
|
|
if (m_cancel) {
|
|
mutex.unlock();
|
|
searchingFinished(0); // TODO: check this, speed issue while locking???
|
|
return;
|
|
}
|
|
mutex.unlock();
|
|
|
|
if (m_reader.searchForPattern(termSeq, seqWords, engine.fileData(docInfo.documentUrl))) // TODO: should this be interruptible as well ???
|
|
hitList.append(qMakePair(docInfo.documentTitle, docInfo.documentUrl));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
emit searchingFinished(hitList.count());
|
|
}
|
|
|
|
} // namespace std
|
|
} // namespace fulltextsearch
|
|
|
|
QT_END_NAMESPACE
|
|
|
|
#include "moc_qhelpsearchindexreader_default_p.h"
|
|
|