kio: implement PDF meta information extractor via Poppler

Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
Ivailo Monev 2022-04-17 04:05:30 +03:00
parent 187385a291
commit 05b34e20b5
11 changed files with 287 additions and 9 deletions

View file

@ -231,6 +231,14 @@ set_package_properties(EPub PROPERTIES
PURPOSE "eBook metadata extraction"
)
macro_optional_find_package(Poppler)
set_package_properties(Poppler PROPERTIES
DESCRIPTION "PDF rendering library"
URL "https://poppler.freedesktop.org/"
TYPE RECOMMENDED
PURPOSE "PDF metadata extraction"
)
macro_optional_find_package(OpenSSL)
set_package_properties(OpenSSL PROPERTIES
DESCRIPTION "Robust, commercial-grade, full-featured toolkit for general-purpose cryptography and secure communication"

View file

@ -24,7 +24,7 @@ build_script:
libexiv2-dev libcdio-dev libssl-dev libcurl4-openssl-dev \
libdbusmenu-katie libavcodec-dev libavutil-dev libavformat-dev \
libtag1-dev media-player-info shared-mime-info media-player-info \
libepub-dev xdg-utils ccache
libepub-dev libpoppler-cpp-dev xdg-utils ccache
export PATH="/usr/lib/ccache/:$PATH"

View file

@ -54,6 +54,7 @@ set(cmakeFiles
FindMtp.cmake
FindOpenEXR.cmake
FindPCIUTILS.cmake
FindPoppler.cmake
FindPopplerQt4.cmake
FindQalculate.cmake
FindRAW1394.cmake

View file

@ -0,0 +1,40 @@
# Try to find Poppler library, once done this will define:
#
# POPPLER_FOUND - system has Poppler
# POPPLER_INCLUDE_DIR - the Poppler include directory
# POPPLER_LIBRARIES - the libraries needed to use Poppler
#
# Copyright (c) 2020 Ivailo Monev <xakepa10@gmail.com>
#
# Redistribution and use is allowed according to the terms of the BSD license.
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
if(NOT WIN32)
include(FindPkgConfig)
pkg_check_modules(PC_POPPLER QUIET poppler-cpp)
set(POPPLER_INCLUDE_DIR ${PC_POPPLER_INCLUDE_DIRS})
set(POPPLER_LIBRARIES ${PC_POPPLER_LIBRARIES})
endif()
set(POPPLER_VERSION ${PC_POPPLER_VERSION})
if(NOT POPPLER_INCLUDE_DIR OR NOT POPPLER_LIBRARIES)
find_path(POPPLER_INCLUDE_DIR
NAMES poppler/cpp/poppler-document.h
HINTS $ENV{POPPLERDIR}/include
)
find_library(POPPLER_LIBRARIES
NAMES poppler-cpp
HINTS $ENV{POPPLERDIR}/lib
)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Poppler
VERSION_VAR POPPLER_VERSION
REQUIRED_VARS POPPLER_LIBRARIES POPPLER_INCLUDE_DIR
)
mark_as_advanced(POPPLER_INCLUDE_DIR POPPLER_LIBRARIES)

View file

@ -81,3 +81,25 @@ if (EPUB_FOUND)
DESTINATION ${KDE4_SERVICES_INSTALL_DIR}
)
endif()
if (POPPLER_FOUND)
include_directories(${POPPLER_INCLUDE_DIR})
set(kfilemetadata_poppler_SRCS kfilemetadata_poppler.cpp)
kde4_add_plugin(kfilemetadata_poppler ${kfilemetadata_poppler_SRCS})
target_link_libraries(kfilemetadata_poppler
${KDE4_KIO_LIBS}
${POPPLER_LIBRARIES}
)
install(
TARGETS kfilemetadata_poppler
DESTINATION ${KDE4_PLUGIN_INSTALL_DIR}
)
install(
FILES kfilemetadata_poppler.desktop
DESTINATION ${KDE4_SERVICES_INSTALL_DIR}
)
endif()

View file

@ -57,11 +57,11 @@ QStringList KFileMetaDataEPubPlugin::keys() const
{
static const QStringList result = QStringList()
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#uniqueFileIdentifier")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#contributor")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#publisher")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#description")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#copyright");

View file

@ -46,19 +46,19 @@ QStringList KFileMetaDataFFmpegPlugin::keys() const
{
static const QStringList result = QStringList()
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#duration")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#musicAlbum")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#artist")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#comment")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#composer")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#copyright")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#composer")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#encoder")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#encodedBy")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#musicAlbum")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#genre")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#performer")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#publisher")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#trackNumber")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#publisher")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#averageBitrate")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#videoCodec")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#audioCodec")

View file

@ -0,0 +1,163 @@
/* This file is part of the KDE libraries
Copyright (C) 2022 Ivailo Monev <xakepa10@gmail.com>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License version 2, as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "kfilemetadata_poppler.h"
#include "kpluginfactory.h"
#include "kglobal.h"
#include "klocale.h"
#include "kdatetime.h"
#include "kdebug.h"
#include <QDateTime>
#include <poppler/cpp/poppler-document.h>
static QString getString(const poppler::ustring &popplerstring)
{
const poppler::byte_array popplerbytes = popplerstring.to_utf8();
return QString::fromUtf8(popplerbytes.data(), popplerbytes.size());
}
static QString getTime(const poppler::time_type &popplertime)
{
const KDateTime kdatetime(QDateTime::fromTime_t(popplertime));
return KGlobal::locale()->formatDateTime(kdatetime, KLocale::FancyLongDate);
}
KFileMetaDataPopplerPlugin::KFileMetaDataPopplerPlugin(QObject* parent, const QVariantList &args)
: KFileMetaDataPlugin(parent)
{
Q_UNUSED(args);
}
KFileMetaDataPopplerPlugin::~KFileMetaDataPopplerPlugin()
{
}
QStringList KFileMetaDataPopplerPlugin::keys() const
{
static const QStringList result = QStringList()
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#keyword")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#generator")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentLastModified")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#textWriter");
return result;
}
QStringList KFileMetaDataPopplerPlugin::mimeTypes() const
{
static const QStringList result = QStringList()
<< QString::fromLatin1("application/pdf")
<< QString::fromLatin1("application/x-pdf");
return result;
}
QList<KFileMetaInfoItem> KFileMetaDataPopplerPlugin::metaData(const KUrl &url, const KFileMetaInfo::WhatFlags flags)
{
Q_UNUSED(flags);
QList<KFileMetaInfoItem> result;
const QByteArray urlpath = url.toLocalFile().toLocal8Bit();
poppler::document *popplerdocument = poppler::document::load_from_file(std::string(urlpath.constData(), urlpath.size()));
if (!popplerdocument) {
kWarning() << "Could not open" << urlpath;
return result;
}
const QString popplertitle = getString(popplerdocument->get_title());
if (!popplertitle.isEmpty()) {
result.append(
KFileMetaInfoItem(
QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title"),
popplertitle
)
);
}
const QString popplerauthor = getString(popplerdocument->get_author());
if (!popplerauthor.isEmpty()) {
result.append(
KFileMetaInfoItem(
QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#textWriter"),
popplerauthor
)
);
}
const QString popplersubject = getString(popplerdocument->get_subject());
if (!popplersubject.isEmpty()) {
result.append(
KFileMetaInfoItem(
QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject"),
popplersubject
)
);
}
const QString popplerkeywords = getString(popplerdocument->get_keywords());
if (!popplerkeywords.isEmpty()) {
result.append(
KFileMetaInfoItem(
QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#keyword"),
popplerkeywords
)
);
}
const QString popplercreator = getString(popplerdocument->get_creator());
if (!popplercreator.isEmpty()) {
result.append(
KFileMetaInfoItem(
QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator"),
popplercreator
)
);
}
const QString popplerproducer = getString(popplerdocument->get_producer());
if (!popplerproducer.isEmpty()) {
result.append(
KFileMetaInfoItem(
QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#generator"),
popplerproducer
)
);
}
const QString popplercreationdata = getTime(popplerdocument->get_creation_date());
if (!popplercreationdata.isEmpty()) {
result.append(
KFileMetaInfoItem(
QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated"),
popplercreationdata
)
);
}
const QString popplermodificationdate = getTime(popplerdocument->get_modification_date());
if (!popplermodificationdate.isEmpty()) {
result.append(
KFileMetaInfoItem(
QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentLastModified"),
popplermodificationdate
)
);
}
delete popplerdocument;
return result;
}
K_PLUGIN_FACTORY(KFileMetaDataPopplerPluginFactory, registerPlugin<KFileMetaDataPopplerPlugin>();)
K_EXPORT_PLUGIN(KFileMetaDataPopplerPluginFactory("kfilemetadata_poppler"))
#include "moc_kfilemetadata_poppler.cpp"

View file

@ -0,0 +1,7 @@
[Desktop Entry]
Type=Service
Name=KFileMetaDataPopplerPlugin
GenericName=Poppler metadata extractor
X-KDE-Library=kfilemetadata_poppler
X-KDE-ServiceTypes=KFileMetaData/Plugin
InitialPreference=1

View file

@ -0,0 +1,37 @@
/* This file is part of the KDE libraries
Copyright (C) 2022 Ivailo Monev <xakepa10@gmail.com>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License version 2, as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#ifndef KFILEMETADATA_POPPLER_H
#define KFILEMETADATA_POPPLER_H
#include "kfilemetadata.h"
class KFileMetaDataPopplerPlugin : public KFileMetaDataPlugin
{
Q_OBJECT
public:
KFileMetaDataPopplerPlugin(QObject* parent, const QVariantList &args);
~KFileMetaDataPopplerPlugin();
QStringList keys() const final;
QStringList mimeTypes() const final;
QList<KFileMetaInfoItem> metaData(const KUrl &url, const KFileMetaInfo::WhatFlags flags) final;
};
#endif // KFILEMETADATA_POPPLER_H

View file

@ -39,12 +39,12 @@ QStringList KFileMetaDataTagLibPlugin::keys() const
{
static const QStringList result = QStringList()
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#comment")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#artist")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#musicAlbum")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#comment")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#genre")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#originalReleaseYear")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#trackNumber")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#originalReleaseYear")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#duration")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#averageBitrate")
<< QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#sampleRate")