From 05b34e20b5ea23f66f3bc977b8c69d77d4d2f5e1 Mon Sep 17 00:00:00 2001 From: Ivailo Monev Date: Sun, 17 Apr 2022 04:05:30 +0300 Subject: [PATCH] kio: implement PDF meta information extractor via Poppler Signed-off-by: Ivailo Monev --- CMakeLists.txt | 8 + appveyor.yml | 2 +- cmake/modules/CMakeLists.txt | 1 + cmake/modules/FindPoppler.cmake | 40 +++++ kio/metadata/CMakeLists.txt | 22 +++ kio/metadata/kfilemetadata_epub.cpp | 4 +- kio/metadata/kfilemetadata_ffmpeg.cpp | 8 +- kio/metadata/kfilemetadata_poppler.cpp | 163 +++++++++++++++++++++ kio/metadata/kfilemetadata_poppler.desktop | 7 + kio/metadata/kfilemetadata_poppler.h | 37 +++++ kio/metadata/kfilemetadata_taglib.cpp | 4 +- 11 files changed, 287 insertions(+), 9 deletions(-) create mode 100644 cmake/modules/FindPoppler.cmake create mode 100644 kio/metadata/kfilemetadata_poppler.cpp create mode 100644 kio/metadata/kfilemetadata_poppler.desktop create mode 100644 kio/metadata/kfilemetadata_poppler.h diff --git a/CMakeLists.txt b/CMakeLists.txt index af38ffe6..30fa1616 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,6 +231,14 @@ set_package_properties(EPub PROPERTIES PURPOSE "eBook metadata extraction" ) +macro_optional_find_package(Poppler) +set_package_properties(Poppler PROPERTIES + DESCRIPTION "PDF rendering library" + URL "https://poppler.freedesktop.org/" + TYPE RECOMMENDED + PURPOSE "PDF metadata extraction" +) + macro_optional_find_package(OpenSSL) set_package_properties(OpenSSL PROPERTIES DESCRIPTION "Robust, commercial-grade, full-featured toolkit for general-purpose cryptography and secure communication" diff --git a/appveyor.yml b/appveyor.yml index 7aab983f..296c76a8 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -24,7 +24,7 @@ build_script: libexiv2-dev libcdio-dev libssl-dev libcurl4-openssl-dev \ libdbusmenu-katie libavcodec-dev libavutil-dev libavformat-dev \ libtag1-dev media-player-info shared-mime-info media-player-info \ - libepub-dev xdg-utils ccache + libepub-dev libpoppler-cpp-dev xdg-utils ccache export PATH="/usr/lib/ccache/:$PATH" diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt index f3eb52ce..6ae85045 100644 --- a/cmake/modules/CMakeLists.txt +++ b/cmake/modules/CMakeLists.txt @@ -54,6 +54,7 @@ set(cmakeFiles FindMtp.cmake FindOpenEXR.cmake FindPCIUTILS.cmake + FindPoppler.cmake FindPopplerQt4.cmake FindQalculate.cmake FindRAW1394.cmake diff --git a/cmake/modules/FindPoppler.cmake b/cmake/modules/FindPoppler.cmake new file mode 100644 index 00000000..11c6d989 --- /dev/null +++ b/cmake/modules/FindPoppler.cmake @@ -0,0 +1,40 @@ +# Try to find Poppler library, once done this will define: +# +# POPPLER_FOUND - system has Poppler +# POPPLER_INCLUDE_DIR - the Poppler include directory +# POPPLER_LIBRARIES - the libraries needed to use Poppler +# +# Copyright (c) 2020 Ivailo Monev +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +if(NOT WIN32) + include(FindPkgConfig) + pkg_check_modules(PC_POPPLER QUIET poppler-cpp) + + set(POPPLER_INCLUDE_DIR ${PC_POPPLER_INCLUDE_DIRS}) + set(POPPLER_LIBRARIES ${PC_POPPLER_LIBRARIES}) +endif() + +set(POPPLER_VERSION ${PC_POPPLER_VERSION}) + +if(NOT POPPLER_INCLUDE_DIR OR NOT POPPLER_LIBRARIES) + find_path(POPPLER_INCLUDE_DIR + NAMES poppler/cpp/poppler-document.h + HINTS $ENV{POPPLERDIR}/include + ) + + find_library(POPPLER_LIBRARIES + NAMES poppler-cpp + HINTS $ENV{POPPLERDIR}/lib + ) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Poppler + VERSION_VAR POPPLER_VERSION + REQUIRED_VARS POPPLER_LIBRARIES POPPLER_INCLUDE_DIR +) + +mark_as_advanced(POPPLER_INCLUDE_DIR POPPLER_LIBRARIES) diff --git a/kio/metadata/CMakeLists.txt b/kio/metadata/CMakeLists.txt index 1a89c9c8..7639eeb7 100644 --- a/kio/metadata/CMakeLists.txt +++ b/kio/metadata/CMakeLists.txt @@ -81,3 +81,25 @@ if (EPUB_FOUND) DESTINATION ${KDE4_SERVICES_INSTALL_DIR} ) endif() + +if (POPPLER_FOUND) + include_directories(${POPPLER_INCLUDE_DIR}) + + set(kfilemetadata_poppler_SRCS kfilemetadata_poppler.cpp) + + kde4_add_plugin(kfilemetadata_poppler ${kfilemetadata_poppler_SRCS}) + target_link_libraries(kfilemetadata_poppler + ${KDE4_KIO_LIBS} + ${POPPLER_LIBRARIES} + ) + + install( + TARGETS kfilemetadata_poppler + DESTINATION ${KDE4_PLUGIN_INSTALL_DIR} + ) + + install( + FILES kfilemetadata_poppler.desktop + DESTINATION ${KDE4_SERVICES_INSTALL_DIR} + ) +endif() diff --git a/kio/metadata/kfilemetadata_epub.cpp b/kio/metadata/kfilemetadata_epub.cpp index cfd9af49..27207b72 100644 --- a/kio/metadata/kfilemetadata_epub.cpp +++ b/kio/metadata/kfilemetadata_epub.cpp @@ -57,11 +57,11 @@ QStringList KFileMetaDataEPubPlugin::keys() const { static const QStringList result = QStringList() << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#uniqueFileIdentifier") - << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#contributor") - << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#publisher") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#description") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#copyright"); diff --git a/kio/metadata/kfilemetadata_ffmpeg.cpp b/kio/metadata/kfilemetadata_ffmpeg.cpp index 9e722d96..f7d3cdd6 100644 --- a/kio/metadata/kfilemetadata_ffmpeg.cpp +++ b/kio/metadata/kfilemetadata_ffmpeg.cpp @@ -46,19 +46,19 @@ QStringList KFileMetaDataFFmpegPlugin::keys() const { static const QStringList result = QStringList() << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#duration") - << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#musicAlbum") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#artist") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#comment") - << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#composer") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#copyright") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#composer") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#encoder") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#encodedBy") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#musicAlbum") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#genre") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#performer") - << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#publisher") - << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#trackNumber") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#publisher") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#averageBitrate") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#videoCodec") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#audioCodec") diff --git a/kio/metadata/kfilemetadata_poppler.cpp b/kio/metadata/kfilemetadata_poppler.cpp new file mode 100644 index 00000000..0f196c19 --- /dev/null +++ b/kio/metadata/kfilemetadata_poppler.cpp @@ -0,0 +1,163 @@ +/* This file is part of the KDE libraries + Copyright (C) 2022 Ivailo Monev + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License version 2, as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#include "kfilemetadata_poppler.h" +#include "kpluginfactory.h" +#include "kglobal.h" +#include "klocale.h" +#include "kdatetime.h" +#include "kdebug.h" + +#include + +#include + +static QString getString(const poppler::ustring &popplerstring) +{ + const poppler::byte_array popplerbytes = popplerstring.to_utf8(); + return QString::fromUtf8(popplerbytes.data(), popplerbytes.size()); +} + +static QString getTime(const poppler::time_type &popplertime) +{ + const KDateTime kdatetime(QDateTime::fromTime_t(popplertime)); + return KGlobal::locale()->formatDateTime(kdatetime, KLocale::FancyLongDate); +} + +KFileMetaDataPopplerPlugin::KFileMetaDataPopplerPlugin(QObject* parent, const QVariantList &args) + : KFileMetaDataPlugin(parent) +{ + Q_UNUSED(args); +} + +KFileMetaDataPopplerPlugin::~KFileMetaDataPopplerPlugin() +{ +} + +QStringList KFileMetaDataPopplerPlugin::keys() const +{ + static const QStringList result = QStringList() + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#keyword") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#generator") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentLastModified") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#textWriter"); + return result; +} + +QStringList KFileMetaDataPopplerPlugin::mimeTypes() const +{ + static const QStringList result = QStringList() + << QString::fromLatin1("application/pdf") + << QString::fromLatin1("application/x-pdf"); + return result; +} + +QList KFileMetaDataPopplerPlugin::metaData(const KUrl &url, const KFileMetaInfo::WhatFlags flags) +{ + Q_UNUSED(flags); + QList result; + const QByteArray urlpath = url.toLocalFile().toLocal8Bit(); + poppler::document *popplerdocument = poppler::document::load_from_file(std::string(urlpath.constData(), urlpath.size())); + if (!popplerdocument) { + kWarning() << "Could not open" << urlpath; + return result; + } + const QString popplertitle = getString(popplerdocument->get_title()); + if (!popplertitle.isEmpty()) { + result.append( + KFileMetaInfoItem( + QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title"), + popplertitle + ) + ); + } + const QString popplerauthor = getString(popplerdocument->get_author()); + if (!popplerauthor.isEmpty()) { + result.append( + KFileMetaInfoItem( + QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#textWriter"), + popplerauthor + ) + ); + } + const QString popplersubject = getString(popplerdocument->get_subject()); + if (!popplersubject.isEmpty()) { + result.append( + KFileMetaInfoItem( + QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject"), + popplersubject + ) + ); + } + const QString popplerkeywords = getString(popplerdocument->get_keywords()); + if (!popplerkeywords.isEmpty()) { + result.append( + KFileMetaInfoItem( + QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#keyword"), + popplerkeywords + ) + ); + } + const QString popplercreator = getString(popplerdocument->get_creator()); + if (!popplercreator.isEmpty()) { + result.append( + KFileMetaInfoItem( + QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nco#creator"), + popplercreator + ) + ); + } + const QString popplerproducer = getString(popplerdocument->get_producer()); + if (!popplerproducer.isEmpty()) { + result.append( + KFileMetaInfoItem( + QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#generator"), + popplerproducer + ) + ); + } + const QString popplercreationdata = getTime(popplerdocument->get_creation_date()); + if (!popplercreationdata.isEmpty()) { + result.append( + KFileMetaInfoItem( + QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentCreated"), + popplercreationdata + ) + ); + } + const QString popplermodificationdate = getTime(popplerdocument->get_modification_date()); + if (!popplermodificationdate.isEmpty()) { + result.append( + KFileMetaInfoItem( + QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#contentLastModified"), + popplermodificationdate + ) + ); + } + delete popplerdocument; + return result; +} + +K_PLUGIN_FACTORY(KFileMetaDataPopplerPluginFactory, registerPlugin();) +K_EXPORT_PLUGIN(KFileMetaDataPopplerPluginFactory("kfilemetadata_poppler")) + +#include "moc_kfilemetadata_poppler.cpp" diff --git a/kio/metadata/kfilemetadata_poppler.desktop b/kio/metadata/kfilemetadata_poppler.desktop new file mode 100644 index 00000000..aaa994e7 --- /dev/null +++ b/kio/metadata/kfilemetadata_poppler.desktop @@ -0,0 +1,7 @@ +[Desktop Entry] +Type=Service +Name=KFileMetaDataPopplerPlugin +GenericName=Poppler metadata extractor +X-KDE-Library=kfilemetadata_poppler +X-KDE-ServiceTypes=KFileMetaData/Plugin +InitialPreference=1 diff --git a/kio/metadata/kfilemetadata_poppler.h b/kio/metadata/kfilemetadata_poppler.h new file mode 100644 index 00000000..32389c12 --- /dev/null +++ b/kio/metadata/kfilemetadata_poppler.h @@ -0,0 +1,37 @@ +/* This file is part of the KDE libraries + Copyright (C) 2022 Ivailo Monev + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License version 2, as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#ifndef KFILEMETADATA_POPPLER_H +#define KFILEMETADATA_POPPLER_H + +#include "kfilemetadata.h" + +class KFileMetaDataPopplerPlugin : public KFileMetaDataPlugin +{ + Q_OBJECT +public: + KFileMetaDataPopplerPlugin(QObject* parent, const QVariantList &args); + ~KFileMetaDataPopplerPlugin(); + + QStringList keys() const final; + QStringList mimeTypes() const final; + + QList metaData(const KUrl &url, const KFileMetaInfo::WhatFlags flags) final; +}; + +#endif // KFILEMETADATA_POPPLER_H diff --git a/kio/metadata/kfilemetadata_taglib.cpp b/kio/metadata/kfilemetadata_taglib.cpp index 9b94d96e..2cad2d1d 100644 --- a/kio/metadata/kfilemetadata_taglib.cpp +++ b/kio/metadata/kfilemetadata_taglib.cpp @@ -39,12 +39,12 @@ QStringList KFileMetaDataTagLibPlugin::keys() const { static const QStringList result = QStringList() << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#comment") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#artist") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#musicAlbum") - << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#comment") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#genre") - << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#originalReleaseYear") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2009/02/19/nmm#trackNumber") + << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/05/10/nid3#originalReleaseYear") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#duration") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#averageBitrate") << QString::fromLatin1("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#sampleRate")