reimplement javascriptcore regexp via std::regex

Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
Ivailo Monev 2022-02-28 05:08:23 +02:00
parent d64f08fdc2
commit 19c7089395
14 changed files with 78 additions and 81 deletions

View file

@ -29,7 +29,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update -qq
sudo apt-get install -qq libpcre3-dev libdeflate-dev libjansson-dev libc6-dev libpng-dev libcups2-dev libfreetype6-dev libfontconfig1-dev libdbus-1-dev libicu-dev xorg-dev
sudo apt-get install -qq libdeflate-dev libjansson-dev libc6-dev libpng-dev libcups2-dev libfreetype6-dev libfontconfig1-dev libdbus-1-dev libicu-dev xorg-dev
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL

View file

@ -186,15 +186,6 @@ set_package_properties(Jansson PROPERTIES
TYPE REQUIRED
)
# v7.7+ required for JavaScript compatibility
find_package(PCRE 7.7)
set_package_properties(PCRE PROPERTIES
PURPOSE "Required for regexp support in script component"
DESCRIPTION "Perl Compatible Regular Expressions"
URL "https://www.pcre.org/"
TYPE REQUIRED
)
find_package(X11)
set_package_properties(X11 PROPERTIES
PURPOSE "Required for X11/X.Org integration support"

4
README
View file

@ -12,7 +12,7 @@ There are several things you should be aware before considering Katie:
- some things have changed:
- QMake build system replaced with CMake
- Jansson, PCRE, Freetype, ICU, X11 and libpng are required for building
- Jansson, Freetype, ICU, X11 and libpng are required for building
- D-Bus and CUPS must be linked to during build
- QtUiTools is build as shared library by default
- moc, uic, rcc, etc. are linked to components
@ -69,7 +69,7 @@ There are several things you should be aware before considering Katie:
- improved namespaces support and introduced QT_NAMESPACE_COMPAT
- improved cross-compilation support
- improved POSIX, XSI, SUSv2, etc. requirements build checks
- script component relies on PCRE instead of internal copy
- script component uses std::regex instead of internal PCRE copy
- tests and benchmarks can be run without prior installation
Bugs fixed in Katie, some of which in Qt5 too:

View file

@ -12,7 +12,7 @@ build_script:
sudo apt-get update -qq
sudo apt-get install -qq libpcre3-dev libdeflate-dev libc6-dev \
sudo apt-get install -qq libdeflate-dev libc6-dev \
libpng-dev libcups2-dev libfreetype6-dev libfontconfig1-dev \
libdbus-1-dev libicu-dev xorg-dev dbus-x11 libjansson-dev ccache \
fonts-freefont-ttf

View file

@ -1,32 +0,0 @@
# - Try to find the PCRE
# Once done this will define
#
# PCRE_FOUND - system has PCRE
# PCRE_INCLUDES - the PCRE include directory
# PCRE_LIBRARIES - the libraries needed to use PCRE
#
# Copyright (C) 2016, Ivailo Monev, <xakepa10@gmail.com>
#
# Redistribution and use is allowed according to the terms of the BSD license.
include(FindPkgConfig)
include(FindPackageHandleStandardArgs)
pkg_check_modules(PC_PCRE QUIET libpcre)
find_path(PCRE_INCLUDES
NAMES pcre.h
HINTS $ENV{PCREDIR}/include ${PC_PCRE_INCLUDEDIR}
)
find_library(PCRE_LIBRARIES
NAMES pcre
HINTS $ENV{PCREDIR}/lib ${PC_PCRE_LIBDIR}
)
find_package_handle_standard_args(PCRE
VERSION_VAR PC_PCRE_VERSION
REQUIRED_VARS PCRE_LIBRARIES PCRE_INCLUDES
)
mark_as_advanced(PCRE_INCLUDES PCRE_LIBRARIES)

View file

@ -7,8 +7,8 @@ Vcs-browser: https://github.com/fluxer/katie
Standards-Version: 4.12.0
Build-Depends: debhelper (>= 9~), libdeflate-dev,
libc6-dev, libjansson-dev, libpng-dev, libcups2-dev, libfreetype6-dev,
libfontconfig1-dev, libpcre3-dev, libdbus-1-dev, libicu-dev, cmake,
git, xserver-xorg-dev, libxinerama-dev, libxrandr-dev, libxrender-dev,
libfontconfig1-dev, libdbus-1-dev, libicu-dev, cmake, git,
xserver-xorg-dev, libxinerama-dev, libxrandr-dev, libxrender-dev,
libxcursor-dev, libsm-dev, unifdef | dpkg
Package: katie-runtime

View file

@ -7,7 +7,7 @@ Summary: C++ toolkit derived from the Qt 4.8 framework
License: BSD and LGPLv2+
URL: https://github.com/fluxer/katie
BuildRequires: gcc-c++ cmake libicu-devel libdeflate-devel jansson-devel libpng-devel freetype-devel pcre-devel libX11-devel libXinerama-devel libXrandr-devel libXrender-devel libXfixes-devel libXcursor-devel libSM-devel libICE-devel dbus-devel fontconfig-devel cups-devel unifdef
BuildRequires: gcc-c++ cmake libicu-devel libdeflate-devel jansson-devel libpng-devel freetype-devel libX11-devel libXinerama-devel libXrandr-devel libXrender-devel libXfixes-devel libXcursor-devel libSM-devel libICE-devel dbus-devel fontconfig-devel cups-devel unifdef
Requires: xdg-utils gnu-free-fonts
Requires(post): /sbin/ldconfig
Requires(postun): /sbin/ldconfig

View file

@ -20,9 +20,9 @@ RUN_DEPENDS = xdg-open:devel/xdg-utils \
${LOCALBASE}/share/fonts/freefont-ttf/FreeSans.ttf:x11-fonts/freefont-ttf
LIB_DEPENDS = libdeflate.so:archivers/libdeflate libicuuc.so:devel/icu \
libicui18n.so:devel/icu libjansson.so:devel/jansson \
libpcre.so:devel/pcre libpng.so:graphics/png \
libfreetype.so:print/freetype2 libfontconfig.so:x11-fonts/fontconfig \
libdbus-1.so:devel/dbus libcups.so:print/cups
libpng.so:graphics/png libfreetype.so:print/freetype2 \
libfontconfig.so:x11-fonts/fontconfig libdbus-1.so:devel/dbus \
libcups.so:print/cups
CMAKE_ARGS = -DKATIE_TOOLS_SUFFIX="-katie" -Wno-dev
OPTIONS_DEFINE = NLS

View file

@ -28,7 +28,6 @@ BUILD_DEPENDS = unifdef-[0-9]*:../../devel/unifdef
.include "../../sysutils/desktop-file-utils/desktopdb.mk"
.include "../../textproc/jansson/buildlink3.mk"
.include "../../textproc/icu/buildlink3.mk"
.include "../../devel/pcre/buildlink3.mk"
.include "../../x11/libXinerama/buildlink3.mk"
.include "../../x11/libXcursor/buildlink3.mk"
.include "../../x11/libXext/buildlink3.mk"

View file

@ -21,10 +21,10 @@ COMPILER = base-clang ports-gcc
MODULES = devel/cmake
BUILD_DEPENDS = devel/gettext,-tools
RUN_DEPENDS = devel/desktop-file-utils devel/xdg-utils fonts/freefont-ttf
LIB_DEPENDS = textproc/icu4c devel/jansson devel/pcre \
graphics/png x11/dbus print/cups,-libs devel/gettext,-runtime
LIB_DEPENDS = textproc/icu4c devel/jansson graphics/png x11/dbus \
print/cups,-libs devel/gettext,-runtime
WANTLIB = ${COMPILER_LIBCXX} ICE SM X11 Xcursor Xext Xfixes Xinerama Xrandr \
Xrender fontconfig freetype icui18n icuuc pcre png \
Xrender fontconfig freetype icui18n icuuc png \
dbus-1 cups intl z c m
SEPARATE_BUILD = Yes
CONFIGURE_ARGS = -DKATIE_TOOLS_SUFFIX="-katie" -Wno-dev

View file

@ -27,14 +27,11 @@
#include <string.h>
#include <wtf/Assertions.h>
#include <pcre.h>
namespace JSC {
inline RegExp::RegExp(const UString& pattern)
: m_pattern(pattern)
, m_flagBits(0)
, m_constructionError(0)
, m_numSubpatterns(0)
{
compile();
@ -43,7 +40,6 @@ inline RegExp::RegExp(const UString& pattern)
inline RegExp::RegExp(const UString& pattern, const UString& flags)
: m_pattern(pattern)
, m_flagBits(0)
, m_constructionError(0)
, m_numSubpatterns(0)
{
// NOTE: The global flag is handled on a case-by-case basis by functions like
@ -63,7 +59,6 @@ inline RegExp::RegExp(const UString& pattern, const UString& flags)
break;
default:
m_constructionError = flagError;
m_regExp = 0;
return;
}
}
@ -73,7 +68,6 @@ inline RegExp::RegExp(const UString& pattern, const UString& flags)
RegExp::~RegExp()
{
pcre_free(m_regExp);
}
PassRefPtr<RegExp> RegExp::create(const UString& pattern)
@ -88,17 +82,38 @@ PassRefPtr<RegExp> RegExp::create(const UString& pattern, const UString& flags)
void RegExp::compile()
{
m_regExp = nullptr;
m_constructionError = std::string();
m_regExp = std::regex();
m_numSubpatterns = 0;
int regexOptions = PCRE_JAVASCRIPT_COMPAT | PCRE_NO_UTF8_CHECK;
int regexOptions = std::regex_constants::ECMAScript;
if (ignoreCase())
regexOptions |= PCRE_CASELESS;
regexOptions |= std::regex_constants::icase;
if (multiline())
regexOptions |= PCRE_MULTILINE;
int errorOffset;
m_regExp = pcre_compile(m_pattern.ascii(), regexOptions, &m_constructionError, &errorOffset, nullptr);
#if __cplusplus >= 201703L
regexOptions |= std::regex_constants::multiline;
#endif
pcre_fullinfo(m_regExp, nullptr, PCRE_INFO_CAPTURECOUNT, &m_numSubpatterns);
#ifndef QT_NO_EXCEPTIONS
try {
const std::string regexpattern = m_pattern.ascii();
m_regExp = std::regex(regexpattern.c_str(), regexOptions);
std::sregex_iterator matchbegin = std::sregex_iterator(regexpattern.begin(), regexpattern.end(), m_regExp);
std::sregex_iterator matchend = std::sregex_iterator();
m_numSubpatterns = std::distance(matchbegin, matchend);
} catch (const std::regex_error &err) {
m_constructionError = err.what();
} catch (...) {
m_constructionError = "Exception caught during regex compilation";
}
#else
// no exceptions, no way to find out if error occured
const std::string regexpattern = m_pattern.ascii();
m_regExp = std::regex(regexpattern.c_str(), regexOptions);
std::sregex_iterator matchbegin = std::sregex_iterator(regexpattern.begin(), regexpattern.end(), m_regExp);
std::sregex_iterator matchend = std::sregex_iterator();
m_numSubpatterns = std::distance(matchbegin, matchend);
#endif
}
int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
@ -111,9 +126,9 @@ int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
if (startOffset > s.size() || s.isNull())
return -1;
if (m_regExp) {
if (isValid()) {
// Set up the offset vector for the result.
// First 2/3 used for result, the last third used by PCRE.
// First 2/3 used for result, the last third unsed but there for compatibility.
int* offsetVector;
int offsetVectorSize;
int fixedSizeOffsetVector[3];
@ -126,18 +141,40 @@ int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
offsetVector = ovector->data();
}
const int numMatches = pcre_exec(m_regExp, nullptr, s.ascii(), s.size(), startOffset, 0, offsetVector, offsetVectorSize);
#ifndef QT_NO_EXCEPTIONS
bool didmatch = false;
try {
didmatch = std::regex_match(s.ascii() + startOffset, m_regExp);
} catch (const std::regex_error &err) {
m_constructionError = err.what();
} catch (...) {
m_constructionError = "Exception caught during regex matching";
}
#else
const bool didmatch = std::regex_match(s.ascii() + startOffset, m_regExp);
#endif
if (numMatches < 0) {
if (!didmatch) {
#ifndef QT_NO_DEBUG
if (numMatches != PCRE_ERROR_NOMATCH)
fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
fprintf(stderr, "jsRegExpExecute failed with result\n");
#endif
if (ovector)
ovector->clear();
return -1;
}
const std::string regexpattern = m_pattern.ascii();
std::sregex_iterator matchbegin = std::sregex_iterator(regexpattern.begin(), regexpattern.end(), m_regExp);
std::sregex_iterator matchend = std::sregex_iterator();
size_t nummatches = 0;
for (std::sregex_iterator iter = matchbegin; iter != matchend; iter++) {
const std::smatch itermatch = *iter;
offsetVector[nummatches] = itermatch.position();
offsetVector[nummatches + 1] = itermatch.length();
offsetVector[nummatches + 2] = 0;
nummatches++;
}
return offsetVector[0];
}

View file

@ -26,7 +26,7 @@
#include <wtf/Forward.h>
#include <wtf/RefCounted.h>
#include <pcre.h>
#include <regex>
namespace JSC {
@ -42,8 +42,8 @@ namespace JSC {
const UString& pattern() const { return m_pattern; }
bool isValid() const { return !m_constructionError; }
const char* errorMessage() const { return m_constructionError; }
bool isValid() const { return m_constructionError.empty(); }
const char* errorMessage() const { return m_constructionError.c_str(); }
int match(const UString&, int startOffset, Vector<int, 32>* ovector = 0);
unsigned numSubpatterns() const { return m_numSubpatterns; }
@ -58,10 +58,10 @@ namespace JSC {
UString m_pattern; // FIXME: Just decompile m_regExp instead of storing this.
int m_flagBits;
const char* m_constructionError;
std::string m_constructionError;
unsigned m_numSubpatterns;
pcre* m_regExp;
std::regex m_regExp;
};
} // namespace JSC

View file

@ -8,7 +8,6 @@ add_definitions(
)
set(EXTRA_SCRIPT_LIBS
KtCore
${PCRE_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT}
)
@ -47,7 +46,6 @@ include_directories(
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/API
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/bytecode
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/generated
${PCRE_INCLUDES}
)
set(SCRIPT_HEADERS

View file

@ -20,6 +20,7 @@
****************************************************************************/
#include <qtest.h>
#include <QtCore/qdebug.h>
#include <QtCore/qdir.h>
#include <QtCore/qfile.h>
#include <QtCore/qtextstream.h>
@ -95,6 +96,9 @@ void tst_SunSpider::benchmark()
QBENCHMARK {
engine.evaluate(testContents);
}
if (engine.hasUncaughtException()) {
qWarning() << engine.uncaughtException().toString();
}
QVERIFY(!engine.hasUncaughtException());
}