mirror of
https://bitbucket.org/smil3y/katie.git
synced 2025-02-23 18:32:55 +00:00
make use of PCRE in JavaScriptCore
Signed-off-by: Ivailo Monev <xakepa10@laimg.moc>
This commit is contained in:
parent
1ff2efbdbe
commit
ad8de752da
17 changed files with 67 additions and 9049 deletions
|
@ -256,6 +256,14 @@ set_package_properties(OpenSSL PROPERTIES
|
|||
TYPE REQUIRED
|
||||
)
|
||||
|
||||
find_package(PCRE)
|
||||
set_package_properties(PCRE PROPERTIES
|
||||
PURPOSE "Required for regexp support in script component"
|
||||
DESCRIPTION "Perl Compatible Regular Expressions"
|
||||
URL "http://www.pcre.org/"
|
||||
TYPE REQUIRED
|
||||
)
|
||||
|
||||
find_package(PythonInterp)
|
||||
set_package_properties(PythonInterp PROPERTIES
|
||||
PURPOSE "UI class maps generator script"
|
||||
|
|
41
cmake/modules/FindPCRE.cmake
Normal file
41
cmake/modules/FindPCRE.cmake
Normal file
|
@ -0,0 +1,41 @@
|
|||
# - Try to find the PCRE
|
||||
# Once done this will define
|
||||
#
|
||||
# PCRE_FOUND - system has PCRE
|
||||
# PCRE_INCLUDES - the PCRE include directory
|
||||
# PCRE_LIBRARIES - The libraries needed to use PCRE
|
||||
#
|
||||
# Copyright (c) 2016, Ivailo Monev, <xakepa10@gmail.com>
|
||||
#
|
||||
# Redistribution and use is allowed according to the terms of the BSD license.
|
||||
|
||||
if(PCRE_INCLUDES AND PCRE_LIBRARIES)
|
||||
set(PCRE_FIND_QUIETLY TRUE)
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
include(FindPkgConfig)
|
||||
pkg_check_modules(PC_PCRE QUIET libpcre)
|
||||
endif()
|
||||
|
||||
find_path(PCRE_INCLUDES
|
||||
NAMES
|
||||
pcre.h
|
||||
HINTS
|
||||
$ENV{PCREDIR}/include
|
||||
${PC_PCRE_INCLUDEDIR}
|
||||
${INCLUDE_INSTALL_DIR}
|
||||
)
|
||||
|
||||
find_library(PCRE_LIBRARIES
|
||||
pcre
|
||||
HINTS
|
||||
$ENV{PCREDIR}/lib
|
||||
${PC_PCRE_LIBDIR}
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(PCRE DEFAULT_MSG PCRE_INCLUDES PCRE_LIBRARIES)
|
||||
|
||||
mark_as_advanced(PCRE_INCLUDES PCRE_LIBRARIES)
|
12
src/3rdparty/javascriptcore/pcre/AUTHORS
vendored
12
src/3rdparty/javascriptcore/pcre/AUTHORS
vendored
|
@ -1,12 +0,0 @@
|
|||
Originally written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge. All rights reserved.
|
||||
|
||||
Adapted for JavaScriptCore and WebKit by Apple Inc.
|
||||
|
||||
Copyright (c) 2005, 2006, 2007 Apple Inc. All rights reserved.
|
35
src/3rdparty/javascriptcore/pcre/COPYING
vendored
35
src/3rdparty/javascriptcore/pcre/COPYING
vendored
|
@ -1,35 +0,0 @@
|
|||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the name of Apple
|
||||
Inc. nor the names of their contributors may be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
273
src/3rdparty/javascriptcore/pcre/dftables
vendored
273
src/3rdparty/javascriptcore/pcre/dftables
vendored
|
@ -1,273 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
#
|
||||
# This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
# started out as a copy of PCRE, many of the features of PCRE have been
|
||||
# removed. This library now supports only the regular expression features
|
||||
# required by the JavaScript language specification, and has only the functions
|
||||
# needed by JavaScriptCore and the rest of WebKit.
|
||||
#
|
||||
# Originally written by Philip Hazel
|
||||
# Copyright (c) 1997-2006 University of Cambridge
|
||||
# Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of the University of Cambridge nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# This is a freestanding support program to generate a file containing
|
||||
# character tables. The tables are built according to the default C
|
||||
# locale.
|
||||
|
||||
use strict;
|
||||
|
||||
use File::Basename;
|
||||
use File::Spec;
|
||||
use File::Temp qw(tempfile);
|
||||
use Getopt::Long;
|
||||
|
||||
sub readHeaderValues();
|
||||
|
||||
my %pcre_internal;
|
||||
|
||||
if (scalar(@ARGV) < 1) {
|
||||
print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
my $outputFile;
|
||||
my $preprocessor;
|
||||
GetOptions('preprocessor=s' => \$preprocessor);
|
||||
if (not $preprocessor) {
|
||||
$preprocessor = "cpp";
|
||||
}
|
||||
|
||||
$outputFile = $ARGV[0];
|
||||
die('Must specify output file.') unless defined($outputFile);
|
||||
|
||||
readHeaderValues();
|
||||
|
||||
open(OUT, ">", $outputFile) or die "$!";
|
||||
binmode(OUT);
|
||||
|
||||
printf(OUT
|
||||
"/*************************************************\n" .
|
||||
"* Perl-Compatible Regular Expressions *\n" .
|
||||
"*************************************************/\n\n" .
|
||||
"/* This file is automatically written by the dftables auxiliary \n" .
|
||||
"program. If you edit it by hand, you might like to edit the Makefile to \n" .
|
||||
"prevent its ever being regenerated.\n\n");
|
||||
printf(OUT
|
||||
"This file contains the default tables for characters with codes less than\n" .
|
||||
"128 (ASCII characters). These tables are used when no external tables are\n" .
|
||||
"passed to PCRE. */\n\n" .
|
||||
"const unsigned char jsc_pcre_default_tables[%d] = {\n\n" .
|
||||
"/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length});
|
||||
|
||||
if ($pcre_internal{lcc_offset} != 0) {
|
||||
die "lcc_offset != 0";
|
||||
}
|
||||
|
||||
printf(OUT " ");
|
||||
for (my $i = 0; $i < 128; $i++) {
|
||||
if (($i & 7) == 0 && $i != 0) {
|
||||
printf(OUT "\n ");
|
||||
}
|
||||
printf(OUT "0x%02X", ord(lc(chr($i))));
|
||||
if ($i != 127) {
|
||||
printf(OUT ", ");
|
||||
}
|
||||
}
|
||||
printf(OUT ",\n\n");
|
||||
|
||||
printf(OUT "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
if ($pcre_internal{fcc_offset} != 128) {
|
||||
die "fcc_offset != 128";
|
||||
}
|
||||
|
||||
printf(OUT " ");
|
||||
for (my $i = 0; $i < 128; $i++) {
|
||||
if (($i & 7) == 0 && $i != 0) {
|
||||
printf(OUT "\n ");
|
||||
}
|
||||
my $c = chr($i);
|
||||
printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c)));
|
||||
if ($i != 127) {
|
||||
printf(OUT ", ");
|
||||
}
|
||||
}
|
||||
printf(OUT ",\n\n");
|
||||
|
||||
printf(OUT
|
||||
"/* This table contains bit maps for various character classes.\n" .
|
||||
"Each map is 32 bytes long and the bits run from the least\n" .
|
||||
"significant end of each byte. The classes are: space, digit, word. */\n\n");
|
||||
|
||||
if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) {
|
||||
die "cbits_offset != fcc_offset + 128";
|
||||
}
|
||||
|
||||
my @cbit_table = (0) x $pcre_internal{cbit_length};
|
||||
for (my $i = ord('0'); $i <= ord('9'); $i++) {
|
||||
$cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7);
|
||||
}
|
||||
$cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7);
|
||||
for (my $i = 0; $i < 128; $i++) {
|
||||
my $c = chr($i);
|
||||
if ($c =~ /[[:alnum:]]/) {
|
||||
$cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7);
|
||||
}
|
||||
if ($c =~ /[[:space:]]/) {
|
||||
$cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7);
|
||||
}
|
||||
}
|
||||
|
||||
printf(OUT " ");
|
||||
for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) {
|
||||
if (($i & 7) == 0 && $i != 0) {
|
||||
if (($i & 31) == 0) {
|
||||
printf(OUT "\n");
|
||||
}
|
||||
printf(OUT "\n ");
|
||||
}
|
||||
printf(OUT "0x%02X", $cbit_table[$i]);
|
||||
if ($i != $pcre_internal{cbit_length} - 1) {
|
||||
printf(OUT ", ");
|
||||
}
|
||||
}
|
||||
printf(OUT ",\n\n");
|
||||
|
||||
printf(OUT
|
||||
"/* This table identifies various classes of character by individual bits:\n" .
|
||||
" 0x%02x white space character\n" .
|
||||
" 0x%02x hexadecimal digit\n" .
|
||||
" 0x%02x alphanumeric or '_'\n*/\n\n",
|
||||
$pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word});
|
||||
|
||||
if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) {
|
||||
die "ctypes_offset != cbits_offset + cbit_length";
|
||||
}
|
||||
|
||||
printf(OUT " ");
|
||||
for (my $i = 0; $i < 128; $i++) {
|
||||
my $x = 0;
|
||||
my $c = chr($i);
|
||||
if ($c =~ /[[:space:]]/) {
|
||||
$x += $pcre_internal{ctype_space};
|
||||
}
|
||||
if ($c =~ /[[:xdigit:]]/) {
|
||||
$x += $pcre_internal{ctype_xdigit};
|
||||
}
|
||||
if ($c =~ /[[:alnum:]_]/) {
|
||||
$x += $pcre_internal{ctype_word};
|
||||
}
|
||||
printf(OUT "0x%02X", $x);
|
||||
if ($i != 127) {
|
||||
printf(OUT ", ");
|
||||
} else {
|
||||
printf(OUT "};");
|
||||
}
|
||||
if (($i & 7) == 7) {
|
||||
printf(OUT " /* ");
|
||||
my $d = chr($i - 7);
|
||||
if ($d =~ /[[:print:]]/) {
|
||||
printf(OUT " %c -", $i - 7);
|
||||
} else {
|
||||
printf(OUT "%3d-", $i - 7);
|
||||
}
|
||||
if ($c =~ m/[[:print:]]/) {
|
||||
printf(OUT " %c ", $i);
|
||||
} else {
|
||||
printf(OUT "%3d", $i);
|
||||
}
|
||||
printf(OUT " */\n");
|
||||
if ($i != 127) {
|
||||
printf(OUT " ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) {
|
||||
die "tables_length != ctypes_offset + 128";
|
||||
}
|
||||
|
||||
printf(OUT "\n\n/* End of chartables.c */\n");
|
||||
|
||||
close(OUT);
|
||||
|
||||
exit 0;
|
||||
|
||||
sub readHeaderValues()
|
||||
{
|
||||
my @variables = qw(
|
||||
cbit_digit
|
||||
cbit_length
|
||||
cbit_space
|
||||
cbit_word
|
||||
cbits_offset
|
||||
ctype_space
|
||||
ctype_word
|
||||
ctype_xdigit
|
||||
ctypes_offset
|
||||
fcc_offset
|
||||
lcc_offset
|
||||
tables_length
|
||||
);
|
||||
|
||||
local $/ = undef;
|
||||
|
||||
my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h");
|
||||
|
||||
my ($fh, $tempFile) = tempfile(
|
||||
basename($0) . "-XXXXXXXX",
|
||||
DIR => File::Spec->tmpdir(),
|
||||
SUFFIX => ".in",
|
||||
UNLINK => 0,
|
||||
);
|
||||
|
||||
print $fh "#define DFTABLES\n\n";
|
||||
|
||||
open(HEADER, "<", $headerPath) or die "$!";
|
||||
print $fh <HEADER>;
|
||||
close(HEADER);
|
||||
|
||||
print $fh "\n\n";
|
||||
|
||||
for my $v (@variables) {
|
||||
print $fh "\$pcre_internal{\"$v\"} = $v;\n";
|
||||
}
|
||||
|
||||
close($fh);
|
||||
|
||||
open(CPP, "$preprocessor \"$tempFile\" |") or die "$!";
|
||||
my $content = <CPP>;
|
||||
close(CPP);
|
||||
|
||||
eval $content;
|
||||
die "$@" if $@;
|
||||
unlink $tempFile;
|
||||
}
|
68
src/3rdparty/javascriptcore/pcre/pcre.h
vendored
68
src/3rdparty/javascriptcore/pcre/pcre.h
vendored
|
@ -1,68 +0,0 @@
|
|||
/* This is the public header file for JavaScriptCore's variant of the PCRE
|
||||
library. While this library started out as a copy of PCRE, many of the
|
||||
features of PCRE have been removed. This library now supports only the
|
||||
regular expression features required by the JavaScript language
|
||||
specification, and has only the functions needed by JavaScriptCore and the
|
||||
rest of WebKit.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
// FIXME: This file needs to be renamed to JSRegExp.h; it's no longer PCRE.
|
||||
|
||||
#ifndef JSRegExp_h
|
||||
#define JSRegExp_h
|
||||
|
||||
#include <wtf/unicode/Unicode.h>
|
||||
|
||||
struct JSRegExp;
|
||||
|
||||
enum JSRegExpIgnoreCaseOption { JSRegExpDoNotIgnoreCase, JSRegExpIgnoreCase };
|
||||
enum JSRegExpMultilineOption { JSRegExpSingleLine, JSRegExpMultiline };
|
||||
|
||||
/* jsRegExpExecute error codes */
|
||||
const int JSRegExpErrorNoMatch = -1;
|
||||
const int JSRegExpErrorHitLimit = -2;
|
||||
const int JSRegExpErrorNoMemory = -3;
|
||||
const int JSRegExpErrorInternal = -4;
|
||||
|
||||
JSRegExp* jsRegExpCompile(const UChar* pattern, int patternLength,
|
||||
JSRegExpIgnoreCaseOption, JSRegExpMultilineOption,
|
||||
unsigned* numSubpatterns, const char** errorMessage);
|
||||
|
||||
int jsRegExpExecute(const JSRegExp*,
|
||||
const UChar* subject, int subjectLength, int startOffset,
|
||||
int* offsetsVector, int offsetsVectorLength);
|
||||
|
||||
void jsRegExpFree(JSRegExp*);
|
||||
|
||||
#endif
|
2703
src/3rdparty/javascriptcore/pcre/pcre_compile.cpp
vendored
2703
src/3rdparty/javascriptcore/pcre/pcre_compile.cpp
vendored
File diff suppressed because it is too large
Load diff
2105
src/3rdparty/javascriptcore/pcre/pcre_exec.cpp
vendored
2105
src/3rdparty/javascriptcore/pcre/pcre_exec.cpp
vendored
File diff suppressed because it is too large
Load diff
455
src/3rdparty/javascriptcore/pcre/pcre_internal.h
vendored
455
src/3rdparty/javascriptcore/pcre/pcre_internal.h
vendored
|
@ -1,455 +0,0 @@
|
|||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This header contains definitions that are shared between the different
|
||||
modules, but which are not relevant to the exported API. This includes some
|
||||
functions whose names all begin with "_pcre_". */
|
||||
|
||||
#ifndef PCRE_INTERNAL_H
|
||||
#define PCRE_INTERNAL_H
|
||||
|
||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
||||
|
||||
#define ctype_space 0x01
|
||||
#define ctype_xdigit 0x08
|
||||
#define ctype_word 0x10 /* alphameric or '_' */
|
||||
|
||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
||||
of bits for a class map. Some classes are built by combining these tables. */
|
||||
|
||||
#define cbit_space 0 /* \s */
|
||||
#define cbit_digit 32 /* \d */
|
||||
#define cbit_word 64 /* \w */
|
||||
#define cbit_length 96 /* Length of the cbits table */
|
||||
|
||||
/* Offsets of the various tables from the base tables pointer, and
|
||||
total length. */
|
||||
|
||||
#define lcc_offset 0
|
||||
#define fcc_offset 128
|
||||
#define cbits_offset 256
|
||||
#define ctypes_offset (cbits_offset + cbit_length)
|
||||
#define tables_length (ctypes_offset + 128)
|
||||
|
||||
#ifndef DFTABLES
|
||||
|
||||
// Change the following to 1 to dump used regular expressions at process exit time.
|
||||
#define REGEXP_HISTOGRAM 0
|
||||
|
||||
#include "Assertions.h"
|
||||
|
||||
#if COMPILER(MSVC)
|
||||
#pragma warning(disable: 4232)
|
||||
#pragma warning(disable: 4244)
|
||||
#endif
|
||||
|
||||
#include "pcre.h"
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links as
|
||||
offsets within the compiled regex. The default is 2, which allows for compiled
|
||||
patterns up to 64K long. */
|
||||
|
||||
#define LINK_SIZE 3
|
||||
|
||||
/* Define DEBUG to get debugging output on stdout. */
|
||||
|
||||
#if 0
|
||||
#define DEBUG
|
||||
#endif
|
||||
|
||||
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
|
||||
inline, and there are *still* stupid compilers about that don't like indented
|
||||
pre-processor statements, or at least there were when I first wrote this. After
|
||||
all, it had only been about 10 years then... */
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DPRINTF(p) printf p
|
||||
#else
|
||||
#define DPRINTF(p) /*nothing*/
|
||||
#endif
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
|
||||
in big-endian order) by default. These are used, for example, to link from the
|
||||
start of a subpattern to its alternatives and its end. The use of 2 bytes per
|
||||
offset limits the size of the compiled regex to around 64K, which is big enough
|
||||
for almost everybody. However, I received a request for an even bigger limit.
|
||||
For this reason, and also to make the code easier to maintain, the storing and
|
||||
loading of offsets from the byte string is now handled by the functions that are
|
||||
defined here. */
|
||||
|
||||
/* PCRE uses some other 2-byte quantities that do not change when the size of
|
||||
offsets changes. There are used for repeat counts and for other things such as
|
||||
capturing parenthesis numbers in back references. */
|
||||
|
||||
static inline void put2ByteValue(unsigned char* opcodePtr, int value)
|
||||
{
|
||||
Q_ASSERT(value >= 0 && value <= 0xFFFF);
|
||||
opcodePtr[0] = value >> 8;
|
||||
opcodePtr[1] = value;
|
||||
}
|
||||
|
||||
static inline void put3ByteValue(unsigned char* opcodePtr, int value)
|
||||
{
|
||||
Q_ASSERT(value >= 0 && value <= 0xFFFFFF);
|
||||
opcodePtr[0] = value >> 16;
|
||||
opcodePtr[1] = value >> 8;
|
||||
opcodePtr[2] = value;
|
||||
}
|
||||
|
||||
static inline int get2ByteValue(const unsigned char* opcodePtr)
|
||||
{
|
||||
return (opcodePtr[0] << 8) | opcodePtr[1];
|
||||
}
|
||||
|
||||
static inline int get3ByteValue(const unsigned char* opcodePtr)
|
||||
{
|
||||
return (opcodePtr[0] << 16) | (opcodePtr[1] << 8) | opcodePtr[2];
|
||||
}
|
||||
|
||||
static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
|
||||
{
|
||||
put2ByteValue(opcodePtr, value);
|
||||
opcodePtr += 2;
|
||||
}
|
||||
|
||||
static inline void put3ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
|
||||
{
|
||||
put3ByteValue(opcodePtr, value);
|
||||
opcodePtr += 3;
|
||||
}
|
||||
|
||||
static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value)
|
||||
{
|
||||
#if LINK_SIZE == 3
|
||||
put3ByteValue(opcodePtr, value);
|
||||
#elif LINK_SIZE == 2
|
||||
put2ByteValue(opcodePtr, value);
|
||||
#else
|
||||
# error LINK_SIZE not supported.
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int getLinkValueAllowZero(const unsigned char* opcodePtr)
|
||||
{
|
||||
#if LINK_SIZE == 3
|
||||
return get3ByteValue(opcodePtr);
|
||||
#elif LINK_SIZE == 2
|
||||
return get2ByteValue(opcodePtr);
|
||||
#else
|
||||
# error LINK_SIZE not supported.
|
||||
#endif
|
||||
}
|
||||
|
||||
#define MAX_PATTERN_SIZE 1024 * 1024 // Derived by empirical testing of compile time in PCRE.
|
||||
COMPILE_ASSERT(MAX_PATTERN_SIZE < (1 << (8 * LINK_SIZE)), pcre_max_pattern_fits_in_bytecode);
|
||||
|
||||
static inline void putLinkValue(unsigned char* opcodePtr, int value)
|
||||
{
|
||||
Q_ASSERT(value);
|
||||
putLinkValueAllowZero(opcodePtr, value);
|
||||
}
|
||||
|
||||
static inline int getLinkValue(const unsigned char* opcodePtr)
|
||||
{
|
||||
int value = getLinkValueAllowZero(opcodePtr);
|
||||
Q_ASSERT(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr, int value)
|
||||
{
|
||||
putLinkValue(opcodePtr, value);
|
||||
opcodePtr += LINK_SIZE;
|
||||
}
|
||||
|
||||
static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, int value)
|
||||
{
|
||||
putLinkValueAllowZero(opcodePtr, value);
|
||||
opcodePtr += LINK_SIZE;
|
||||
}
|
||||
|
||||
// FIXME: These are really more of a "compiled regexp state" than "regexp options"
|
||||
enum RegExpOptions {
|
||||
UseFirstByteOptimizationOption = 0x40000000, /* firstByte is set */
|
||||
UseRequiredByteOptimizationOption = 0x20000000, /* reqByte is set */
|
||||
UseMultiLineFirstByteOptimizationOption = 0x10000000, /* start after \n for multiline */
|
||||
IsAnchoredOption = 0x02000000, /* can't use partial with this regex */
|
||||
IgnoreCaseOption = 0x00000001,
|
||||
MatchAcrossMultipleLinesOption = 0x00000002
|
||||
};
|
||||
|
||||
/* Flags added to firstByte or reqByte; a "non-literal" item is either a
|
||||
variable-length repeat, or a anything other than literal characters. */
|
||||
|
||||
#define REQ_IGNORE_CASE 0x0100 /* indicates should ignore case */
|
||||
#define REQ_VARY 0x0200 /* reqByte followed non-literal item */
|
||||
|
||||
/* Miscellaneous definitions */
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain UTF-8 characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns
|
||||
their negation. Also, they must appear in the same order as in the opcode
|
||||
definitions below, up to ESC_w. The final one must be
|
||||
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
||||
tests in the code for an escape > ESC_b and <= ESC_w to
|
||||
detect the types that may be repeated. These are the types that consume
|
||||
characters. If any new escapes are put in between that don't consume a
|
||||
character, that code will have to change. */
|
||||
|
||||
enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF };
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
Note that whenever this list is updated, the two macro definitions that follow
|
||||
must also be updated to match. */
|
||||
|
||||
#define FOR_EACH_OPCODE(macro) \
|
||||
macro(END) \
|
||||
\
|
||||
macro(NOT_WORD_BOUNDARY) \
|
||||
macro(WORD_BOUNDARY) \
|
||||
macro(NOT_DIGIT) \
|
||||
macro(DIGIT) \
|
||||
macro(NOT_WHITESPACE) \
|
||||
macro(WHITESPACE) \
|
||||
macro(NOT_WORDCHAR) \
|
||||
macro(WORDCHAR) \
|
||||
\
|
||||
macro(NOT_NEWLINE) \
|
||||
\
|
||||
macro(CIRC) \
|
||||
macro(DOLL) \
|
||||
macro(BOL) \
|
||||
macro(EOL) \
|
||||
macro(CHAR) \
|
||||
macro(CHAR_IGNORING_CASE) \
|
||||
macro(ASCII_CHAR) \
|
||||
macro(ASCII_LETTER_IGNORING_CASE) \
|
||||
macro(NOT) \
|
||||
\
|
||||
macro(STAR) \
|
||||
macro(MINSTAR) \
|
||||
macro(PLUS) \
|
||||
macro(MINPLUS) \
|
||||
macro(QUERY) \
|
||||
macro(MINQUERY) \
|
||||
macro(UPTO) \
|
||||
macro(MINUPTO) \
|
||||
macro(EXACT) \
|
||||
\
|
||||
macro(NOTSTAR) \
|
||||
macro(NOTMINSTAR) \
|
||||
macro(NOTPLUS) \
|
||||
macro(NOTMINPLUS) \
|
||||
macro(NOTQUERY) \
|
||||
macro(NOTMINQUERY) \
|
||||
macro(NOTUPTO) \
|
||||
macro(NOTMINUPTO) \
|
||||
macro(NOTEXACT) \
|
||||
\
|
||||
macro(TYPESTAR) \
|
||||
macro(TYPEMINSTAR) \
|
||||
macro(TYPEPLUS) \
|
||||
macro(TYPEMINPLUS) \
|
||||
macro(TYPEQUERY) \
|
||||
macro(TYPEMINQUERY) \
|
||||
macro(TYPEUPTO) \
|
||||
macro(TYPEMINUPTO) \
|
||||
macro(TYPEEXACT) \
|
||||
\
|
||||
macro(CRSTAR) \
|
||||
macro(CRMINSTAR) \
|
||||
macro(CRPLUS) \
|
||||
macro(CRMINPLUS) \
|
||||
macro(CRQUERY) \
|
||||
macro(CRMINQUERY) \
|
||||
macro(CRRANGE) \
|
||||
macro(CRMINRANGE) \
|
||||
\
|
||||
macro(CLASS) \
|
||||
macro(NCLASS) \
|
||||
macro(XCLASS) \
|
||||
\
|
||||
macro(REF) \
|
||||
\
|
||||
macro(ALT) \
|
||||
macro(KET) \
|
||||
macro(KETRMAX) \
|
||||
macro(KETRMIN) \
|
||||
\
|
||||
macro(ASSERT) \
|
||||
macro(ASSERT_NOT) \
|
||||
\
|
||||
macro(BRAZERO) \
|
||||
macro(BRAMINZERO) \
|
||||
macro(BRANUMBER) \
|
||||
macro(BRA)
|
||||
|
||||
#define OPCODE_ENUM_VALUE(opcode) OP_##opcode,
|
||||
enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) };
|
||||
|
||||
/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
|
||||
study.c that all opcodes are less than 128 in value. This makes handling UTF-8
|
||||
character sequences easier. */
|
||||
|
||||
/* The highest extraction number before we have to start using additional
|
||||
bytes. (Originally PCRE didn't have support for extraction counts higher than
|
||||
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
||||
i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
|
||||
opcodes. */
|
||||
|
||||
/* FIXME: Note that OP_BRA + 100 is > 128, so the two comments above
|
||||
are in conflict! */
|
||||
|
||||
#define EXTRACT_BASIC_MAX 100
|
||||
|
||||
/* The code vector runs on as long as necessary after the end. */
|
||||
|
||||
struct JSRegExp {
|
||||
unsigned options;
|
||||
|
||||
unsigned short topBracket;
|
||||
unsigned short topBackref;
|
||||
|
||||
unsigned short firstByte;
|
||||
unsigned short reqByte;
|
||||
|
||||
#if REGEXP_HISTOGRAM
|
||||
size_t stringOffset;
|
||||
size_t stringLength;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Internal shared data tables. These are tables that are used by more than one
|
||||
of the exported public functions. They have to be "external" in the C sense,
|
||||
but are not part of the PCRE public API. The data for these tables is in the
|
||||
pcre_tables.c module. */
|
||||
|
||||
#define jsc_pcre_utf8_table1_size 6
|
||||
|
||||
extern const int jsc_pcre_utf8_table1[6];
|
||||
extern const int jsc_pcre_utf8_table2[6];
|
||||
extern const int jsc_pcre_utf8_table3[6];
|
||||
extern const unsigned char jsc_pcre_utf8_table4[0x40];
|
||||
|
||||
extern const unsigned char jsc_pcre_default_tables[tables_length];
|
||||
|
||||
static inline unsigned char toLowerCase(unsigned char c)
|
||||
{
|
||||
static const unsigned char* lowerCaseChars = jsc_pcre_default_tables + lcc_offset;
|
||||
return lowerCaseChars[c];
|
||||
}
|
||||
|
||||
static inline unsigned char flipCase(unsigned char c)
|
||||
{
|
||||
static const unsigned char* flippedCaseChars = jsc_pcre_default_tables + fcc_offset;
|
||||
return flippedCaseChars[c];
|
||||
}
|
||||
|
||||
static inline unsigned char classBitmapForChar(unsigned char c)
|
||||
{
|
||||
static const unsigned char* charClassBitmaps = jsc_pcre_default_tables + cbits_offset;
|
||||
return charClassBitmaps[c];
|
||||
}
|
||||
|
||||
static inline unsigned char charTypeForChar(unsigned char c)
|
||||
{
|
||||
const unsigned char* charTypeMap = jsc_pcre_default_tables + ctypes_offset;
|
||||
return charTypeMap[c];
|
||||
}
|
||||
|
||||
static inline bool isWordChar(UChar c)
|
||||
{
|
||||
return c < 128 && (charTypeForChar(c) & ctype_word);
|
||||
}
|
||||
|
||||
static inline bool isSpaceChar(UChar c)
|
||||
{
|
||||
return (c < 128 && (charTypeForChar(c) & ctype_space)) || c == 0x00A0;
|
||||
}
|
||||
|
||||
static inline bool isNewline(UChar nl)
|
||||
{
|
||||
return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);
|
||||
}
|
||||
|
||||
static inline bool isBracketStartOpcode(unsigned char opcode)
|
||||
{
|
||||
if (opcode >= OP_BRA)
|
||||
return true;
|
||||
switch (opcode) {
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr)
|
||||
{
|
||||
Q_ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT);
|
||||
do
|
||||
opcodePtr += getLinkValue(opcodePtr + 1);
|
||||
while (*opcodePtr == OP_ALT);
|
||||
}
|
||||
|
||||
/* Internal shared functions. These are functions that are used in more
|
||||
that one of the source files. They have to have external linkage, but
|
||||
but are not part of the public API and so not exported from the library. */
|
||||
|
||||
extern int jsc_pcre_ucp_othercase(unsigned);
|
||||
extern bool jsc_pcre_xclass(int, const unsigned char*);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* End of pcre_internal.h */
|
72
src/3rdparty/javascriptcore/pcre/pcre_tables.cpp
vendored
72
src/3rdparty/javascriptcore/pcre/pcre_tables.cpp
vendored
|
@ -1,72 +0,0 @@
|
|||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. */
|
||||
|
||||
#include "Platform.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
const int jsc_pcre_utf8_table1[6] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int jsc_pcre_utf8_table2[6] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int jsc_pcre_utf8_table3[6] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra characters, indexed by the first character
|
||||
masked with 0x3f. The highest number for a valid UTF-8 character is in fact
|
||||
0x3d. */
|
||||
|
||||
const unsigned char jsc_pcre_utf8_table4[0x40] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#include "chartables.c"
|
|
@ -1,99 +0,0 @@
|
|||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains code for searching the table of Unicode character
|
||||
properties. */
|
||||
|
||||
#include "Platform.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#include "ucpinternal.h" /* Internal table details */
|
||||
#include "ucptable.cpp" /* The table itself */
|
||||
|
||||
/*************************************************
|
||||
* Search table and return other case *
|
||||
*************************************************/
|
||||
|
||||
/* If the given character is a letter, and there is another case for the
|
||||
letter, return the other case. Otherwise, return -1.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
|
||||
Returns: the other case or -1 if none
|
||||
*/
|
||||
|
||||
int jsc_pcre_ucp_othercase(unsigned c)
|
||||
{
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table) / sizeof(cnode);
|
||||
int mid;
|
||||
|
||||
/* The table is searched using a binary chop. You might think that using
|
||||
intermediate variables to hold some of the common expressions would speed
|
||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
||||
makes things a lot slower. */
|
||||
|
||||
for (;;) {
|
||||
if (top <= bot)
|
||||
return -1;
|
||||
mid = (bot + top) >> 1;
|
||||
if (c == (ucp_table[mid].f0 & f0_charmask))
|
||||
break;
|
||||
if (c < (ucp_table[mid].f0 & f0_charmask))
|
||||
top = mid;
|
||||
else {
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) && (c <= (ucp_table[mid].f0 & f0_charmask) + (ucp_table[mid].f1 & f1_rangemask)))
|
||||
break;
|
||||
bot = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Found an entry in the table. Return -1 for a range entry. Otherwise return
|
||||
the other case if there is one, else -1. */
|
||||
|
||||
if (ucp_table[mid].f0 & f0_rangeflag)
|
||||
return -1;
|
||||
|
||||
int offset = ucp_table[mid].f1 & f1_casemask;
|
||||
if (offset & f1_caseneg)
|
||||
offset |= f1_caseneg;
|
||||
return !offset ? -1 : c + offset;
|
||||
}
|
115
src/3rdparty/javascriptcore/pcre/pcre_xclass.cpp
vendored
115
src/3rdparty/javascriptcore/pcre/pcre_xclass.cpp
vendored
|
@ -1,115 +0,0 @@
|
|||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class (one that contains characters whose values are > 255). */
|
||||
|
||||
#include "Platform.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain values > 255.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag byte of the XCLASS data
|
||||
|
||||
Returns: true if character matches, else false
|
||||
*/
|
||||
|
||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-8 mode. */
|
||||
|
||||
static inline void getUTF8CharAndAdvancePointer(int& c, const unsigned char*& subjectPtr)
|
||||
{
|
||||
c = *subjectPtr++;
|
||||
if ((c & 0xc0) == 0xc0) {
|
||||
int gcaa = jsc_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
int gcss = 6 * gcaa;
|
||||
c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
|
||||
while (gcaa-- > 0) {
|
||||
gcss -= 6;
|
||||
c |= (*subjectPtr++ & 0x3f) << gcss;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool jsc_pcre_xclass(int c, const unsigned char* data)
|
||||
{
|
||||
bool negated = (*data & XCL_NOT);
|
||||
|
||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||
not, we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256) {
|
||||
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0)
|
||||
data += 32;
|
||||
|
||||
int t;
|
||||
while ((t = *data++) != XCL_END) {
|
||||
if (t == XCL_SINGLE) {
|
||||
int x;
|
||||
getUTF8CharAndAdvancePointer(x, data);
|
||||
if (c == x)
|
||||
return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE) {
|
||||
int x, y;
|
||||
getUTF8CharAndAdvancePointer(x, data);
|
||||
getUTF8CharAndAdvancePointer(y, data);
|
||||
if (c >= x && c <= y)
|
||||
return !negated;
|
||||
}
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
126
src/3rdparty/javascriptcore/pcre/ucpinternal.h
vendored
126
src/3rdparty/javascriptcore/pcre/ucpinternal.h
vendored
|
@ -1,126 +0,0 @@
|
|||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
||||
started out as a copy of PCRE, many of the features of PCRE have been
|
||||
removed. This library now supports only the regular expression features
|
||||
required by the JavaScript language specification, and has only the functions
|
||||
needed by JavaScriptCore and the rest of WebKit.
|
||||
|
||||
Originally written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
||||
words that form a data item in the table. */
|
||||
|
||||
typedef struct cnode {
|
||||
unsigned f0;
|
||||
unsigned f1;
|
||||
} cnode;
|
||||
|
||||
/* Things for the f0 field */
|
||||
|
||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||
#define f0_scriptshift 24 /* Shift for script value */
|
||||
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||
|
||||
/* Things for the f1 field */
|
||||
|
||||
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
||||
#define f1_typeshift 26 /* Shift for the type field */
|
||||
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
||||
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
||||
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
||||
|
||||
/* The data consists of a vector of structures of type cnode. The two unsigned
|
||||
32-bit integers are used as follows:
|
||||
|
||||
(f0) (1) The most significant byte holds the script number. The numbers are
|
||||
defined by the enum in ucp.h.
|
||||
|
||||
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
||||
It is not set if this entry defines a single character
|
||||
|
||||
(3) The 0x00600000 bits are spare.
|
||||
|
||||
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
||||
ever be greater than 0x0010ffff, so this should be OK for ever.
|
||||
|
||||
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
||||
defined by an enum in ucp.h.
|
||||
|
||||
(2) The 0x03ff0000 bits are spare.
|
||||
|
||||
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
||||
range if this entry defines a range, OR the *signed* offset to the
|
||||
character's "other case" partner if this entry defines a single
|
||||
character. There is no partner if the value is zero.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
||||
-------------------------------------------------------------------------------
|
||||
| | | | |
|
||||
| | |-> spare | |-> spare
|
||||
| | |
|
||||
| |-> spare |-> spare
|
||||
|
|
||||
|-> range flag
|
||||
|
||||
The upper/lower casing information is set only for characters that come in
|
||||
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
||||
|
||||
When searching the data, proceed as follows:
|
||||
|
||||
(1) Set up for a binary chop search.
|
||||
|
||||
(2) If the top is not greater than the bottom, the character is not in the
|
||||
table. Its type must therefore be "Cn" ("Undefined").
|
||||
|
||||
(3) Find the middle vector element.
|
||||
|
||||
(4) Extract the code point and compare. If equal, we are done.
|
||||
|
||||
(5) If the test character is smaller, set the top to the current point, and
|
||||
goto (2).
|
||||
|
||||
(6) If the current entry defines a range, compute the last character by adding
|
||||
the offset, and see if the test character is within the range. If it is,
|
||||
we are done.
|
||||
|
||||
(7) Otherwise, set the bottom to one element past the current point and goto
|
||||
(2).
|
||||
*/
|
||||
|
||||
/* End of ucpinternal.h */
|
2968
src/3rdparty/javascriptcore/pcre/ucptable.cpp
vendored
2968
src/3rdparty/javascriptcore/pcre/ucptable.cpp
vendored
File diff suppressed because it is too large
Load diff
22
src/3rdparty/javascriptcore/runtime/RegExp.cpp
vendored
22
src/3rdparty/javascriptcore/runtime/RegExp.cpp
vendored
|
@ -27,7 +27,7 @@
|
|||
#include <string.h>
|
||||
#include <wtf/Assertions.h>
|
||||
|
||||
#include <pcre/pcre.h>
|
||||
#include <pcre.h>
|
||||
|
||||
namespace JSC {
|
||||
|
||||
|
@ -73,7 +73,7 @@ inline RegExp::RegExp(const UString& pattern, const UString& flags)
|
|||
|
||||
RegExp::~RegExp()
|
||||
{
|
||||
jsRegExpFree(m_regExp);
|
||||
pcre_free(m_regExp);
|
||||
}
|
||||
|
||||
PassRefPtr<RegExp> RegExp::create(const UString& pattern)
|
||||
|
@ -90,9 +90,15 @@ void RegExp::compile()
|
|||
{
|
||||
m_regExp = 0;
|
||||
|
||||
JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
|
||||
JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
|
||||
m_regExp = jsRegExpCompile(m_pattern.data(), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
|
||||
int regexOptions = PCRE_JAVASCRIPT_COMPAT;
|
||||
if (ignoreCase())
|
||||
regexOptions |= PCRE_CASELESS;
|
||||
if (multiline())
|
||||
regexOptions |= PCRE_MULTILINE;
|
||||
int errorOffset;
|
||||
m_regExp = pcre_compile(m_pattern.UTF8String(), regexOptions, &m_constructionError, &errorOffset, Q_NULLPTR);
|
||||
|
||||
pcre_fullinfo(m_regExp, Q_NULLPTR, PCRE_INFO_CAPTURECOUNT, &m_numSubpatterns);
|
||||
}
|
||||
|
||||
int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
|
||||
|
@ -120,11 +126,11 @@ int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
|
|||
offsetVector = ovector->data();
|
||||
}
|
||||
|
||||
int numMatches = jsRegExpExecute(m_regExp, s.data(), s.size(), startOffset, offsetVector, offsetVectorSize);
|
||||
|
||||
const int numMatches = pcre_exec(m_regExp, Q_NULLPTR, s.UTF8String(), s.size(), startOffset, 0, offsetVector, offsetVectorSize);
|
||||
|
||||
if (numMatches < 0) {
|
||||
#ifndef NDEBUG
|
||||
if (numMatches != JSRegExpErrorNoMatch)
|
||||
if (numMatches != Q_NULLPTR)
|
||||
fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
|
||||
#endif
|
||||
if (ovector)
|
||||
|
|
4
src/3rdparty/javascriptcore/runtime/RegExp.h
vendored
4
src/3rdparty/javascriptcore/runtime/RegExp.h
vendored
|
@ -26,7 +26,7 @@
|
|||
#include <wtf/Forward.h>
|
||||
#include <wtf/RefCounted.h>
|
||||
|
||||
struct JSRegExp;
|
||||
#include <pcre.h>
|
||||
|
||||
namespace JSC {
|
||||
|
||||
|
@ -61,7 +61,7 @@ namespace JSC {
|
|||
const char* m_constructionError;
|
||||
unsigned m_numSubpatterns;
|
||||
|
||||
JSRegExp* m_regExp;
|
||||
pcre* m_regExp;
|
||||
};
|
||||
|
||||
} // namespace JSC
|
||||
|
|
|
@ -9,7 +9,7 @@ add_definitions(
|
|||
-DQLALR_NO_QSCRIPTGRAMMAR_DEBUG_INFO
|
||||
-DBUILDING_QT__
|
||||
)
|
||||
set(EXTRA_SCRIPT_LIBS KtCore)
|
||||
set(EXTRA_SCRIPT_LIBS KtCore ${PCRE_LIBRARIES})
|
||||
|
||||
# TODO: fix std::auto_ptr warnings
|
||||
if(KATIE_COMPILER MATCHES "(gcc|clang)")
|
||||
|
@ -160,11 +160,6 @@ set(SCRIPT_SOURCES
|
|||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/wtf/TypeTraits.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/wtf/unicode/CollatorDefault.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/wtf/unicode/UTF8.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/pcre/pcre_compile.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/pcre/pcre_exec.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/pcre/pcre_tables.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/pcre/pcre_ucp_searchfuncs.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/pcre/pcre_xclass.cpp
|
||||
)
|
||||
|
||||
include_directories(
|
||||
|
@ -196,8 +191,7 @@ include_directories(
|
|||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/API
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/bytecode
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/generated
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/pcre
|
||||
${CMAKE_SOURCE_DIR}/src/3rdparty/javascriptcore/tmp
|
||||
${PCRE_INCLUDES}
|
||||
)
|
||||
|
||||
katie_generate_misc("${SCRIPT_HEADERS}" QtScript)
|
||||
|
|
Loading…
Add table
Reference in a new issue