mirror of
https://bitbucket.org/smil3y/kdelibs.git
synced 2025-02-24 10:52:49 +00:00
952 lines
23 KiB
C++
952 lines
23 KiB
C++
// -*- c-basic-offset: 2 -*-
|
|
/*
|
|
* This file is part of the KDE libraries
|
|
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
|
|
* Copyright (C) 2006 Apple Computer, Inc.
|
|
* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public License
|
|
* along with this library; see the file COPYING.LIB. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*
|
|
*/
|
|
|
|
#include "lexer.h"
|
|
#include <config-kjs.h>
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
|
|
#include "dtoa.h"
|
|
#include "function.h"
|
|
#include "interpreter.h"
|
|
#include "nodes.h"
|
|
#include "commonunicode.h"
|
|
#include "wtf/ASCIICType.h"
|
|
#include "wtf/DisallowCType.h"
|
|
#include <wtf/unicode/libc/UnicodeLibC.h>
|
|
|
|
using namespace WTF;
|
|
using namespace Unicode;
|
|
|
|
// GCC cstring uses these automatically, but not all implementations do.
|
|
using std::strlen;
|
|
using std::strcpy;
|
|
using std::strncpy;
|
|
using std::memset;
|
|
using std::memcpy;
|
|
|
|
// we can't specify the namespace in yacc's C output, so do it here
|
|
using namespace KJS;
|
|
|
|
#include "grammar.h"
|
|
#include "lookup.h"
|
|
#include "lexer.lut.h"
|
|
|
|
extern YYLTYPE kjsyylloc; // global bison variable holding token info
|
|
|
|
// a bridge for yacc from the C world to C++
|
|
int kjsyylex()
|
|
{
|
|
return lexer().lex();
|
|
}
|
|
|
|
namespace KJS {
|
|
|
|
static bool isDecimalDigit(int c);
|
|
|
|
static const size_t initialReadBufferCapacity = 32;
|
|
static const size_t initialStringTableCapacity = 64;
|
|
|
|
Lexer& lexer()
|
|
{
|
|
// ASSERT(JSLock::currentThreadIsHoldingLock());
|
|
|
|
// FIXME: We'd like to avoid calling new here, but we don't currently
|
|
// support tearing down the Lexer at app quit time, since that would involve
|
|
// tearing down its UString data members without holding the JSLock.
|
|
static Lexer* staticLexer = new Lexer;
|
|
return *staticLexer;
|
|
}
|
|
|
|
Lexer::Lexer()
|
|
: yylineno(0)
|
|
, restrKeyword(false)
|
|
, eatNextIdentifier(false)
|
|
, stackToken(-1)
|
|
, lastToken(-1)
|
|
, pos(0)
|
|
, code(0)
|
|
, length(0)
|
|
#ifndef KJS_PURE_ECMA
|
|
, bol(true)
|
|
#endif
|
|
, current(0)
|
|
, next1(0)
|
|
, next2(0)
|
|
, next3(0)
|
|
{
|
|
m_buffer8.reserveCapacity(initialReadBufferCapacity);
|
|
m_buffer16.reserveCapacity(initialReadBufferCapacity);
|
|
m_strings.reserveCapacity(initialStringTableCapacity);
|
|
m_identifiers.reserveCapacity(initialStringTableCapacity);
|
|
}
|
|
|
|
void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
|
|
{
|
|
yylineno = startingLineNumber;
|
|
m_sourceURL = sourceURL;
|
|
restrKeyword = false;
|
|
delimited = false;
|
|
eatNextIdentifier = false;
|
|
stackToken = -1;
|
|
lastToken = -1;
|
|
pos = 0;
|
|
code = c;
|
|
length = len;
|
|
skipLF = false;
|
|
skipCR = false;
|
|
error = false;
|
|
#ifndef KJS_PURE_ECMA
|
|
bol = true;
|
|
#endif
|
|
|
|
// read first characters
|
|
current = (length > 0) ? code[0].uc : -1;
|
|
next1 = (length > 1) ? code[1].uc : -1;
|
|
next2 = (length > 2) ? code[2].uc : -1;
|
|
next3 = (length > 3) ? code[3].uc : -1;
|
|
}
|
|
|
|
void Lexer::shift(unsigned int p)
|
|
{
|
|
// Here would be a good place to strip Cf characters, but that has caused compatibility problems:
|
|
// <http://bugs.webkit.org/show_bug.cgi?id=10183>.
|
|
while (p--) {
|
|
current = next1;
|
|
next1 = next2;
|
|
next2 = next3;
|
|
pos++;
|
|
next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
|
|
}
|
|
}
|
|
|
|
// called on each new line
|
|
void Lexer::nextLine()
|
|
{
|
|
yylineno++;
|
|
#ifndef KJS_PURE_ECMA
|
|
bol = true;
|
|
#endif
|
|
}
|
|
|
|
void Lexer::setDone(State s)
|
|
{
|
|
state = s;
|
|
done = true;
|
|
}
|
|
|
|
int Lexer::lex()
|
|
{
|
|
int token = 0;
|
|
state = Start;
|
|
unsigned short stringType = 0; // either single or double quotes
|
|
m_buffer8.clear();
|
|
m_buffer16.clear();
|
|
done = false;
|
|
terminator = false;
|
|
skipLF = false;
|
|
skipCR = false;
|
|
|
|
// did we push a token on the stack previously ?
|
|
// (after an automatic semicolon insertion)
|
|
if (stackToken >= 0) {
|
|
setDone(Other);
|
|
token = stackToken;
|
|
stackToken = 0;
|
|
}
|
|
|
|
while (!done) {
|
|
if (skipLF && current != '\n') // found \r but not \n afterwards
|
|
skipLF = false;
|
|
if (skipCR && current != '\r') // found \n but not \r afterwards
|
|
skipCR = false;
|
|
if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
|
|
{
|
|
skipLF = false;
|
|
skipCR = false;
|
|
shift(1);
|
|
}
|
|
switch (state) {
|
|
case Start:
|
|
if (isWhiteSpace()) {
|
|
// do nothing
|
|
} else if (current == '/' && next1 == '/') {
|
|
shift(1);
|
|
state = InSingleLineComment;
|
|
} else if (current == '/' && next1 == '*') {
|
|
shift(1);
|
|
state = InMultiLineComment;
|
|
} else if (current == -1) {
|
|
if (!terminator && !delimited) {
|
|
// automatic semicolon insertion if program incomplete
|
|
token = ';';
|
|
stackToken = 0;
|
|
setDone(Other);
|
|
} else
|
|
setDone(Eof);
|
|
} else if (isLineTerminator()) {
|
|
nextLine();
|
|
terminator = true;
|
|
if (restrKeyword) {
|
|
token = ';';
|
|
setDone(Other);
|
|
}
|
|
} else if (current == '"' || current == '\'') {
|
|
state = InString;
|
|
stringType = static_cast<unsigned short>(current);
|
|
} else if (isIdentStart(current)) {
|
|
record16(current);
|
|
state = InIdentifierOrKeyword;
|
|
} else if (current == '\\') {
|
|
state = InIdentifierStartUnicodeEscapeStart;
|
|
} else if (current == '0') {
|
|
record8(current);
|
|
state = InNum0;
|
|
} else if (isDecimalDigit(current)) {
|
|
record8(current);
|
|
state = InNum;
|
|
} else if (current == '.' && isDecimalDigit(next1)) {
|
|
record8(current);
|
|
state = InDecimal;
|
|
#ifndef KJS_PURE_ECMA
|
|
// <!-- marks the beginning of a line comment (for www usage)
|
|
} else if (current == '<' && next1 == '!' &&
|
|
next2 == '-' && next3 == '-') {
|
|
shift(3);
|
|
state = InSingleLineComment;
|
|
// same for -->
|
|
} else if (bol && current == '-' && next1 == '-' && next2 == '>') {
|
|
shift(2);
|
|
state = InSingleLineComment;
|
|
#endif
|
|
} else {
|
|
token = matchPunctuator(current, next1, next2, next3);
|
|
if (token != -1) {
|
|
setDone(Other);
|
|
} else {
|
|
// cerr << "encountered unknown character" << endl;
|
|
setDone(Bad);
|
|
}
|
|
}
|
|
break;
|
|
case InString:
|
|
switch (current) {
|
|
case '\'':
|
|
case '"':
|
|
if (current == stringType) {
|
|
shift(1);
|
|
setDone(String);
|
|
} else {
|
|
record16(current);
|
|
}
|
|
break;
|
|
case '\\':
|
|
state = InEscapeSequence;
|
|
break;
|
|
case '\n':
|
|
case '\r':
|
|
case 0x2028:
|
|
case 0x2029:
|
|
case -1:
|
|
// encountered newline or eof
|
|
setDone(Bad);
|
|
break;
|
|
default:
|
|
record16(current);
|
|
break;
|
|
}
|
|
break;
|
|
// Escape Sequences inside of strings
|
|
case InEscapeSequence:
|
|
if (isOctalDigit(current)) {
|
|
if (current >= '0' && current <= '3' &&
|
|
isOctalDigit(next1) && isOctalDigit(next2)) {
|
|
record16(convertOctal(current, next1, next2));
|
|
shift(2);
|
|
state = InString;
|
|
} else if (isOctalDigit(current) && isOctalDigit(next1)) {
|
|
record16(convertOctal('0', current, next1));
|
|
shift(1);
|
|
state = InString;
|
|
} else if (isOctalDigit(current)) {
|
|
record16(convertOctal('0', '0', current));
|
|
state = InString;
|
|
} else {
|
|
setDone(Bad);
|
|
}
|
|
} else if (current == 'x')
|
|
state = InHexEscape;
|
|
else if (current == 'u')
|
|
state = InUnicodeEscape;
|
|
else if (isLineTerminator()) {
|
|
nextLine();
|
|
state = InString;
|
|
} else {
|
|
record16(singleEscape(static_cast<unsigned short>(current)));
|
|
state = InString;
|
|
}
|
|
break;
|
|
case InHexEscape:
|
|
if (isHexDigit(current) && isHexDigit(next1)) {
|
|
state = InString;
|
|
record16(convertHex(current, next1));
|
|
shift(1);
|
|
} else {
|
|
setDone(Bad);
|
|
}
|
|
break;
|
|
case InUnicodeEscape:
|
|
if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
|
|
record16(convertUnicode(current, next1, next2, next3));
|
|
shift(3);
|
|
state = InString;
|
|
} else if (current == stringType) {
|
|
record16('u');
|
|
shift(1);
|
|
setDone(String);
|
|
} else {
|
|
setDone(Bad);
|
|
}
|
|
break;
|
|
case InSingleLineComment:
|
|
if (isLineTerminator()) {
|
|
nextLine();
|
|
terminator = true;
|
|
if (restrKeyword) {
|
|
token = ';';
|
|
setDone(Other);
|
|
} else
|
|
state = Start;
|
|
} else if (current == -1) {
|
|
setDone(Eof);
|
|
}
|
|
break;
|
|
case InMultiLineComment:
|
|
if (current == -1) {
|
|
setDone(Bad);
|
|
} else if (isLineTerminator()) {
|
|
nextLine();
|
|
} else if (current == '*' && next1 == '/') {
|
|
state = Start;
|
|
shift(1);
|
|
}
|
|
break;
|
|
case InIdentifierOrKeyword:
|
|
case InIdentifier:
|
|
if (isIdentPart(current))
|
|
record16(current);
|
|
else if (current == '\\')
|
|
state = InIdentifierPartUnicodeEscapeStart;
|
|
else
|
|
setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
|
|
break;
|
|
case InNum0:
|
|
if (current == 'x' || current == 'X') {
|
|
record8(current);
|
|
state = InHex;
|
|
} else if (current == '.') {
|
|
record8(current);
|
|
state = InDecimal;
|
|
} else if (current == 'e' || current == 'E') {
|
|
record8(current);
|
|
state = InExponentIndicator;
|
|
} else if (isOctalDigit(current)) {
|
|
record8(current);
|
|
state = InOctal;
|
|
} else if (isDecimalDigit(current)) {
|
|
record8(current);
|
|
state = InDecimal;
|
|
} else {
|
|
setDone(Number);
|
|
}
|
|
break;
|
|
case InHex:
|
|
if (isHexDigit(current)) {
|
|
record8(current);
|
|
} else {
|
|
setDone(Hex);
|
|
}
|
|
break;
|
|
case InOctal:
|
|
if (isOctalDigit(current)) {
|
|
record8(current);
|
|
}
|
|
else if (isDecimalDigit(current)) {
|
|
record8(current);
|
|
state = InDecimal;
|
|
} else
|
|
setDone(Octal);
|
|
break;
|
|
case InNum:
|
|
if (isDecimalDigit(current)) {
|
|
record8(current);
|
|
} else if (current == '.') {
|
|
record8(current);
|
|
state = InDecimal;
|
|
} else if (current == 'e' || current == 'E') {
|
|
record8(current);
|
|
state = InExponentIndicator;
|
|
} else
|
|
setDone(Number);
|
|
break;
|
|
case InDecimal:
|
|
if (isDecimalDigit(current)) {
|
|
record8(current);
|
|
} else if (current == 'e' || current == 'E') {
|
|
record8(current);
|
|
state = InExponentIndicator;
|
|
} else
|
|
setDone(Number);
|
|
break;
|
|
case InExponentIndicator:
|
|
if (current == '+' || current == '-') {
|
|
record8(current);
|
|
} else if (isDecimalDigit(current)) {
|
|
record8(current);
|
|
state = InExponent;
|
|
} else
|
|
setDone(Bad);
|
|
break;
|
|
case InExponent:
|
|
if (isDecimalDigit(current)) {
|
|
record8(current);
|
|
} else
|
|
setDone(Number);
|
|
break;
|
|
case InIdentifierStartUnicodeEscapeStart:
|
|
if (current == 'u')
|
|
state = InIdentifierStartUnicodeEscape;
|
|
else
|
|
setDone(Bad);
|
|
break;
|
|
case InIdentifierPartUnicodeEscapeStart:
|
|
if (current == 'u')
|
|
state = InIdentifierPartUnicodeEscape;
|
|
else
|
|
setDone(Bad);
|
|
break;
|
|
case InIdentifierStartUnicodeEscape:
|
|
if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
|
|
setDone(Bad);
|
|
break;
|
|
}
|
|
token = convertUnicode(current, next1, next2, next3).uc;
|
|
shift(3);
|
|
if (!isIdentStart(token)) {
|
|
setDone(Bad);
|
|
break;
|
|
}
|
|
record16(token);
|
|
state = InIdentifier;
|
|
break;
|
|
case InIdentifierPartUnicodeEscape:
|
|
if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
|
|
setDone(Bad);
|
|
break;
|
|
}
|
|
token = convertUnicode(current, next1, next2, next3).uc;
|
|
shift(3);
|
|
if (!isIdentPart(token)) {
|
|
setDone(Bad);
|
|
break;
|
|
}
|
|
record16(token);
|
|
state = InIdentifier;
|
|
break;
|
|
default:
|
|
assert(!"Unhandled state in switch statement");
|
|
}
|
|
|
|
// move on to the next character
|
|
if (!done)
|
|
shift(1);
|
|
#ifndef KJS_PURE_ECMA
|
|
if (state != Start && state != InMultiLineComment)
|
|
bol = false;
|
|
#endif
|
|
}
|
|
|
|
// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
|
|
if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
|
|
state = Bad;
|
|
|
|
// terminate string
|
|
m_buffer8.append('\0');
|
|
|
|
#ifdef KJS_DEBUG_LEX
|
|
fprintf(stderr, "line: %d ", lineNo());
|
|
fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
|
|
fprintf(stderr, "%s ", m_buffer8.data());
|
|
#endif
|
|
|
|
double dval = 0;
|
|
if (state == Number) {
|
|
dval = kjs_strtod(m_buffer8.data(), 0L);
|
|
} else if (state == Hex) { // scan hex numbers
|
|
const char *p = m_buffer8.data() + 2;
|
|
while (char c = *p++) {
|
|
dval *= 16;
|
|
dval += convertHex(c);
|
|
}
|
|
|
|
if (dval >= mantissaOverflowLowerBound)
|
|
dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
|
|
|
|
state = Number;
|
|
} else if (state == Octal) { // scan octal number
|
|
const char *p = m_buffer8.data() + 1;
|
|
while (char c = *p++) {
|
|
dval *= 8;
|
|
dval += c - '0';
|
|
}
|
|
|
|
if (dval >= mantissaOverflowLowerBound)
|
|
dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
|
|
|
|
state = Number;
|
|
}
|
|
|
|
#ifdef KJS_DEBUG_LEX
|
|
switch (state) {
|
|
case Eof:
|
|
printf("(EOF)\n");
|
|
break;
|
|
case Other:
|
|
printf("(Other)\n");
|
|
break;
|
|
case Identifier:
|
|
printf("(Identifier)/(Keyword)\n");
|
|
break;
|
|
case String:
|
|
printf("(String)\n");
|
|
break;
|
|
case Number:
|
|
printf("(Number)\n");
|
|
break;
|
|
default:
|
|
printf("(unknown)");
|
|
}
|
|
#endif
|
|
|
|
if (state != Identifier && eatNextIdentifier)
|
|
eatNextIdentifier = false;
|
|
|
|
restrKeyword = false;
|
|
delimited = false;
|
|
kjsyylloc.first_line = yylineno; // ???
|
|
kjsyylloc.last_line = yylineno;
|
|
|
|
switch (state) {
|
|
case Eof:
|
|
token = 0;
|
|
break;
|
|
case Other:
|
|
if(token == '}' || token == ';') {
|
|
delimited = true;
|
|
}
|
|
break;
|
|
case IdentifierOrKeyword:
|
|
if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) {
|
|
case Identifier:
|
|
// Lookup for keyword failed, means this is an identifier
|
|
// Apply anonymous-function hack below (eat the identifier)
|
|
if (eatNextIdentifier) {
|
|
eatNextIdentifier = false;
|
|
token = lex();
|
|
break;
|
|
}
|
|
kjsyylval.ident = makeIdentifier(m_buffer16);
|
|
token = IDENT;
|
|
break;
|
|
}
|
|
|
|
eatNextIdentifier = false;
|
|
// Hack for "f = function somename() { ... }", too hard to get into the grammar
|
|
if (token == FUNCTION && lastToken == '=' )
|
|
eatNextIdentifier = true;
|
|
|
|
if (token == CONTINUE || token == BREAK ||
|
|
token == RETURN || token == THROW)
|
|
restrKeyword = true;
|
|
break;
|
|
case String:
|
|
kjsyylval.ustr = makeUString(m_buffer16);
|
|
token = STRING;
|
|
break;
|
|
case Number:
|
|
kjsyylval.dval = dval;
|
|
token = NUMBER;
|
|
break;
|
|
case Bad:
|
|
#ifdef KJS_DEBUG_LEX
|
|
fprintf(stderr, "KJS: yylex: ERROR.\n");
|
|
#endif
|
|
error = true;
|
|
return -1;
|
|
default:
|
|
assert(!"unhandled numeration value in switch");
|
|
error = true;
|
|
return -1;
|
|
}
|
|
lastToken = token;
|
|
return token;
|
|
}
|
|
|
|
bool Lexer::isWhiteSpace() const
|
|
{
|
|
return CommonUnicode::isWhiteSpace(current);
|
|
}
|
|
|
|
bool Lexer::isLineTerminator()
|
|
{
|
|
bool cr = (current == '\r');
|
|
bool lf = (current == '\n');
|
|
if (cr)
|
|
skipLF = true;
|
|
else if (lf)
|
|
skipCR = true;
|
|
return cr || lf || current == 0x2028 || current == 0x2029;
|
|
}
|
|
|
|
typedef bool (CharacterCheck)(int c);
|
|
|
|
static bool isIdentStartLibC(int c)
|
|
{
|
|
return (category(c) & (Letter_Uppercase | Letter_Lowercase |
|
|
Letter_Titlecase | Letter_Modifier | Letter_Other))
|
|
|| c == '$' || c == '_';
|
|
}
|
|
|
|
static bool isIdentPartLibC(int c)
|
|
{
|
|
return (category(c) & (Letter_Uppercase | Letter_Lowercase |
|
|
Letter_Titlecase | Letter_Modifier | Letter_Other |
|
|
Mark_NonSpacing | Mark_SpacingCombining |
|
|
Number_DecimalDigit | Punctuation_Connector))
|
|
|| c == '$' || c == '_';
|
|
}
|
|
|
|
static CharacterCheck *identStart = ::isIdentStartLibC;
|
|
static CharacterCheck *identPart = ::isIdentPartLibC;
|
|
|
|
void Lexer::setIdentStartChecker(bool (*f)(int c))
|
|
{
|
|
identStart = f;
|
|
}
|
|
|
|
void Lexer::setIdentPartChecker(bool (*f)(int c))
|
|
{
|
|
identPart = f;
|
|
}
|
|
|
|
bool Lexer::isIdentStart(int c)
|
|
{
|
|
return (*identStart)(c);
|
|
}
|
|
|
|
bool Lexer::isIdentPart(int c)
|
|
{
|
|
return (*identPart)(c);
|
|
}
|
|
|
|
static bool isDecimalDigit(int c)
|
|
{
|
|
return (c >= '0' && c <= '9');
|
|
}
|
|
|
|
bool Lexer::isHexDigit(int c)
|
|
{
|
|
return ((c >= '0' && c <= '9') ||
|
|
(c >= 'a' && c <= 'f') ||
|
|
(c >= 'A' && c <= 'F'));
|
|
}
|
|
|
|
bool Lexer::isOctalDigit(int c)
|
|
{
|
|
return (c >= '0' && c <= '7');
|
|
}
|
|
|
|
int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
|
|
{
|
|
if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
|
|
shift(4);
|
|
return URSHIFTEQUAL;
|
|
} else if (c1 == '=' && c2 == '=' && c3 == '=') {
|
|
shift(3);
|
|
return STREQ;
|
|
} else if (c1 == '!' && c2 == '=' && c3 == '=') {
|
|
shift(3);
|
|
return STRNEQ;
|
|
} else if (c1 == '>' && c2 == '>' && c3 == '>') {
|
|
shift(3);
|
|
return URSHIFT;
|
|
} else if (c1 == '<' && c2 == '<' && c3 == '=') {
|
|
shift(3);
|
|
return LSHIFTEQUAL;
|
|
} else if (c1 == '>' && c2 == '>' && c3 == '=') {
|
|
shift(3);
|
|
return RSHIFTEQUAL;
|
|
} else if (c1 == '<' && c2 == '=') {
|
|
shift(2);
|
|
return LE;
|
|
} else if (c1 == '>' && c2 == '=') {
|
|
shift(2);
|
|
return GE;
|
|
} else if (c1 == '!' && c2 == '=') {
|
|
shift(2);
|
|
return NE;
|
|
} else if (c1 == '+' && c2 == '+') {
|
|
shift(2);
|
|
if (terminator)
|
|
return AUTOPLUSPLUS;
|
|
else
|
|
return PLUSPLUS;
|
|
} else if (c1 == '-' && c2 == '-') {
|
|
shift(2);
|
|
if (terminator)
|
|
return AUTOMINUSMINUS;
|
|
else
|
|
return MINUSMINUS;
|
|
} else if (c1 == '=' && c2 == '=') {
|
|
shift(2);
|
|
return EQEQ;
|
|
} else if (c1 == '+' && c2 == '=') {
|
|
shift(2);
|
|
return PLUSEQUAL;
|
|
} else if (c1 == '-' && c2 == '=') {
|
|
shift(2);
|
|
return MINUSEQUAL;
|
|
} else if (c1 == '*' && c2 == '=') {
|
|
shift(2);
|
|
return MULTEQUAL;
|
|
} else if (c1 == '/' && c2 == '=') {
|
|
shift(2);
|
|
return DIVEQUAL;
|
|
} else if (c1 == '&' && c2 == '=') {
|
|
shift(2);
|
|
return ANDEQUAL;
|
|
} else if (c1 == '^' && c2 == '=') {
|
|
shift(2);
|
|
return XOREQUAL;
|
|
} else if (c1 == '%' && c2 == '=') {
|
|
shift(2);
|
|
return MODEQUAL;
|
|
} else if (c1 == '|' && c2 == '=') {
|
|
shift(2);
|
|
return OREQUAL;
|
|
} else if (c1 == '<' && c2 == '<') {
|
|
shift(2);
|
|
return LSHIFT;
|
|
} else if (c1 == '>' && c2 == '>') {
|
|
shift(2);
|
|
return RSHIFT;
|
|
} else if (c1 == '&' && c2 == '&') {
|
|
shift(2);
|
|
return AND;
|
|
} else if (c1 == '|' && c2 == '|') {
|
|
shift(2);
|
|
return OR;
|
|
}
|
|
|
|
switch(c1) {
|
|
case '=':
|
|
case '>':
|
|
case '<':
|
|
case ',':
|
|
case '!':
|
|
case '~':
|
|
case '?':
|
|
case ':':
|
|
case '.':
|
|
case '+':
|
|
case '-':
|
|
case '*':
|
|
case '/':
|
|
case '&':
|
|
case '|':
|
|
case '^':
|
|
case '%':
|
|
case '(':
|
|
case ')':
|
|
case '{':
|
|
case '}':
|
|
case '[':
|
|
case ']':
|
|
case ';':
|
|
shift(1);
|
|
return static_cast<int>(c1);
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
unsigned short Lexer::singleEscape(unsigned short c)
|
|
{
|
|
switch(c) {
|
|
case 'b':
|
|
return 0x08;
|
|
case 't':
|
|
return 0x09;
|
|
case 'n':
|
|
return 0x0A;
|
|
case 'v':
|
|
return 0x0B;
|
|
case 'f':
|
|
return 0x0C;
|
|
case 'r':
|
|
return 0x0D;
|
|
case '"':
|
|
return 0x22;
|
|
case '\'':
|
|
return 0x27;
|
|
case '\\':
|
|
return 0x5C;
|
|
default:
|
|
return c;
|
|
}
|
|
}
|
|
|
|
unsigned short Lexer::convertOctal(int c1, int c2, int c3)
|
|
{
|
|
return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
|
|
}
|
|
|
|
unsigned char Lexer::convertHex(int c)
|
|
{
|
|
if (c >= '0' && c <= '9')
|
|
return static_cast<unsigned char>(c - '0');
|
|
if (c >= 'a' && c <= 'f')
|
|
return static_cast<unsigned char>(c - 'a' + 10);
|
|
return static_cast<unsigned char>(c - 'A' + 10);
|
|
}
|
|
|
|
unsigned char Lexer::convertHex(int c1, int c2)
|
|
{
|
|
return ((convertHex(c1) << 4) + convertHex(c2));
|
|
}
|
|
|
|
KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
|
|
{
|
|
return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
|
|
(convertHex(c3) << 4) + convertHex(c4));
|
|
}
|
|
|
|
void Lexer::record8(int c)
|
|
{
|
|
ASSERT(c >= 0);
|
|
ASSERT(c <= 0xff);
|
|
m_buffer8.append(c);
|
|
}
|
|
|
|
void Lexer::record16(int c)
|
|
{
|
|
ASSERT(c >= 0);
|
|
ASSERT(c <= USHRT_MAX);
|
|
record16(UChar(static_cast<unsigned short>(c)));
|
|
}
|
|
|
|
void Lexer::record16(KJS::UChar c)
|
|
{
|
|
m_buffer16.append(c);
|
|
}
|
|
|
|
bool Lexer::scanRegExp()
|
|
{
|
|
m_buffer16.clear();
|
|
bool lastWasEscape = false;
|
|
bool inBrackets = false;
|
|
|
|
while (1) {
|
|
if (isLineTerminator() || current == -1)
|
|
return false;
|
|
else if (current != '/' || lastWasEscape == true || inBrackets == true)
|
|
{
|
|
// keep track of '[' and ']'
|
|
if (!lastWasEscape) {
|
|
if ( current == '[' && !inBrackets )
|
|
inBrackets = true;
|
|
if ( current == ']' && inBrackets )
|
|
inBrackets = false;
|
|
}
|
|
record16(current);
|
|
lastWasEscape =
|
|
!lastWasEscape && (current == '\\');
|
|
} else { // end of regexp
|
|
m_pattern = UString(m_buffer16);
|
|
m_buffer16.clear();
|
|
shift(1);
|
|
break;
|
|
}
|
|
shift(1);
|
|
}
|
|
|
|
while (isIdentPart(current)) {
|
|
record16(current);
|
|
shift(1);
|
|
}
|
|
m_flags = UString(m_buffer16);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
void Lexer::clear()
|
|
{
|
|
deleteAllValues(m_strings);
|
|
Vector<UString*> newStrings;
|
|
newStrings.reserveCapacity(initialStringTableCapacity);
|
|
m_strings.swap(newStrings);
|
|
deleteAllValues(m_identifiers);
|
|
Vector<KJS::Identifier*> newIdentifiers;
|
|
newIdentifiers.reserveCapacity(initialStringTableCapacity);
|
|
m_identifiers.swap(newIdentifiers);
|
|
|
|
Vector<char> newBuffer8;
|
|
newBuffer8.reserveCapacity(initialReadBufferCapacity);
|
|
m_buffer8.swap(newBuffer8);
|
|
|
|
Vector<UChar> newBuffer16;
|
|
newBuffer16.reserveCapacity(initialReadBufferCapacity);
|
|
m_buffer16.swap(newBuffer16);
|
|
|
|
m_pattern = 0;
|
|
m_flags = 0;
|
|
m_sourceURL = 0;
|
|
}
|
|
|
|
Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer)
|
|
{
|
|
KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
|
|
m_identifiers.append(identifier);
|
|
return identifier;
|
|
}
|
|
|
|
UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer)
|
|
{
|
|
UString* string = new UString(buffer);
|
|
m_strings.append(string);
|
|
return string;
|
|
}
|
|
|
|
} // namespace KJS
|