kde-extraapps/kdevelop/languages/cpp/parser/lexer.cpp
2015-07-26 14:23:17 +03:00

1124 lines
26 KiB
C++

/* This file is part of KDevelop
Copyright 2002-2005 Roberto Raggi <roberto@kdevelop.org>
Copyright 2007-2008 David Nolden <david.nolden.kdevelop@art-master.de>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License version 2 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "lexer.h"
#include "tokens.h"
#include "control.h"
#include "parsesession.h"
#include "rpp/pp-scanner.h"
#include <cctype>
#include <util/kdevvarlengtharray.h>
#include <kdebug.h>
#include <klocalizedstring.h>
void TokenStream::splitRightShift(uint index)
{
Q_ASSERT(kind(index) == Token_rightshift);
// change kind of current token and adapt size
Token &current_token = (*this)[index];
Q_ASSERT(current_token.size == 2);
current_token.size = 1;
current_token.kind = '>';
// copy to next token (i.e. the new one) and adapt position
Token next_token = current_token;
next_token.position += 1;
insert(index+1, next_token);
}
KDevelop::IndexedString TokenStream::symbol(const Token& t) const
{
if(t.size == 1)
return KDevelop::IndexedString::fromIndex(session->contents()[t.position]);
else
return KDevelop::IndexedString();
}
uint TokenStream::symbolIndex(const Token& t) const
{
return session->contents()[t.position];
}
QByteArray TokenStream::symbolByteArray(const Token& t) const
{
if (t.size == 0) // esp. for EOF
return QByteArray();
return stringFromContents(session->contentsVector(), t.position, t.size);
}
QString TokenStream::symbolString(const Token& t) const
{
return QString::fromUtf8(symbolByteArray(t));
}
uint TokenStream::symbolLength(const Token& t) const
{
uint ret = 0;
for(uint a = t.position; a < t.position+t.size; ++a) {
ret += KDevelop::IndexedString::lengthFromIndex(session->contents()[a]);
}
return ret;
}
QString Lexer::SpecialCursor::toString() const
{
return KDevelop::IndexedString::fromIndex(*current).str();
}
/**
* Returns the character BEHIND the found comment
* */
void Lexer::skipComment()
{
///A nearly exact copy of rpp::pp_skip_comment_or_divop::operator()
enum {
MAYBE_BEGIN,
BEGIN,
MAYBE_END,
END,
IN_COMMENT,
IN_CXX_COMMENT
} state (MAYBE_BEGIN);
while (cursor < endCursor && *cursor) {
switch (state) {
case MAYBE_BEGIN:
if (*cursor != '/')
return;
state = BEGIN;
break;
case BEGIN:
if (*cursor == '*')
state = IN_COMMENT;
else if (*cursor == '/')
state = IN_CXX_COMMENT;
else
return;
break;
case IN_COMMENT:
if( *cursor == '\n' ) {
scan_newline();
continue;
}
if (*cursor == '*')
state = MAYBE_END;
break;
case IN_CXX_COMMENT:
if (*cursor == '\n')
return;
break;
case MAYBE_END:
if (*cursor == '/')
state = END;
else if (*cursor != '*')
state = IN_COMMENT;
if( *cursor == '\n' ) {
scan_newline();
continue;
}
break;
case END:
return;
}
++cursor;
}
return;
}
const uint index_size = 200;
KDevVarLengthArray<KDevVarLengthArray<QPair<uint, TOKEN_KIND>, 10 >, index_size > createIndicesForTokens() {
KDevVarLengthArray<KDevVarLengthArray<QPair<uint, TOKEN_KIND>, 10 >, index_size > ret;
ret.resize(index_size);
#define ADD_TOKEN(string) ret[KDevelop::IndexedString(#string).index() % index_size].append(qMakePair(KDevelop::IndexedString(#string).index(), Token_ ## string));
#define ADD_TOKEN2(string, tok) ret[KDevelop::IndexedString(#string).index() % index_size].append(qMakePair(KDevelop::IndexedString(#string).index(), Token_ ## tok));
ADD_TOKEN(K_DCOP);
ADD_TOKEN(Q_OBJECT);
ADD_TOKEN(__typeof);
ADD_TOKEN2(__typeof__, __typeof);
ADD_TOKEN2(typeof, __typeof);
ADD_TOKEN(and);
ADD_TOKEN(and_eq);
ADD_TOKEN(asm);
ADD_TOKEN2(__asm, asm);
ADD_TOKEN2(__asm__, asm)
ADD_TOKEN(auto);
ADD_TOKEN(bitand);
ADD_TOKEN(bitor);
ADD_TOKEN(bool);
ADD_TOKEN(break);
ADD_TOKEN(case);
ADD_TOKEN(catch);
ADD_TOKEN(char);
ADD_TOKEN(char16_t);
ADD_TOKEN(char32_t);
ADD_TOKEN(class);
ADD_TOKEN(compl);
ADD_TOKEN(const);
ADD_TOKEN(constexpr);
ADD_TOKEN(const_cast);
ADD_TOKEN(continue);
ADD_TOKEN(decltype);
ADD_TOKEN2(__decltype, decltype);
ADD_TOKEN(default);
ADD_TOKEN(delete);
ADD_TOKEN(do);
ADD_TOKEN(double);
ADD_TOKEN(dynamic_cast);
ADD_TOKEN(else);
//ADD_TOKEN(emit);
ADD_TOKEN(enum);
ADD_TOKEN(explicit);
ADD_TOKEN(export);
ADD_TOKEN(extern);
ADD_TOKEN(false);
ADD_TOKEN(float);
ADD_TOKEN(final);
ADD_TOKEN(for);
ADD_TOKEN(friend);
ADD_TOKEN(goto);
ADD_TOKEN(if);
ADD_TOKEN(inline);
ADD_TOKEN2(__inline__, inline);
ADD_TOKEN2(__inline, inline);
ADD_TOKEN(int);
ADD_TOKEN(k_dcop);
ADD_TOKEN(k_dcop_signals);
ADD_TOKEN(long);
ADD_TOKEN(mutable);
ADD_TOKEN(namespace);
ADD_TOKEN(new);
ADD_TOKEN(noexcept);
ADD_TOKEN(not);
ADD_TOKEN(not_eq);
ADD_TOKEN(nullptr);
ADD_TOKEN(operator);
ADD_TOKEN(or);
ADD_TOKEN(or_eq);
ADD_TOKEN(override);
ADD_TOKEN(private);
ADD_TOKEN(protected);
ADD_TOKEN(public);
ADD_TOKEN(register);
ADD_TOKEN(reinterpret_cast);
ADD_TOKEN(return);
ADD_TOKEN(short);
ADD_TOKEN(__qt_signals__);
ADD_TOKEN(signed);
ADD_TOKEN2(__signed__, signed);
ADD_TOKEN(sizeof);
ADD_TOKEN(__qt_slots__);
ADD_TOKEN(static);
ADD_TOKEN(static_assert);
ADD_TOKEN(static_cast);
ADD_TOKEN(struct);
ADD_TOKEN(switch);
ADD_TOKEN(template);
ADD_TOKEN(this);
ADD_TOKEN(thread_local);
ADD_TOKEN2(__thread, thread_local);
ADD_TOKEN2(__thread__, thread_local);
ADD_TOKEN(throw);
ADD_TOKEN(true);
ADD_TOKEN(try);
ADD_TOKEN(typedef);
ADD_TOKEN(typeid);
ADD_TOKEN(typename);
ADD_TOKEN(union);
ADD_TOKEN(unsigned);
ADD_TOKEN2(__unsigned__, unsigned);
ADD_TOKEN(using);
ADD_TOKEN(virtual);
ADD_TOKEN(void);
ADD_TOKEN(volatile);
ADD_TOKEN2(__volatile__, volatile);
ADD_TOKEN(wchar_t);
ADD_TOKEN(while);
ADD_TOKEN(xor);
ADD_TOKEN(xor_eq);
ADD_TOKEN(__qt_signal__);
ADD_TOKEN(__qt_slot__);
ADD_TOKEN(__qt_property__);
return ret;
}
scan_fun_ptr Lexer::s_scan_table[256];
bool Lexer::s_initialized = false;
Lexer::Lexer(Control *c)
: session(0),
control(c),
m_leaveSize(false)
{
}
void Lexer::tokenize(ParseSession* _session)
{
session = _session;
TokenStream* stream = session->token_stream;
Q_ASSERT(stream->isEmpty());
if (!s_initialized)
initialize_scan_table();
m_canMergeComment = false;
m_firstInLine = true;
m_leaveSize = false;
{
Token eof;
eof.kind = Token_EOF;
eof.position = 0;
eof.size = 0;
stream->append(eof);
index = 1;
}
cursor.current = session->contents();
endCursor = session->contents() + session->contentsVector().size();
while(endCursor-1 >= session->contents() && (*(endCursor-1)) == 0)
--endCursor;
while (cursor < endCursor) {
Q_ASSERT(static_cast<uint>(stream->size()) == index);
size_t previousIndex = index;
{
Token token{cursor.offsetIn(session->contents()), 0, Token_EOF};
stream->append(token);
}
Token* current_token = &(session->token_stream->last());
if(cursor.isChar()) {
(this->*s_scan_table[((uchar)*cursor)])();
}else{
//check for utf8 strings
static const uint u8Index = KDevelop::IndexedString("u8").index();
//check for raw strings
static const uint u8RIndex = KDevelop::IndexedString("u8R").index();
static const uint uRIndex = KDevelop::IndexedString("uR").index();
static const uint URIndex = KDevelop::IndexedString("UR").index();
static const uint LRIndex = KDevelop::IndexedString("LR").index();
if (*cursor.current == u8Index) {
// check for utf8 string
// not calling scan_identifier_or_literal as u8
// is only supported for strings, not characters
if (*(cursor+1) == '"') {
++cursor;
scan_string_constant();
} else {
scan_identifier_or_keyword();
}
} else if (*cursor.current == u8RIndex || *cursor.current == uRIndex
|| *cursor.current == URIndex || *cursor.current == LRIndex)
{
// probably raw string
scan_raw_string_or_identifier();
} else {
//The cursor represents an identifier
scan_identifier_or_keyword();
}
}
if(!m_leaveSize)
current_token->size = cursor.offsetIn( session->contents() ) - current_token->position;
Q_ASSERT(m_leaveSize || (cursor.current == session->contents() + current_token->position + current_token->size));
Q_ASSERT(current_token->position + current_token->size <= (uint)session->contentsVector().size());
Q_ASSERT(previousIndex == index-1 || previousIndex == index); //Never parse more than 1 token, because that won't be initialized correctly
m_leaveSize = false;
if(previousIndex != index)
m_firstInLine = false;
else // skipped index, remove last appended token again
stream->pop_back();
}
{
Token eof;
eof.kind = Token_EOF;
eof.position = cursor.offsetIn( session->contents() );
eof.size = 0;
stream->append(eof);
}
stream->squeeze();
}
void Lexer::initialize_scan_table()
{
s_initialized = true;
for (int i=0; i<256; ++i)
{
if (isspace(i))
s_scan_table[i] = &Lexer::scan_white_spaces;
else if (isalpha(i) || i == '_')
s_scan_table[i] = &Lexer::scan_identifier_or_keyword;
else if (isdigit(i))
s_scan_table[i] = &Lexer::scan_int_constant;
else
s_scan_table[i] = &Lexer::scan_invalid_input;
}
s_scan_table[int('L')] = &Lexer::scan_identifier_or_literal;
s_scan_table[int('u')] = &Lexer::scan_identifier_or_literal;
s_scan_table[int('U')] = &Lexer::scan_identifier_or_literal;
s_scan_table[int('R')] = &Lexer::scan_raw_string_or_identifier;
s_scan_table[int('\n')] = &Lexer::scan_newline;
s_scan_table[int('#')] = &Lexer::scan_preprocessor;
s_scan_table[int('\'')] = &Lexer::scan_char_constant;
s_scan_table[int('"')] = &Lexer::scan_string_constant;
s_scan_table[int('.')] = &Lexer::scan_int_constant;
s_scan_table[int('!')] = &Lexer::scan_not;
s_scan_table[int('%')] = &Lexer::scan_remainder;
s_scan_table[int('&')] = &Lexer::scan_and;
s_scan_table[int('(')] = &Lexer::scan_left_paren;
s_scan_table[int(')')] = &Lexer::scan_right_paren;
s_scan_table[int('*')] = &Lexer::scan_star;
s_scan_table[int('+')] = &Lexer::scan_plus;
s_scan_table[int(',')] = &Lexer::scan_comma;
s_scan_table[int('-')] = &Lexer::scan_minus;
s_scan_table[int('/')] = &Lexer::scan_divide;
s_scan_table[int(':')] = &Lexer::scan_colon;
s_scan_table[int(';')] = &Lexer::scan_semicolon;
s_scan_table[int('<')] = &Lexer::scan_less;
s_scan_table[int('=')] = &Lexer::scan_equal;
s_scan_table[int('>')] = &Lexer::scan_greater;
s_scan_table[int('?')] = &Lexer::scan_question;
s_scan_table[int('[')] = &Lexer::scan_left_bracket;
s_scan_table[int(']')] = &Lexer::scan_right_bracket;
s_scan_table[int('^')] = &Lexer::scan_xor;
s_scan_table[int('{')] = &Lexer::scan_left_brace;
s_scan_table[int('|')] = &Lexer::scan_or;
s_scan_table[int('}')] = &Lexer::scan_right_brace;
s_scan_table[int('~')] = &Lexer::scan_tilde;
s_scan_table[0] = &Lexer::scan_EOF;
}
void Lexer::scan_preprocessor()
{
while (cursor < endCursor && *cursor && *cursor != '\n')
++cursor;
if (*cursor != '\n')
{
KDevelop::ProblemPointer p = createProblem();
p->setDescription("expected end of line");
control->reportProblem(p);
}
}
void Lexer::scan_char_constant()
{
//const char *begin = cursor;
++cursor;
while (cursor < endCursor && *cursor && *cursor != '\'')
{
if (*cursor == '\n')
{
KDevelop::ProblemPointer p = createProblem();
p->setDescription("unexpected new line");
control->reportProblem(p);
break;
}
if (*cursor == '\\' && (cursor.current + 1) < endCursor)
++cursor;
++cursor;
}
if (*cursor != '\'')
{
KDevelop::ProblemPointer p = createProblem();
p->setDescription("expected '");
control->reportProblem(p);
}
else
{
++cursor;
}
(*session->token_stream)[index++].kind = Token_char_literal;
}
void Lexer::scan_string_constant()
{
//const char *begin = cursor;
++cursor;
while (cursor < endCursor && *cursor && *cursor != '"')
{
if (*cursor == '\n')
{
KDevelop::ProblemPointer p = createProblem();
p->setDescription("unexpected new line");
control->reportProblem(p);
break;
}
if (*cursor == '\\' && (cursor.current + 1) < endCursor)
++cursor;
++cursor;
}
if (*cursor != '"')
{
KDevelop::ProblemPointer p = createProblem();
p->setDescription("expected \"");
control->reportProblem(p);
}
else
{
++cursor;
}
(*session->token_stream)[index++].kind = Token_string_literal;
}
void Lexer::scan_raw_string_constant()
{
Q_ASSERT(*cursor == '"');
++cursor;
(*session->token_stream)[index++].kind = Token_string_literal;
// find delimiter
KDevVarLengthArray<uint, 16> delim;
// NOTE: actually the spec says the delim should not be longer
// than 16 *chars* but due to string concatenation
// we don't really care about that and only look for
// max. 16 - chars or strings - until we find a proper '(' char
int length = 0;
while (cursor < endCursor && *cursor && *cursor != '(' && length < 16)
{
delim.append(*cursor.current);
++cursor;
++length;
}
if (*cursor != '(')
{
KDevelop::ProblemPointer p = createProblem();
p->setDescription("expected R\"delim(");
control->reportProblem(p);
return;
}
++cursor;
// parse raw string
bool delimFound = false;
while (cursor < endCursor && *cursor)
{
if (*cursor == ')') {
++cursor;
// check for end delimiter
int i = 0;
while(i < delim.size() && cursor < endCursor && *cursor && *cursor.current == delim[i]) {
++cursor;
++i;
}
if (i == delim.size() && cursor < endCursor && *cursor == '"') {
++cursor;
delimFound = true;
break;
}
} else {
++cursor;
}
}
if (!delimFound)
{
KDevelop::ProblemPointer p = createProblem();
p->setDescription("expected )delim\"");
control->reportProblem(p);
}
}
void Lexer::scan_newline()
{
++cursor;
m_firstInLine = true;
}
void Lexer::scan_white_spaces()
{
while (cursor < endCursor && isspace(*cursor))
{
if (*cursor == '\n')
scan_newline();
else
++cursor;
}
}
void Lexer::scan_identifier_or_literal()
{
switch (*(cursor + 1))
{
case '\'':
++cursor;
scan_char_constant();
break;
case '\"':
++cursor;
scan_string_constant();
break;
default:
scan_identifier_or_keyword();
break;
}
}
void Lexer::scan_raw_string_or_identifier()
{
if (*(cursor+1) == '"')
{
++cursor;
scan_raw_string_constant();
}
else
{
scan_identifier_or_keyword();
}
}
void Lexer::scan_identifier_or_keyword()
{
if(!(cursor < endCursor))
return;
//We have to merge symbols tokenized separately, they may have been contracted using ##
SpecialCursor nextCursor(cursor);
++nextCursor;
while(nextCursor < endCursor && (!isCharacter(*(nextCursor.current)) || isLetterOrNumber(*nextCursor.current) || characterFromIndex(*nextCursor.current) == '_')) {
//Fortunately this shouldn't happen too often, only when ## is used within the preprocessor
KDevelop::IndexedString mergedSymbol(KDevelop::IndexedString::fromIndex(*(cursor.current)).byteArray() + KDevelop::IndexedString::fromIndex(*(nextCursor.current)).byteArray());
(*cursor.current) = mergedSymbol.index();
(*nextCursor.current) = 0;
++nextCursor;
}
uint bucket = (*cursor.current) % index_size;
//A very simple lookup table: First level contains all pairs grouped by with (index % index_size), then there is a simple list
static const KDevVarLengthArray<KDevVarLengthArray<QPair<uint, TOKEN_KIND>, 10 >, index_size > indicesForTokens = createIndicesForTokens();
for(int a = 0; a < indicesForTokens[bucket].size(); ++a) {
if(indicesForTokens[bucket][a].first == *cursor.current) {
(*session->token_stream)[index++].kind = indicesForTokens[bucket][a].second;
++cursor;
return;
}
}
if(*cursor.current != 0) // If the index is zero, then the string is empty. Never create empty identifier tokens.
{
m_leaveSize = true; //Since we may have skipped input tokens while mergin, we have to make sure that the size stays 1(the merged tokens will be empty)
(*session->token_stream)[index].size = 1;
(*session->token_stream)[index++].kind = Token_identifier;
}
cursor = nextCursor;
}
void Lexer::scan_int_constant()
{
if (*cursor == '.' && !std::isdigit(*(cursor + 1)))
{
scan_dot();
return;
}
//const char *begin = cursor;
while (cursor < endCursor && (isalnum(*cursor) || *cursor == '.'))
++cursor;
(*session->token_stream)[index++].kind = Token_number_literal;
}
void Lexer::scan_not()
{
/*
'!' ::= not
'!=' ::= not_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_not_eq;
}
else
{
(*session->token_stream)[index++].kind = '!';
}
}
void Lexer::scan_remainder()
{
/*
'%' ::= remainder
'%=' ::= remainder_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_remainder_eq;
}
else
{
(*session->token_stream)[index++].kind = '%';
}
}
void Lexer::scan_and()
{
/*
'&&' ::= and_and
'&' ::= and
'&=' ::= and_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_and_eq;
}
else if (*cursor == '&')
{
++cursor;
(*session->token_stream)[index++].kind = Token_and;
}
else
{
(*session->token_stream)[index++].kind = '&';
}
}
void Lexer::scan_left_paren()
{
++cursor;
(*session->token_stream)[index++].kind = '(';
}
void Lexer::scan_right_paren()
{
++cursor;
(*session->token_stream)[index++].kind = ')';
}
void Lexer::scan_star()
{
/*
'*' ::= star
'*=' ::= star_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_star_eq;
}
else
{
(*session->token_stream)[index++].kind = '*';
}
}
void Lexer::scan_plus()
{
/*
'+' ::= plus
'++' ::= incr
'+=' ::= plus_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_plus_eq;
}
else if (*cursor == '+')
{
++cursor;
(*session->token_stream)[index++].kind = Token_incr;
}
else
{
(*session->token_stream)[index++].kind = '+';
}
}
void Lexer::scan_comma()
{
++cursor;
(*session->token_stream)[index++].kind = ',';
}
void Lexer::scan_minus()
{
/*
'-' ::= minus
'--' ::= decr
'-=' ::= minus_equal
'->' ::= left_arrow
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_minus_eq;
}
else if (*cursor == '-')
{
++cursor;
(*session->token_stream)[index++].kind = Token_decr;
}
else if (*cursor == '>')
{
++cursor;
(*session->token_stream)[index++].kind = Token_arrow;
}
else
{
(*session->token_stream)[index++].kind = '-';
}
}
void Lexer::scan_dot()
{
/*
'.' ::= dot
'...' ::= ellipsis
*/
++cursor;
if (*cursor == '.' && *(cursor + 1) == '.')
{
cursor += 2;
(*session->token_stream)[index++].kind = Token_ellipsis;
}
else if (*cursor == '.' && *(cursor + 1) == '*')
{
cursor += 2;
(*session->token_stream)[index++].kind = Token_ptrmem;
}
else
(*session->token_stream)[index++].kind = '.';
}
void Lexer::scan_divide()
{
/*
'/' ::= divide
'/=' ::= divide_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_div_eq;
}
else if( *cursor == '*' || *cursor == '/' )
{
///It is a comment
--cursor; //Move back to the '/'
SpecialCursor commentBegin = cursor;
skipComment();
if( cursor != commentBegin ) {
///Store the comment
if(!m_canMergeComment || (*session->token_stream)[index-1].kind != Token_comment) {
//Only allow appending to comments that are behind a newline, because else they may belong to the item on their left side.
//If index is 1, this comment is the first token, which should be the translation-unit comment. So do not merge following comments.
if(m_firstInLine && index != 1)
m_canMergeComment = true;
else
m_canMergeComment = false;
(*session->token_stream)[index++].kind = Token_comment;
(*session->token_stream)[index-1].size = (size_t)(cursor - commentBegin);
(*session->token_stream)[index-1].position = commentBegin.offsetIn( session->contents() );
}else{
//Merge with previous comment
(*session->token_stream)[index-1].size = cursor.offsetIn(session->contents()) - (*session->token_stream)[index-1].position;
}
}
}
else
{
(*session->token_stream)[index++].kind = '/';
}
}
void Lexer::scan_colon()
{
++cursor;
if (*cursor == ':')
{
++cursor;
(*session->token_stream)[index++].kind = Token_scope;
}
else
{
(*session->token_stream)[index++].kind = ':';
}
}
void Lexer::scan_semicolon()
{
++cursor;
(*session->token_stream)[index++].kind = ';';
}
void Lexer::scan_less()
{
/*
'<' ::= less
'<<' ::= left_shift
'<<=' ::= left_shift_equal
'<=' ::= less_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_leq;
}
else if (*cursor == '<')
{
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_leftshift_eq;
}
else
{
(*session->token_stream)[index++].kind = Token_leftshift;
}
}
else
{
(*session->token_stream)[index++].kind = '<';
}
}
void Lexer::scan_equal()
{
/*
'=' ::= equal
'==' ::= equal_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_eq;
}
else
{
(*session->token_stream)[index++].kind = '=';
}
}
void Lexer::scan_greater()
{
/*
'>' ::= greater
'>=' ::= greater_equal
'>>' ::= right_shift
'>>=' ::= right_shift_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_geq;
}
else if (*cursor == '>')
{
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_rightshift_eq;
}
else
{
(*session->token_stream)[index++].kind = Token_rightshift;
}
}
else
{
(*session->token_stream)[index++].kind = '>';
}
}
void Lexer::scan_question()
{
++cursor;
(*session->token_stream)[index++].kind = '?';
}
void Lexer::scan_left_bracket()
{
++cursor;
(*session->token_stream)[index++].kind = '[';
}
void Lexer::scan_right_bracket()
{
++cursor;
(*session->token_stream)[index++].kind = ']';
}
void Lexer::scan_xor()
{
/*
'^' ::= xor
'^=' ::= xor_equal
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_xor_eq;
}
else
{
(*session->token_stream)[index++].kind = '^';
}
}
void Lexer::scan_left_brace()
{
++cursor;
(*session->token_stream)[index++].kind = '{';
}
void Lexer::scan_or()
{
/*
'|' ::= or
'|=' ::= or_equal
'||' ::= or_or
*/
++cursor;
if (*cursor == '=')
{
++cursor;
(*session->token_stream)[index++].kind = Token_or_eq;
}
else if (*cursor == '|')
{
++cursor;
(*session->token_stream)[index++].kind = Token_or;
}
else
{
(*session->token_stream)[index++].kind = '|';
}
}
void Lexer::scan_right_brace()
{
++cursor;
(*session->token_stream)[index++].kind = '}';
}
void Lexer::scan_tilde()
{
++cursor;
(*session->token_stream)[index++].kind = '~';
}
void Lexer::scan_EOF()
{
++cursor;
(*session->token_stream)[index++].kind = Token_EOF;
}
void Lexer::scan_invalid_input()
{
KDevelop::ProblemPointer p = createProblem();
p->setDescription(i18n("invalid input: %1", KDevelop::IndexedString::fromIndex(*cursor.current).str()));
control->reportProblem(p);
++cursor;
}
KDevelop::ProblemPointer Lexer::createProblem() const
{
Q_ASSERT(index > 0);
KDevelop::ProblemPointer p(new KDevelop::Problem);
KDevelop::CursorInRevision position = session->positionAt(cursor.offsetIn(session->contents()), true);
p->setSource(KDevelop::ProblemData::Lexer);
p->setFinalLocation(KDevelop::DocumentRange(session->url(), KDevelop::SimpleRange(position.castToSimpleCursor(), 1)));
return p;
}