2010-10-14 08:27:31 +02:00
|
|
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2012-11-15 17:28:16 +00:00
|
|
|
/*
|
|
|
|
* This file is part of the LibreOffice project.
|
|
|
|
*
|
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
*
|
|
|
|
* This file incorporates work covered by the following license notice:
|
|
|
|
*
|
|
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
|
|
* with this work for additional information regarding copyright
|
|
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
|
|
* except in compliance with the License. You may obtain a copy of
|
|
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
|
|
*/
|
2008-11-28 11:39:37 +00:00
|
|
|
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
#include <unicode/uchar.h>
|
2013-02-15 13:02:10 +01:00
|
|
|
#include <comphelper/syntaxhighlight.hxx>
|
2011-08-29 00:36:22 +01:00
|
|
|
#include <comphelper/string.hxx>
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-10-28 12:09:28 +01:00
|
|
|
// Flags for character properties
|
|
|
|
#define CHAR_START_IDENTIFIER 0x0001
|
|
|
|
#define CHAR_IN_IDENTIFIER 0x0002
|
|
|
|
#define CHAR_START_NUMBER 0x0004
|
|
|
|
#define CHAR_IN_NUMBER 0x0008
|
|
|
|
#define CHAR_IN_HEX_NUMBER 0x0010
|
|
|
|
#define CHAR_IN_OCT_NUMBER 0x0020
|
|
|
|
#define CHAR_START_STRING 0x0040
|
|
|
|
#define CHAR_OPERATOR 0x0080
|
|
|
|
#define CHAR_SPACE 0x0100
|
|
|
|
#define CHAR_EOL 0x0200
|
|
|
|
|
2008-11-28 11:39:37 +00:00
|
|
|
// ##########################################################################
|
2013-02-12 15:39:09 +01:00
|
|
|
// ATTENTION: all these words need to be in lower case
|
2008-11-28 11:39:37 +00:00
|
|
|
// ##########################################################################
|
|
|
|
static const char* strListBasicKeyWords[] = {
|
|
|
|
"access",
|
|
|
|
"alias",
|
|
|
|
"and",
|
|
|
|
"any",
|
|
|
|
"append",
|
|
|
|
"as",
|
2012-09-23 23:56:05 +09:00
|
|
|
"attribute",
|
2008-11-28 11:39:37 +00:00
|
|
|
"base",
|
|
|
|
"binary",
|
|
|
|
"boolean",
|
|
|
|
"byref",
|
|
|
|
"byte",
|
|
|
|
"byval",
|
|
|
|
"call",
|
|
|
|
"case",
|
|
|
|
"cdecl",
|
|
|
|
"classmodule",
|
|
|
|
"close",
|
|
|
|
"compare",
|
|
|
|
"compatible",
|
|
|
|
"const",
|
|
|
|
"currency",
|
|
|
|
"date",
|
|
|
|
"declare",
|
|
|
|
"defbool",
|
|
|
|
"defcur",
|
|
|
|
"defdate",
|
|
|
|
"defdbl",
|
|
|
|
"deferr",
|
|
|
|
"defint",
|
|
|
|
"deflng",
|
|
|
|
"defobj",
|
|
|
|
"defsng",
|
|
|
|
"defstr",
|
|
|
|
"defvar",
|
|
|
|
"dim",
|
|
|
|
"do",
|
|
|
|
"double",
|
|
|
|
"each",
|
|
|
|
"else",
|
|
|
|
"elseif",
|
|
|
|
"end",
|
|
|
|
"end enum",
|
|
|
|
"end function",
|
|
|
|
"end if",
|
2012-09-23 14:57:10 +02:00
|
|
|
"end property",
|
2008-11-28 11:39:37 +00:00
|
|
|
"end select",
|
|
|
|
"end sub",
|
|
|
|
"end type",
|
|
|
|
"endif",
|
|
|
|
"enum",
|
|
|
|
"eqv",
|
|
|
|
"erase",
|
|
|
|
"error",
|
|
|
|
"exit",
|
|
|
|
"explicit",
|
|
|
|
"for",
|
|
|
|
"function",
|
|
|
|
"get",
|
|
|
|
"global",
|
|
|
|
"gosub",
|
|
|
|
"goto",
|
|
|
|
"if",
|
|
|
|
"imp",
|
|
|
|
"implements",
|
|
|
|
"in",
|
|
|
|
"input",
|
|
|
|
"integer",
|
|
|
|
"is",
|
|
|
|
"let",
|
|
|
|
"lib",
|
|
|
|
"like",
|
|
|
|
"line",
|
|
|
|
"line input",
|
|
|
|
"local",
|
|
|
|
"lock",
|
|
|
|
"long",
|
|
|
|
"loop",
|
|
|
|
"lprint",
|
|
|
|
"lset",
|
|
|
|
"mod",
|
|
|
|
"name",
|
|
|
|
"new",
|
|
|
|
"next",
|
|
|
|
"not",
|
|
|
|
"object",
|
|
|
|
"on",
|
|
|
|
"open",
|
|
|
|
"option",
|
|
|
|
"optional",
|
|
|
|
"or",
|
|
|
|
"output",
|
2012-09-23 14:57:10 +02:00
|
|
|
"paramarray",
|
2008-11-28 11:39:37 +00:00
|
|
|
"preserve",
|
|
|
|
"print",
|
|
|
|
"private",
|
|
|
|
"property",
|
|
|
|
"public",
|
|
|
|
"random",
|
|
|
|
"read",
|
|
|
|
"redim",
|
|
|
|
"rem",
|
|
|
|
"resume",
|
|
|
|
"return",
|
|
|
|
"rset",
|
|
|
|
"select",
|
|
|
|
"set",
|
|
|
|
"shared",
|
|
|
|
"single",
|
|
|
|
"static",
|
|
|
|
"step",
|
|
|
|
"stop",
|
|
|
|
"string",
|
|
|
|
"sub",
|
|
|
|
"system",
|
|
|
|
"text",
|
|
|
|
"then",
|
|
|
|
"to",
|
|
|
|
"type",
|
|
|
|
"typeof",
|
|
|
|
"until",
|
|
|
|
"variant",
|
2012-09-23 23:56:05 +09:00
|
|
|
"vbasupport",
|
2008-11-28 11:39:37 +00:00
|
|
|
"wend",
|
|
|
|
"while",
|
|
|
|
"with",
|
2013-02-12 22:47:06 +01:00
|
|
|
"withevents",
|
2008-11-28 11:39:37 +00:00
|
|
|
"write",
|
|
|
|
"xor"
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static const char* strListSqlKeyWords[] = {
|
|
|
|
"all",
|
|
|
|
"and",
|
|
|
|
"any",
|
|
|
|
"as",
|
|
|
|
"asc",
|
|
|
|
"avg",
|
|
|
|
"between",
|
|
|
|
"by",
|
|
|
|
"cast",
|
|
|
|
"corresponding",
|
|
|
|
"count",
|
|
|
|
"create",
|
|
|
|
"cross",
|
|
|
|
"delete",
|
|
|
|
"desc",
|
|
|
|
"distinct",
|
|
|
|
"drop",
|
|
|
|
"escape",
|
|
|
|
"except",
|
|
|
|
"exists",
|
|
|
|
"false",
|
|
|
|
"from",
|
|
|
|
"full",
|
|
|
|
"global",
|
|
|
|
"group",
|
|
|
|
"having",
|
|
|
|
"in",
|
|
|
|
"inner",
|
|
|
|
"insert",
|
|
|
|
"intersect",
|
|
|
|
"into",
|
|
|
|
"is",
|
|
|
|
"join",
|
|
|
|
"left",
|
|
|
|
"like",
|
2013-06-14 10:59:39 +02:00
|
|
|
"limit",
|
2008-11-28 11:39:37 +00:00
|
|
|
"local",
|
|
|
|
"match",
|
|
|
|
"max",
|
|
|
|
"min",
|
|
|
|
"natural",
|
|
|
|
"not",
|
|
|
|
"null",
|
|
|
|
"on",
|
|
|
|
"or",
|
|
|
|
"order",
|
|
|
|
"outer",
|
|
|
|
"right",
|
|
|
|
"select",
|
|
|
|
"set",
|
|
|
|
"some",
|
|
|
|
"sum",
|
|
|
|
"table",
|
|
|
|
"temporary",
|
|
|
|
"true",
|
|
|
|
"union",
|
|
|
|
"unique",
|
|
|
|
"unknown",
|
|
|
|
"update",
|
|
|
|
"using",
|
|
|
|
"values",
|
|
|
|
"where"
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2013-10-14 12:04:36 +03:00
|
|
|
extern "C" int compare_strings( const void *arg1, const void *arg2 )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
return strcmp( (char *)arg1, *(char **)arg2 );
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-05-16 11:19:17 +01:00
|
|
|
namespace
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-10-28 12:09:28 +01:00
|
|
|
bool isAlpha(sal_Unicode c)
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2012-05-16 11:19:17 +01:00
|
|
|
if (comphelper::string::isalphaAscii(c))
|
|
|
|
return true;
|
2013-02-12 15:39:09 +01:00
|
|
|
return u_isalpha(c);
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
2012-05-16 11:19:17 +01:00
|
|
|
}
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-10-28 12:09:28 +01:00
|
|
|
class SyntaxHighlighter::Tokenizer
|
|
|
|
{
|
|
|
|
HighlighterLanguage aLanguage;
|
|
|
|
// Character information tables
|
|
|
|
sal_uInt16 aCharTypeTab[256];
|
|
|
|
|
|
|
|
const sal_Unicode* mpStringBegin;
|
|
|
|
const sal_Unicode* mpActualPos;
|
|
|
|
|
|
|
|
sal_Unicode peekChar( void ) { return *mpActualPos; }
|
|
|
|
sal_Unicode getChar( void ) { return *mpActualPos++; }
|
|
|
|
|
|
|
|
// Auxiliary function: testing of the character flags
|
|
|
|
sal_Bool testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags );
|
|
|
|
|
|
|
|
// Get new token, EmptyString == nothing more over there
|
|
|
|
sal_Bool getNextToken( /*out*/TokenTypes& reType,
|
|
|
|
/*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos );
|
|
|
|
|
|
|
|
const char** ppListKeyWords;
|
|
|
|
sal_uInt16 nKeyWordCount;
|
|
|
|
|
|
|
|
public:
|
|
|
|
Tokenizer( HighlighterLanguage aLang = HIGHLIGHT_BASIC );
|
|
|
|
~Tokenizer( void );
|
|
|
|
|
|
|
|
sal_uInt16 parseLine( const OUString* aSource );
|
|
|
|
void getHighlightPortions( const OUString& rLine,
|
|
|
|
/*out*/std::vector<HighlightPortion>& portions );
|
|
|
|
void setKeyWords( const char** ppKeyWords, sal_uInt16 nCount );
|
|
|
|
};
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Helper function: test character flag
|
2013-10-28 12:09:28 +01:00
|
|
|
sal_Bool SyntaxHighlighter::Tokenizer::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
bool bRet = false;
|
|
|
|
if( c != 0 && c <= 255 )
|
|
|
|
{
|
|
|
|
bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
|
|
|
|
}
|
|
|
|
else if( c > 255 )
|
|
|
|
{
|
|
|
|
bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
|
2012-05-16 11:19:17 +01:00
|
|
|
? isAlpha(c) : false;
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
|
|
|
return bRet;
|
|
|
|
}
|
|
|
|
|
2013-10-28 12:09:28 +01:00
|
|
|
void SyntaxHighlighter::Tokenizer::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
ppListKeyWords = ppKeyWords;
|
|
|
|
nKeyWordCount = nCount;
|
|
|
|
}
|
|
|
|
|
2013-10-28 12:09:28 +01:00
|
|
|
sal_Bool SyntaxHighlighter::Tokenizer::getNextToken( /*out*/TokenTypes& reType,
|
2008-11-28 11:39:37 +00:00
|
|
|
/*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
|
|
|
|
{
|
|
|
|
reType = TT_UNKNOWN;
|
|
|
|
|
|
|
|
rpStartPos = mpActualPos;
|
|
|
|
|
|
|
|
sal_Unicode c = peekChar();
|
2013-10-22 18:18:31 +02:00
|
|
|
if( c == 0 )
|
2010-11-05 10:31:15 +08:00
|
|
|
return sal_False;
|
2008-11-28 11:39:37 +00:00
|
|
|
|
|
|
|
getChar();
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
//*** Go through all possibilities ***
|
2008-11-28 11:39:37 +00:00
|
|
|
// Space?
|
2010-11-05 10:31:15 +08:00
|
|
|
if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2010-11-05 10:31:15 +08:00
|
|
|
while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
|
2008-11-28 11:39:37 +00:00
|
|
|
getChar();
|
|
|
|
|
|
|
|
reType = TT_WHITESPACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Identifier?
|
2010-11-05 10:31:15 +08:00
|
|
|
else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2010-11-05 10:31:15 +08:00
|
|
|
sal_Bool bIdentifierChar;
|
2008-11-28 11:39:37 +00:00
|
|
|
do
|
|
|
|
{
|
|
|
|
// Naechstes Zeichen holen
|
|
|
|
c = peekChar();
|
|
|
|
bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
|
|
|
|
if( bIdentifierChar )
|
|
|
|
getChar();
|
|
|
|
}
|
|
|
|
while( bIdentifierChar );
|
|
|
|
|
|
|
|
reType = TT_IDENTIFIER;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Keyword table
|
2008-11-28 11:39:37 +00:00
|
|
|
if (ppListKeyWords != NULL)
|
|
|
|
{
|
|
|
|
int nCount = mpActualPos - rpStartPos;
|
|
|
|
|
|
|
|
// No keyword if string contains char > 255
|
|
|
|
bool bCanBeKeyword = true;
|
|
|
|
for( int i = 0 ; i < nCount ; i++ )
|
|
|
|
{
|
|
|
|
if( rpStartPos[i] > 255 )
|
|
|
|
{
|
|
|
|
bCanBeKeyword = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( bCanBeKeyword )
|
|
|
|
{
|
2013-02-12 15:49:31 +01:00
|
|
|
OUString aKWString(rpStartPos, nCount);
|
|
|
|
OString aByteStr = OUStringToOString(aKWString,
|
2011-11-07 22:24:39 +00:00
|
|
|
RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase();
|
|
|
|
if ( bsearch( aByteStr.getStr(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
|
2008-11-28 11:39:37 +00:00
|
|
|
compare_strings ) )
|
|
|
|
{
|
|
|
|
reType = TT_KEYWORDS;
|
|
|
|
|
2011-11-07 22:24:39 +00:00
|
|
|
if (aByteStr.equalsL(RTL_CONSTASCII_STRINGPARAM("rem")))
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Remove all characters until end of line or EOF
|
2008-11-28 11:39:37 +00:00
|
|
|
sal_Unicode cPeek = peekChar();
|
2013-10-22 18:18:31 +02:00
|
|
|
while( cPeek != 0 && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
c = getChar();
|
|
|
|
cPeek = peekChar();
|
|
|
|
}
|
|
|
|
|
|
|
|
reType = TT_COMMENT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Operator?
|
|
|
|
// only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
|
2010-11-05 10:31:15 +08:00
|
|
|
else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2012-06-19 16:43:41 +02:00
|
|
|
// parameters for SQL view
|
2008-11-28 11:39:37 +00:00
|
|
|
if ( (c==':') || (c=='?'))
|
|
|
|
{
|
|
|
|
if (c!='?')
|
|
|
|
{
|
2010-11-05 10:31:15 +08:00
|
|
|
sal_Bool bIdentifierChar;
|
2008-11-28 11:39:37 +00:00
|
|
|
do
|
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Get next character
|
2008-11-28 11:39:37 +00:00
|
|
|
c = peekChar();
|
2012-05-16 11:19:17 +01:00
|
|
|
bIdentifierChar = isAlpha(c);
|
2008-11-28 11:39:37 +00:00
|
|
|
if( bIdentifierChar )
|
|
|
|
getChar();
|
|
|
|
}
|
|
|
|
while( bIdentifierChar );
|
|
|
|
}
|
|
|
|
reType = TT_PARAMETER;
|
|
|
|
}
|
2012-03-01 23:02:30 +02:00
|
|
|
else if (c=='-')
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
sal_Unicode cPeekNext = peekChar();
|
|
|
|
if (cPeekNext=='-')
|
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Remove all characters until end of line or EOF
|
2013-10-22 18:18:31 +02:00
|
|
|
while( cPeekNext != 0 && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
getChar();
|
2009-02-17 15:39:54 +00:00
|
|
|
cPeekNext = peekChar();
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
|
|
|
reType = TT_COMMENT;
|
|
|
|
}
|
|
|
|
}
|
2009-02-17 15:39:54 +00:00
|
|
|
else if (c=='/')
|
|
|
|
{
|
|
|
|
sal_Unicode cPeekNext = peekChar();
|
|
|
|
if (cPeekNext=='/')
|
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Remove all characters until end of line or EOF
|
2013-10-22 18:18:31 +02:00
|
|
|
while( cPeekNext != 0 && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
|
2009-02-17 15:39:54 +00:00
|
|
|
{
|
|
|
|
getChar();
|
|
|
|
cPeekNext = peekChar();
|
|
|
|
}
|
|
|
|
reType = TT_COMMENT;
|
|
|
|
}
|
|
|
|
}
|
2008-11-28 11:39:37 +00:00
|
|
|
else
|
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Comment?
|
2008-11-28 11:39:37 +00:00
|
|
|
if ( c == '\'' )
|
|
|
|
{
|
2013-10-28 10:44:28 +01:00
|
|
|
// Skip all characters until end of input or end of line:
|
|
|
|
for (;;) {
|
|
|
|
c = peekChar();
|
|
|
|
if (c == 0 || testCharFlags(c, CHAR_EOL)) {
|
|
|
|
break;
|
|
|
|
}
|
2013-10-28 08:47:22 +01:00
|
|
|
getChar();
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
2013-10-28 08:47:22 +01:00
|
|
|
|
2008-11-28 11:39:37 +00:00
|
|
|
reType = TT_COMMENT;
|
|
|
|
}
|
|
|
|
|
2013-03-02 21:08:46 +00:00
|
|
|
// The real operator; can be easily used since not the actual
|
|
|
|
// operator (e.g. +=) is concerned, but the fact that it is one
|
2008-11-28 11:39:37 +00:00
|
|
|
if( reType != TT_COMMENT )
|
|
|
|
{
|
|
|
|
reType = TT_OPERATOR;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-02 21:08:46 +00:00
|
|
|
// Object separator? Must be handled before Number
|
2008-11-28 11:39:37 +00:00
|
|
|
else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
|
|
|
|
{
|
|
|
|
reType = TT_OPERATOR;
|
|
|
|
}
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Number?
|
2010-11-05 10:31:15 +08:00
|
|
|
else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
reType = TT_NUMBER;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Number system, 10 = normal, it is changed for Oct/Hex
|
2008-11-28 11:39:37 +00:00
|
|
|
int nRadix = 10;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Is it an Oct or a Hex number?
|
2008-11-28 11:39:37 +00:00
|
|
|
if( c == '&' )
|
|
|
|
{
|
|
|
|
// Octal?
|
|
|
|
if( peekChar() == 'o' || peekChar() == 'O' )
|
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// remove o
|
2008-11-28 11:39:37 +00:00
|
|
|
getChar();
|
2013-02-12 15:39:09 +01:00
|
|
|
nRadix = 8; // Octal base
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Read all numbers
|
2008-11-28 11:39:37 +00:00
|
|
|
while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
|
|
|
|
c = getChar();
|
|
|
|
}
|
2013-02-12 15:39:09 +01:00
|
|
|
// Hexadecimal?
|
2008-11-28 11:39:37 +00:00
|
|
|
else if( peekChar() == 'h' || peekChar() == 'H' )
|
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// remove x
|
2008-11-28 11:39:37 +00:00
|
|
|
getChar();
|
2013-02-12 15:39:09 +01:00
|
|
|
nRadix = 16; // Hexadecimal base
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Read all numbers
|
2008-11-28 11:39:37 +00:00
|
|
|
while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
|
|
|
|
c = getChar();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
reType = TT_OPERATOR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// When it is not Oct or Hex, then it is double
|
2008-11-28 11:39:37 +00:00
|
|
|
if( reType == TT_NUMBER && nRadix == 10 )
|
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Flag if the last character is an exponent
|
2010-11-05 10:31:15 +08:00
|
|
|
sal_Bool bAfterExpChar = sal_False;
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Read all numbers
|
2008-11-28 11:39:37 +00:00
|
|
|
while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
|
|
|
|
(bAfterExpChar && peekChar() == '+' ) ||
|
|
|
|
(bAfterExpChar && peekChar() == '-' ) )
|
2013-02-12 15:39:09 +01:00
|
|
|
// After exponent +/- are OK, too
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
c = getChar();
|
2008-11-28 11:39:37 +00:00
|
|
|
bAfterExpChar = ( c == 'e' || c == 'E' );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// String?
|
2010-11-05 10:31:15 +08:00
|
|
|
else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Remember which character has opened the string
|
2008-11-28 11:39:37 +00:00
|
|
|
sal_Unicode cEndString = c;
|
|
|
|
if( c == '[' )
|
|
|
|
cEndString = ']';
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Read all characters
|
2008-11-28 11:39:37 +00:00
|
|
|
while( peekChar() != cEndString )
|
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Detect EOF before getChar(), so we do not loose EOF
|
2013-10-22 18:18:31 +02:00
|
|
|
if( peekChar() == 0 )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
// ERROR: unterminated string literal
|
|
|
|
reType = TT_ERROR;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
c = getChar();
|
2010-11-05 10:31:15 +08:00
|
|
|
if( testCharFlags( c, CHAR_EOL ) == sal_True )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
// ERROR: unterminated string literal
|
|
|
|
reType = TT_ERROR;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( reType != TT_ERROR )
|
|
|
|
{
|
|
|
|
getChar();
|
|
|
|
if( cEndString == ']' )
|
|
|
|
reType = TT_IDENTIFIER;
|
|
|
|
else
|
|
|
|
reType = TT_STRING;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// End of line?
|
2010-11-05 10:31:15 +08:00
|
|
|
else if( testCharFlags( c, CHAR_EOL ) == sal_True )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// If another EOL character comes, read it
|
2008-11-28 11:39:37 +00:00
|
|
|
sal_Unicode cNext = peekChar();
|
2010-11-05 10:31:15 +08:00
|
|
|
if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
|
2008-11-28 11:39:37 +00:00
|
|
|
getChar();
|
|
|
|
|
|
|
|
reType = TT_EOL;
|
|
|
|
}
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// All other will remain TT_UNKNOWN
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Save end position
|
2013-10-28 10:44:28 +01:00
|
|
|
rpEndPos = mpActualPos;
|
2010-11-05 10:31:15 +08:00
|
|
|
return sal_True;
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
|
|
|
|
2013-10-28 12:09:28 +01:00
|
|
|
SyntaxHighlighter::Tokenizer::Tokenizer( HighlighterLanguage aLang ): aLanguage(aLang)
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Fill character table
|
2010-11-05 10:31:15 +08:00
|
|
|
sal_uInt16 i;
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Allowed characters for identifiers
|
2010-11-05 10:31:15 +08:00
|
|
|
sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
|
2008-11-28 11:39:37 +00:00
|
|
|
for( i = 'a' ; i <= 'z' ; i++ )
|
|
|
|
aCharTypeTab[i] |= nHelpMask;
|
|
|
|
for( i = 'A' ; i <= 'Z' ; i++ )
|
|
|
|
aCharTypeTab[i] |= nHelpMask;
|
|
|
|
aCharTypeTab[(int)'_'] |= nHelpMask;
|
|
|
|
aCharTypeTab[(int)'$'] |= nHelpMask;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Digit (can be identifier and number)
|
2010-11-05 10:31:15 +08:00
|
|
|
nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
|
2008-11-28 11:39:37 +00:00
|
|
|
CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
|
|
|
|
for( i = '0' ; i <= '9' ; i++ )
|
|
|
|
aCharTypeTab[i] |= nHelpMask;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Add e, E, . and & here manually
|
2008-11-28 11:39:37 +00:00
|
|
|
aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
|
|
|
|
aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
|
2010-11-05 10:31:15 +08:00
|
|
|
aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
|
2008-11-28 11:39:37 +00:00
|
|
|
aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Hexadecimal digit
|
2008-11-28 11:39:37 +00:00
|
|
|
for( i = 'a' ; i <= 'f' ; i++ )
|
|
|
|
aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
|
|
|
|
for( i = 'A' ; i <= 'F' ; i++ )
|
|
|
|
aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Octal digit
|
2008-11-28 11:39:37 +00:00
|
|
|
for( i = '0' ; i <= '7' ; i++ )
|
|
|
|
aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// String literal start/end characters
|
2008-11-28 11:39:37 +00:00
|
|
|
aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
|
|
|
|
aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
|
|
|
|
aCharTypeTab[(int)'['] |= CHAR_START_STRING;
|
|
|
|
aCharTypeTab[(int)'`'] |= CHAR_START_STRING;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Operator characters
|
2008-11-28 11:39:37 +00:00
|
|
|
aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
|
|
|
|
// aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
|
|
|
|
aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)','] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
|
|
|
|
// aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826
|
|
|
|
aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
|
|
|
|
aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
|
|
|
|
|
|
|
|
// Space
|
|
|
|
aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
|
|
|
|
aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// End of line characters
|
2008-11-28 11:39:37 +00:00
|
|
|
aCharTypeTab[(int)'\r'] |= CHAR_EOL;
|
|
|
|
aCharTypeTab[(int)'\n'] |= CHAR_EOL;
|
|
|
|
|
|
|
|
ppListKeyWords = NULL;
|
|
|
|
}
|
|
|
|
|
2013-10-28 12:09:28 +01:00
|
|
|
SyntaxHighlighter::Tokenizer::~Tokenizer( void )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2013-10-28 12:09:28 +01:00
|
|
|
sal_uInt16 SyntaxHighlighter::Tokenizer::parseLine( const OUString* aSource )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Set the position to the beginning of the source string
|
2013-02-12 22:29:33 +01:00
|
|
|
mpStringBegin = mpActualPos = aSource->getStr();
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Variables for the out parameter
|
2008-11-28 11:39:37 +00:00
|
|
|
TokenTypes eType;
|
|
|
|
const sal_Unicode* pStartPos;
|
|
|
|
const sal_Unicode* pEndPos;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Loop over all the tokens
|
2010-11-05 10:31:15 +08:00
|
|
|
sal_uInt16 nTokenCount = 0;
|
2008-11-28 11:39:37 +00:00
|
|
|
while( getNextToken( eType, pStartPos, pEndPos ) )
|
|
|
|
nTokenCount++;
|
|
|
|
|
|
|
|
return nTokenCount;
|
|
|
|
}
|
|
|
|
|
2013-10-28 12:09:28 +01:00
|
|
|
void SyntaxHighlighter::Tokenizer::getHighlightPortions( const OUString& rLine,
|
2013-10-22 16:48:15 +02:00
|
|
|
/*out*/std::vector<HighlightPortion>& portions )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-02-12 15:39:09 +01:00
|
|
|
// Set the position to the beginning of the source string
|
2013-02-12 22:29:33 +01:00
|
|
|
mpStringBegin = mpActualPos = rLine.getStr();
|
2008-11-28 11:39:37 +00:00
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Variables for the out parameter
|
2008-11-28 11:39:37 +00:00
|
|
|
TokenTypes eType;
|
|
|
|
const sal_Unicode* pStartPos;
|
|
|
|
const sal_Unicode* pEndPos;
|
|
|
|
|
2013-02-12 15:39:09 +01:00
|
|
|
// Loop over all the tokens
|
2013-10-28 08:47:22 +01:00
|
|
|
while( getNextToken( eType, pStartPos, pEndPos ) )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-10-22 18:15:48 +02:00
|
|
|
portions.push_back(
|
|
|
|
HighlightPortion(
|
|
|
|
pStartPos - mpStringBegin, pEndPos - mpStringBegin, eType));
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
SyntaxHighlighter::SyntaxHighlighter()
|
|
|
|
{
|
|
|
|
m_pKeyWords = NULL;
|
|
|
|
m_nKeyWordCount = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SyntaxHighlighter::~SyntaxHighlighter()
|
|
|
|
{
|
|
|
|
delete m_pKeyWords;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
|
|
|
|
{
|
|
|
|
eLanguage = eLanguage_;
|
2013-10-28 12:09:28 +01:00
|
|
|
m_tokenizer.reset(new SyntaxHighlighter::Tokenizer(eLanguage));
|
2008-11-28 11:39:37 +00:00
|
|
|
|
|
|
|
switch (eLanguage)
|
|
|
|
{
|
|
|
|
case HIGHLIGHT_BASIC:
|
2013-10-28 12:09:28 +01:00
|
|
|
m_tokenizer->setKeyWords( strListBasicKeyWords,
|
2008-11-28 11:39:37 +00:00
|
|
|
sizeof( strListBasicKeyWords ) / sizeof( char* ));
|
|
|
|
break;
|
|
|
|
case HIGHLIGHT_SQL:
|
2013-10-28 12:09:28 +01:00
|
|
|
m_tokenizer->setKeyWords( strListSqlKeyWords,
|
2008-11-28 11:39:37 +00:00
|
|
|
sizeof( strListSqlKeyWords ) / sizeof( char* ));
|
|
|
|
break;
|
|
|
|
default:
|
2013-10-28 12:09:28 +01:00
|
|
|
m_tokenizer->setKeyWords( NULL, 0 );
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-28 11:38:36 +01:00
|
|
|
void SyntaxHighlighter::notifyChange(
|
|
|
|
const OUString* pChangedLines, sal_uInt32 nArrayLength)
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2010-11-05 10:31:15 +08:00
|
|
|
for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
|
2013-10-28 12:09:28 +01:00
|
|
|
m_tokenizer->parseLine(&pChangedLines[i]);
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
|
|
|
|
2013-10-28 11:38:36 +01:00
|
|
|
void SyntaxHighlighter::getHighlightPortions( const OUString& rLine,
|
2013-10-22 16:48:15 +02:00
|
|
|
/*out*/std::vector<HighlightPortion>& portions )
|
2008-11-28 11:39:37 +00:00
|
|
|
{
|
2013-10-28 12:09:28 +01:00
|
|
|
m_tokenizer->getHighlightPortions( rLine, portions );
|
2008-11-28 11:39:37 +00:00
|
|
|
}
|
2010-10-14 08:27:31 +02:00
|
|
|
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|