Files
libreoffice/starmath/source/parse.cxx
Eike Rathke 2caa9d7ddc Resolves: tdf#127873 accept ',' comma group separator in number entities again
Existing documents unwittingly may have used that as decimal
separator in such locales (though it never was as this is always
the en-US locale).

"Regression" from

commit 9336286a7ea5385541344f444e6f8702c85bdacb
CommitDate: Fri Nov 30 22:15:22 2018 +0100

    [API CHANGE] Resolves: tdf#42518 new KParseTokens::GROUP_SEPARATOR_IN_NUMBER

Change-Id: I0ffc1b8ec7c1820fccd8277036c64093dddf82fe
Reviewed-on: https://gerrit.libreoffice.org/80023
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
2019-10-02 17:21:04 +02:00

2461 lines
83 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <memory>
#include <com/sun/star/i18n/UnicodeType.hpp>
#include <com/sun/star/i18n/KParseTokens.hpp>
#include <com/sun/star/i18n/KParseType.hpp>
#include <i18nlangtag/lang.h>
#include <tools/lineend.hxx>
#include <unotools/configmgr.hxx>
#include <unotools/syslocale.hxx>
#include <sal/log.hxx>
#include <osl/diagnose.h>
#include <rtl/character.hxx>
#include <node.hxx>
#include <parse.hxx>
#include <strings.hrc>
#include <smmod.hxx>
#include "cfgitem.hxx"
#include <cassert>
#include <stack>
using namespace ::com::sun::star::i18n;
SmToken::SmToken()
: eType(TUNKNOWN)
, cMathChar('\0')
, nGroup(TG::NONE)
, nLevel(0)
, nRow(0)
, nCol(0)
{
}
SmToken::SmToken(SmTokenType eTokenType,
sal_Unicode cMath,
const sal_Char* pText,
TG nTokenGroup,
sal_uInt16 nTokenLevel)
: aText(OUString::createFromAscii(pText))
, eType(eTokenType)
, cMathChar(cMath)
, nGroup(nTokenGroup)
, nLevel(nTokenLevel)
, nRow(0)
, nCol(0)
{
}
static const SmTokenTableEntry aTokenTable[] =
{
{ "abs", TABS, '\0', TG::UnOper, 13 },
{ "acute", TACUTE, MS_ACUTE, TG::Attribute, 5 },
{ "aleph" , TALEPH, MS_ALEPH, TG::Standalone, 5 },
{ "alignb", TALIGNC, '\0', TG::Align, 0},
{ "alignc", TALIGNC, '\0', TG::Align, 0},
{ "alignl", TALIGNL, '\0', TG::Align, 0},
{ "alignm", TALIGNC, '\0', TG::Align, 0},
{ "alignr", TALIGNR, '\0', TG::Align, 0},
{ "alignt", TALIGNC, '\0', TG::Align, 0},
{ "and", TAND, MS_AND, TG::Product, 0},
{ "approx", TAPPROX, MS_APPROX, TG::Relation, 0},
{ "aqua", TAQUA, '\0', TG::Color, 0},
{ "arccos", TACOS, '\0', TG::Function, 5},
{ "arccot", TACOT, '\0', TG::Function, 5},
{ "arcosh", TACOSH, '\0', TG::Function, 5 },
{ "arcoth", TACOTH, '\0', TG::Function, 5 },
{ "arcsin", TASIN, '\0', TG::Function, 5},
{ "arctan", TATAN, '\0', TG::Function, 5},
{ "arsinh", TASINH, '\0', TG::Function, 5},
{ "artanh", TATANH, '\0', TG::Function, 5},
{ "backepsilon" , TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5},
{ "bar", TBAR, MS_BAR, TG::Attribute, 5},
{ "binom", TBINOM, '\0', TG::NONE, 5 },
{ "black", TBLACK, '\0', TG::Color, 0},
{ "blue", TBLUE, '\0', TG::Color, 0},
{ "bold", TBOLD, '\0', TG::FontAttr, 5},
{ "boper", TBOPER, '\0', TG::Product, 0},
{ "breve", TBREVE, MS_BREVE, TG::Attribute, 5},
{ "bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
{ "cdot", TCDOT, MS_CDOT, TG::Product, 0},
{ "check", TCHECK, MS_CHECK, TG::Attribute, 5},
{ "circ" , TCIRC, MS_CIRC, TG::Standalone, 5},
{ "circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5},
{ "color", TCOLOR, '\0', TG::FontAttr, 5},
{ "coprod", TCOPROD, MS_COPROD, TG::Oper, 5},
{ "cos", TCOS, '\0', TG::Function, 5},
{ "cosh", TCOSH, '\0', TG::Function, 5},
{ "cot", TCOT, '\0', TG::Function, 5},
{ "coth", TCOTH, '\0', TG::Function, 5},
{ "csub", TCSUB, '\0', TG::Power, 0},
{ "csup", TCSUP, '\0', TG::Power, 0},
{ "cyan", TCYAN, '\0', TG::Color, 0},
{ "dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5},
{ "ddot", TDDOT, MS_DDOT, TG::Attribute, 5},
{ "def", TDEF, MS_DEF, TG::Relation, 0},
{ "div", TDIV, MS_DIV, TG::Product, 0},
{ "divides", TDIVIDES, MS_LINE, TG::Relation, 0},
{ "dlarrow" , TDLARROW, MS_DLARROW, TG::Standalone, 5},
{ "dlrarrow" , TDLRARROW, MS_DLRARROW, TG::Standalone, 5},
{ "dot", TDOT, MS_DOT, TG::Attribute, 5},
{ "dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5}, // 5 to continue expression
{ "dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5},
{ "dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5},
{ "dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5},
{ "dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5},
{ "dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5},
{ "downarrow" , TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5},
{ "drarrow" , TDRARROW, MS_DRARROW, TG::Standalone, 5},
{ "emptyset" , TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5},
{ "equiv", TEQUIV, MS_EQUIV, TG::Relation, 0},
{ "exists", TEXISTS, MS_EXISTS, TG::Standalone, 5},
{ "exp", TEXP, '\0', TG::Function, 5},
{ "fact", TFACT, MS_FACT, TG::UnOper, 5},
{ "fixed", TFIXED, '\0', TG::Font, 0},
{ "font", TFONT, '\0', TG::FontAttr, 5},
{ "forall", TFORALL, MS_FORALL, TG::Standalone, 5},
{ "from", TFROM, '\0', TG::Limit, 0},
{ "fuchsia", TFUCHSIA, '\0', TG::Color, 0},
{ "func", TFUNC, '\0', TG::Function, 5},
{ "ge", TGE, MS_GE, TG::Relation, 0},
{ "geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 },
{ "gg", TGG, MS_GG, TG::Relation, 0},
{ "grave", TGRAVE, MS_GRAVE, TG::Attribute, 5},
{ "gray", TGRAY, '\0', TG::Color, 0},
{ "green", TGREEN, '\0', TG::Color, 0},
{ "gt", TGT, MS_GT, TG::Relation, 0},
{ "harpoon", THARPOON, MS_HARPOON, TG::Attribute, 5},
{ "hat", THAT, MS_HAT, TG::Attribute, 5},
{ "hbar" , THBAR, MS_HBAR, TG::Standalone, 5},
{ "iiint", TIIINT, MS_IIINT, TG::Oper, 5},
{ "iint", TIINT, MS_IINT, TG::Oper, 5},
{ "im" , TIM, MS_IM, TG::Standalone, 5 },
{ "in", TIN, MS_IN, TG::Relation, 0},
{ "infinity" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
{ "infty" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
{ "int", TINT, MS_INT, TG::Oper, 5},
{ "intd", TINTD, MS_INT, TG::Oper, 5},
{ "intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0},
{ "ital", TITALIC, '\0', TG::FontAttr, 5},
{ "italic", TITALIC, '\0', TG::FontAttr, 5},
{ "lambdabar" , TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5},
{ "langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5},
{ "lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5},
{ "lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5},
{ "ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5},
{ "ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5},
{ "le", TLE, MS_LE, TG::Relation, 0},
{ "left", TLEFT, '\0', TG::NONE, 5},
{ "leftarrow" , TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5},
{ "leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 },
{ "lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5},
{ "lim", TLIM, '\0', TG::Oper, 5},
{ "lime", TLIME, '\0', TG::Color, 0},
{ "liminf", TLIMINF, '\0', TG::Oper, 5},
{ "limsup", TLIMSUP, '\0', TG::Oper, 5},
{ "lint", TLINT, MS_LINT, TG::Oper, 5},
{ "ll", TLL, MS_LL, TG::Relation, 0},
{ "lline", TLLINE, MS_VERTLINE, TG::LBrace, 5},
{ "llint", TLLINT, MS_LLINT, TG::Oper, 5},
{ "lllint", TLLLINT, MS_LLLINT, TG::Oper, 5},
{ "ln", TLN, '\0', TG::Function, 5},
{ "log", TLOG, '\0', TG::Function, 5},
{ "lsub", TLSUB, '\0', TG::Power, 0},
{ "lsup", TLSUP, '\0', TG::Power, 0},
{ "lt", TLT, MS_LT, TG::Relation, 0},
{ "magenta", TMAGENTA, '\0', TG::Color, 0},
{ "maroon", TMAROON, '\0', TG::Color, 0},
{ "matrix", TMATRIX, '\0', TG::NONE, 5},
{ "minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5},
{ "mline", TMLINE, MS_VERTLINE, TG::NONE, 0}, //! not in TG::RBrace, Level 0
{ "nabla", TNABLA, MS_NABLA, TG::Standalone, 5},
{ "navy", TNAVY, '\0', TG::Color, 0},
{ "nbold", TNBOLD, '\0', TG::FontAttr, 5},
{ "ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0},
{ "neg", TNEG, MS_NEG, TG::UnOper, 5 },
{ "neq", TNEQ, MS_NEQ, TG::Relation, 0},
{ "newline", TNEWLINE, '\0', TG::NONE, 0},
{ "ni", TNI, MS_NI, TG::Relation, 0},
{ "nitalic", TNITALIC, '\0', TG::FontAttr, 5},
{ "none", TNONE, '\0', TG::LBrace | TG::RBrace, 0},
{ "nospace", TNOSPACE, '\0', TG::Standalone, 5},
{ "notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5},
{ "notin", TNOTIN, MS_NOTIN, TG::Relation, 0},
{ "nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
{ "nroot", TNROOT, MS_SQRT, TG::UnOper, 5},
{ "nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
{ "nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
{ "nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
{ "nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
{ "nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
{ "odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0},
{ "odot", TODOT, MS_ODOT, TG::Product, 0},
{ "olive", TOLIVE, '\0', TG::Color, 0},
{ "ominus", TOMINUS, MS_OMINUS, TG::Sum, 0},
{ "oper", TOPER, '\0', TG::Oper, 5},
{ "oplus", TOPLUS, MS_OPLUS, TG::Sum, 0},
{ "or", TOR, MS_OR, TG::Sum, 0},
{ "ortho", TORTHO, MS_ORTHO, TG::Relation, 0},
{ "otimes", TOTIMES, MS_OTIMES, TG::Product, 0},
{ "over", TOVER, '\0', TG::Product, 0},
{ "overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5},
{ "overline", TOVERLINE, '\0', TG::Attribute, 5},
{ "overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5},
{ "owns", TNI, MS_NI, TG::Relation, 0},
{ "parallel", TPARALLEL, MS_DLINE, TG::Relation, 0},
{ "partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
{ "phantom", TPHANTOM, '\0', TG::FontAttr, 5},
{ "plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5},
{ "prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
{ "preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
{ "precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
{ "prod", TPROD, MS_PROD, TG::Oper, 5},
{ "prop", TPROP, MS_PROP, TG::Relation, 0},
{ "purple", TPURPLE, '\0', TG::Color, 0},
{ "rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0}, //! 0 to terminate expression
{ "rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0},
{ "rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0},
{ "rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0},
{ "rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0},
{ "re" , TRE, MS_RE, TG::Standalone, 5 },
{ "red", TRED, '\0', TG::Color, 0},
{ "rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0}, //! 0 to terminate expression
{ "right", TRIGHT, '\0', TG::NONE, 0},
{ "rightarrow" , TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5},
{ "rline", TRLINE, MS_VERTLINE, TG::RBrace, 0}, //! 0 to terminate expression
{ "rsub", TRSUB, '\0', TG::Power, 0},
{ "rsup", TRSUP, '\0', TG::Power, 0},
{ "sans", TSANS, '\0', TG::Font, 0},
{ "serif", TSERIF, '\0', TG::Font, 0},
{ "setC" , TSETC, MS_SETC, TG::Standalone, 5},
{ "setminus", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
{ "setN" , TSETN, MS_SETN, TG::Standalone, 5},
{ "setQ" , TSETQ, MS_SETQ, TG::Standalone, 5},
{ "setR" , TSETR, MS_SETR, TG::Standalone, 5},
{ "setZ" , TSETZ, MS_SETZ, TG::Standalone, 5},
{ "silver", TSILVER, '\0', TG::Color, 0},
{ "sim", TSIM, MS_SIM, TG::Relation, 0},
{ "simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0},
{ "sin", TSIN, '\0', TG::Function, 5},
{ "sinh", TSINH, '\0', TG::Function, 5},
{ "size", TSIZE, '\0', TG::FontAttr, 5},
{ "slash", TSLASH, MS_SLASH, TG::Product, 0 },
{ "sqrt", TSQRT, MS_SQRT, TG::UnOper, 5},
{ "stack", TSTACK, '\0', TG::NONE, 5},
{ "sub", TRSUB, '\0', TG::Power, 0},
{ "subset", TSUBSET, MS_SUBSET, TG::Relation, 0},
{ "subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0},
{ "succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
{ "succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
{ "succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
{ "sum", TSUM, MS_SUM, TG::Oper, 5},
{ "sup", TRSUP, '\0', TG::Power, 0},
{ "supset", TSUPSET, MS_SUPSET, TG::Relation, 0},
{ "supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0},
{ "tan", TTAN, '\0', TG::Function, 5},
{ "tanh", TTANH, '\0', TG::Function, 5},
{ "teal", TTEAL, '\0', TG::Color, 0},
{ "tilde", TTILDE, MS_TILDE, TG::Attribute, 5},
{ "times", TTIMES, MS_TIMES, TG::Product, 0},
{ "to", TTO, '\0', TG::Limit, 0},
{ "toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0},
{ "transl", TTRANSL, MS_TRANSL, TG::Relation, 0},
{ "transr", TTRANSR, MS_TRANSR, TG::Relation, 0},
{ "underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5},
{ "underline", TUNDERLINE, '\0', TG::Attribute, 5},
{ "union", TUNION, MS_UNION, TG::Sum, 0},
{ "uoper", TUOPER, '\0', TG::UnOper, 5},
{ "uparrow" , TUPARROW, MS_UPARROW, TG::Standalone, 5},
{ "vec", TVEC, MS_VEC, TG::Attribute, 5},
{ "white", TWHITE, '\0', TG::Color, 0},
{ "widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
{ "wideharpoon", TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5},
{ "widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5},
{ "wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 },
{ "widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5},
{ "widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5},
{ "wp" , TWP, MS_WP, TG::Standalone, 5},
{ "yellow", TYELLOW, '\0', TG::Color, 0}
};
#if !defined NDEBUG
static bool sortCompare(const SmTokenTableEntry & lhs, const SmTokenTableEntry & rhs)
{
return OUString::createFromAscii(lhs.pIdent).compareToIgnoreAsciiCase(OUString::createFromAscii(rhs.pIdent)) < 0;
}
#endif
static bool findCompare(const SmTokenTableEntry & lhs, const OUString & s)
{
return s.compareToIgnoreAsciiCaseAscii(lhs.pIdent) > 0;
}
const SmTokenTableEntry * SmParser::GetTokenTableEntry( const OUString &rName )
{
static bool bSortKeyWords = false;
if( !bSortKeyWords )
{
assert( std::is_sorted( std::begin(aTokenTable), std::end(aTokenTable), sortCompare ) );
bSortKeyWords = true;
}
if (rName.isEmpty())
return nullptr;
auto findIter = std::lower_bound( std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare );
if ( findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCaseAscii( findIter->pIdent ))
return &*findIter;
return nullptr;
}
namespace {
bool IsDelimiter( const OUString &rTxt, sal_Int32 nPos )
// returns 'true' iff cChar is '\0' or a delimiter
{
assert(nPos <= rTxt.getLength()); //index out of range
if (nPos == rTxt.getLength())
return true;
sal_Unicode cChar = rTxt[nPos];
// check if 'cChar' is in the delimiter table
static const sal_Unicode aDelimiterTable[] =
{
' ', '\t', '\n', '\r', '+', '-', '*', '/', '=', '#',
'%', '\\', '"', '~', '`', '>', '<', '&', '|', '(',
')', '{', '}', '[', ']', '^', '_'
};
for (auto const &cDelimiter : aDelimiterTable)
{
if (cDelimiter == cChar)
return true;
}
sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType( rTxt, nPos );
return ( nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR ||
nTypJp == css::i18n::UnicodeType::CONTROL);
}
}
void SmParser::Replace( sal_Int32 nPos, sal_Int32 nLen, const OUString &rText )
{
assert( nPos + nLen <= m_aBufferString.getLength() );
m_aBufferString = m_aBufferString.replaceAt( nPos, nLen, rText );
sal_Int32 nChg = rText.getLength() - nLen;
m_nBufferIndex = m_nBufferIndex + nChg;
m_nTokenIndex = m_nTokenIndex + nChg;
}
void SmParser::NextToken()
{
// First character may be any alphabetic
static const sal_Int32 coStartFlags =
KParseTokens::ANY_LETTER |
KParseTokens::IGNORE_LEADING_WS;
// Continuing characters may be any alphabetic
static const sal_Int32 coContFlags =
(coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
| KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
// user-defined char continuing characters may be any alphanumeric or dot.
static const sal_Int32 coUserDefinedCharContFlags =
KParseTokens::ANY_LETTER_OR_NUMBER |
KParseTokens::ASC_DOT |
KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
// First character for numbers, may be any numeric or dot
static const sal_Int32 coNumStartFlags =
KParseTokens::ASC_DIGIT |
KParseTokens::ASC_DOT |
KParseTokens::IGNORE_LEADING_WS;
// Continuing characters for numbers, may be any numeric or dot.
// tdf#127873: additionally accept ',' comma group separator as too many
// existing documents unwittingly may have used that as decimal separator
// in such locales (though it never was as this is always the en-US locale
// and the group separator is only parsed away).
static const sal_Int32 coNumContFlags =
(coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS) |
KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
sal_Int32 nBufLen = m_aBufferString.getLength();
ParseResult aRes;
sal_Int32 nRealStart;
bool bCont;
do
{
// skip white spaces
while (UnicodeType::SPACE_SEPARATOR ==
m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
++m_nBufferIndex;
// Try to parse a number in a locale-independent manner using
// '.' as decimal separator.
// See https://bz.apache.org/ooo/show_bug.cgi?id=45779
aRes = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER,
m_aBufferString, m_nBufferIndex,
coNumStartFlags, "",
coNumContFlags, "");
if (aRes.TokenType == 0)
{
// Try again with the default token parsing.
aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
coStartFlags, "",
coContFlags, "");
}
nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
m_nBufferIndex = nRealStart;
bCont = false;
if ( aRes.TokenType == 0 &&
nRealStart < nBufLen &&
'\n' == m_aBufferString[ nRealStart ] )
{
// keep data needed for tokens row and col entry up to date
++m_nRow;
m_nBufferIndex = m_nColOff = nRealStart + 1;
bCont = true;
}
else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
{
if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
{
//SkipComment
m_nBufferIndex = nRealStart + 2;
while (m_nBufferIndex < nBufLen &&
'\n' != m_aBufferString[ m_nBufferIndex ])
++m_nBufferIndex;
bCont = true;
}
}
} while (bCont);
// set index of current token
m_nTokenIndex = m_nBufferIndex;
m_aCurToken.nRow = m_nRow;
m_aCurToken.nCol = nRealStart - m_nColOff + 1;
bool bHandled = true;
if (nRealStart >= nBufLen)
{
m_aCurToken.eType = TEND;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 0;
m_aCurToken.aText.clear();
}
else if (aRes.TokenType & KParseType::ANY_NUMBER)
{
assert(aRes.EndPos > 0);
if ( m_aBufferString[aRes.EndPos-1] == ',' &&
aRes.EndPos < nBufLen &&
m_pSysCC->getType( m_aBufferString, aRes.EndPos ) != UnicodeType::SPACE_SEPARATOR )
{
// Comma followed by a non-space char is unlikely for decimal/thousands separator.
--aRes.EndPos;
}
sal_Int32 n = aRes.EndPos - nRealStart;
assert(n >= 0);
m_aCurToken.eType = TNUMBER;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = m_aBufferString.copy( nRealStart, n );
SAL_WARN_IF( !IsDelimiter( m_aBufferString, aRes.EndPos ), "starmath", "identifier really finished? (compatibility!)" );
}
else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
{
m_aCurToken.eType = TTEXT;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = aRes.DequotedNameOrString;
m_aCurToken.nRow = m_nRow;
m_aCurToken.nCol = nRealStart - m_nColOff + 2;
}
else if (aRes.TokenType & KParseType::IDENTNAME)
{
sal_Int32 n = aRes.EndPos - nRealStart;
assert(n >= 0);
OUString aName( m_aBufferString.copy( nRealStart, n ) );
const SmTokenTableEntry *pEntry = GetTokenTableEntry( aName );
if (pEntry)
{
m_aCurToken.eType = pEntry->eType;
m_aCurToken.cMathChar = pEntry->cMathChar;
m_aCurToken.nGroup = pEntry->nGroup;
m_aCurToken.nLevel = pEntry->nLevel;
m_aCurToken.aText = OUString::createFromAscii( pEntry->pIdent );
}
else
{
m_aCurToken.eType = TIDENT;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = aName;
SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos),"starmath", "identifier really finished? (compatibility!)");
}
}
else if (aRes.TokenType == 0 && '_' == m_aBufferString[ nRealStart ])
{
m_aCurToken.eType = TRSUB;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::Power;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "_";
aRes.EndPos = nRealStart + 1;
}
else if (aRes.TokenType & KParseType::BOOLEAN)
{
sal_Int32 &rnEndPos = aRes.EndPos;
if (rnEndPos - nRealStart <= 2)
{
sal_Unicode ch = m_aBufferString[ nRealStart ];
switch (ch)
{
case '<':
{
if (m_aBufferString.match("<<", nRealStart))
{
m_aCurToken.eType = TLL;
m_aCurToken.cMathChar = MS_LL;
m_aCurToken.nGroup = TG::Relation;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "<<";
rnEndPos = nRealStart + 2;
}
else if (m_aBufferString.match("<=", nRealStart))
{
m_aCurToken.eType = TLE;
m_aCurToken.cMathChar = MS_LE;
m_aCurToken.nGroup = TG::Relation;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "<=";
rnEndPos = nRealStart + 2;
}
else if (m_aBufferString.match("<-", nRealStart))
{
m_aCurToken.eType = TLEFTARROW;
m_aCurToken.cMathChar = MS_LEFTARROW;
m_aCurToken.nGroup = TG::Standalone;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "<-";
rnEndPos = nRealStart + 2;
}
else if (m_aBufferString.match("<>", nRealStart))
{
m_aCurToken.eType = TNEQ;
m_aCurToken.cMathChar = MS_NEQ;
m_aCurToken.nGroup = TG::Relation;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "<>";
rnEndPos = nRealStart + 2;
}
else if (m_aBufferString.match("<?>", nRealStart))
{
m_aCurToken.eType = TPLACE;
m_aCurToken.cMathChar = MS_PLACE;
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "<?>";
rnEndPos = nRealStart + 3;
}
else
{
m_aCurToken.eType = TLT;
m_aCurToken.cMathChar = MS_LT;
m_aCurToken.nGroup = TG::Relation;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "<";
}
}
break;
case '>':
{
if (m_aBufferString.match(">=", nRealStart))
{
m_aCurToken.eType = TGE;
m_aCurToken.cMathChar = MS_GE;
m_aCurToken.nGroup = TG::Relation;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = ">=";
rnEndPos = nRealStart + 2;
}
else if (m_aBufferString.match(">>", nRealStart))
{
m_aCurToken.eType = TGG;
m_aCurToken.cMathChar = MS_GG;
m_aCurToken.nGroup = TG::Relation;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = ">>";
rnEndPos = nRealStart + 2;
}
else
{
m_aCurToken.eType = TGT;
m_aCurToken.cMathChar = MS_GT;
m_aCurToken.nGroup = TG::Relation;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = ">";
}
}
break;
default:
bHandled = false;
}
}
}
else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
{
sal_Int32 &rnEndPos = aRes.EndPos;
if (rnEndPos - nRealStart == 1)
{
sal_Unicode ch = m_aBufferString[ nRealStart ];
switch (ch)
{
case '%':
{
//! modifies aRes.EndPos
OSL_ENSURE( rnEndPos >= nBufLen ||
'%' != m_aBufferString[ rnEndPos ],
"unexpected comment start" );
// get identifier of user-defined character
ParseResult aTmpRes = m_pSysCC->parseAnyToken(
m_aBufferString, rnEndPos,
KParseTokens::ANY_LETTER,
"",
coUserDefinedCharContFlags,
"" );
sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
// default setting for the case that no identifier
// i.e. a valid symbol-name is following the '%'
// character
m_aCurToken.eType = TTEXT;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText ="%";
m_aCurToken.nRow = m_nRow;
m_aCurToken.nCol = nTmpStart - m_nColOff;
if (aTmpRes.TokenType & KParseType::IDENTNAME)
{
sal_Int32 n = aTmpRes.EndPos - nTmpStart;
m_aCurToken.eType = TSPECIAL;
m_aCurToken.aText = m_aBufferString.copy( nTmpStart-1, n+1 );
OSL_ENSURE( aTmpRes.EndPos > rnEndPos,
"empty identifier" );
if (aTmpRes.EndPos > rnEndPos)
rnEndPos = aTmpRes.EndPos;
else
++rnEndPos;
}
// if no symbol-name was found we start-over with
// finding the next token right after the '%' sign.
// I.e. we leave rnEndPos unmodified.
}
break;
case '[':
{
m_aCurToken.eType = TLBRACKET;
m_aCurToken.cMathChar = MS_LBRACKET;
m_aCurToken.nGroup = TG::LBrace;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "[";
}
break;
case '\\':
{
m_aCurToken.eType = TESCAPE;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "\\";
}
break;
case ']':
{
m_aCurToken.eType = TRBRACKET;
m_aCurToken.cMathChar = MS_RBRACKET;
m_aCurToken.nGroup = TG::RBrace;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "]";
}
break;
case '^':
{
m_aCurToken.eType = TRSUP;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::Power;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "^";
}
break;
case '`':
{
m_aCurToken.eType = TSBLANK;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::Blank;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "`";
}
break;
case '{':
{
m_aCurToken.eType = TLGROUP;
m_aCurToken.cMathChar = MS_LBRACE;
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "{";
}
break;
case '|':
{
m_aCurToken.eType = TOR;
m_aCurToken.cMathChar = MS_OR;
m_aCurToken.nGroup = TG::Sum;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "|";
}
break;
case '}':
{
m_aCurToken.eType = TRGROUP;
m_aCurToken.cMathChar = MS_RBRACE;
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "}";
}
break;
case '~':
{
m_aCurToken.eType = TBLANK;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::Blank;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "~";
}
break;
case '#':
{
if (m_aBufferString.match("##", nRealStart))
{
m_aCurToken.eType = TDPOUND;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "##";
rnEndPos = nRealStart + 2;
}
else
{
m_aCurToken.eType = TPOUND;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "#";
}
}
break;
case '&':
{
m_aCurToken.eType = TAND;
m_aCurToken.cMathChar = MS_AND;
m_aCurToken.nGroup = TG::Product;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "&";
}
break;
case '(':
{
m_aCurToken.eType = TLPARENT;
m_aCurToken.cMathChar = MS_LPARENT;
m_aCurToken.nGroup = TG::LBrace;
m_aCurToken.nLevel = 5; //! 0 to continue expression
m_aCurToken.aText = "(";
}
break;
case ')':
{
m_aCurToken.eType = TRPARENT;
m_aCurToken.cMathChar = MS_RPARENT;
m_aCurToken.nGroup = TG::RBrace;
m_aCurToken.nLevel = 0; //! 0 to terminate expression
m_aCurToken.aText = ")";
}
break;
case '*':
{
m_aCurToken.eType = TMULTIPLY;
m_aCurToken.cMathChar = MS_MULTIPLY;
m_aCurToken.nGroup = TG::Product;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "*";
}
break;
case '+':
{
if (m_aBufferString.match("+-", nRealStart))
{
m_aCurToken.eType = TPLUSMINUS;
m_aCurToken.cMathChar = MS_PLUSMINUS;
m_aCurToken.nGroup = TG::UnOper | TG::Sum;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "+-";
rnEndPos = nRealStart + 2;
}
else
{
m_aCurToken.eType = TPLUS;
m_aCurToken.cMathChar = MS_PLUS;
m_aCurToken.nGroup = TG::UnOper | TG::Sum;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "+";
}
}
break;
case '-':
{
if (m_aBufferString.match("-+", nRealStart))
{
m_aCurToken.eType = TMINUSPLUS;
m_aCurToken.cMathChar = MS_MINUSPLUS;
m_aCurToken.nGroup = TG::UnOper | TG::Sum;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "-+";
rnEndPos = nRealStart + 2;
}
else if (m_aBufferString.match("->", nRealStart))
{
m_aCurToken.eType = TRIGHTARROW;
m_aCurToken.cMathChar = MS_RIGHTARROW;
m_aCurToken.nGroup = TG::Standalone;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "->";
rnEndPos = nRealStart + 2;
}
else
{
m_aCurToken.eType = TMINUS;
m_aCurToken.cMathChar = MS_MINUS;
m_aCurToken.nGroup = TG::UnOper | TG::Sum;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "-";
}
}
break;
case '.':
{
// Only one character? Then it can't be a number.
if (m_nBufferIndex < m_aBufferString.getLength() - 1)
{
// for compatibility with SO5.2
// texts like .34 ...56 ... h ...78..90
// will be treated as numbers
m_aCurToken.eType = TNUMBER;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
sal_Int32 nTxtStart = m_nBufferIndex;
sal_Unicode cChar;
// if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
do
{
cChar = m_aBufferString[ ++m_nBufferIndex ];
}
while ( (cChar == '.' || rtl::isAsciiDigit( cChar )) &&
( m_nBufferIndex < m_aBufferString.getLength() - 1 ) );
m_aCurToken.aText = m_aBufferString.copy( nTxtStart, m_nBufferIndex - nTxtStart );
aRes.EndPos = m_nBufferIndex;
}
else
bHandled = false;
}
break;
case '/':
{
m_aCurToken.eType = TDIVIDEBY;
m_aCurToken.cMathChar = MS_SLASH;
m_aCurToken.nGroup = TG::Product;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "/";
}
break;
case '=':
{
m_aCurToken.eType = TASSIGN;
m_aCurToken.cMathChar = MS_ASSIGN;
m_aCurToken.nGroup = TG::Relation;
m_aCurToken.nLevel = 0;
m_aCurToken.aText = "=";
}
break;
default:
bHandled = false;
}
}
}
else
bHandled = false;
if (!bHandled)
{
m_aCurToken.eType = TCHARACTER;
m_aCurToken.cMathChar = '\0';
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = m_aBufferString.copy( nRealStart, 1 );
aRes.EndPos = nRealStart + 1;
}
if (TEND != m_aCurToken.eType)
m_nBufferIndex = aRes.EndPos;
}
namespace
{
SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
{
SmNodeArray aSubArray(rSubNodes.size());
for (size_t i = 0; i < rSubNodes.size(); ++i)
aSubArray[i] = rSubNodes[i].release();
return aSubArray;
}
}
// grammar
std::unique_ptr<SmTableNode> SmParser::DoTable()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
std::vector<std::unique_ptr<SmNode>> aLineArray;
aLineArray.push_back(DoLine());
while (m_aCurToken.eType == TNEWLINE)
{
NextToken();
aLineArray.push_back(DoLine());
}
assert(m_aCurToken.eType == TEND);
std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
xSNode->SetSubNodes(buildNodeArray(aLineArray));
return xSNode;
}
std::unique_ptr<SmNode> SmParser::DoAlign(bool bUseExtraSpaces)
// parse alignment info (if any), then go on with rest of expression
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
std::unique_ptr<SmStructureNode> xSNode;
if (TokenInGroup(TG::Align))
{
xSNode.reset(new SmAlignNode(m_aCurToken));
NextToken();
// allow for just one align statement in 5.0
if (TokenInGroup(TG::Align))
return DoError(SmParseError::DoubleAlign);
}
auto pNode = DoExpression(bUseExtraSpaces);
if (xSNode)
{
xSNode->SetSubNode(0, pNode.release());
return xSNode;
}
return pNode;
}
// Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
std::unique_ptr<SmNode> SmParser::DoLine()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
std::vector<std::unique_ptr<SmNode>> ExpressionArray;
// start with single expression that may have an alignment statement
// (and go on with expressions that must not have alignment
// statements in 'while' loop below. See also 'Expression()'.)
if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
ExpressionArray.push_back(DoAlign());
while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
ExpressionArray.push_back(DoExpression());
//If there's no expression, add an empty one.
//this is to avoid a formula tree without any caret
//positions, in visual formula editor.
if(ExpressionArray.empty())
{
SmToken aTok;
aTok.eType = TNEWLINE;
ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
}
auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
return xSNode;
}
std::unique_ptr<SmNode> SmParser::DoExpression(bool bUseExtraSpaces)
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
std::vector<std::unique_ptr<SmNode>> RelationArray;
RelationArray.push_back(DoRelation());
while (m_aCurToken.nLevel >= 4)
RelationArray.push_back(DoRelation());
if (RelationArray.size() > 1)
{
std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
xSNode->SetSubNodes(buildNodeArray(RelationArray));
xSNode->SetUseExtraSpaces(bUseExtraSpaces);
return xSNode;
}
else
{
// This expression has only one node so just push this node.
return std::move(RelationArray[0]);
}
}
std::unique_ptr<SmNode> SmParser::DoRelation()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
auto xFirst = DoSum();
while (TokenInGroup(TG::Relation))
{
std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
auto xSecond = DoOpSubSup();
auto xThird = DoSum();
xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
xFirst = std::move(xSNode);
}
return xFirst;
}
std::unique_ptr<SmNode> SmParser::DoSum()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
auto xFirst = DoProduct();
while (TokenInGroup(TG::Sum))
{
std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
auto xSecond = DoOpSubSup();
auto xThird = DoProduct();
xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
xFirst = std::move(xSNode);
}
return xFirst;
}
std::unique_ptr<SmNode> SmParser::DoProduct()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
auto xFirst = DoPower();
int nDepthLimit = 0;
while (TokenInGroup(TG::Product))
{
//this linear loop builds a recursive structure, if it gets
//too deep then later processing, e.g. releasing the tree,
//can exhaust stack
if (nDepthLimit > DEPTH_LIMIT)
throw std::range_error("parser depth limit");
std::unique_ptr<SmStructureNode> xSNode;
std::unique_ptr<SmNode> xOper;
bool bSwitchArgs = false;
SmTokenType eType = m_aCurToken.eType;
switch (eType)
{
case TOVER:
xSNode.reset(new SmBinVerNode(m_aCurToken));
xOper.reset(new SmRectangleNode(m_aCurToken));
NextToken();
break;
case TBOPER:
xSNode.reset(new SmBinHorNode(m_aCurToken));
NextToken();
//Let the glyph node know it's a binary operation
m_aCurToken.eType = TBOPER;
m_aCurToken.nGroup = TG::Product;
xOper = DoGlyphSpecial();
break;
case TOVERBRACE :
case TUNDERBRACE :
xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
xOper.reset(new SmMathSymbolNode(m_aCurToken));
NextToken();
break;
case TWIDEBACKSLASH:
case TWIDESLASH:
{
SmBinDiagonalNode *pSTmp = new SmBinDiagonalNode(m_aCurToken);
pSTmp->SetAscending(eType == TWIDESLASH);
xSNode.reset(pSTmp);
xOper.reset(new SmPolyLineNode(m_aCurToken));
NextToken();
bSwitchArgs = true;
break;
}
default:
xSNode.reset(new SmBinHorNode(m_aCurToken));
xOper = DoOpSubSup();
}
auto xArg = DoPower();
if (bSwitchArgs)
{
//! vgl siehe SmBinDiagonalNode::Arrange
xSNode->SetSubNodes(std::move(xFirst), std::move(xArg), std::move(xOper));
}
else
{
xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xArg));
}
xFirst = std::move(xSNode);
++nDepthLimit;
}
return xFirst;
}
std::unique_ptr<SmNode> SmParser::DoSubSup(TG nActiveGroup, SmNode *pGivenNode)
{
std::unique_ptr<SmNode> xGivenNode(pGivenNode);
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
assert(m_aCurToken.nGroup == nActiveGroup);
std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
//! Of course 'm_aCurToken' is just the first sub-/supscript token.
//! It should be of no further interest. The positions of the
//! sub-/supscripts will be identified by the corresponding subnodes
//! index in the 'aSubNodes' array (enum value from 'SmSubSup').
pNode->SetUseLimits(nActiveGroup == TG::Limit);
// initialize subnodes array
std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
aSubNodes[0] = std::move(xGivenNode);
// process all sub-/supscripts
int nIndex = 0;
while (TokenInGroup(nActiveGroup))
{
SmTokenType eType (m_aCurToken.eType);
switch (eType)
{
case TRSUB : nIndex = static_cast<int>(RSUB); break;
case TRSUP : nIndex = static_cast<int>(RSUP); break;
case TFROM :
case TCSUB : nIndex = static_cast<int>(CSUB); break;
case TTO :
case TCSUP : nIndex = static_cast<int>(CSUP); break;
case TLSUB : nIndex = static_cast<int>(LSUB); break;
case TLSUP : nIndex = static_cast<int>(LSUP); break;
default :
SAL_WARN( "starmath", "unknown case");
}
nIndex++;
assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
std::unique_ptr<SmNode> xENode;
if (aSubNodes[nIndex]) // if already occupied at earlier iteration
{
// forget the earlier one, remember an error instead
aSubNodes[nIndex].reset();
xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
}
else
{
// skip sub-/supscript token
NextToken();
}
// get sub-/supscript node
// (even when we saw a double-sub/supscript error in the above
// in order to minimize mess and continue parsing.)
std::unique_ptr<SmNode> xSNode;
if (eType == TFROM || eType == TTO)
{
// parse limits in old 4.0 and 5.0 style
xSNode = DoRelation();
}
else
xSNode = DoTerm(true);
aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
}
pNode->SetSubNodes(buildNodeArray(aSubNodes));
return pNode;
}
std::unique_ptr<SmNode> SmParser::DoOpSubSup()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
// get operator symbol
auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
// skip operator token
NextToken();
// get sub- supscripts if any
if (m_aCurToken.nGroup == TG::Power)
return DoSubSup(TG::Power, pNode.release());
return pNode;
}
std::unique_ptr<SmNode> SmParser::DoPower()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
// get body for sub- supscripts on top of stack
std::unique_ptr<SmNode> xNode(DoTerm(false));
if (m_aCurToken.nGroup == TG::Power)
return DoSubSup(TG::Power, xNode.release());
return xNode;
}
std::unique_ptr<SmBlankNode> SmParser::DoBlank()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(TokenInGroup(TG::Blank));
std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
do
{
pBlankNode->IncreaseBy(m_aCurToken);
NextToken();
}
while (TokenInGroup(TG::Blank));
// Ignore trailing spaces, if corresponding option is set
if ( m_aCurToken.eType == TNEWLINE ||
(m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing() && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()) )
{
pBlankNode->Clear();
}
return pBlankNode;
}
std::unique_ptr<SmNode> SmParser::DoTerm(bool bGroupNumberIdent)
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
switch (m_aCurToken.eType)
{
case TESCAPE :
return DoEscape();
case TNOSPACE :
case TLGROUP :
{
bool bNoSpace = m_aCurToken.eType == TNOSPACE;
if (bNoSpace)
NextToken();
if (m_aCurToken.eType != TLGROUP)
return DoTerm(false); // nospace is no longer concerned
NextToken();
// allow for empty group
if (m_aCurToken.eType == TRGROUP)
{
std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
xSNode->SetSubNodes(nullptr, nullptr);
NextToken();
return std::unique_ptr<SmNode>(xSNode.release());
}
auto pNode = DoAlign(!bNoSpace);
if (m_aCurToken.eType == TRGROUP) {
NextToken();
return pNode;
}
auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
xSNode->SetSubNodes(std::move(pNode), std::move(xError));
return std::unique_ptr<SmNode>(xSNode.release());
}
case TLEFT :
return DoBrace();
case TBLANK :
case TSBLANK :
return DoBlank();
case TTEXT :
{
auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
NextToken();
return std::unique_ptr<SmNode>(pNode.release());
}
case TCHARACTER :
{
auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
NextToken();
return std::unique_ptr<SmNode>(pNode.release());
}
case TIDENT :
case TNUMBER :
{
auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken,
m_aCurToken.eType == TNUMBER ?
FNT_NUMBER :
FNT_VARIABLE);
if (!bGroupNumberIdent)
{
NextToken();
return std::unique_ptr<SmNode>(pTextNode.release());
}
std::vector<std::unique_ptr<SmNode>> aNodes;
// Some people want to be able to write "x_2n" for "x_{2n}"
// although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
// The tokenizer skips whitespaces so we need some additional
// work to distinguish from "x_2 n".
// See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
// https://bugs.libreoffice.org/show_bug.cgi?id=55853
sal_Int32 nBufLen = m_aBufferString.getLength();
// We need to be careful to call NextToken() only after having
// tested for a whitespace separator (otherwise it will be
// skipped!)
bool moveToNextToken = true;
while (m_nBufferIndex < nBufLen &&
m_pSysCC->getType(m_aBufferString, m_nBufferIndex) !=
UnicodeType::SPACE_SEPARATOR)
{
NextToken();
if (m_aCurToken.eType != TNUMBER &&
m_aCurToken.eType != TIDENT)
{
// Neither a number nor an identifier. We just moved to
// the next token, so no need to do that again.
moveToNextToken = false;
break;
}
aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(m_aCurToken,
m_aCurToken.eType ==
TNUMBER ?
FNT_NUMBER :
FNT_VARIABLE)));
}
if (moveToNextToken)
NextToken();
if (aNodes.empty())
return std::unique_ptr<SmNode>(pTextNode.release());
// We have several concatenated identifiers and numbers.
// Let's group them into one SmExpressionNode.
aNodes.insert(aNodes.begin(), std::move(pTextNode));
std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
xNode->SetSubNodes(buildNodeArray(aNodes));
return std::unique_ptr<SmNode>(xNode.release());
}
case TLEFTARROW :
case TRIGHTARROW :
case TUPARROW :
case TDOWNARROW :
case TCIRC :
case TDRARROW :
case TDLARROW :
case TDLRARROW :
case TEXISTS :
case TNOTEXISTS :
case TFORALL :
case TPARTIAL :
case TNABLA :
case TTOWARD :
case TDOTSAXIS :
case TDOTSDIAG :
case TDOTSDOWN :
case TDOTSLOW :
case TDOTSUP :
case TDOTSVERT :
{
auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
NextToken();
return std::unique_ptr<SmNode>(pNode.release());
}
case TSETN :
case TSETZ :
case TSETQ :
case TSETR :
case TSETC :
case THBAR :
case TLAMBDABAR :
case TBACKEPSILON :
case TALEPH :
case TIM :
case TRE :
case TWP :
case TEMPTYSET :
case TINFINITY :
{
auto pNode = std::make_unique<SmMathIdentifierNode>(m_aCurToken);
NextToken();
return std::unique_ptr<SmNode>(pNode.release());
}
case TPLACE:
{
auto pNode = std::make_unique<SmPlaceNode>(m_aCurToken);
NextToken();
return std::unique_ptr<SmNode>(pNode.release());
}
case TSPECIAL:
return DoSpecial();
case TBINOM:
return DoBinom();
case TSTACK:
return DoStack();
case TMATRIX:
return DoMatrix();
default:
if (TokenInGroup(TG::LBrace))
return DoBrace();
if (TokenInGroup(TG::Oper))
return DoOperator();
if (TokenInGroup(TG::UnOper))
return DoUnOper();
if ( TokenInGroup(TG::Attribute) ||
TokenInGroup(TG::FontAttr) )
{
std::stack<std::unique_ptr<SmStructureNode>> aStack;
bool bIsAttr;
while ( (bIsAttr = TokenInGroup(TG::Attribute))
|| TokenInGroup(TG::FontAttr))
aStack.push(bIsAttr ? DoAttribut() : DoFontAttribut());
auto xFirstNode = DoPower();
while (!aStack.empty())
{
std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
aStack.pop();
xNode->SetSubNodes(nullptr, std::move(xFirstNode));
xFirstNode = std::move(xNode);
}
return xFirstNode;
}
if (TokenInGroup(TG::Function))
return DoFunction();
return DoError(SmParseError::UnexpectedChar);
}
}
std::unique_ptr<SmNode> SmParser::DoEscape()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
NextToken();
switch (m_aCurToken.eType)
{
case TLPARENT :
case TRPARENT :
case TLBRACKET :
case TRBRACKET :
case TLDBRACKET :
case TRDBRACKET :
case TLBRACE :
case TLGROUP :
case TRBRACE :
case TRGROUP :
case TLANGLE :
case TRANGLE :
case TLCEIL :
case TRCEIL :
case TLFLOOR :
case TRFLOOR :
case TLLINE :
case TRLINE :
case TLDLINE :
case TRDLINE :
{
auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
NextToken();
return std::unique_ptr<SmNode>(pNode.release());
}
default:
return DoError(SmParseError::UnexpectedToken);
}
}
std::unique_ptr<SmOperNode> SmParser::DoOperator()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(TokenInGroup(TG::Oper));
auto xSNode = std::make_unique<SmOperNode>(m_aCurToken);
// get operator
auto xOperator = DoOper();
if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
xOperator = DoSubSup(m_aCurToken.nGroup, xOperator.release());
// get argument
auto xArg = DoPower();
xSNode->SetSubNodes(std::move(xOperator), std::move(xArg));
return xSNode;
}
std::unique_ptr<SmNode> SmParser::DoOper()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
SmTokenType eType (m_aCurToken.eType);
std::unique_ptr<SmNode> pNode;
switch (eType)
{
case TSUM :
case TPROD :
case TCOPROD :
case TINT :
case TINTD :
case TIINT :
case TIIINT :
case TLINT :
case TLLINT :
case TLLLINT :
pNode.reset(new SmMathSymbolNode(m_aCurToken));
break;
case TLIM :
case TLIMSUP :
case TLIMINF :
{
const sal_Char* pLim = nullptr;
switch (eType)
{
case TLIM : pLim = "lim"; break;
case TLIMSUP : pLim = "lim sup"; break;
case TLIMINF : pLim = "lim inf"; break;
default:
break;
}
if( pLim )
m_aCurToken.aText = OUString::createFromAscii(pLim);
pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
}
break;
case TOPER :
NextToken();
OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
break;
default :
assert(false && "unknown case");
}
NextToken();
return pNode;
}
std::unique_ptr<SmStructureNode> SmParser::DoUnOper()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(TokenInGroup(TG::UnOper));
SmToken aNodeToken = m_aCurToken;
SmTokenType eType = m_aCurToken.eType;
bool bIsPostfix = eType == TFACT;
std::unique_ptr<SmStructureNode> xSNode;
std::unique_ptr<SmNode> xOper;
std::unique_ptr<SmNode> xExtra;
std::unique_ptr<SmNode> xArg;
switch (eType)
{
case TABS :
case TSQRT :
NextToken();
break;
case TNROOT :
NextToken();
xExtra = DoPower();
break;
case TUOPER :
NextToken();
//Let the glyph know what it is...
m_aCurToken.eType = TUOPER;
m_aCurToken.nGroup = TG::UnOper;
xOper = DoGlyphSpecial();
break;
case TPLUS :
case TMINUS :
case TPLUSMINUS :
case TMINUSPLUS :
case TNEG :
case TFACT :
xOper = DoOpSubSup();
break;
default :
assert(false);
}
// get argument
xArg = DoPower();
if (eType == TABS)
{
xSNode.reset(new SmBraceNode(aNodeToken));
xSNode->SetScaleMode(SmScaleMode::Height);
// build nodes for left & right lines
// (text, group, level of the used token are of no interest here)
// we'll use row & column of the keyword for abs
aNodeToken.eType = TABS;
aNodeToken.cMathChar = MS_VERTLINE;
std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight));
}
else if (eType == TSQRT || eType == TNROOT)
{
xSNode.reset(new SmRootNode(aNodeToken));
xOper.reset(new SmRootSymbolNode(aNodeToken));
xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg));
}
else
{
xSNode.reset(new SmUnHorNode(aNodeToken));
if (bIsPostfix)
xSNode->SetSubNodes(std::move(xArg), std::move(xOper));
else
{
// prefix operator
xSNode->SetSubNodes(std::move(xOper), std::move(xArg));
}
}
return xSNode;
}
std::unique_ptr<SmStructureNode> SmParser::DoAttribut()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(TokenInGroup(TG::Attribute));
auto xSNode = std::make_unique<SmAttributNode>(m_aCurToken);
std::unique_ptr<SmNode> xAttr;
SmScaleMode eScaleMode = SmScaleMode::None;
// get appropriate node for the attribute itself
switch (m_aCurToken.eType)
{ case TUNDERLINE :
case TOVERLINE :
case TOVERSTRIKE :
xAttr.reset(new SmRectangleNode(m_aCurToken));
eScaleMode = SmScaleMode::Width;
break;
case TWIDEVEC :
case TWIDEHARPOON :
case TWIDEHAT :
case TWIDETILDE :
xAttr.reset(new SmMathSymbolNode(m_aCurToken));
eScaleMode = SmScaleMode::Width;
break;
default :
xAttr.reset(new SmMathSymbolNode(m_aCurToken));
}
NextToken();
xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later
xSNode->SetScaleMode(eScaleMode);
return xSNode;
}
std::unique_ptr<SmStructureNode> SmParser::DoFontAttribut()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(TokenInGroup(TG::FontAttr));
switch (m_aCurToken.eType)
{
case TITALIC :
case TNITALIC :
case TBOLD :
case TNBOLD :
case TPHANTOM :
{
auto pNode = std::make_unique<SmFontNode>(m_aCurToken);
NextToken();
return pNode;
}
case TSIZE :
return DoFontSize();
case TFONT :
return DoFont();
case TCOLOR :
return DoColor();
default :
assert(false);
return {};
}
}
std::unique_ptr<SmStructureNode> SmParser::DoColor()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(m_aCurToken.eType == TCOLOR);
std::unique_ptr<SmStructureNode> xNode;
// last color rules, get that one
SmToken aToken;
do
{ NextToken();
if (TokenInGroup(TG::Color))
{ aToken = m_aCurToken;
NextToken();
}
else
{
return DoError(SmParseError::ColorExpected);
}
} while (m_aCurToken.eType == TCOLOR);
xNode.reset(new SmFontNode(aToken));
return xNode;
}
std::unique_ptr<SmStructureNode> SmParser::DoFont()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(m_aCurToken.eType == TFONT);
std::unique_ptr<SmStructureNode> xNode;
// last font rules, get that one
SmToken aToken;
do
{ NextToken();
if (TokenInGroup(TG::Font))
{ aToken = m_aCurToken;
NextToken();
}
else
{
return DoError(SmParseError::FontExpected);
}
} while (m_aCurToken.eType == TFONT);
xNode.reset(new SmFontNode(aToken));
return xNode;
}
// gets number used as arguments in Math formulas (e.g. 'size' command)
// Format: no negative numbers, must start with a digit, no exponent notation, ...
static bool lcl_IsNumber(const OUString& rText)
{
bool bPoint = false;
const sal_Unicode* pBuffer = rText.getStr();
for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
{
const sal_Unicode cChar = *pBuffer;
if(cChar == '.')
{
if(bPoint)
return false;
else
bPoint = true;
}
else if ( !rtl::isAsciiDigit( cChar ) )
return false;
}
return true;
}
std::unique_ptr<SmStructureNode> SmParser::DoFontSize()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(m_aCurToken.eType == TSIZE);
FontSizeType Type;
std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
NextToken();
switch (m_aCurToken.eType)
{
case TNUMBER: Type = FontSizeType::ABSOLUT; break;
case TPLUS: Type = FontSizeType::PLUS; break;
case TMINUS: Type = FontSizeType::MINUS; break;
case TMULTIPLY: Type = FontSizeType::MULTIPLY; break;
case TDIVIDEBY: Type = FontSizeType::DIVIDE; break;
default:
return DoError(SmParseError::SizeExpected);
}
if (Type != FontSizeType::ABSOLUT)
{
NextToken();
if (m_aCurToken.eType != TNUMBER)
return DoError(SmParseError::SizeExpected);
}
// get number argument
Fraction aValue( 1 );
if (lcl_IsNumber( m_aCurToken.aText ))
{
double fTmp = m_aCurToken.aText.toDouble();
if (fTmp != 0.0)
{
aValue = fTmp;
//!! keep the numerator and denominator from being too large
//!! otherwise ongoing multiplications may result in overflows
//!! (for example in SmNode::SetFontSize the font size calculated
//!! may become 0 because of this!!! Happens e.g. for ftmp = 2.9 with Linux
//!! or ftmp = 1.11111111111111111... (11/9) on every platform.)
if (aValue.GetDenominator() > 1000)
{
long nNum = aValue.GetNumerator();
long nDenom = aValue.GetDenominator();
while (nDenom > 1000)
{
nNum /= 10;
nDenom /= 10;
}
aValue = Fraction( nNum, nDenom );
}
}
}
NextToken();
pFontNode->SetSizeParameter(aValue, Type);
return pFontNode;
}
std::unique_ptr<SmStructureNode> SmParser::DoBrace()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
assert(m_aCurToken.eType == TLEFT || TokenInGroup(TG::LBrace));
std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
std::unique_ptr<SmNode> pBody, pLeft, pRight;
SmScaleMode eScaleMode = SmScaleMode::None;
SmParseError eError = SmParseError::None;
if (m_aCurToken.eType == TLEFT)
{ NextToken();
eScaleMode = SmScaleMode::Height;
// check for left bracket
if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
{
pLeft.reset(new SmMathSymbolNode(m_aCurToken));
NextToken();
pBody = DoBracebody(true);
if (m_aCurToken.eType == TRIGHT)
{ NextToken();
// check for right bracket
if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
{
pRight.reset(new SmMathSymbolNode(m_aCurToken));
NextToken();
}
else
eError = SmParseError::RbraceExpected;
}
else
eError = SmParseError::RightExpected;
}
else
eError = SmParseError::LbraceExpected;
}
else
{
assert(TokenInGroup(TG::LBrace));
pLeft.reset(new SmMathSymbolNode(m_aCurToken));
NextToken();
pBody = DoBracebody(false);
SmTokenType eExpectedType = TUNKNOWN;
switch (pLeft->GetToken().eType)
{ case TLPARENT : eExpectedType = TRPARENT; break;
case TLBRACKET : eExpectedType = TRBRACKET; break;
case TLBRACE : eExpectedType = TRBRACE; break;
case TLDBRACKET : eExpectedType = TRDBRACKET; break;
case TLLINE : eExpectedType = TRLINE; break;
case TLDLINE : eExpectedType = TRDLINE; break;
case TLANGLE : eExpectedType = TRANGLE; break;
case TLFLOOR : eExpectedType = TRFLOOR; break;
case TLCEIL : eExpectedType = TRCEIL; break;
default :
SAL_WARN("starmath", "unknown case");
}
if (m_aCurToken.eType == eExpectedType)
{
pRight.reset(new SmMathSymbolNode(m_aCurToken));
NextToken();
}
else
eError = SmParseError::ParentMismatch;
}
if (eError == SmParseError::None)
{
assert(pLeft);
assert(pRight);
xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
xSNode->SetScaleMode(eScaleMode);
return xSNode;
}
return DoError(eError);
}
std::unique_ptr<SmBracebodyNode> SmParser::DoBracebody(bool bIsLeftRight)
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
auto pBody = std::make_unique<SmBracebodyNode>(m_aCurToken);
std::vector<std::unique_ptr<SmNode>> aNodes;
// get body if any
if (bIsLeftRight)
{
do
{
if (m_aCurToken.eType == TMLINE)
{
aNodes.emplace_back(std::make_unique<SmMathSymbolNode>(m_aCurToken));
NextToken();
}
else if (m_aCurToken.eType != TRIGHT)
{
aNodes.push_back(DoAlign());
if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT)
aNodes.emplace_back(DoError(SmParseError::RightExpected));
}
} while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT);
}
else
{
do
{
if (m_aCurToken.eType == TMLINE)
{
aNodes.emplace_back(std::make_unique<SmMathSymbolNode>(m_aCurToken));
NextToken();
}
else if (!TokenInGroup(TG::RBrace))
{
aNodes.push_back(DoAlign());
if (m_aCurToken.eType != TMLINE && !TokenInGroup(TG::RBrace))
aNodes.emplace_back(DoError(SmParseError::RbraceExpected));
}
} while (m_aCurToken.eType != TEND && !TokenInGroup(TG::RBrace));
}
pBody->SetSubNodes(buildNodeArray(aNodes));
pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
return pBody;
}
std::unique_ptr<SmTextNode> SmParser::DoFunction()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
switch (m_aCurToken.eType)
{
case TFUNC:
NextToken(); // skip "FUNC"-statement
[[fallthrough]];
case TSIN :
case TCOS :
case TTAN :
case TCOT :
case TASIN :
case TACOS :
case TATAN :
case TACOT :
case TSINH :
case TCOSH :
case TTANH :
case TCOTH :
case TASINH :
case TACOSH :
case TATANH :
case TACOTH :
case TLN :
case TLOG :
case TEXP :
{
auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
NextToken();
return pNode;
}
default:
assert(false);
return nullptr;
}
}
std::unique_ptr<SmTableNode> SmParser::DoBinom()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
auto xSNode = std::make_unique<SmTableNode>(m_aCurToken);
NextToken();
auto xFirst = DoSum();
auto xSecond = DoSum();
xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond));
return xSNode;
}
std::unique_ptr<SmStructureNode> SmParser::DoStack()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
NextToken();
if (m_aCurToken.eType != TLGROUP)
return DoError(SmParseError::LgroupExpected);
std::vector<std::unique_ptr<SmNode>> aExprArr;
do
{
NextToken();
aExprArr.push_back(DoAlign());
}
while (m_aCurToken.eType == TPOUND);
if (m_aCurToken.eType == TRGROUP)
NextToken();
else
aExprArr.emplace_back(DoError(SmParseError::RgroupExpected));
xSNode->SetSubNodes(buildNodeArray(aExprArr));
return xSNode;
}
std::unique_ptr<SmStructureNode> SmParser::DoMatrix()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
NextToken();
if (m_aCurToken.eType != TLGROUP)
return DoError(SmParseError::LgroupExpected);
std::vector<std::unique_ptr<SmNode>> aExprArr;
do
{
NextToken();
aExprArr.push_back(DoAlign());
}
while (m_aCurToken.eType == TPOUND);
size_t nCol = aExprArr.size();
size_t nRow = 1;
while (m_aCurToken.eType == TDPOUND)
{
NextToken();
for (size_t i = 0; i < nCol; i++)
{
auto xNode = DoAlign();
if (i < (nCol - 1))
{
if (m_aCurToken.eType == TPOUND)
NextToken();
else
xNode = DoError(SmParseError::PoundExpected);
}
aExprArr.emplace_back(std::move(xNode));
}
++nRow;
}
if (m_aCurToken.eType == TRGROUP)
NextToken();
else
{
std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
if (aExprArr.empty())
nRow = nCol = 1;
else
aExprArr.pop_back();
aExprArr.emplace_back(std::move(xENode));
}
xMNode->SetSubNodes(buildNodeArray(aExprArr));
xMNode->SetRowCol(static_cast<sal_uInt16>(nRow),
static_cast<sal_uInt16>(nCol));
return std::unique_ptr<SmStructureNode>(xMNode.release());
}
std::unique_ptr<SmSpecialNode> SmParser::DoSpecial()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
bool bReplace = false;
OUString &rName = m_aCurToken.aText;
OUString aNewName;
// conversion of symbol names for 6.0 (XML) file format
// (name change on import / export.
// UI uses localized names XML file format does not.)
if( rName.startsWith("%") )
{
if (IsImportSymbolNames())
{
aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.copy(1));
bReplace = true;
}
else if (IsExportSymbolNames())
{
aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.copy(1));
bReplace = true;
}
}
if (!aNewName.isEmpty())
aNewName = "%" + aNewName;
if (bReplace && !aNewName.isEmpty() && rName != aNewName)
{
Replace(GetTokenIndex(), rName.getLength(), aNewName);
rName = aNewName;
}
// add symbol name to list of used symbols
const OUString aSymbolName(m_aCurToken.aText.copy(1));
if (!aSymbolName.isEmpty())
m_aUsedSymbols.insert( aSymbolName );
auto pNode = std::make_unique<SmSpecialNode>(m_aCurToken);
NextToken();
return pNode;
}
std::unique_ptr<SmGlyphSpecialNode> SmParser::DoGlyphSpecial()
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
auto pNode = std::make_unique<SmGlyphSpecialNode>(m_aCurToken);
NextToken();
return pNode;
}
std::unique_ptr<SmExpressionNode> SmParser::DoError(SmParseError eError)
{
DepthProtect aDepthGuard(m_nParseDepth);
if (aDepthGuard.TooDeep())
throw std::range_error("parser depth limit");
auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
std::unique_ptr<SmErrorNode> pErr(new SmErrorNode(m_aCurToken));
xSNode->SetSubNodes(std::move(pErr), nullptr);
AddError(eError, xSNode.get());
NextToken();
return xSNode;
}
// end grammar
SmParser::SmParser()
: m_nCurError( 0 )
, m_nBufferIndex( 0 )
, m_nTokenIndex( 0 )
, m_nRow( 0 )
, m_nColOff( 0 )
, m_bImportSymNames( false )
, m_bExportSymNames( false )
, m_nParseDepth(0)
, m_aNumCC( LanguageTag( LANGUAGE_ENGLISH_US ) )
, m_pSysCC( SM_MOD()->GetSysLocale().GetCharClassPtr() )
{
}
std::unique_ptr<SmTableNode> SmParser::Parse(const OUString &rBuffer)
{
m_aUsedSymbols.clear();
m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
m_nBufferIndex = 0;
m_nTokenIndex = 0;
m_nRow = 1;
m_nColOff = 0;
m_nCurError = -1;
m_aErrDescList.clear();
NextToken();
return DoTable();
}
std::unique_ptr<SmNode> SmParser::ParseExpression(const OUString &rBuffer)
{
m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
m_nBufferIndex = 0;
m_nTokenIndex = 0;
m_nRow = 1;
m_nColOff = 0;
m_nCurError = -1;
m_aErrDescList.clear();
NextToken();
return DoExpression();
}
void SmParser::AddError(SmParseError Type, SmNode *pNode)
{
std::unique_ptr<SmErrorDesc> pErrDesc(new SmErrorDesc);
pErrDesc->m_eType = Type;
pErrDesc->m_pNode = pNode;
pErrDesc->m_aText = SmResId(RID_ERR_IDENT);
const char* pRID;
switch (Type)
{
case SmParseError::UnexpectedChar: pRID = RID_ERR_UNEXPECTEDCHARACTER; break;
case SmParseError::UnexpectedToken: pRID = RID_ERR_UNEXPECTEDTOKEN; break;
case SmParseError::PoundExpected: pRID = RID_ERR_POUNDEXPECTED; break;
case SmParseError::ColorExpected: pRID = RID_ERR_COLOREXPECTED; break;
case SmParseError::LgroupExpected: pRID = RID_ERR_LGROUPEXPECTED; break;
case SmParseError::RgroupExpected: pRID = RID_ERR_RGROUPEXPECTED; break;
case SmParseError::LbraceExpected: pRID = RID_ERR_LBRACEEXPECTED; break;
case SmParseError::RbraceExpected: pRID = RID_ERR_RBRACEEXPECTED; break;
case SmParseError::ParentMismatch: pRID = RID_ERR_PARENTMISMATCH; break;
case SmParseError::RightExpected: pRID = RID_ERR_RIGHTEXPECTED; break;
case SmParseError::FontExpected: pRID = RID_ERR_FONTEXPECTED; break;
case SmParseError::SizeExpected: pRID = RID_ERR_SIZEEXPECTED; break;
case SmParseError::DoubleAlign: pRID = RID_ERR_DOUBLEALIGN; break;
case SmParseError::DoubleSubsupscript: pRID = RID_ERR_DOUBLESUBSUPSCRIPT; break;
default:
assert(false);
return;
}
pErrDesc->m_aText += SmResId(pRID);
m_aErrDescList.push_back(std::move(pErrDesc));
}
const SmErrorDesc *SmParser::NextError()
{
if ( !m_aErrDescList.empty() )
if (m_nCurError > 0) return m_aErrDescList[ --m_nCurError ].get();
else
{
m_nCurError = 0;
return m_aErrDescList[ m_nCurError ].get();
}
else return nullptr;
}
const SmErrorDesc *SmParser::PrevError()
{
if ( !m_aErrDescList.empty() )
if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1)) return m_aErrDescList[ ++m_nCurError ].get();
else
{
m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
return m_aErrDescList[ m_nCurError ].get();
}
else return nullptr;
}
const SmErrorDesc *SmParser::GetError()
{
if ( !m_aErrDescList.empty() )
return m_aErrDescList.front().get();
return nullptr;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */