Files
libreoffice/sc/source/core/tool/stringutil.cxx
Jochen Nitschke 86639b757b sc: remove CHAR_CR define
Change-Id: Ief0ef8916166d479e868fc56887aa788aae63260
Reviewed-on: https://gerrit.libreoffice.org/27327
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Noel Grandin <noelgrandin@gmail.com>
2016-07-20 06:44:52 +00:00

526 lines
15 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include "stringutil.hxx"
#include "global.hxx"
#include <svl/zforlist.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/strbuf.hxx>
#include <rtl/math.hxx>
ScSetStringParam::ScSetStringParam() :
mpNumFormatter(nullptr),
mbDetectNumberFormat(true),
meSetTextNumFormat(Never),
mbHandleApostrophe(true),
meStartListening(sc::SingleCellListening)
{
}
void ScSetStringParam::setTextInput()
{
mbDetectNumberFormat = false;
mbHandleApostrophe = false;
meSetTextNumFormat = Always;
}
void ScSetStringParam::setNumericInput()
{
mbDetectNumberFormat = true;
mbHandleApostrophe = true;
meSetTextNumFormat = Never;
}
bool ScStringUtil::parseSimpleNumber(
const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
{
// Actually almost the entire pre-check is unnecessary and we could call
// rtl::math::stringToDouble() just after having exchanged ascii space with
// non-breaking space, if it wasn't for check of grouped digits. The NaN
// and Inf cases that are accepted by stringToDouble() could be detected
// using rtl::math::isFinite() on the result.
/* TODO: The grouped digits check isn't even valid for locales that do not
* group in thousands ... e.g. Indian locales. But that's something also
* the number scanner doesn't implement yet, only the formatter. */
OUStringBuffer aBuf;
sal_Int32 i = 0;
sal_Int32 n = rStr.getLength();
const sal_Unicode* p = rStr.getStr();
const sal_Unicode* pLast = p + (n-1);
sal_Int32 nPosDSep = -1, nPosGSep = -1;
sal_uInt32 nDigitCount = 0;
bool haveSeenDigit = false;
sal_Int32 nPosExponent = -1;
// Skip preceding spaces.
for (i = 0; i < n; ++i, ++p)
{
sal_Unicode c = *p;
if (c != 0x0020 && c != 0x00A0)
// first non-space character. Exit.
break;
}
if (i == n)
// the whole string is space. Fail.
return false;
n -= i; // Subtract the length of the preceding spaces.
// Determine the last non-space character.
for (; p != pLast; --pLast, --n)
{
sal_Unicode c = *pLast;
if (c != 0x0020 && c != 0x00A0)
// Non space character. Exit.
break;
}
for (i = 0; i < n; ++i, ++p)
{
sal_Unicode c = *p;
if (c == 0x0020 && gsep == 0x00A0)
// ascii space to unicode space if that is group separator
c = 0x00A0;
if ('0' <= c && c <= '9')
{
// this is a digit.
aBuf.append(c);
haveSeenDigit = true;
++nDigitCount;
}
else if (c == dsep)
{
// this is a decimal separator.
if (nPosDSep >= 0)
// a second decimal separator -> not a valid number.
return false;
if (nPosGSep >= 0 && i - nPosGSep != 4)
// the number has a group separator and the decimal sep is not
// positioned correctly.
return false;
nPosDSep = i;
nPosGSep = -1;
aBuf.append(c);
nDigitCount = 0;
}
else if (c == gsep)
{
// this is a group (thousand) separator.
if (!haveSeenDigit)
// not allowed before digits.
return false;
if (nPosDSep >= 0)
// not allowed after the decimal separator.
return false;
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
if (nPosExponent >= 0)
// not allowed in exponent.
return false;
nPosGSep = i;
nDigitCount = 0;
}
else if (c == '-' || c == '+')
{
// A sign must be the first character if it's given, or immediately
// follow the exponent character if present.
if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
aBuf.append(c);
else
return false;
}
else if (c == 'E' || c == 'e')
{
// this is an exponent designator.
if (nPosExponent >= 0)
// Only one exponent allowed.
return false;
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
aBuf.append(c);
nPosExponent = i;
nPosDSep = -1;
nPosGSep = -1;
nDigitCount = 0;
}
else
return false;
}
// finished parsing the number.
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
sal_Int32 nParseEnd = 0;
OUString aString( aBuf.makeStringAndClear());
rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
// Not a valid number or not entire string consumed.
return false;
return true;
}
bool ScStringUtil::parseSimpleNumber(
const char* p, size_t n, char dsep, char gsep, double& rVal)
{
// Actually almost the entire pre-check is unnecessary and we could call
// rtl::math::stringToDouble() just after having exchanged ascii space with
// non-breaking space, if it wasn't for check of grouped digits. The NaN
// and Inf cases that are accepted by stringToDouble() could be detected
// using rtl::math::isFinite() on the result.
/* TODO: The grouped digits check isn't even valid for locales that do not
* group in thousands ... e.g. Indian locales. But that's something also
* the number scanner doesn't implement yet, only the formatter. */
OStringBuffer aBuf;
size_t i = 0;
const char* pLast = p + (n-1);
sal_Int32 nPosDSep = -1, nPosGSep = -1;
sal_uInt32 nDigitCount = 0;
bool haveSeenDigit = false;
sal_Int32 nPosExponent = -1;
// Skip preceding spaces.
for (i = 0; i < n; ++i, ++p)
{
char c = *p;
if (c != ' ')
// first non-space character. Exit.
break;
}
if (i == n)
// the whole string is space. Fail.
return false;
n -= i; // Subtract the length of the preceding spaces.
// Determine the last non-space character.
for (; p != pLast; --pLast, --n)
{
char c = *pLast;
if (c != ' ')
// Non space character. Exit.
break;
}
for (i = 0; i < n; ++i, ++p)
{
char c = *p;
if ('0' <= c && c <= '9')
{
// this is a digit.
aBuf.append(c);
haveSeenDigit = true;
++nDigitCount;
}
else if (c == dsep)
{
// this is a decimal separator.
if (nPosDSep >= 0)
// a second decimal separator -> not a valid number.
return false;
if (nPosGSep >= 0 && i - nPosGSep != 4)
// the number has a group separator and the decimal sep is not
// positioned correctly.
return false;
nPosDSep = i;
nPosGSep = -1;
aBuf.append(c);
nDigitCount = 0;
}
else if (c == gsep)
{
// this is a group (thousand) separator.
if (!haveSeenDigit)
// not allowed before digits.
return false;
if (nPosDSep >= 0)
// not allowed after the decimal separator.
return false;
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
if (nPosExponent >= 0)
// not allowed in exponent.
return false;
nPosGSep = i;
nDigitCount = 0;
}
else if (c == '-' || c == '+')
{
// A sign must be the first character if it's given, or immediately
// follow the exponent character if present.
if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
aBuf.append(c);
else
return false;
}
else if (c == 'E' || c == 'e')
{
// this is an exponent designator.
if (nPosExponent >= 0)
// Only one exponent allowed.
return false;
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
aBuf.append(c);
nPosExponent = i;
nPosDSep = -1;
nPosGSep = -1;
nDigitCount = 0;
}
else
return false;
}
// finished parsing the number.
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
sal_Int32 nParseEnd = 0;
OString aString( aBuf.makeStringAndClear());
rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
// Not a valid number or not entire string consumed.
return false;
return true;
}
sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
{
assert( !(rQuotedPairs.getLength()%2) );
assert( rQuotedPairs.indexOf(cTok) );
// empty string: TokenCount is 0 per definition
if ( rIn.isEmpty() )
return 0;
sal_Int32 nTokCount = 1;
sal_Int32 nLen = rIn.getLength();
sal_Int32 nQuotedLen = rQuotedPairs.getLength();
sal_Unicode cQuotedEndChar = 0;
const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
const sal_Unicode* pStr = rIn.getStr();
sal_Int32 nIndex = 0;
while ( nIndex < nLen )
{
sal_Unicode c = *pStr;
if ( cQuotedEndChar )
{
// reached end of the quote ?
if ( c == cQuotedEndChar )
cQuotedEndChar = 0;
}
else
{
// Is the char a quote-beginn char ?
sal_Int32 nQuoteIndex = 0;
while ( nQuoteIndex < nQuotedLen )
{
if ( pQuotedStr[nQuoteIndex] == c )
{
cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
break;
}
else
nQuoteIndex += 2;
}
// If the token-char matches then increase TokCount
if ( c == cTok )
++nTokCount;
}
++pStr;
++nIndex;
}
return nTokCount;
}
OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
sal_Unicode cTok, sal_Int32& rIndex )
{
assert( !(rQuotedPairs.getLength()%2) );
assert( rQuotedPairs.indexOf(cTok) == -1 );
const sal_Unicode* pStr = rIn.getStr();
const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
sal_Unicode cQuotedEndChar = 0;
sal_Int32 nQuotedLen = rQuotedPairs.getLength();
sal_Int32 nLen = rIn.getLength();
sal_Int32 nTok = 0;
sal_Int32 nFirstChar = rIndex;
sal_Int32 i = nFirstChar;
// detect token position and length
pStr += i;
while ( i < nLen )
{
sal_Unicode c = *pStr;
if ( cQuotedEndChar )
{
// end of the quote reached ?
if ( c == cQuotedEndChar )
cQuotedEndChar = 0;
}
else
{
// Is the char a quote-begin char ?
sal_Int32 nQuoteIndex = 0;
while ( nQuoteIndex < nQuotedLen )
{
if ( pQuotedStr[nQuoteIndex] == c )
{
cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
break;
}
else
nQuoteIndex += 2;
}
// If the token-char matches then increase TokCount
if ( c == cTok )
{
++nTok;
if ( nTok == nToken )
nFirstChar = i+1;
else
{
if ( nTok > nToken )
break;
}
}
}
++pStr;
++i;
}
if ( nTok >= nToken )
{
if ( i < nLen )
rIndex = i+1;
else
rIndex = -1;
return rIn.copy( nFirstChar, i-nFirstChar );
}
else
{
rIndex = -1;
return OUString();
}
}
bool ScStringUtil::isMultiline( const OUString& rStr )
{
if (rStr.indexOf('\n') != -1)
return true;
if (rStr.indexOf('\r') != -1)
return true;
return false;
}
ScInputStringType ScStringUtil::parseInputString(
SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
{
ScInputStringType aRet;
aRet.mnFormatType = 0;
aRet.meType = ScInputStringType::Unknown;
aRet.maText = rStr;
aRet.mfValue = 0.0;
if (rStr.getLength() > 1 && rStr[0] == '=')
{
aRet.meType = ScInputStringType::Formula;
}
else if (rStr.getLength() > 1 && rStr[0] == '\'')
{
// for bEnglish, "'" at the beginning is always interpreted as text
// marker and stripped
aRet.maText = rStr.copy(1);
aRet.meType = ScInputStringType::Text;
}
else // (nur) auf englisches Zahlformat testen
{
sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
{
aRet.meType = ScInputStringType::Number;
aRet.mnFormatType = rFormatter.GetType(nNumFormat);
}
else if (!rStr.isEmpty())
aRet.meType = ScInputStringType::Text;
// the (English) number format is not set
//TODO: find and replace with matching local format???
}
return aRet;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */