use OUStringBuffer for a string that's modified often (tdf#145862)

SvParser::aToken was OUString despite being a buffer where the parsed
result is collected.

Change-Id: Id24c842738ea0f6f1836f77d855069963ac5ae55
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/130763
Tested-by: Jenkins
Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
This commit is contained in:
Luboš Luňák 2022-03-01 12:07:47 +01:00
parent 2960d41287
commit b871d057c4
16 changed files with 80 additions and 68 deletions

View File

@ -179,7 +179,7 @@ void EditHTMLParser::NextToken( HtmlTokenId nToken )
if ( !bInPara )
StartPara( false );
OUString aText = aToken;
OUString aText = aToken.toString();
if ( aText.startsWith(" ") && ThrowAwayBlank() && !IsReadPRE() )
aText = aText.copy( 1 );

View File

@ -166,7 +166,7 @@ void SvxRTFParser::NextToken( int nToken )
case RTF_LDBLQUOTE: cCh = 0x201C; goto INSINGLECHAR;
case RTF_RDBLQUOTE: cCh = 0x201D; goto INSINGLECHAR;
INSINGLECHAR:
aToken = OUString(cCh);
aToken = OUStringChar(cCh);
[[fallthrough]]; // aToken is set as Text
case RTF_TEXTTOKEN:
{
@ -324,7 +324,8 @@ void SvxRTFParser::ReadStyleTable()
case RTF_TEXTTOKEN:
if (bHasStyleNo)
{
xStyle->sName = DelCharAtEnd( aToken, ';' );
DelCharAtEnd( aToken, ';' );
xStyle->sName = aToken.toString();
if (!m_StyleTable.empty())
{
@ -581,15 +582,11 @@ void SvxRTFParser::ClearAttrStack()
aAttrStack.clear();
}
OUString& SvxRTFParser::DelCharAtEnd( OUString& rStr, const sal_Unicode cDel )
void SvxRTFParser::DelCharAtEnd( OUStringBuffer& rStr, const sal_Unicode cDel )
{
if( !rStr.isEmpty() && ' ' == rStr[ 0 ])
rStr = comphelper::string::stripStart(rStr, ' ');
if( !rStr.isEmpty() && ' ' == rStr[ rStr.getLength()-1 ])
rStr = comphelper::string::stripEnd(rStr, ' ');
rStr.strip(' ');
if( !rStr.isEmpty() && cDel == rStr[ rStr.getLength()-1 ])
rStr = rStr.copy( 0, rStr.getLength()-1 );
return rStr;
rStr.setLength( rStr.getLength()-1 );
}

View File

@ -148,7 +148,7 @@ class EDITENG_DLLPUBLIC SvxRTFParser : public SvRTFParser
protected:
virtual void InsertPara() = 0;
static OUString& DelCharAtEnd( OUString& rStr, const sal_Unicode cDel );
static void DelCharAtEnd( OUStringBuffer& rStr, const sal_Unicode cDel );
// is called for each token that is recognized in CallParser
virtual void NextToken( int nToken ) override;

View File

@ -31,6 +31,12 @@ inline bool equalsIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2
== 0;
};
// Like OUString::compareToIgnoreAsciiCase, but for two std::u16string_view:
inline int compareToIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2)
{
return rtl_ustr_compareIgnoreAsciiCase_WithLength(s1.data(), s1.size(), s2.data(), s2.size());
};
// Similar to OString::getToken, returning the first token of a std::string_view, starting at a
// given position (and if needed, it can be turned into a template to also cover std::u16string_view
// etc., or extended to return the n'th token instead of just the first, or support an initial

View File

@ -22,6 +22,7 @@
#include <svtools/svtdllapi.h>
#include <sal/types.h>
#include <stdexcept>
#include <string_view>
namespace rtl {
class OUString;
@ -30,13 +31,13 @@ enum class HtmlOptionId;
enum class HtmlTokenId : sal_Int16;
// search the char for the CharName
sal_Unicode GetHTMLCharName( const rtl::OUString& rName );
sal_Unicode GetHTMLCharName( std::u16string_view rName );
// search the TokenID for the token
SVT_DLLPUBLIC HtmlTokenId GetHTMLToken( const rtl::OUString& rName );
SVT_DLLPUBLIC HtmlTokenId GetHTMLToken( std::u16string_view rName );
// search the TokenId for an attribute token
HtmlOptionId GetHTMLOption( const rtl::OUString& rName );
HtmlOptionId GetHTMLOption( std::u16string_view rName );
// search the 24-bit color for a color name (not found = SAL_MAX_UINT32)
SVT_DLLPUBLIC sal_uInt32 GetHTMLColor( const rtl::OUString& rName );

View File

@ -26,7 +26,7 @@ namespace rtl {
};
// search the TokenID for the token
int GetRTFToken( const OUString& rSearch );
int GetRTFToken( std::u16string_view rSearch );
enum RTF_TOKEN_RANGES {
RTF_NOGROUP = 0x0100,

View File

@ -24,6 +24,7 @@
#include <tools/ref.hxx>
#include <tools/long.hxx>
#include <rtl/textenc.h>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.hxx>
#include <memory>
@ -46,7 +47,7 @@ class SVT_DLLPUBLIC SvParser : public SvRefBase
protected:
SvStream& rInput;
OUString aToken; // scanned token
OUStringBuffer aToken; // scanned token
sal_uInt32 nlLineNr; // current line number
sal_uInt32 nlLinePos; // current column number

View File

@ -592,6 +592,10 @@ private:
using namespace std::string_view_literals;
CPPUNIT_ASSERT(o3tl::equalsIgnoreAsciiCase(u"test"sv, u"test"sv));
CPPUNIT_ASSERT(!o3tl::equalsIgnoreAsciiCase(u"test"sv, u"test2"sv));
CPPUNIT_ASSERT_EQUAL(0, o3tl::compareToIgnoreAsciiCase(u"test"sv, u"test"sv));
CPPUNIT_ASSERT_GREATER(0, o3tl::compareToIgnoreAsciiCase(u"zest"sv, u"test"sv));
CPPUNIT_ASSERT_LESS(0, o3tl::compareToIgnoreAsciiCase(u"test"sv, u"test2"sv));
}
};

View File

@ -21,6 +21,7 @@
#include <algorithm>
#include <string_view>
#include <o3tl/string_view.hxx>
#include <sal/types.h>
#include <rtl/ustring.hxx>
#include <svtools/htmltokn.h>
@ -43,12 +44,12 @@ static bool sortCompare(const TokenEntry<T> & lhs, const TokenEntry<T> & rhs)
return lhs.sToken < rhs.sToken;
}
template<typename T>
static bool findCompare(const TokenEntry<T> & lhs, const OUString & rhs)
static bool findCompare(const TokenEntry<T> & lhs, std::u16string_view rhs)
{
return lhs.sToken < rhs;
}
template<typename T, size_t LEN>
static T search(TokenEntry<T> const (&dataTable)[LEN], const OUString & key, T notFoundValue)
static T search(TokenEntry<T> const (&dataTable)[LEN], std::u16string_view key, T notFoundValue)
{
auto findIt = std::lower_bound( std::begin(dataTable), std::end(dataTable),
key, findCompare<T> );
@ -174,7 +175,7 @@ HTML_TokenEntry const aHTMLTokenTab[] = {
};
HtmlTokenId GetHTMLToken( const OUString& rName )
HtmlTokenId GetHTMLToken( std::u16string_view rName )
{
static bool bSortKeyWords = false;
if( !bSortKeyWords )
@ -183,7 +184,7 @@ HtmlTokenId GetHTMLToken( const OUString& rName )
bSortKeyWords = true;
}
if( rName.startsWith( OOO_STRING_SVTOOLS_HTML_comment ))
if( o3tl::starts_with( rName, u"" OOO_STRING_SVTOOLS_HTML_comment ))
return HtmlTokenId::COMMENT;
return search( aHTMLTokenTab, rName, HtmlTokenId::NONE);
@ -459,7 +460,7 @@ static HTML_CharEntry aHTMLCharNameTab[] = {
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_diams), 9830}
};
sal_Unicode GetHTMLCharName( const OUString& rName )
sal_Unicode GetHTMLCharName( std::u16string_view rName )
{
if( !bSortCharKeyWords )
{
@ -631,7 +632,7 @@ static HTML_OptionEntry aHTMLOptionTab[] = {
{std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_start), HtmlOptionId::START}, // Netscape 2.0 vs IExplorer 2.0
};
HtmlOptionId GetHTMLOption( const OUString& rName )
HtmlOptionId GetHTMLOption( std::u16string_view rName )
{
if( !bSortOptionKeyWords )
{

View File

@ -19,6 +19,7 @@
#include <comphelper/string.hxx>
#include <o3tl/safeint.hxx>
#include <o3tl/string_view.hxx>
#include <tools/stream.hxx>
#include <tools/debug.hxx>
#include <tools/color.hxx>
@ -542,7 +543,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
else
{
// If not scanning a tag return token
aToken += sTmpBuffer;
aToken.append( sTmpBuffer );
sTmpBuffer.setLength(0);
if( !aToken.isEmpty() )
@ -564,7 +565,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
// Hack: _GetNextChar shall not read the
// next character
if( ';' != nNextCh )
aToken += " ";
aToken.append( " " );
if( 1U == cChar )
return HtmlTokenId::NONBREAKSPACE;
else //2U
@ -719,7 +720,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
if( !aToken.isEmpty() || sTmpBuffer.getLength() > 1 )
{
// Have seen s.th. aside from blanks?
aToken += sTmpBuffer;
aToken.append( sTmpBuffer );
sTmpBuffer.setLength(0);
return HtmlTokenId::TEXTTOKEN;
}
@ -753,7 +754,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
!IsParserWorking() )
{
if( !sTmpBuffer.isEmpty() )
aToken += sTmpBuffer;
aToken.append( sTmpBuffer );
return HtmlTokenId::TEXTTOKEN;
}
} while( rtl::isAsciiAlpha( nNextCh ) || rtl::isAsciiDigit( nNextCh ) );
@ -766,7 +767,7 @@ HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
}
if( !sTmpBuffer.isEmpty() )
aToken += sTmpBuffer;
aToken.append( sTmpBuffer );
return HtmlTokenId::TEXTTOKEN;
}
@ -801,7 +802,7 @@ HtmlTokenId HTMLParser::GetNextRawToken()
// Maybe we've reached the end.
// Save what we have read previously...
aToken += sTmpBuffer;
aToken.append( sTmpBuffer );
sTmpBuffer.setLength(0);
// and remember position in stream.
@ -904,9 +905,9 @@ HtmlTokenId HTMLParser::GetNextRawToken()
else
{
// remember "</" , everything else we find in the buffer
aToken += "<";
aToken.append( "<" );
if( bOffState )
aToken += "/";
aToken.append( "/" );
bNextCh = false;
}
@ -974,7 +975,7 @@ HtmlTokenId HTMLParser::GetNextRawToken()
if( !bContinue && !sTmpBuffer.isEmpty() )
{
aToken += sTmpBuffer;
aToken.append( sTmpBuffer );
sTmpBuffer.setLength(0);
}
@ -1001,7 +1002,7 @@ HtmlTokenId HTMLParser::GetNextToken_()
// HtmlTokenId::<TOKEN>_OFF generated for HtmlTokenId::<TOKEN>_ON
nRet = mnPendingOffToken;
mnPendingOffToken = HtmlTokenId::NONE;
aToken.clear();
aToken.setLength( 0 );
return nRet;
}
@ -1058,7 +1059,7 @@ HtmlTokenId HTMLParser::GetNextToken_()
if( !sTmpBuffer.isEmpty() )
{
aToken += sTmpBuffer;
aToken.append( sTmpBuffer );
sTmpBuffer.setLength(0);
}
@ -1075,10 +1076,10 @@ HtmlTokenId HTMLParser::GetNextToken_()
// Search token in table:
sSaveToken = aToken;
aToken = aToken.toAsciiLowerCase();
aToken = aToken.toString().toAsciiLowerCase();
if (!maNamespace.isEmpty() && aToken.startsWith(maNamespace))
aToken = aToken.copy(maNamespace.getLength());
if (!maNamespace.isEmpty() && o3tl::starts_with(aToken, maNamespace))
aToken = aToken.remove( 0, maNamespace.getLength());
if( HtmlTokenId::NONE == (nRet = GetHTMLToken( aToken )) )
// Unknown control
@ -1107,7 +1108,7 @@ HtmlTokenId HTMLParser::GetNextToken_()
// and append a blank.
aToken = sSaveToken;
if( '>'!=nNextCh )
aToken += " ";
aToken.append( " " );
sal_uInt64 nCStreamPos = 0;
sal_uInt32 nCLineNr = 0;
sal_uInt32 nCLinePos = 0;
@ -1146,14 +1147,14 @@ HtmlTokenId HTMLParser::GetNextToken_()
SetLineNr( nCLineNr );
SetLinePos( nCLinePos );
ClearTxtConvContext();
aToken = aToken.copy(0, nCStrLen);
aToken.truncate(nCStrLen);
nNextCh = '>';
}
}
else
{
// TokenString not needed anymore
aToken.clear();
aToken.setLength( 0 );
}
// Read until closing '>'
@ -1165,10 +1166,10 @@ HtmlTokenId HTMLParser::GetNextToken_()
// generate pending HtmlTokenId::<TOKEN>_OFF for HtmlTokenId::<TOKEN>_ON
// Do not convert this to a single HtmlTokenId::<TOKEN>_OFF
// which lead to fdo#56772.
if ((nRet >= HtmlTokenId::ONOFF_START) && aToken.endsWith("/"))
if ((nRet >= HtmlTokenId::ONOFF_START) && o3tl::ends_with(aToken, u"/"))
{
mnPendingOffToken = static_cast<HtmlTokenId>(static_cast<int>(nRet) + 1); // HtmlTokenId::<TOKEN>_ON -> HtmlTokenId::<TOKEN>_OFF
aToken = aToken.replaceAt( aToken.getLength()-1, 1, u""); // remove trailing '/'
aToken.setLength( aToken.getLength()-1 ); // remove trailing '/'
}
if( sal_Unicode(EOF) == nNextCh && rInput.eof() )
{
@ -1212,7 +1213,7 @@ HtmlTokenId HTMLParser::GetNextToken_()
}
if( SvParserState::Pending == eState )
bReadNextChar = bReadNextCharSave;
aToken.clear();
aToken.setLength( 0 );
}
else if( '%' == nNextCh )
{
@ -1249,7 +1250,7 @@ HtmlTokenId HTMLParser::GetNextToken_()
if( IsParserWorking() )
{
sSaveToken = aToken;
aToken.clear();
aToken.setLength( 0 );
}
}
else
@ -1387,7 +1388,7 @@ void HTMLParser::UnescapeToken()
bEscape = false;
if( '\\'==aToken[nPos] && !bOldEscape )
{
aToken = aToken.replaceAt( nPos, 1, u"" );
aToken.remove( nPos, 1 );
bEscape = true;
}
else
@ -1426,7 +1427,7 @@ const HTMLOptions& HTMLParser::GetOptions( HtmlOptionId const *pNoConvertToken )
nPos++;
}
OUString sName( aToken.copy( nStt, nPos-nStt ) );
OUString sName( aToken.subView( nStt, nPos-nStt ) );
// PlugIns require original token name. Convert to lower case only for searching.
nToken = GetHTMLOption( sName.toAsciiLowerCase() ); // Name is ready
@ -1477,7 +1478,7 @@ const HTMLOptions& HTMLParser::GetOptions( HtmlOptionId const *pNoConvertToken )
case '\r':
case '\n':
if( bStripCRLF )
aToken = aToken.replaceAt( nPos, 1, u"" );
aToken.remove( nPos, 1 );
else
{
nPos++;
@ -1492,7 +1493,7 @@ const HTMLOptions& HTMLParser::GetOptions( HtmlOptionId const *pNoConvertToken )
}
else
{
aToken = aToken.replaceAt( nPos, 1, u"" );
aToken.remove( nPos, 1 );
bEscape = true;
}
break;
@ -1549,7 +1550,7 @@ const HTMLOptions& HTMLParser::GetOptions( HtmlOptionId const *pNoConvertToken )
}
else
{
aToken = aToken.replaceAt( nPos, 1, u"" );
aToken.remove( nPos, 1 );
bEscape = true;
}
break;
@ -1568,7 +1569,7 @@ const HTMLOptions& HTMLParser::GetOptions( HtmlOptionId const *pNoConvertToken )
}
if( nLen )
aValue = aToken.copy( nStt, nLen );
aValue = aToken.subView( nStt, nLen );
}
}
@ -1795,7 +1796,7 @@ HtmlTokenId HTMLParser::FilterXMP( HtmlTokenId nToken )
}
else
aToken = sSaveToken;
aToken += ">";
aToken.append( ">" );
nToken = HtmlTokenId::TEXTTOKEN;
}
break;

View File

@ -106,7 +106,7 @@ int SvRTFParser::GetNextToken_()
aStrBuffer.appendUtf32(nNextCh);
nNextCh = GetNextChar();
} while( RTF_ISALPHA( nNextCh ) );
aToken += aStrBuffer;
aToken.append( aStrBuffer );
}
// minus before numeric parameters
@ -157,7 +157,7 @@ int SvRTFParser::GetNextToken_()
aParserStates.top().nUCharOverread = nUCharOverread;
}
}
aToken.clear(); // #i47831# erase token to prevent the token from being treated as text
aToken.setLength( 0 ); // #i47831# erase token to prevent the token from being treated as text
// read next token
nRet = 0;
break;
@ -183,7 +183,7 @@ int SvRTFParser::GetNextToken_()
if( !bRTF_InTextRead )
{
nRet = RTF_TEXTTOKEN;
aToken = OUString( static_cast<sal_Unicode>(nTokenValue) );
aToken = OUStringChar( static_cast<sal_Unicode>(nTokenValue) );
// overread the next n "RTF" characters. This
// can be also \{, \}, \'88
@ -502,7 +502,7 @@ void SvRTFParser::ScanText()
if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
{
if (!aStrBuffer.isEmpty())
aToken += aStrBuffer;
aToken.append( aStrBuffer );
return;
}
} while
@ -519,7 +519,7 @@ void SvRTFParser::ScanText()
}
if (!aStrBuffer.isEmpty())
aToken += aStrBuffer;
aToken.append( aStrBuffer );
}

View File

@ -17,6 +17,7 @@
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <o3tl/string_view.hxx>
#include <rtl/ustring.hxx>
#include <svtools/rtfkeywd.hxx>
#include <svtools/rtftoken.h>
@ -1175,7 +1176,7 @@ static RTF_TokenEntry aRTFTokenTab[] = {
};
int GetRTFToken( const OUString& rSearch )
int GetRTFToken( std::u16string_view rSearch )
{
if( !bSortKeyWords )
{
@ -1187,12 +1188,12 @@ int GetRTFToken( const OUString& rSearch )
bSortKeyWords = true;
}
auto findCompare = [](const RTF_TokenEntry & lhs, const OUString & s)
auto findCompare = [](const RTF_TokenEntry & lhs, std::u16string_view s)
{
return s.compareToIgnoreAsciiCase(lhs.sToken) > 0;
return o3tl::compareToIgnoreAsciiCase(s, lhs.sToken) > 0;
};
auto findIt = std::lower_bound( std::begin(aRTFTokenTab), std::end(aRTFTokenTab), rSearch, findCompare);
if (findIt != std::end(aRTFTokenTab) && rSearch.compareToIgnoreAsciiCase(findIt->sToken)==0)
if (findIt != std::end(aRTFTokenTab) && o3tl::compareToIgnoreAsciiCase(rSearch, findIt->sToken)==0)
return findIt->nToken;
return 0;

View File

@ -444,7 +444,7 @@ T SvParser<T>::GetNextToken()
if( !nTokenStackPos )
{
aToken.clear(); // empty token buffer
aToken.setLength( 0 ); // empty token buffer
nTokenValue = -1; // marker for no value read
bTokenHasValue = false;

View File

@ -189,7 +189,7 @@ void SwHTMLParser::AddScriptSource()
if( nPos != -1 )
{
m_aBasicLib =
aToken.copy( nPos + sizeof(OOO_STRING_SVTOOLS_HTML_SB_library) - 1 );
aToken.subView( nPos + sizeof(OOO_STRING_SVTOOLS_HTML_SB_library) - 1 );
m_aBasicLib = comphelper::string::strip(m_aBasicLib, ' ');
}
}
@ -200,7 +200,7 @@ void SwHTMLParser::AddScriptSource()
if( nPos != -1 )
{
m_aBasicModule =
aToken.copy( nPos + sizeof(OOO_STRING_SVTOOLS_HTML_SB_module) - 1 );
aToken.subView( nPos + sizeof(OOO_STRING_SVTOOLS_HTML_SB_module) - 1 );
m_aBasicModule = comphelper::string::strip(m_aBasicModule, ' ');
}
}

View File

@ -2484,7 +2484,7 @@ void SwHTMLParser::InsertSelectText()
{
sal_Int32 nLen = rText.getLength();
if( !nLen || ' '==rText[nLen-1])
aToken = aToken.replaceAt( 0, 1, u"" );
aToken.remove( 0, 1 );
}
if( !aToken.isEmpty() )
rText += aToken;

View File

@ -1270,7 +1270,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken )
{
if( !m_bDocInitialized )
DocumentDetected();
m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken.toString());
// if there are temporary paragraph attributes and the
// paragraph isn't empty then the paragraph attributes
@ -1530,10 +1530,10 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken )
const OUString& rText = pTextNode->GetText();
sal_Unicode cLast = rText[--nPos];
if( ' ' == cLast || '\x0a' == cLast)
aToken = aToken.copy(1);
aToken.remove(0, 1);
}
else
aToken = aToken.copy(1);
aToken.remove(0, 1);
if( aToken.isEmpty() )
{
@ -1569,7 +1569,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken )
}
}
m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken );
m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken.toString());
// if there are temporary paragraph attributes and the
// paragraph isn't empty then the paragraph attributes
@ -2058,7 +2058,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken )
if( ' ' == aToken[ 3 ] &&
' ' == aToken[ aToken.getLength()-3 ] )
{
OUString aComment( aToken.copy( 3, aToken.getLength()-5 ) );
OUString aComment( aToken.subView( 3, aToken.getLength()-5 ) );
InsertComment(comphelper::string::strip(aComment, ' '));
}
else