sw: plain-text ASCII export: filter out all CH_TXT_ATR_*

These control characters are Writer implementation details and should
not be available via public interfaces.

This filter is also used by SwXTextRange::getString().

Change-Id: If656ee3d451dbefe2f7a905e8b63a44cdb787809
This commit is contained in:
Michael Stahl 2017-03-29 16:43:23 +02:00
parent 30102ded91
commit ab4f53eaad
4 changed files with 79 additions and 0 deletions

View File

@ -43,6 +43,7 @@ public:
void testIsdigitAsciiString();
void testReverseString();
void testSplit();
void testRemoveAny();
CPPUNIT_TEST_SUITE(TestString);
CPPUNIT_TEST(testNatural);
@ -55,6 +56,7 @@ public:
CPPUNIT_TEST(testIsdigitAsciiString);
CPPUNIT_TEST(testReverseString);
CPPUNIT_TEST(testSplit);
CPPUNIT_TEST(testRemoveAny);
CPPUNIT_TEST_SUITE_END();
};
@ -382,6 +384,26 @@ void TestString::testSplit()
CPPUNIT_ASSERT_EQUAL(OUString("F1"), aRet[2]);
}
void TestString::testRemoveAny()
{
using namespace ::comphelper::string;
OUString in("abcAAAbbC");
sal_Unicode const test1 [] = { 'a', 0 };
CPPUNIT_ASSERT_EQUAL(OUString("bcAAAbbC"), removeAny(in, test1));
sal_Unicode const test2 [] = { 0 };
CPPUNIT_ASSERT_EQUAL(in, removeAny(in, test2));
sal_Unicode const test3 [] = { 'A', 0 };
CPPUNIT_ASSERT_EQUAL(OUString("abcbbC"), removeAny(in, test3));
sal_Unicode const test4 [] = { 'A', 'a', 0 };
CPPUNIT_ASSERT_EQUAL(OUString("bcbbC"), removeAny(in, test4));
sal_Unicode const test5 [] = { 'C', 0 };
CPPUNIT_ASSERT_EQUAL(OUString("abcAAAbb"), removeAny(in, test5));
sal_Unicode const test6 [] = { 'X', 0 };
CPPUNIT_ASSERT_EQUAL(in, removeAny(in, test6));
sal_Unicode const test7 [] = { 'A', 'B', 'C', 'a', 'b', 'c', 0 };
CPPUNIT_ASSERT_EQUAL(OUString(""), removeAny(in, test7));
}
CPPUNIT_TEST_SUITE_REGISTRATION(TestString);
}

View File

@ -420,6 +420,42 @@ sal_Int32 indexOfAny(OUString const& rIn,
return -1;
}
OUString removeAny(OUString const& rIn,
sal_Unicode const*const pChars)
{
OUStringBuffer buf;
bool isFound(false);
for (sal_Int32 i = 0; i < rIn.getLength(); ++i)
{
sal_Unicode const c = rIn[i];
bool removeC(false);
for (sal_Unicode const* pChar = pChars; *pChar; ++pChar)
{
if (c == *pChar)
{
removeC = true;
break;
}
}
if (removeC)
{
if (!isFound)
{
if (i > 0)
{
buf.append(rIn.copy(0, i));
}
isFound = true;
}
}
else if (isFound)
{
buf.append(c);
}
}
return (isFound) ? buf.makeStringAndClear() : rIn;
}
OUString setToken(const OUString& rIn, sal_Int32 nToken, sal_Unicode cTok,
const OUString& rNewToken)
{

View File

@ -255,6 +255,15 @@ COMPHELPER_DLLPUBLIC OUString setToken(const OUString& rIn, sal_Int32 nToken, sa
COMPHELPER_DLLPUBLIC sal_Int32 indexOfAny(OUString const& rIn,
sal_Unicode const*const pChars, sal_Int32 const nPos);
/** Remove any of a list of code units in the string.
@param rIn OUString to search
@param pChars 0-terminated array of sal_Unicode code units to search for
@return OUString that has all of the pChars code units removed
*/
COMPHELPER_DLLPUBLIC OUString removeAny(OUString const& rIn,
sal_Unicode const*const pChars);
/** Convert a sequence of strings to a single comma separated string.
Note that no escaping of commas or anything fancy is done.

View File

@ -206,6 +206,18 @@ static Writer& OutASC_SwTextNode( Writer& rWrt, SwContentNode& rNode )
if ( !bExportSoftHyphens )
aOutStr = aOutStr.replaceAll(OUStringLiteral1(CHAR_SOFTHYPHEN), "");
// all INWORD/BREAKWORD should be already removed by OutAttr
// but the field-marks are not attributes so filter those
static sal_Unicode const forbidden [] = {
CH_TXT_ATR_INPUTFIELDSTART,
CH_TXT_ATR_INPUTFIELDEND,
CH_TXT_ATR_FORMELEMENT,
CH_TXT_ATR_FIELDSTART,
CH_TXT_ATR_FIELDEND,
0
};
aOutStr = comphelper::string::removeAny(aOutStr, forbidden);
rWrt.Strm().WriteUnicodeOrByteText( aOutStr );
}
nStrPos = nNextAttr;