tdf#117643 Writer: fix apostrophe search regression
During text search, ASCII apostrophe ' (U+0027)
of the search term matches the typographic
apostrophe ’ (U+2019) of the text, too.
There was a UX regression in document editing from
commit e6fade1ce1
(tdf#38395 enable smart apostrophe replacement by default),
because Find and Replace window and Find toolbar
doesn't replace ASCII apostrophe, so the search term
hadn't matched the text (now with the automatically
replaced typographic apostrophes), as before the commit.
Regex search hasn't been modified, i.e. searching U+2019
is still necessary a search term with U+2019.
The typographic apostrophes of a search term only match
ASCII apostrophes of the text, if the search term contain
also an ASCII apostrophe, too.
Note: as a more sophisticated solution, it's possible to
add a new default transliteration option for this later.
Change-Id: I5121edbef5cf34fdd5b5f9ba3c046a06329a756a
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/105717
Tested-by: Jenkins
Reviewed-by: László Németh <nemeth@numbertext.org>
This commit is contained in:
@@ -38,11 +38,13 @@ public:
|
|||||||
void testICU();
|
void testICU();
|
||||||
void testSearches();
|
void testSearches();
|
||||||
void testWildcardSearch();
|
void testWildcardSearch();
|
||||||
|
void testApostropheSearch();
|
||||||
|
|
||||||
CPPUNIT_TEST_SUITE(TestTextSearch);
|
CPPUNIT_TEST_SUITE(TestTextSearch);
|
||||||
CPPUNIT_TEST(testICU);
|
CPPUNIT_TEST(testICU);
|
||||||
CPPUNIT_TEST(testSearches);
|
CPPUNIT_TEST(testSearches);
|
||||||
CPPUNIT_TEST(testWildcardSearch);
|
CPPUNIT_TEST(testWildcardSearch);
|
||||||
|
CPPUNIT_TEST(testApostropheSearch);
|
||||||
CPPUNIT_TEST_SUITE_END();
|
CPPUNIT_TEST_SUITE_END();
|
||||||
private:
|
private:
|
||||||
uno::Reference<util::XTextSearch> m_xSearch;
|
uno::Reference<util::XTextSearch> m_xSearch;
|
||||||
@@ -265,6 +267,119 @@ void TestTextSearch::testWildcardSearch()
|
|||||||
CPPUNIT_ASSERT((aRes.startOffset[0] == 6) && (aRes.endOffset[0] == 0));
|
CPPUNIT_ASSERT((aRes.startOffset[0] == 6) && (aRes.endOffset[0] == 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TestTextSearch::testApostropheSearch()
|
||||||
|
{
|
||||||
|
// A) find typographic apostrophes also by using ASCII apostrophe in searchString
|
||||||
|
OUString str( u"It\u2019s an apostrophe." );
|
||||||
|
sal_Int32 startPos = 0, endPos = str.getLength();
|
||||||
|
|
||||||
|
// set options
|
||||||
|
util::SearchOptions aOptions;
|
||||||
|
aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
|
||||||
|
aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
|
||||||
|
aOptions.searchString = "'";
|
||||||
|
m_xSearch->setOptions( aOptions );
|
||||||
|
|
||||||
|
util::SearchResult aRes;
|
||||||
|
|
||||||
|
// search forward
|
||||||
|
aRes = m_xSearch->searchForward( str, startPos, endPos );
|
||||||
|
// This was 0.
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
|
||||||
|
|
||||||
|
// search backwards
|
||||||
|
aRes = m_xSearch->searchBackward( str, endPos, startPos );
|
||||||
|
// This was 0.
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
|
||||||
|
|
||||||
|
// check with transliteration
|
||||||
|
aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
|
||||||
|
| TransliterationFlags::IGNORE_WIDTH);
|
||||||
|
m_xSearch->setOptions(aOptions);
|
||||||
|
|
||||||
|
// search forward
|
||||||
|
aRes = m_xSearch->searchForward( str, startPos, endPos );
|
||||||
|
// This was 0.
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
|
||||||
|
|
||||||
|
// search backwards
|
||||||
|
aRes = m_xSearch->searchBackward( str, endPos, startPos );
|
||||||
|
// This was 0.
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
|
||||||
|
|
||||||
|
// B) search ASCII apostrophe in a text with ASCII apostrophes
|
||||||
|
str = str.replace(u'\u2019', '\'');
|
||||||
|
|
||||||
|
// search forward
|
||||||
|
aRes = m_xSearch->searchForward( str, startPos, endPos );
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
|
||||||
|
|
||||||
|
// search backwards
|
||||||
|
aRes = m_xSearch->searchBackward( str, endPos, startPos );
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
|
||||||
|
|
||||||
|
// C) search typographic apostrophe in a text with ASCII apostrophes (no result)
|
||||||
|
aOptions.searchString = OUString(u"\u2019");
|
||||||
|
m_xSearch->setOptions( aOptions );
|
||||||
|
|
||||||
|
aRes = m_xSearch->searchForward( str, startPos, endPos );
|
||||||
|
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
|
||||||
|
|
||||||
|
aRes = m_xSearch->searchBackward( str, endPos, startPos );
|
||||||
|
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
|
||||||
|
|
||||||
|
// D) search typographic apostrophe in a text with typographic apostrophes
|
||||||
|
str = str.replace('\'', u'\u2019');
|
||||||
|
|
||||||
|
// search forward
|
||||||
|
aRes = m_xSearch->searchForward( str, startPos, endPos );
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
|
||||||
|
|
||||||
|
// search backwards
|
||||||
|
aRes = m_xSearch->searchBackward( str, endPos, startPos );
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
|
||||||
|
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
|
||||||
|
|
||||||
|
// E) search mixed apostrophes in a text with mixed apostrophes:
|
||||||
|
aOptions.searchString = OUString(u"'\u2019");
|
||||||
|
m_xSearch->setOptions( aOptions );
|
||||||
|
str = u"test: \u2019'";
|
||||||
|
|
||||||
|
// search forward
|
||||||
|
aRes = m_xSearch->searchForward( str, startPos, str.getLength());
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
|
||||||
|
// search backwards
|
||||||
|
aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
|
||||||
|
// F) search mixed apostrophes in a text with ASCII apostrophes:
|
||||||
|
str = u"test: ''";
|
||||||
|
|
||||||
|
// search forward
|
||||||
|
aRes = m_xSearch->searchForward( str, startPos, str.getLength());
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
|
||||||
|
// search backwards
|
||||||
|
aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
|
||||||
|
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
|
||||||
|
}
|
||||||
|
|
||||||
void TestTextSearch::setUp()
|
void TestTextSearch::setUp()
|
||||||
{
|
{
|
||||||
BootstrapFixtureBase::setUp();
|
BootstrapFixtureBase::setUp();
|
||||||
|
@@ -127,6 +127,8 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions )
|
|||||||
maWildcardReversePattern.clear();
|
maWildcardReversePattern.clear();
|
||||||
maWildcardReversePattern2.clear();
|
maWildcardReversePattern2.clear();
|
||||||
TransliterationFlags transliterateFlags = static_cast<TransliterationFlags>(aSrchPara.transliterateFlags);
|
TransliterationFlags transliterateFlags = static_cast<TransliterationFlags>(aSrchPara.transliterateFlags);
|
||||||
|
bSearchApostrophe = false;
|
||||||
|
bool bReplaceApostrophe = false;
|
||||||
if (aSrchPara.AlgorithmType2 == SearchAlgorithms2::REGEXP)
|
if (aSrchPara.AlgorithmType2 == SearchAlgorithms2::REGEXP)
|
||||||
{
|
{
|
||||||
// RESrchPrepare will consider aSrchPara.transliterateFlags when
|
// RESrchPrepare will consider aSrchPara.transliterateFlags when
|
||||||
@@ -137,6 +139,11 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions )
|
|||||||
// match is not case-altered, leave case-(in)sensitive to regex engine.
|
// match is not case-altered, leave case-(in)sensitive to regex engine.
|
||||||
transliterateFlags &= ~TransliterationFlags::IGNORE_CASE;
|
transliterateFlags &= ~TransliterationFlags::IGNORE_CASE;
|
||||||
}
|
}
|
||||||
|
else if ( aSrchPara.searchString.indexOf('\'') > - 1 )
|
||||||
|
{
|
||||||
|
bSearchApostrophe = true;
|
||||||
|
bReplaceApostrophe = aSrchPara.searchString.indexOf(u'\u2019') > -1;
|
||||||
|
}
|
||||||
|
|
||||||
// Create Transliteration class
|
// Create Transliteration class
|
||||||
if( isSimpleTrans( transliterateFlags) )
|
if( isSimpleTrans( transliterateFlags) )
|
||||||
@@ -214,6 +221,9 @@ void TextSearch::setOptions2( const SearchOptions2& rOptions )
|
|||||||
checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
|
checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
|
||||||
sSrchStr.getLength()-1) == ScriptType::COMPLEX));
|
sSrchStr.getLength()-1) == ScriptType::COMPLEX));
|
||||||
|
|
||||||
|
if ( bReplaceApostrophe )
|
||||||
|
sSrchStr = sSrchStr.replace(u'\u2019', '\'');
|
||||||
|
|
||||||
// Take the new SearchOptions2::AlgorithmType2 field and ignore
|
// Take the new SearchOptions2::AlgorithmType2 field and ignore
|
||||||
// SearchOptions::algorithmType
|
// SearchOptions::algorithmType
|
||||||
switch( aSrchPara.AlgorithmType2)
|
switch( aSrchPara.AlgorithmType2)
|
||||||
@@ -312,6 +322,10 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
|
|||||||
|
|
||||||
OUString in_str(searchStr);
|
OUString in_str(searchStr);
|
||||||
|
|
||||||
|
// in non-regex mode, allow searching typographical apostrophe with the ASCII one
|
||||||
|
// to avoid regression after using automatic conversion to U+2019 during typing in Writer
|
||||||
|
bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1;
|
||||||
|
|
||||||
bUsePrimarySrchStr = true;
|
bUsePrimarySrchStr = true;
|
||||||
|
|
||||||
if ( xTranslit.is() )
|
if ( xTranslit.is() )
|
||||||
@@ -341,6 +355,9 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
|
|||||||
css::uno::Sequence<sal_Int32> offset(nInEndPos - nInStartPos);
|
css::uno::Sequence<sal_Int32> offset(nInEndPos - nInStartPos);
|
||||||
in_str = xTranslit->transliterate(searchStr, nInStartPos, nInEndPos - nInStartPos, offset);
|
in_str = xTranslit->transliterate(searchStr, nInStartPos, nInEndPos - nInStartPos, offset);
|
||||||
|
|
||||||
|
if ( bReplaceApostrophe )
|
||||||
|
in_str = in_str.replace(u'\u2019', '\'');
|
||||||
|
|
||||||
// JP 20.6.2001: also the start and end positions must be corrected!
|
// JP 20.6.2001: also the start and end positions must be corrected!
|
||||||
sal_Int32 newStartPos =
|
sal_Int32 newStartPos =
|
||||||
(startPos == 0) ? 0 : FindPosInSeq_Impl( offset, startPos );
|
(startPos == 0) ? 0 : FindPosInSeq_Impl( offset, startPos );
|
||||||
@@ -382,6 +399,9 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if ( bReplaceApostrophe )
|
||||||
|
in_str = in_str.replace(u'\u2019', '\'');
|
||||||
|
|
||||||
sres = (this->*fnForward)( in_str, startPos, endPos );
|
sres = (this->*fnForward)( in_str, startPos, endPos );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -437,6 +457,10 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
|
|||||||
|
|
||||||
OUString in_str(searchStr);
|
OUString in_str(searchStr);
|
||||||
|
|
||||||
|
// in non-regex mode, allow searching typographical apostrophe with the ASCII one
|
||||||
|
// to avoid regression after using automatic conversion to U+2019 during typing in Writer
|
||||||
|
bool bReplaceApostrophe = bSearchApostrophe && in_str.indexOf(u'\u2019') > -1;
|
||||||
|
|
||||||
bUsePrimarySrchStr = true;
|
bUsePrimarySrchStr = true;
|
||||||
|
|
||||||
if ( xTranslit.is() )
|
if ( xTranslit.is() )
|
||||||
@@ -445,6 +469,9 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
|
|||||||
css::uno::Sequence<sal_Int32> offset(startPos - endPos);
|
css::uno::Sequence<sal_Int32> offset(startPos - endPos);
|
||||||
in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset );
|
in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset );
|
||||||
|
|
||||||
|
if ( bReplaceApostrophe )
|
||||||
|
in_str = in_str.replace(u'\u2019', '\'');
|
||||||
|
|
||||||
// JP 20.6.2001: also the start and end positions must be corrected!
|
// JP 20.6.2001: also the start and end positions must be corrected!
|
||||||
sal_Int32 const newStartPos = (startPos < searchStr.getLength())
|
sal_Int32 const newStartPos = (startPos < searchStr.getLength())
|
||||||
? FindPosInSeq_Impl( offset, startPos )
|
? FindPosInSeq_Impl( offset, startPos )
|
||||||
@@ -490,6 +517,9 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if ( bReplaceApostrophe )
|
||||||
|
in_str = in_str.replace(u'\u2019', '\'');
|
||||||
|
|
||||||
sres = (this->*fnBackward)( in_str, startPos, endPos );
|
sres = (this->*fnBackward)( in_str, startPos, endPos );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -68,6 +68,9 @@ class TextSearch: public cppu::WeakImplHelper
|
|||||||
FnSrch fnForward;
|
FnSrch fnForward;
|
||||||
FnSrch fnBackward;
|
FnSrch fnBackward;
|
||||||
|
|
||||||
|
// to fix UX regression, U+0027 matches also U+2019 in non-regex search
|
||||||
|
bool bSearchApostrophe;
|
||||||
|
|
||||||
// Members and methods for the normal (Boyer-Moore) search
|
// Members and methods for the normal (Boyer-Moore) search
|
||||||
std::unique_ptr<TextSearchJumpTable> pJumpTable;
|
std::unique_ptr<TextSearchJumpTable> pJumpTable;
|
||||||
std::unique_ptr<TextSearchJumpTable> pJumpTable2;
|
std::unique_ptr<TextSearchJumpTable> pJumpTable2;
|
||||||
|
Reference in New Issue
Block a user