tdf#164989: Disallow combining characters when a hex already appeared

We either expect a single (maybe combined) character to convert into "U+"
notation, or a sequence of hexadecimal numbers (possibly with U+) for
conversion to characters. If we already saw a hexadecimal ASCII character,
it can't be preceded by a character combining with it, so stop as soon as
such an unexpected character appears.

Change-Id: Ic480fe8f173240eb263d5a77286b149c933049a6
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/181007
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
This commit is contained in:
Mike Kaganski 2025-02-02 14:37:34 +05:00
parent ea047987b5
commit 6d78861299
2 changed files with 15 additions and 2 deletions

View File

@ -1065,7 +1065,7 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar)
switch ( unicode::getUnicodeType(uChar) ) switch ( unicode::getUnicodeType(uChar) )
{ {
case css::i18n::UnicodeType::SURROGATE: case css::i18n::UnicodeType::SURROGATE:
if( bPreventNonHex ) if (bPreventNonHex || mbIsHexString)
{ {
mbAllowMoreChars = false; mbAllowMoreChars = false;
return false; return false;
@ -1096,7 +1096,7 @@ bool ToggleUnicodeCodepoint::AllowMoreInput(sal_uInt32 uChar)
case css::i18n::UnicodeType::NON_SPACING_MARK: case css::i18n::UnicodeType::NON_SPACING_MARK:
case css::i18n::UnicodeType::COMBINING_SPACING_MARK: case css::i18n::UnicodeType::COMBINING_SPACING_MARK:
if( bPreventNonHex ) if (bPreventNonHex || mbIsHexString)
{ {
mbAllowMoreChars = false; mbAllowMoreChars = false;
return false; return false;

View File

@ -2411,6 +2411,19 @@ CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testUnicodeNotationToggle)
// - Actual : xyz侮U+e0101 // - Actual : xyz侮U+e0101
// i.e., one codepoint to the left of the combining codepoint was not converted // i.e., one codepoint to the left of the combining codepoint was not converted
CPPUNIT_ASSERT_EQUAL(sWithCombiningSMPName, sDocString); CPPUNIT_ASSERT_EQUAL(sWithCombiningSMPName, sDocString);
pWrtShell->SplitNode();
// Given a combined character "è", consisting of U+0065 and U+0300, followed by a HEX
// without a U+ for the conversion into the next character "n"
pWrtShell->Insert2(u"è006E"_ustr);
dispatchCommand(mxComponent, u".uno:UnicodeNotationToggle"_ustr, aPropertyValues);
sDocString = pWrtShell->GetCursor()->GetPointNode().GetTextNode()->GetText();
// Before tdf#164989 fix, this failed with
// - Expected: èn
// - Actual : è006U+0300
// i.e., it converted the last combined character *before* the HEX code *to HEX*, replacing
// the last character of the HEX; not the expected conversion of the code itself *from HEX*.
CPPUNIT_ASSERT_EQUAL(u"\u0065\u0300n"_ustr, sDocString);
} }
CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957) CPPUNIT_TEST_FIXTURE(SwUiWriterTest7, testTdf34957)