Resolves: fdo#40292 Tamil grapheme cluster rules
This commit is contained in:
@@ -80,10 +80,10 @@ private:
|
||||
uno::Reference<i18n::XBreakIterator> m_xBreak;
|
||||
};
|
||||
|
||||
//See https://bugs.freedesktop.org/show_bug.cgi?id=31271 for motivation
|
||||
//See https://bugs.freedesktop.org/show_bug.cgi?id=31271
|
||||
void TestBreakIterator::testLineBreaking()
|
||||
{
|
||||
::rtl::OUString aTest1(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
|
||||
::rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
|
||||
|
||||
i18n::LineBreakHyphenationOptions aHyphOptions;
|
||||
i18n::LineBreakUserOptions aUserOptions;
|
||||
@@ -94,18 +94,19 @@ void TestBreakIterator::testLineBreaking()
|
||||
|
||||
{
|
||||
//Here we want the line break to leave text here) on the next line
|
||||
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest1, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
|
||||
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
|
||||
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 6);
|
||||
}
|
||||
|
||||
{
|
||||
//Here we want the line break to leave "here)" on the next line
|
||||
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest1, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
|
||||
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
|
||||
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 11);
|
||||
}
|
||||
}
|
||||
|
||||
//See http://qa.openoffice.org/issues/show_bug.cgi?id=111152 for motivation
|
||||
//See http://qa.openoffice.org/issues/show_bug.cgi?id=111152
|
||||
//See https://bugs.freedesktop.org/show_bug.cgi?id=40292
|
||||
void TestBreakIterator::testGraphemeIteration()
|
||||
{
|
||||
lang::Locale aLocale;
|
||||
@@ -114,46 +115,90 @@ void TestBreakIterator::testGraphemeIteration()
|
||||
|
||||
{
|
||||
const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
|
||||
::rtl::OUString aTest1(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
|
||||
::rtl::OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
|
||||
|
||||
sal_Int32 nDone=0;
|
||||
sal_Int32 nPos;
|
||||
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
|
||||
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
|
||||
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
|
||||
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
||||
}
|
||||
|
||||
{
|
||||
const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
|
||||
::rtl::OUString aTest1(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
|
||||
::rtl::OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
|
||||
|
||||
sal_Int32 nDone=0;
|
||||
sal_Int32 nPos;
|
||||
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
|
||||
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
|
||||
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
|
||||
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
||||
}
|
||||
|
||||
{
|
||||
const sal_Unicode TA_HALANT_MA_HALANT_YA [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
|
||||
::rtl::OUString aTest1(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
|
||||
::rtl::OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
|
||||
|
||||
sal_Int32 nDone=0;
|
||||
sal_Int32 nPos;
|
||||
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
|
||||
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
|
||||
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
|
||||
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
||||
}
|
||||
|
||||
aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ta"));
|
||||
aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
|
||||
|
||||
{
|
||||
const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
|
||||
::rtl::OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
|
||||
|
||||
sal_Int32 nDone=0;
|
||||
sal_Int32 nPos = 0;
|
||||
|
||||
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
|
||||
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
||||
}
|
||||
|
||||
{
|
||||
const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
|
||||
{ 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
|
||||
::rtl::OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
|
||||
SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
|
||||
|
||||
sal_Int32 nDone=0;
|
||||
sal_Int32 nPos=0;
|
||||
|
||||
for (sal_Int32 i = 0; i < 4; ++i)
|
||||
{
|
||||
sal_Int32 nOldPos = nPos;
|
||||
nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
|
||||
}
|
||||
|
||||
for (sal_Int32 i = 0; i < 4; ++i)
|
||||
{
|
||||
sal_Int32 nOldPos = nPos;
|
||||
nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
|
||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||
CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
|
||||
::rtl::OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
|
||||
|
@@ -40,8 +40,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
|
||||
$OriyaSignVirama = \u0B4D;
|
||||
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
|
||||
$GurmukhiSignVirama = \u0A4D;
|
||||
$TamilLetter = [\u0B85-\u0BB9];
|
||||
$TamilKa = \u0B95;
|
||||
$TamilSignVirama = \u0BCD;
|
||||
$TamilSsa = \u0BB7;
|
||||
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
|
||||
$TeluguSignVirama = \u0C4D;
|
||||
|
||||
@@ -70,7 +71,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
|
||||
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
|
||||
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
|
||||
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
|
||||
$TamilLetter ($TamilSignVirama $TamilLetter?)+;
|
||||
$TamilKa $TamilSignVirama $TamilSsa;
|
||||
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
|
||||
|
||||
$L ($L | $V | $LV | $LVT);
|
||||
@@ -95,7 +96,7 @@ $LF $CR;
|
||||
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
|
||||
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
|
||||
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
|
||||
($TamilLetter? $TamilSignVirama)+ $TamilLetter;
|
||||
$TamilSsa $TamilSignVirama $TamilKa;
|
||||
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
|
||||
($L | $V | $LV | $LVT) $L;
|
||||
($V | $T) ($LV | $V);
|
||||
|
@@ -32,8 +32,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
|
||||
$OriyaSignVirama = \u0B4D;
|
||||
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
|
||||
$GurmukhiSignVirama = \u0A4D;
|
||||
$TamilLetter = [\u0B85-\u0BB9];
|
||||
$TamilKa = \u0B95;
|
||||
$TamilSignVirama = \u0BCD;
|
||||
$TamilSsa = \u0BB7;
|
||||
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
|
||||
$TeluguSignVirama = \u0C4D;
|
||||
|
||||
@@ -62,7 +63,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
|
||||
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
|
||||
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
|
||||
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
|
||||
$TamilLetter ($TamilSignVirama $TamilLetter?)+;
|
||||
$TamilKa $TamilSignVirama $TamilSsa;
|
||||
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
|
||||
|
||||
$L ($L | $V | $LV | $LVT);
|
||||
@@ -86,7 +87,7 @@ $LF $CR;
|
||||
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
|
||||
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
|
||||
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
|
||||
($TamilLetter? $TamilSignVirama)+ $TamilLetter;
|
||||
$TamilSsa $TamilSignVirama $TamilKa;
|
||||
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
|
||||
($L | $V | $LV | $LVT) $L;
|
||||
($V | $T) ($LV | $V);
|
||||
|
Reference in New Issue
Block a user