Resolves: fdo#40292 Tamil grapheme cluster rules
This commit is contained in:
@@ -80,10 +80,10 @@ private:
|
|||||||
uno::Reference<i18n::XBreakIterator> m_xBreak;
|
uno::Reference<i18n::XBreakIterator> m_xBreak;
|
||||||
};
|
};
|
||||||
|
|
||||||
//See https://bugs.freedesktop.org/show_bug.cgi?id=31271 for motivation
|
//See https://bugs.freedesktop.org/show_bug.cgi?id=31271
|
||||||
void TestBreakIterator::testLineBreaking()
|
void TestBreakIterator::testLineBreaking()
|
||||||
{
|
{
|
||||||
::rtl::OUString aTest1(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
|
::rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
|
||||||
|
|
||||||
i18n::LineBreakHyphenationOptions aHyphOptions;
|
i18n::LineBreakHyphenationOptions aHyphOptions;
|
||||||
i18n::LineBreakUserOptions aUserOptions;
|
i18n::LineBreakUserOptions aUserOptions;
|
||||||
@@ -94,18 +94,19 @@ void TestBreakIterator::testLineBreaking()
|
|||||||
|
|
||||||
{
|
{
|
||||||
//Here we want the line break to leave text here) on the next line
|
//Here we want the line break to leave text here) on the next line
|
||||||
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest1, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
|
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
|
||||||
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 6);
|
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
//Here we want the line break to leave "here)" on the next line
|
//Here we want the line break to leave "here)" on the next line
|
||||||
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest1, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
|
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
|
||||||
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 11);
|
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 11);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//See http://qa.openoffice.org/issues/show_bug.cgi?id=111152 for motivation
|
//See http://qa.openoffice.org/issues/show_bug.cgi?id=111152
|
||||||
|
//See https://bugs.freedesktop.org/show_bug.cgi?id=40292
|
||||||
void TestBreakIterator::testGraphemeIteration()
|
void TestBreakIterator::testGraphemeIteration()
|
||||||
{
|
{
|
||||||
lang::Locale aLocale;
|
lang::Locale aLocale;
|
||||||
@@ -114,46 +115,90 @@ void TestBreakIterator::testGraphemeIteration()
|
|||||||
|
|
||||||
{
|
{
|
||||||
const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
|
const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
|
||||||
::rtl::OUString aTest1(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
|
::rtl::OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
|
||||||
|
|
||||||
sal_Int32 nDone=0;
|
sal_Int32 nDone=0;
|
||||||
sal_Int32 nPos;
|
sal_Int32 nPos;
|
||||||
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
|
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
|
||||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
|
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
|
||||||
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
|
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
|
||||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
|
const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
|
||||||
::rtl::OUString aTest1(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
|
::rtl::OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
|
||||||
|
|
||||||
sal_Int32 nDone=0;
|
sal_Int32 nDone=0;
|
||||||
sal_Int32 nPos;
|
sal_Int32 nPos;
|
||||||
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
|
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
|
||||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
|
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
|
||||||
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
|
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
|
||||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const sal_Unicode TA_HALANT_MA_HALANT_YA [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
|
const sal_Unicode TA_HALANT_MA_HALANT_YA [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
|
||||||
::rtl::OUString aTest1(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
|
::rtl::OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
|
||||||
|
|
||||||
sal_Int32 nDone=0;
|
sal_Int32 nDone=0;
|
||||||
sal_Int32 nPos;
|
sal_Int32 nPos;
|
||||||
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
|
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
|
||||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
|
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
|
||||||
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
|
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
|
||||||
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ta"));
|
||||||
|
aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
|
||||||
|
|
||||||
|
{
|
||||||
|
const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
|
||||||
|
::rtl::OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
|
||||||
|
|
||||||
|
sal_Int32 nDone=0;
|
||||||
|
sal_Int32 nPos = 0;
|
||||||
|
|
||||||
|
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
|
||||||
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
|
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
|
||||||
|
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
|
||||||
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
|
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
|
||||||
|
{ 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
|
||||||
|
::rtl::OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
|
||||||
|
SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
|
||||||
|
|
||||||
|
sal_Int32 nDone=0;
|
||||||
|
sal_Int32 nPos=0;
|
||||||
|
|
||||||
|
for (sal_Int32 i = 0; i < 4; ++i)
|
||||||
|
{
|
||||||
|
sal_Int32 nOldPos = nPos;
|
||||||
|
nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
|
||||||
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
|
CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (sal_Int32 i = 0; i < 4; ++i)
|
||||||
|
{
|
||||||
|
sal_Int32 nOldPos = nPos;
|
||||||
|
nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
|
||||||
|
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
|
||||||
|
CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
|
const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
|
||||||
::rtl::OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
|
::rtl::OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
|
||||||
|
@@ -40,8 +40,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
|
|||||||
$OriyaSignVirama = \u0B4D;
|
$OriyaSignVirama = \u0B4D;
|
||||||
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
|
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
|
||||||
$GurmukhiSignVirama = \u0A4D;
|
$GurmukhiSignVirama = \u0A4D;
|
||||||
$TamilLetter = [\u0B85-\u0BB9];
|
$TamilKa = \u0B95;
|
||||||
$TamilSignVirama = \u0BCD;
|
$TamilSignVirama = \u0BCD;
|
||||||
|
$TamilSsa = \u0BB7;
|
||||||
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
|
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
|
||||||
$TeluguSignVirama = \u0C4D;
|
$TeluguSignVirama = \u0C4D;
|
||||||
|
|
||||||
@@ -70,7 +71,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
|
|||||||
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
|
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
|
||||||
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
|
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
|
||||||
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
|
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
|
||||||
$TamilLetter ($TamilSignVirama $TamilLetter?)+;
|
$TamilKa $TamilSignVirama $TamilSsa;
|
||||||
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
|
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
|
||||||
|
|
||||||
$L ($L | $V | $LV | $LVT);
|
$L ($L | $V | $LV | $LVT);
|
||||||
@@ -95,7 +96,7 @@ $LF $CR;
|
|||||||
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
|
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
|
||||||
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
|
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
|
||||||
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
|
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
|
||||||
($TamilLetter? $TamilSignVirama)+ $TamilLetter;
|
$TamilSsa $TamilSignVirama $TamilKa;
|
||||||
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
|
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
|
||||||
($L | $V | $LV | $LVT) $L;
|
($L | $V | $LV | $LVT) $L;
|
||||||
($V | $T) ($LV | $V);
|
($V | $T) ($LV | $V);
|
||||||
|
@@ -32,8 +32,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
|
|||||||
$OriyaSignVirama = \u0B4D;
|
$OriyaSignVirama = \u0B4D;
|
||||||
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
|
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
|
||||||
$GurmukhiSignVirama = \u0A4D;
|
$GurmukhiSignVirama = \u0A4D;
|
||||||
$TamilLetter = [\u0B85-\u0BB9];
|
$TamilKa = \u0B95;
|
||||||
$TamilSignVirama = \u0BCD;
|
$TamilSignVirama = \u0BCD;
|
||||||
|
$TamilSsa = \u0BB7;
|
||||||
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
|
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
|
||||||
$TeluguSignVirama = \u0C4D;
|
$TeluguSignVirama = \u0C4D;
|
||||||
|
|
||||||
@@ -62,7 +63,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
|
|||||||
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
|
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
|
||||||
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
|
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
|
||||||
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
|
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
|
||||||
$TamilLetter ($TamilSignVirama $TamilLetter?)+;
|
$TamilKa $TamilSignVirama $TamilSsa;
|
||||||
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
|
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
|
||||||
|
|
||||||
$L ($L | $V | $LV | $LVT);
|
$L ($L | $V | $LV | $LVT);
|
||||||
@@ -86,7 +87,7 @@ $LF $CR;
|
|||||||
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
|
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
|
||||||
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
|
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
|
||||||
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
|
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
|
||||||
($TamilLetter? $TamilSignVirama)+ $TamilLetter;
|
$TamilSsa $TamilSignVirama $TamilKa;
|
||||||
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
|
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
|
||||||
($L | $V | $LV | $LVT) $L;
|
($L | $V | $LV | $LVT) $L;
|
||||||
($V | $T) ($LV | $V);
|
($V | $T) ($LV | $V);
|
||||||
|
Reference in New Issue
Block a user