tdf#140767 Implemented Syriac justification

This change extends kashida justification to Syriac, using custom
insertion rules.

Change-Id: I7508d2c32e95abb12a098e989c7153828ba81c87
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173990
Tested-by: Jenkins
Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
This commit is contained in:
Jonathan Clark
2024-09-26 02:59:26 -06:00
parent d8f430e4be
commit c3c29d31d7
9 changed files with 206 additions and 26 deletions

View File

@@ -26,6 +26,7 @@ public:
void testFinalYeh();
void testNoZwnjExpansion();
void testExcludeInvalid();
void testSyriac();
CPPUNIT_TEST_SUITE(KashidaTest);
CPPUNIT_TEST(testCharacteristic);
@@ -33,6 +34,7 @@ public:
CPPUNIT_TEST(testFinalYeh);
CPPUNIT_TEST(testNoZwnjExpansion);
CPPUNIT_TEST(testExcludeInvalid);
CPPUNIT_TEST(testSyriac);
CPPUNIT_TEST_SUITE_END();
};
@@ -69,7 +71,9 @@ void KashidaTest::testManualKashida()
// tdf#65344: Do not insert kashida before a final Yeh
void KashidaTest::testFinalYeh()
{
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نیمِي"_ustr).has_value());
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"يييي"_ustr).value().nIndex);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"ييي"_ustr).value().nIndex);
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"يي"_ustr).has_value());
// Should always insert kashida after Seen, even before a final Yeh
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"كرسي"_ustr).value().nIndex);
@@ -82,7 +86,7 @@ void KashidaTest::testNoZwnjExpansion()
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"نویس\u200Cه"_ustr).value().nIndex);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"متن"_ustr).value().nIndex);
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"مت\u200Cن"_ustr).has_value());
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"مت\u200Cن"_ustr).value().nIndex);
}
// tdf#163105: Do not insert kashida if the position is invalid
@@ -111,6 +115,43 @@ void KashidaTest::testExcludeInvalid()
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نویسه"_ustr, aValid).has_value());
}
// tdf#140767: Kashida justification for Syriac
void KashidaTest::testSyriac()
{
// - Prefer user-inserted kashida
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"ܥܥـܥܥܥܥ"_ustr).value().nIndex);
std::vector<bool> aValid;
aValid.resize(7, true);
// - Start from end and work toward midpoint, then reverse direction
CPPUNIT_ASSERT_EQUAL(sal_Int32(5),
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
aValid[5] = false;
CPPUNIT_ASSERT_EQUAL(sal_Int32(4),
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
aValid[4] = false;
CPPUNIT_ASSERT_EQUAL(sal_Int32(0),
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
aValid[0] = false;
CPPUNIT_ASSERT_EQUAL(sal_Int32(1),
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
aValid[1] = false;
CPPUNIT_ASSERT_EQUAL(sal_Int32(2),
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
aValid[2] = false;
CPPUNIT_ASSERT_EQUAL(sal_Int32(3),
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
aValid[3] = false;
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).has_value());
}
CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest);
}

View File

@@ -133,10 +133,9 @@ bool CanConnectToPrev(sal_Unicode cCh, sal_Unicode cPrevCh)
return bRet;
}
}
std::optional<i18nutil::KashidaPosition>
i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& pValidPositions)
GetWordKashidaPositionArabic(const OUString& rWord, const std::vector<bool>& pValidPositions)
{
sal_Int32 nIdx = 0;
sal_Int32 nPrevIdx = 0;
@@ -148,9 +147,6 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
sal_Int32 nWordLen = rWord.getLength();
SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != static_cast<size_t>(nWordLen),
"i18n", "Kashida valid position array wrong size");
// ignore trailing vowel chars
while (nWordLen && isTransparentChar(rWord[nWordLen - 1]))
{
@@ -298,8 +294,8 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
}
}
// 8. If valid position data exists, use the last legal position
if (nPriorityLevel >= 7 && nIdx > 0 && !pValidPositions.empty())
// 8. Try any valid position
if (nPriorityLevel >= 7 && nIdx > 0)
{
fnTryInsertBefore(7);
}
@@ -317,10 +313,86 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
if (-1 != nKashidaPos)
{
return KashidaPosition{ nKashidaPos };
return i18nutil::KashidaPosition{ nKashidaPos };
}
return std::nullopt;
}
std::optional<i18nutil::KashidaPosition>
GetWordKashidaPositionSyriac(const OUString& rWord, const std::vector<bool>& pValidPositions)
{
sal_Int32 nWordLen = rWord.getLength();
// Search for a user-inserted kashida
for (sal_Int32 i = nWordLen - 1; i >= 0; --i)
{
if (0x640 == rWord[i])
{
return i18nutil::KashidaPosition{ i };
}
}
// Always insert kashida from the outside-in:
// - First, work from the end of the word toward the midpoint
// - Then, work from the beginning of the word toward the midpoint
sal_Int32 nWordMidpoint = nWordLen / 2;
auto fnPositionValid = [&pValidPositions](sal_Int32 nIdx) {
// Exclusions:
// tdf#163105: Do not insert kashida if the position is invalid
if (!pValidPositions.empty() && !pValidPositions[nIdx])
{
return false;
}
return true;
};
// End to midpoint
for (sal_Int32 i = nWordLen - 2; i > nWordMidpoint; --i)
{
if (fnPositionValid(i))
{
return i18nutil::KashidaPosition{ i };
}
}
// Beginning to midpoint
for (sal_Int32 i = 0; i <= nWordMidpoint; ++i)
{
if (fnPositionValid(i))
{
return i18nutil::KashidaPosition{ i };
}
}
return std::nullopt;
}
}
std::optional<i18nutil::KashidaPosition>
i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& pValidPositions)
{
sal_Int32 nWordLen = rWord.getLength();
SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != static_cast<size_t>(nWordLen),
"i18n", "Kashida valid position array wrong size");
for (sal_Int32 nIdx = 0; nIdx < nWordLen; ++nIdx)
{
auto cCh = rWord[nIdx];
if ((cCh >= 0x700 && cCh <= 0x74F) || (cCh >= 0x860 && cCh <= 0x86A))
{
// This word contains Syriac characters.
return GetWordKashidaPositionSyriac(rWord, pValidPositions);
}
}
return GetWordKashidaPositionArabic(rWord, pValidPositions);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */

View File

@@ -347,16 +347,17 @@ public:
*/
bool IsKashidaLine(TextFrameIndex nCharIdx) const;
/** Checks if text is Arabic text.
/** Checks if text is in a script that allows kashida justification.
@descr Checks if text is Arabic text.
@descr Checks if text is in a language that allows kashida justification.
@param rText
The text to check
@param nStt
Start index of the text
@return Returns if the language is an Arabic language
@return Returns true if the script is Arabic or Syriac
*/
static bool IsArabicText(const OUString& rText, TextFrameIndex nStt, TextFrameIndex nLen);
static bool IsKashidaScriptText(const OUString& rText, TextFrameIndex nStt,
TextFrameIndex nLen);
/** Performs a thai justification on the kerning array

View File

@@ -168,7 +168,8 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S
// Fetch the set of valid positions from VCL, where possible
aValidPositions.clear();
if ( SwScriptInfo::IsArabicText( rInf.GetText(), TextFrameIndex{aScanner.GetBegin()}, TextFrameIndex{aScanner.GetLen()} ) )
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), TextFrameIndex{ aScanner.GetBegin() },
TextFrameIndex{ aScanner.GetLen() }))
{
rItr.SeekAndChgAttrIter(TextFrameIndex{ aScanner.GetBegin() }, rInf.GetRefDev());
@@ -250,7 +251,7 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S
}
sal_Int32 nKashidasDropped = 0;
if ( !SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) )
if (!SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext - nIdx))
{
nKashidasDropped = nKashidasInAttr;
rKashidas -= nKashidasDropped;
@@ -314,7 +315,8 @@ static bool lcl_CheckKashidaWidth ( SwScriptInfo& rSI, SwTextSizeInfo& rInf, SwT
sal_Int32 nKashidasInAttr = rSI.KashidaJustify(nullptr, nullptr, nIdx, nNext - nIdx);
tools::Long nFontMinKashida = rInf.GetRefDev()->GetMinKashida();
if ( nFontMinKashida && nKashidasInAttr > 0 && SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) )
if (nFontMinKashida && nKashidasInAttr > 0
&& SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext - nIdx))
{
sal_Int32 nKashidasDropped = 0;
while ( rKashidas && nGluePortion && nKashidasInAttr > 0 &&

View File

@@ -2194,16 +2194,18 @@ sal_Int32 SwScriptInfo::KashidaJustify( KernArray* pKernArray,
return 0;
}
// Checks if the current text is 'Arabic' text. Note that only the first
// Checks if the text is in Arabic or Syriac. Note that only the first
// character has to be checked because a ctl portion only contains one
// script, see NewTextPortion
bool SwScriptInfo::IsArabicText(const OUString& rText,
bool SwScriptInfo::IsKashidaScriptText(const OUString& rText,
TextFrameIndex const nStt, TextFrameIndex const nLen)
{
using namespace ::com::sun::star::i18n;
static const ScriptTypeList typeList[] = {
{ UnicodeScript_kArabic, UnicodeScript_kArabic, sal_Int16(UnicodeScript_kArabic) }, // 11,
{ UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, sal_Int16(UnicodeScript_kScriptCount) } // 88
{ UnicodeScript_kArabic, UnicodeScript_kArabic, sal_Int16(UnicodeScript_kArabic) }, // 11,
{ UnicodeScript_kSyriac, UnicodeScript_kSyriac, sal_Int16(UnicodeScript_kSyriac) }, // 12,
{ UnicodeScript_kScriptCount, UnicodeScript_kScriptCount,
sal_Int16(UnicodeScript_kScriptCount) } // 88
};
// go forward if current position does not hold a regular character:
@@ -2229,7 +2231,7 @@ bool SwScriptInfo::IsArabicText(const OUString& rText,
{
const sal_Unicode cCh = rText[nIdx];
const sal_Int16 type = unicode::getUnicodeScriptType( cCh, typeList, sal_Int16(UnicodeScript_kScriptCount) );
return type == sal_Int16(UnicodeScript_kArabic);
return type == sal_Int16(UnicodeScript_kArabic) || type == sal_Int16(UnicodeScript_kSyriac);
}
return false;
}

View File

@@ -116,7 +116,7 @@ static TextFrameIndex lcl_AddSpace(const SwTextSizeInfo &rInf,
// Kashida Justification: Insert Kashidas
if ( nEnd > nPos && pSI && COMPLEX == nScript )
{
if ( SwScriptInfo::IsArabicText( *pStr, nPos, nEnd - nPos ) && pSI->CountKashida() )
if (SwScriptInfo::IsKashidaScriptText(*pStr, nPos, nEnd - nPos) && pSI->CountKashida())
{
const sal_Int32 nKashRes = pSI->KashidaJustify(nullptr, nullptr, nPos, nEnd - nPos);
// i60591: need to check result of KashidaJustify

View File

@@ -1127,7 +1127,8 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf )
// Kashida Justification
if ( SwFontScript::CTL == nActual && nSpaceAdd )
{
if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(),
rInf.GetLen()))
{
aKashidaArray.resize(aKernArray.size(), false);
if ( pSI && pSI->CountKashida() &&
@@ -1344,7 +1345,7 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf )
// Kashida Justification
if ( SwFontScript::CTL == nActual && nSpaceAdd )
{
if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), rInf.GetLen()))
{
aKashidaArray.resize(aKernArray.size(), false);
if ( pSI && pSI->CountKashida() &&
@@ -1838,7 +1839,7 @@ TextFrameIndex SwFntObj::GetModelPositionForViewPoint(SwDrawTextInfo &rInf)
// Kashida Justification
if ( SwFontScript::CTL == nActual && rInf.GetSpace() )
{
if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), rInf.GetLen()))
{
if ( pSI && pSI->CountKashida() &&
pSI->KashidaJustify( &aKernArray, nullptr, rInf.GetIdx(), rInf.GetLen(),

Binary file not shown.

View File

@@ -5761,6 +5761,67 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf163105Editeng)
CPPUNIT_ASSERT_LESS(170.0, aRect.at(2).getWidth());
}
CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf140767SyriacJustification)
{
saveAsPDF(u"tdf140767.odt");
auto pPdfDocument = parsePDFExport();
CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0);
CPPUNIT_ASSERT(pPdfPage);
auto pTextPage = pPdfPage->getTextPage();
CPPUNIT_ASSERT(pTextPage);
int nPageObjectCount = pPdfPage->getObjectCount();
CPPUNIT_ASSERT_EQUAL(11, nPageObjectCount);
std::vector<OUString> aText;
std::vector<basegfx::B2DRectangle> aRect;
int nTextObjectCount = 0;
for (int i = 0; i < nPageObjectCount; ++i)
{
auto pPageObject = pPdfPage->getObject(i);
CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr);
if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text)
{
aText.push_back(pPageObject->getText(pTextPage));
aRect.push_back(pPageObject->getBounds());
++nTextObjectCount;
}
}
CPPUNIT_ASSERT_EQUAL(11, nTextObjectCount);
std::cout << "Strings" << std::endl;
for (auto const& em : aText)
{
std::cout << em << std::endl;
for (sal_Int32 i = 0; i < em.getLength(); ++i)
{
std::cout << std::hex << static_cast<uint32_t>(em[i]) << " ";
}
std::cout << std::endl;
}
CPPUNIT_ASSERT_EQUAL(u"ܝ"_ustr, aText.at(0).trim());
CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(1).trim());
CPPUNIT_ASSERT_EQUAL(u"ܺܛ"_ustr, aText.at(2).trim());
CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(3).trim());
CPPUNIT_ASSERT_EQUAL(u"ܰܚ"_ustr, aText.at(4).trim());
CPPUNIT_ASSERT_EQUAL(u"ܕ"_ustr, aText.at(5).trim()); // This span is whitespace justified
CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(6).trim());
CPPUNIT_ASSERT_EQUAL(u"ܰܓ"_ustr, aText.at(7).trim());
CPPUNIT_ASSERT_EQUAL(u"ܒ"_ustr, aText.at(8).trim());
CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(9).trim());
CPPUNIT_ASSERT_EQUAL(u"ܰܐ"_ustr, aText.at(10).trim());
// Without kashida justification, this space will be 224.328
CPPUNIT_ASSERT_LESS(90.0, aRect.at(5).getWidth());
}
} // end anonymous namespace
CPPUNIT_PLUGIN_IMPLEMENT();