tdf#140767 Implemented Syriac justification
This change extends kashida justification to Syriac, using custom insertion rules. Change-Id: I7508d2c32e95abb12a098e989c7153828ba81c87 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173990 Tested-by: Jenkins Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
This commit is contained in:
@@ -26,6 +26,7 @@ public:
|
||||
void testFinalYeh();
|
||||
void testNoZwnjExpansion();
|
||||
void testExcludeInvalid();
|
||||
void testSyriac();
|
||||
|
||||
CPPUNIT_TEST_SUITE(KashidaTest);
|
||||
CPPUNIT_TEST(testCharacteristic);
|
||||
@@ -33,6 +34,7 @@ public:
|
||||
CPPUNIT_TEST(testFinalYeh);
|
||||
CPPUNIT_TEST(testNoZwnjExpansion);
|
||||
CPPUNIT_TEST(testExcludeInvalid);
|
||||
CPPUNIT_TEST(testSyriac);
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
};
|
||||
|
||||
@@ -69,7 +71,9 @@ void KashidaTest::testManualKashida()
|
||||
// tdf#65344: Do not insert kashida before a final Yeh
|
||||
void KashidaTest::testFinalYeh()
|
||||
{
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نیمِي"_ustr).has_value());
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"يييي"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"ييي"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"يي"_ustr).has_value());
|
||||
|
||||
// Should always insert kashida after Seen, even before a final Yeh
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"كرسي"_ustr).value().nIndex);
|
||||
@@ -82,7 +86,7 @@ void KashidaTest::testNoZwnjExpansion()
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"نویس\u200Cه"_ustr).value().nIndex);
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"متن"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"مت\u200Cن"_ustr).has_value());
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"مت\u200Cن"_ustr).value().nIndex);
|
||||
}
|
||||
|
||||
// tdf#163105: Do not insert kashida if the position is invalid
|
||||
@@ -111,6 +115,43 @@ void KashidaTest::testExcludeInvalid()
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"نویسه"_ustr, aValid).has_value());
|
||||
}
|
||||
|
||||
// tdf#140767: Kashida justification for Syriac
|
||||
void KashidaTest::testSyriac()
|
||||
{
|
||||
// - Prefer user-inserted kashida
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), GetWordKashidaPosition(u"ܥܥـܥܥܥܥ"_ustr).value().nIndex);
|
||||
|
||||
std::vector<bool> aValid;
|
||||
aValid.resize(7, true);
|
||||
|
||||
// - Start from end and work toward midpoint, then reverse direction
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(5),
|
||||
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
|
||||
aValid[5] = false;
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(4),
|
||||
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
|
||||
aValid[4] = false;
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(0),
|
||||
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
|
||||
aValid[0] = false;
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(1),
|
||||
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
|
||||
aValid[1] = false;
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(2),
|
||||
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
|
||||
aValid[2] = false;
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(3),
|
||||
GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).value().nIndex);
|
||||
aValid[3] = false;
|
||||
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ܥܥܥܥܥܥܥ"_ustr, aValid).has_value());
|
||||
}
|
||||
|
||||
CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest);
|
||||
}
|
||||
|
||||
|
@@ -133,10 +133,9 @@ bool CanConnectToPrev(sal_Unicode cCh, sal_Unicode cPrevCh)
|
||||
|
||||
return bRet;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<i18nutil::KashidaPosition>
|
||||
i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& pValidPositions)
|
||||
GetWordKashidaPositionArabic(const OUString& rWord, const std::vector<bool>& pValidPositions)
|
||||
{
|
||||
sal_Int32 nIdx = 0;
|
||||
sal_Int32 nPrevIdx = 0;
|
||||
@@ -148,9 +147,6 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
|
||||
|
||||
sal_Int32 nWordLen = rWord.getLength();
|
||||
|
||||
SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != static_cast<size_t>(nWordLen),
|
||||
"i18n", "Kashida valid position array wrong size");
|
||||
|
||||
// ignore trailing vowel chars
|
||||
while (nWordLen && isTransparentChar(rWord[nWordLen - 1]))
|
||||
{
|
||||
@@ -298,8 +294,8 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
|
||||
}
|
||||
}
|
||||
|
||||
// 8. If valid position data exists, use the last legal position
|
||||
if (nPriorityLevel >= 7 && nIdx > 0 && !pValidPositions.empty())
|
||||
// 8. Try any valid position
|
||||
if (nPriorityLevel >= 7 && nIdx > 0)
|
||||
{
|
||||
fnTryInsertBefore(7);
|
||||
}
|
||||
@@ -317,10 +313,86 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
|
||||
|
||||
if (-1 != nKashidaPos)
|
||||
{
|
||||
return KashidaPosition{ nKashidaPos };
|
||||
return i18nutil::KashidaPosition{ nKashidaPos };
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<i18nutil::KashidaPosition>
|
||||
GetWordKashidaPositionSyriac(const OUString& rWord, const std::vector<bool>& pValidPositions)
|
||||
{
|
||||
sal_Int32 nWordLen = rWord.getLength();
|
||||
|
||||
// Search for a user-inserted kashida
|
||||
for (sal_Int32 i = nWordLen - 1; i >= 0; --i)
|
||||
{
|
||||
if (0x640 == rWord[i])
|
||||
{
|
||||
return i18nutil::KashidaPosition{ i };
|
||||
}
|
||||
}
|
||||
|
||||
// Always insert kashida from the outside-in:
|
||||
// - First, work from the end of the word toward the midpoint
|
||||
// - Then, work from the beginning of the word toward the midpoint
|
||||
|
||||
sal_Int32 nWordMidpoint = nWordLen / 2;
|
||||
|
||||
auto fnPositionValid = [&pValidPositions](sal_Int32 nIdx) {
|
||||
// Exclusions:
|
||||
|
||||
// tdf#163105: Do not insert kashida if the position is invalid
|
||||
if (!pValidPositions.empty() && !pValidPositions[nIdx])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// End to midpoint
|
||||
for (sal_Int32 i = nWordLen - 2; i > nWordMidpoint; --i)
|
||||
{
|
||||
if (fnPositionValid(i))
|
||||
{
|
||||
return i18nutil::KashidaPosition{ i };
|
||||
}
|
||||
}
|
||||
|
||||
// Beginning to midpoint
|
||||
for (sal_Int32 i = 0; i <= nWordMidpoint; ++i)
|
||||
{
|
||||
if (fnPositionValid(i))
|
||||
{
|
||||
return i18nutil::KashidaPosition{ i };
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<i18nutil::KashidaPosition>
|
||||
i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>& pValidPositions)
|
||||
{
|
||||
sal_Int32 nWordLen = rWord.getLength();
|
||||
|
||||
SAL_WARN_IF(!pValidPositions.empty() && pValidPositions.size() != static_cast<size_t>(nWordLen),
|
||||
"i18n", "Kashida valid position array wrong size");
|
||||
|
||||
for (sal_Int32 nIdx = 0; nIdx < nWordLen; ++nIdx)
|
||||
{
|
||||
auto cCh = rWord[nIdx];
|
||||
|
||||
if ((cCh >= 0x700 && cCh <= 0x74F) || (cCh >= 0x860 && cCh <= 0x86A))
|
||||
{
|
||||
// This word contains Syriac characters.
|
||||
return GetWordKashidaPositionSyriac(rWord, pValidPositions);
|
||||
}
|
||||
}
|
||||
|
||||
return GetWordKashidaPositionArabic(rWord, pValidPositions);
|
||||
}
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
|
||||
|
@@ -347,16 +347,17 @@ public:
|
||||
*/
|
||||
bool IsKashidaLine(TextFrameIndex nCharIdx) const;
|
||||
|
||||
/** Checks if text is Arabic text.
|
||||
/** Checks if text is in a script that allows kashida justification.
|
||||
|
||||
@descr Checks if text is Arabic text.
|
||||
@descr Checks if text is in a language that allows kashida justification.
|
||||
@param rText
|
||||
The text to check
|
||||
@param nStt
|
||||
Start index of the text
|
||||
@return Returns if the language is an Arabic language
|
||||
@return Returns true if the script is Arabic or Syriac
|
||||
*/
|
||||
static bool IsArabicText(const OUString& rText, TextFrameIndex nStt, TextFrameIndex nLen);
|
||||
static bool IsKashidaScriptText(const OUString& rText, TextFrameIndex nStt,
|
||||
TextFrameIndex nLen);
|
||||
|
||||
/** Performs a thai justification on the kerning array
|
||||
|
||||
|
@@ -168,7 +168,8 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S
|
||||
|
||||
// Fetch the set of valid positions from VCL, where possible
|
||||
aValidPositions.clear();
|
||||
if ( SwScriptInfo::IsArabicText( rInf.GetText(), TextFrameIndex{aScanner.GetBegin()}, TextFrameIndex{aScanner.GetLen()} ) )
|
||||
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), TextFrameIndex{ aScanner.GetBegin() },
|
||||
TextFrameIndex{ aScanner.GetLen() }))
|
||||
{
|
||||
rItr.SeekAndChgAttrIter(TextFrameIndex{ aScanner.GetBegin() }, rInf.GetRefDev());
|
||||
|
||||
@@ -250,7 +251,7 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S
|
||||
}
|
||||
|
||||
sal_Int32 nKashidasDropped = 0;
|
||||
if ( !SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) )
|
||||
if (!SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext - nIdx))
|
||||
{
|
||||
nKashidasDropped = nKashidasInAttr;
|
||||
rKashidas -= nKashidasDropped;
|
||||
@@ -314,7 +315,8 @@ static bool lcl_CheckKashidaWidth ( SwScriptInfo& rSI, SwTextSizeInfo& rInf, SwT
|
||||
sal_Int32 nKashidasInAttr = rSI.KashidaJustify(nullptr, nullptr, nIdx, nNext - nIdx);
|
||||
|
||||
tools::Long nFontMinKashida = rInf.GetRefDev()->GetMinKashida();
|
||||
if ( nFontMinKashida && nKashidasInAttr > 0 && SwScriptInfo::IsArabicText( rInf.GetText(), nIdx, nNext - nIdx ) )
|
||||
if (nFontMinKashida && nKashidasInAttr > 0
|
||||
&& SwScriptInfo::IsKashidaScriptText(rInf.GetText(), nIdx, nNext - nIdx))
|
||||
{
|
||||
sal_Int32 nKashidasDropped = 0;
|
||||
while ( rKashidas && nGluePortion && nKashidasInAttr > 0 &&
|
||||
|
@@ -2194,16 +2194,18 @@ sal_Int32 SwScriptInfo::KashidaJustify( KernArray* pKernArray,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Checks if the current text is 'Arabic' text. Note that only the first
|
||||
// Checks if the text is in Arabic or Syriac. Note that only the first
|
||||
// character has to be checked because a ctl portion only contains one
|
||||
// script, see NewTextPortion
|
||||
bool SwScriptInfo::IsArabicText(const OUString& rText,
|
||||
bool SwScriptInfo::IsKashidaScriptText(const OUString& rText,
|
||||
TextFrameIndex const nStt, TextFrameIndex const nLen)
|
||||
{
|
||||
using namespace ::com::sun::star::i18n;
|
||||
static const ScriptTypeList typeList[] = {
|
||||
{ UnicodeScript_kArabic, UnicodeScript_kArabic, sal_Int16(UnicodeScript_kArabic) }, // 11,
|
||||
{ UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, sal_Int16(UnicodeScript_kScriptCount) } // 88
|
||||
{ UnicodeScript_kArabic, UnicodeScript_kArabic, sal_Int16(UnicodeScript_kArabic) }, // 11,
|
||||
{ UnicodeScript_kSyriac, UnicodeScript_kSyriac, sal_Int16(UnicodeScript_kSyriac) }, // 12,
|
||||
{ UnicodeScript_kScriptCount, UnicodeScript_kScriptCount,
|
||||
sal_Int16(UnicodeScript_kScriptCount) } // 88
|
||||
};
|
||||
|
||||
// go forward if current position does not hold a regular character:
|
||||
@@ -2229,7 +2231,7 @@ bool SwScriptInfo::IsArabicText(const OUString& rText,
|
||||
{
|
||||
const sal_Unicode cCh = rText[nIdx];
|
||||
const sal_Int16 type = unicode::getUnicodeScriptType( cCh, typeList, sal_Int16(UnicodeScript_kScriptCount) );
|
||||
return type == sal_Int16(UnicodeScript_kArabic);
|
||||
return type == sal_Int16(UnicodeScript_kArabic) || type == sal_Int16(UnicodeScript_kSyriac);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@@ -116,7 +116,7 @@ static TextFrameIndex lcl_AddSpace(const SwTextSizeInfo &rInf,
|
||||
// Kashida Justification: Insert Kashidas
|
||||
if ( nEnd > nPos && pSI && COMPLEX == nScript )
|
||||
{
|
||||
if ( SwScriptInfo::IsArabicText( *pStr, nPos, nEnd - nPos ) && pSI->CountKashida() )
|
||||
if (SwScriptInfo::IsKashidaScriptText(*pStr, nPos, nEnd - nPos) && pSI->CountKashida())
|
||||
{
|
||||
const sal_Int32 nKashRes = pSI->KashidaJustify(nullptr, nullptr, nPos, nEnd - nPos);
|
||||
// i60591: need to check result of KashidaJustify
|
||||
|
@@ -1127,7 +1127,8 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf )
|
||||
// Kashida Justification
|
||||
if ( SwFontScript::CTL == nActual && nSpaceAdd )
|
||||
{
|
||||
if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
|
||||
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(),
|
||||
rInf.GetLen()))
|
||||
{
|
||||
aKashidaArray.resize(aKernArray.size(), false);
|
||||
if ( pSI && pSI->CountKashida() &&
|
||||
@@ -1344,7 +1345,7 @@ void SwFntObj::DrawText( SwDrawTextInfo &rInf )
|
||||
// Kashida Justification
|
||||
if ( SwFontScript::CTL == nActual && nSpaceAdd )
|
||||
{
|
||||
if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
|
||||
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), rInf.GetLen()))
|
||||
{
|
||||
aKashidaArray.resize(aKernArray.size(), false);
|
||||
if ( pSI && pSI->CountKashida() &&
|
||||
@@ -1838,7 +1839,7 @@ TextFrameIndex SwFntObj::GetModelPositionForViewPoint(SwDrawTextInfo &rInf)
|
||||
// Kashida Justification
|
||||
if ( SwFontScript::CTL == nActual && rInf.GetSpace() )
|
||||
{
|
||||
if ( SwScriptInfo::IsArabicText( rInf.GetText(), rInf.GetIdx(), rInf.GetLen() ) )
|
||||
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), rInf.GetIdx(), rInf.GetLen()))
|
||||
{
|
||||
if ( pSI && pSI->CountKashida() &&
|
||||
pSI->KashidaJustify( &aKernArray, nullptr, rInf.GetIdx(), rInf.GetLen(),
|
||||
|
BIN
vcl/qa/cppunit/pdfexport/data/tdf140767.odt
Normal file
BIN
vcl/qa/cppunit/pdfexport/data/tdf140767.odt
Normal file
Binary file not shown.
@@ -5761,6 +5761,67 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf163105Editeng)
|
||||
CPPUNIT_ASSERT_LESS(170.0, aRect.at(2).getWidth());
|
||||
}
|
||||
|
||||
CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf140767SyriacJustification)
|
||||
{
|
||||
saveAsPDF(u"tdf140767.odt");
|
||||
|
||||
auto pPdfDocument = parsePDFExport();
|
||||
CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
|
||||
|
||||
auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0);
|
||||
CPPUNIT_ASSERT(pPdfPage);
|
||||
auto pTextPage = pPdfPage->getTextPage();
|
||||
CPPUNIT_ASSERT(pTextPage);
|
||||
|
||||
int nPageObjectCount = pPdfPage->getObjectCount();
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(11, nPageObjectCount);
|
||||
|
||||
std::vector<OUString> aText;
|
||||
std::vector<basegfx::B2DRectangle> aRect;
|
||||
|
||||
int nTextObjectCount = 0;
|
||||
for (int i = 0; i < nPageObjectCount; ++i)
|
||||
{
|
||||
auto pPageObject = pPdfPage->getObject(i);
|
||||
CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr);
|
||||
if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text)
|
||||
{
|
||||
aText.push_back(pPageObject->getText(pTextPage));
|
||||
aRect.push_back(pPageObject->getBounds());
|
||||
++nTextObjectCount;
|
||||
}
|
||||
}
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(11, nTextObjectCount);
|
||||
|
||||
std::cout << "Strings" << std::endl;
|
||||
for (auto const& em : aText)
|
||||
{
|
||||
std::cout << em << std::endl;
|
||||
for (sal_Int32 i = 0; i < em.getLength(); ++i)
|
||||
{
|
||||
std::cout << std::hex << static_cast<uint32_t>(em[i]) << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(u"ܝ"_ustr, aText.at(0).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(1).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u"ܺܛ"_ustr, aText.at(2).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(3).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u"ܰܚ"_ustr, aText.at(4).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u"ܕ"_ustr, aText.at(5).trim()); // This span is whitespace justified
|
||||
CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(6).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u"ܰܓ"_ustr, aText.at(7).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u"ܒ"_ustr, aText.at(8).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(9).trim());
|
||||
CPPUNIT_ASSERT_EQUAL(u"ܰܐ"_ustr, aText.at(10).trim());
|
||||
|
||||
// Without kashida justification, this space will be 224.328
|
||||
CPPUNIT_ASSERT_LESS(90.0, aRect.at(5).getWidth());
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
CPPUNIT_PLUGIN_IMPLEMENT();
|
||||
|
Reference in New Issue
Block a user