fdo#44314 non-standard hyphenation at soft hyphens + with pers. dic.

Change-Id: I25e7c13036c6ce1948cc33d45901ef69a258fb03
2014-01-30 14:56:30 +01:00 · 2014-01-30 14:56:30 +01:00 · e63923b033
commit e63923b033
parent a1aa702861
5 changed files with 118 additions and 32 deletions
--- a/cui/source/options/optdict.cxx
+++ b/cui/source/options/optdict.cxx
@ -54,6 +54,23 @@ static long nStaticTabs[]=
 static OUString getNormDicEntry_Impl(const OUString &rText)
 {
    OUString aTmp(comphelper::string::stripEnd(rText, '.'));
    // non-standard hyphenation
    if (aTmp.indexOf('[') > -1)
    {
        OUStringBuffer aTmp2 ( aTmp.getLength() );
        sal_Bool bSkip = sal_False;
        for (sal_Int32 i = 0; i < aTmp.getLength(); i++)
        {
            sal_Unicode cTmp = aTmp[i];
            if (cTmp == '[')
                bSkip = sal_True;
            else if (!bSkip)
                aTmp2.append( cTmp );
            else if (cTmp == ']')
                bSkip = sal_False;
        }
        aTmp = aTmp2.makeStringAndClear();
    }
    return comphelper::string::remove(aTmp, '=');
 }
@ -68,7 +85,7 @@ static CDE_RESULT cmpDicEntry_Impl( const OUString &rText1, const OUString &rTex
        eRes = CDE_EQUAL;
    else
    {   // similar = equal up to trailing '.' and hyphenation positions
-        // marked with '='
+        // marked with '=' and '[' + alternative spelling pattern + ']'
        if (getNormDicEntry_Impl( rText1 ) == getNormDicEntry_Impl( rText2 ))
            eRes = CDE_SIMILAR;
    }
--- a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx
+++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx
@ -508,15 +508,21 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo
 Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
-        const OUString& /*aWord*/,
+        const OUString& aWord,
-        const ::com::sun::star::lang::Locale& /*aLocale*/,
+        const ::com::sun::star::lang::Locale& aLocale,
-        sal_Int16 /*nIndex*/,
+        sal_Int16 nIndex,
-        const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
+        const ::com::sun::star::beans::PropertyValues& aProperties )
        throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
 {
-  /* alternative spelling isn't supported by tex dictionaries */
+    // FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported,
-  /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
+    // because Writer has got a layout problem here.
-  /* TASK: implement queryAlternativeSpelling() */
+    // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
    for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2
    {
        Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
        if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
            return xRes;
    }
    return NULL;
 }
@ -658,7 +664,7 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const
        for ( i = 0; i < encWord.getLength(); i++)
        {
-            if (hyphens[i]&1 && (!rep || !rep[i]))
+            if (hyphens[i]&1)
                nHyphCount++;
        }
@ -670,8 +676,8 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const
        for (i = 0; i < nWord.getLength(); i++)
        {
            hyphenatedWordBuffer.append(aWord[i]);
-            // hyphenation position (not alternative)
+            // hyphenation position
-            if (hyphens[i]&1 && (!rep || !rep[i]))
+            if (hyphens[i]&1)
            {
                pPos[nHyphCount] = i;
                hyphenatedWordBuffer.append('=');
--- a/linguistic/source/dicimp.cxx
+++ b/linguistic/source/dicimp.cxx
@ -550,24 +550,37 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1,
    }
    const sal_Unicode cIgnChar = '=';
    const sal_Unicode cIgnBeg = '['; // for alternative hyphenation, eg. Schif[f]fahrt, Zuc[1k]ker
    const sal_Unicode cIgnEnd = ']'; // planned: gee"[1-/e]rfde or ge[-/1e]e"rfde (gee"rfde -> ge=erfde)
    sal_Int32       nIdx1 = 0,
                  nIdx2 = 0,
                  nNumIgnChar1 = 0,
                  nNumIgnChar2 = 0;
    bool IgnState;
    sal_Int32 nDiff = 0;
    sal_Unicode cChar1 = '\0';
    sal_Unicode cChar2 = '\0';
    do
    {
        // skip chars to be ignored
-        while (nIdx1 < nLen1  &&  (cChar1 = aWord1[ nIdx1 ]) == cIgnChar)
+        IgnState = false;
        while (nIdx1 < nLen1  &&  ((cChar1 = aWord1[ nIdx1 ]) == cIgnChar || cChar1 == cIgnBeg || IgnState ))
        {
            if ( cChar1 == cIgnBeg )
                IgnState = true;
            else if (cChar1 == cIgnEnd)
                IgnState = false;
            nIdx1++;
            nNumIgnChar1++;
        }
-        while (nIdx2 < nLen2  &&  (cChar2 = aWord2[ nIdx2 ]) == cIgnChar)
+        IgnState = false;
        while (nIdx2 < nLen2  &&  ((cChar2 = aWord2[ nIdx2 ]) == cIgnChar || cChar2 == cIgnBeg || IgnState ))
        {
            if ( cChar2 == cIgnBeg )
                IgnState = true;
            else if (cChar2 == cIgnEnd)
                IgnState = false;
            nIdx2++;
            nNumIgnChar2++;
        }
@ -590,15 +603,25 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1,
        // shorter one
        // count remaining IgnChars
        IgnState = false;
        while (nIdx1 < nLen1 )
        {
-            if (aWord1[ nIdx1++ ] == cIgnChar)
+            if (aWord1[ nIdx1 ] == cIgnBeg)
                IgnState = true;
            if (IgnState || aWord1[ nIdx1++ ] == cIgnChar)
                nNumIgnChar1++;
            if (aWord1[ nIdx1] == cIgnEnd)
                IgnState = false;
        }
        IgnState = false;
        while (nIdx2 < nLen2 )
        {
            if (aWord1[ nIdx2 ] == cIgnBeg)
                IgnState = true;
            if (aWord2[ nIdx2++ ] == cIgnChar)
                nNumIgnChar2++;
            if (aWord1[ nIdx1] == cIgnEnd)
                IgnState = false;
        }
        nRes = ((sal_Int32) nLen1 - nNumIgnChar1) - ((sal_Int32) nLen2 - nNumIgnChar2);
--- a/linguistic/source/hyphdsp.cxx
+++ b/linguistic/source/hyphdsp.cxx
@ -79,18 +79,23 @@ Reference<XHyphenatedWord>  HyphenatorDispatcher::buildHyphWord(
        sal_Int32 nTextLen = aText.getLength();
        // trailing '=' means "hyphenation should not be possible"
-        if (nTextLen > 0  &&  aText[ nTextLen - 1 ] != '=')
+        if (nTextLen > 0  &&  aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[')
        {
            sal_Int16 nHyphenationPos = -1;
            sal_Int32 nHyphenPos = -1;
            sal_Int16 nOrigHyphPos = -1;
            OUStringBuffer aTmp( nTextLen );
            sal_Bool  bSkip = sal_False;
            sal_Bool  bSkip2 = sal_False;
            sal_Int32 nHyphIdx = -1;
            sal_Int32 nLeading = 0;
            for (sal_Int32 i = 0;  i < nTextLen;  i++)
            {
                sal_Unicode cTmp = aText[i];
-                if (cTmp != '=')
+                if (cTmp == '[' || cTmp == ']')
                    bSkip2 = !bSkip2;
                if (cTmp != '=' && !bSkip2 && cTmp != ']')
                {
                    aTmp.append( cTmp );
                    nLeading++;
@ -101,8 +106,10 @@ Reference<XHyphenatedWord>  HyphenatorDispatcher::buildHyphWord(
                {
                    if (!bSkip  &&  nHyphIdx >= 0)
                    {
-                        if (nLeading <= nMaxLeading)
+                        if (nLeading <= nMaxLeading) {
                            nHyphenationPos = (sal_Int16) nHyphIdx;
                            nOrigHyphPos = i;
                        }
                    }
                    bSkip = sal_True;   //! multiple '=' should count as one only
                }
@ -110,24 +117,23 @@ Reference<XHyphenatedWord>  HyphenatorDispatcher::buildHyphWord(
            if (nHyphenationPos > 0)
            {
                aText = aTmp.makeStringAndClear();
 #if OSL_DEBUG_LEVEL > 1
                {
-                    if (aText != rOrigWord)
+                    if (aTmp.toString() != rOrigWord)
                    {
                        // both words should only differ by a having a trailing '.'
                        // character or not...
                        OUString aShorter, aLonger;
-                        if (aText.getLength() <= rOrigWord.getLength())
+                        if (aTmp.getLength() <= rOrigWord.getLength())
                        {
-                            aShorter = aText;
+                            aShorter = aTmp.toString();
                            aLonger  = rOrigWord;
                        }
                        else
                        {
                            aShorter = rOrigWord;
-                            aLonger  = aText;
+                            aLonger  = aTmp.toString();
                        }
                        sal_Int32 nS = aShorter.getLength();
                        sal_Int32 nL = aLonger.getLength();
@ -139,12 +145,33 @@ Reference<XHyphenatedWord>  HyphenatorDispatcher::buildHyphWord(
                    }
                }
 #endif
                if (aText[ nOrigHyphPos ] == '[')  // alternative hyphenation
                {
                    sal_Int16 split = 0;
                    sal_Unicode c = aText [ nOrigHyphPos + 1 ];
                    sal_Int32 endhyphpat = aText.indexOf( ']', nOrigHyphPos );
                    if ('0' <= c && c <= '9')
                    {
                        split = c - '0';
                        nOrigHyphPos++;
                    }
                    if (endhyphpat > -1)
                    {
                        OUStringBuffer aTmp2 ( aTmp.copy(0, std::max (nHyphenationPos + 1 - split, 0) ) );
                        aTmp2.append( aText.copy( nOrigHyphPos + 1, endhyphpat - nOrigHyphPos - 1) );
                        nHyphenPos = aTmp2.getLength();
                        aTmp2.append( aTmp.copy( nHyphenationPos + 1 ) );
                        //! take care of #i22591#
                        if (rOrigWord[ rOrigWord.getLength() - 1 ] == '.')
                            aTmp2.append( '.' );
                        aText = aTmp2.makeStringAndClear();
                    }
                }
                if (nHyphenPos == -1)
                    aText = rOrigWord;
-                DBG_ASSERT( aText == rOrigWord, "failed to " );
+                xRes = new HyphenatedWord( rOrigWord, nLang, nHyphenationPos,
-                xRes = new HyphenatedWord( aText, nLang, nHyphenationPos,
+                                aText, (nHyphenPos > -1) ? nHyphenPos - 1 : nHyphenationPos);
                                aText, nHyphenationPos );
            }
        }
    }
@ -167,7 +194,7 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens(
        sal_Int32 nTextLen = aText.getLength();
        // trailing '=' means "hyphenation should not be possible"
-        if (nTextLen > 0  &&  aText[ nTextLen - 1 ] != '=')
+        if (nTextLen > 0  &&  aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[')
        {
            // sequence to hold hyphenation positions
            Sequence< sal_Int16 > aHyphPos( nTextLen );
@ -176,11 +203,14 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens(
            OUStringBuffer aTmp( nTextLen );
            sal_Bool  bSkip = sal_False;
            sal_Bool  bSkip2 = sal_False;
            sal_Int32 nHyphIdx = -1;
            for (sal_Int32 i = 0;  i < nTextLen;  i++)
            {
                sal_Unicode cTmp = aText[i];
-                if (cTmp != '=')
+                if (cTmp == '[' || cTmp == ']')
                    bSkip2 = !bSkip2;
                if (cTmp != '=' && !bSkip2 && cTmp != ']')
                {
                    aTmp.append( cTmp );
                    bSkip = sal_False;
@ -426,7 +456,15 @@ Reference< XHyphenatedWord > SAL_CALL
        if (xEntry.is())
        {
-            //! alternative spellings not yet supported by dictionaries
+            // FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported,
            // because Writer has got a layout problem here.
            // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
            for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2
            {
                xRes = buildHyphWord(aChkWord, xEntry, nLanguage, nIndex + 1 + extrachar);
                if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
                    return xRes;
            }
        }
        else
        {
--- a/linguistic/source/misc.cxx
+++ b/linguistic/source/misc.cxx
@ -262,9 +262,11 @@ static sal_Bool lcl_HasHyphInfo( const uno::Reference<XDictionaryEntry> &xEntry
    sal_Bool bRes = sal_False;
    if (xEntry.is())
    {
-        // there has to be (at least one) '=' denoting a hyphenation position
+        // there has to be (at least one) '=' or '[' denoting a hyphenation position
        // and it must not be before any character of the word
        sal_Int32 nIdx = xEntry->getDictionaryWord().indexOf( '=' );
        if (nIdx == -1)
            nIdx = xEntry->getDictionaryWord().indexOf( '[' );
        bRes = nIdx != -1  &&  nIdx != 0;
    }
    return bRes;