fdo#44314 non-standard hyphenation at soft hyphens + with pers. dic.

Change-Id: I25e7c13036c6ce1948cc33d45901ef69a258fb03
This commit is contained in:
László Németh 2014-01-30 14:56:30 +01:00
parent a1aa702861
commit e63923b033
5 changed files with 118 additions and 32 deletions

View File

@ -54,6 +54,23 @@ static long nStaticTabs[]=
static OUString getNormDicEntry_Impl(const OUString &rText)
{
OUString aTmp(comphelper::string::stripEnd(rText, '.'));
// non-standard hyphenation
if (aTmp.indexOf('[') > -1)
{
OUStringBuffer aTmp2 ( aTmp.getLength() );
sal_Bool bSkip = sal_False;
for (sal_Int32 i = 0; i < aTmp.getLength(); i++)
{
sal_Unicode cTmp = aTmp[i];
if (cTmp == '[')
bSkip = sal_True;
else if (!bSkip)
aTmp2.append( cTmp );
else if (cTmp == ']')
bSkip = sal_False;
}
aTmp = aTmp2.makeStringAndClear();
}
return comphelper::string::remove(aTmp, '=');
}
@ -68,7 +85,7 @@ static CDE_RESULT cmpDicEntry_Impl( const OUString &rText1, const OUString &rTex
eRes = CDE_EQUAL;
else
{ // similar = equal up to trailing '.' and hyphenation positions
// marked with '='
// marked with '=' and '[' + alternative spelling pattern + ']'
if (getNormDicEntry_Impl( rText1 ) == getNormDicEntry_Impl( rText2 ))
eRes = CDE_SIMILAR;
}

View File

@ -508,15 +508,21 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo
Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
const OUString& /*aWord*/,
const ::com::sun::star::lang::Locale& /*aLocale*/,
sal_Int16 /*nIndex*/,
const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
const OUString& aWord,
const ::com::sun::star::lang::Locale& aLocale,
sal_Int16 nIndex,
const ::com::sun::star::beans::PropertyValues& aProperties )
throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
{
/* alternative spelling isn't supported by tex dictionaries */
/* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
/* TASK: implement queryAlternativeSpelling() */
// FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported,
// because Writer has got a layout problem here.
// Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2
{
Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
return xRes;
}
return NULL;
}
@ -658,7 +664,7 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const
for ( i = 0; i < encWord.getLength(); i++)
{
if (hyphens[i]&1 && (!rep || !rep[i]))
if (hyphens[i]&1)
nHyphCount++;
}
@ -670,8 +676,8 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const
for (i = 0; i < nWord.getLength(); i++)
{
hyphenatedWordBuffer.append(aWord[i]);
// hyphenation position (not alternative)
if (hyphens[i]&1 && (!rep || !rep[i]))
// hyphenation position
if (hyphens[i]&1)
{
pPos[nHyphCount] = i;
hyphenatedWordBuffer.append('=');

View File

@ -550,24 +550,37 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1,
}
const sal_Unicode cIgnChar = '=';
const sal_Unicode cIgnBeg = '['; // for alternative hyphenation, eg. Schif[f]fahrt, Zuc[1k]ker
const sal_Unicode cIgnEnd = ']'; // planned: gee"[1-/e]rfde or ge[-/1e]e"rfde (gee"rfde -> ge=erfde)
sal_Int32 nIdx1 = 0,
nIdx2 = 0,
nNumIgnChar1 = 0,
nNumIgnChar2 = 0;
bool IgnState;
sal_Int32 nDiff = 0;
sal_Unicode cChar1 = '\0';
sal_Unicode cChar2 = '\0';
do
{
// skip chars to be ignored
while (nIdx1 < nLen1 && (cChar1 = aWord1[ nIdx1 ]) == cIgnChar)
IgnState = false;
while (nIdx1 < nLen1 && ((cChar1 = aWord1[ nIdx1 ]) == cIgnChar || cChar1 == cIgnBeg || IgnState ))
{
if ( cChar1 == cIgnBeg )
IgnState = true;
else if (cChar1 == cIgnEnd)
IgnState = false;
nIdx1++;
nNumIgnChar1++;
}
while (nIdx2 < nLen2 && (cChar2 = aWord2[ nIdx2 ]) == cIgnChar)
IgnState = false;
while (nIdx2 < nLen2 && ((cChar2 = aWord2[ nIdx2 ]) == cIgnChar || cChar2 == cIgnBeg || IgnState ))
{
if ( cChar2 == cIgnBeg )
IgnState = true;
else if (cChar2 == cIgnEnd)
IgnState = false;
nIdx2++;
nNumIgnChar2++;
}
@ -590,15 +603,25 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1,
// shorter one
// count remaining IgnChars
IgnState = false;
while (nIdx1 < nLen1 )
{
if (aWord1[ nIdx1++ ] == cIgnChar)
if (aWord1[ nIdx1 ] == cIgnBeg)
IgnState = true;
if (IgnState || aWord1[ nIdx1++ ] == cIgnChar)
nNumIgnChar1++;
if (aWord1[ nIdx1] == cIgnEnd)
IgnState = false;
}
IgnState = false;
while (nIdx2 < nLen2 )
{
if (aWord1[ nIdx2 ] == cIgnBeg)
IgnState = true;
if (aWord2[ nIdx2++ ] == cIgnChar)
nNumIgnChar2++;
if (aWord1[ nIdx1] == cIgnEnd)
IgnState = false;
}
nRes = ((sal_Int32) nLen1 - nNumIgnChar1) - ((sal_Int32) nLen2 - nNumIgnChar2);

View File

@ -79,18 +79,23 @@ Reference<XHyphenatedWord> HyphenatorDispatcher::buildHyphWord(
sal_Int32 nTextLen = aText.getLength();
// trailing '=' means "hyphenation should not be possible"
if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=')
if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[')
{
sal_Int16 nHyphenationPos = -1;
sal_Int32 nHyphenPos = -1;
sal_Int16 nOrigHyphPos = -1;
OUStringBuffer aTmp( nTextLen );
sal_Bool bSkip = sal_False;
sal_Bool bSkip2 = sal_False;
sal_Int32 nHyphIdx = -1;
sal_Int32 nLeading = 0;
for (sal_Int32 i = 0; i < nTextLen; i++)
{
sal_Unicode cTmp = aText[i];
if (cTmp != '=')
if (cTmp == '[' || cTmp == ']')
bSkip2 = !bSkip2;
if (cTmp != '=' && !bSkip2 && cTmp != ']')
{
aTmp.append( cTmp );
nLeading++;
@ -101,8 +106,10 @@ Reference<XHyphenatedWord> HyphenatorDispatcher::buildHyphWord(
{
if (!bSkip && nHyphIdx >= 0)
{
if (nLeading <= nMaxLeading)
if (nLeading <= nMaxLeading) {
nHyphenationPos = (sal_Int16) nHyphIdx;
nOrigHyphPos = i;
}
}
bSkip = sal_True; //! multiple '=' should count as one only
}
@ -110,24 +117,23 @@ Reference<XHyphenatedWord> HyphenatorDispatcher::buildHyphWord(
if (nHyphenationPos > 0)
{
aText = aTmp.makeStringAndClear();
#if OSL_DEBUG_LEVEL > 1
{
if (aText != rOrigWord)
if (aTmp.toString() != rOrigWord)
{
// both words should only differ by a having a trailing '.'
// character or not...
OUString aShorter, aLonger;
if (aText.getLength() <= rOrigWord.getLength())
if (aTmp.getLength() <= rOrigWord.getLength())
{
aShorter = aText;
aShorter = aTmp.toString();
aLonger = rOrigWord;
}
else
{
aShorter = rOrigWord;
aLonger = aText;
aLonger = aTmp.toString();
}
sal_Int32 nS = aShorter.getLength();
sal_Int32 nL = aLonger.getLength();
@ -139,12 +145,33 @@ Reference<XHyphenatedWord> HyphenatorDispatcher::buildHyphWord(
}
}
#endif
if (aText[ nOrigHyphPos ] == '[') // alternative hyphenation
{
sal_Int16 split = 0;
sal_Unicode c = aText [ nOrigHyphPos + 1 ];
sal_Int32 endhyphpat = aText.indexOf( ']', nOrigHyphPos );
if ('0' <= c && c <= '9')
{
split = c - '0';
nOrigHyphPos++;
}
if (endhyphpat > -1)
{
OUStringBuffer aTmp2 ( aTmp.copy(0, std::max (nHyphenationPos + 1 - split, 0) ) );
aTmp2.append( aText.copy( nOrigHyphPos + 1, endhyphpat - nOrigHyphPos - 1) );
nHyphenPos = aTmp2.getLength();
aTmp2.append( aTmp.copy( nHyphenationPos + 1 ) );
//! take care of #i22591#
if (rOrigWord[ rOrigWord.getLength() - 1 ] == '.')
aTmp2.append( '.' );
aText = aTmp2.makeStringAndClear();
}
}
if (nHyphenPos == -1)
aText = rOrigWord;
DBG_ASSERT( aText == rOrigWord, "failed to " );
xRes = new HyphenatedWord( aText, nLang, nHyphenationPos,
aText, nHyphenationPos );
xRes = new HyphenatedWord( rOrigWord, nLang, nHyphenationPos,
aText, (nHyphenPos > -1) ? nHyphenPos - 1 : nHyphenationPos);
}
}
}
@ -167,7 +194,7 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens(
sal_Int32 nTextLen = aText.getLength();
// trailing '=' means "hyphenation should not be possible"
if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=')
if (nTextLen > 0 && aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[')
{
// sequence to hold hyphenation positions
Sequence< sal_Int16 > aHyphPos( nTextLen );
@ -176,11 +203,14 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens(
OUStringBuffer aTmp( nTextLen );
sal_Bool bSkip = sal_False;
sal_Bool bSkip2 = sal_False;
sal_Int32 nHyphIdx = -1;
for (sal_Int32 i = 0; i < nTextLen; i++)
{
sal_Unicode cTmp = aText[i];
if (cTmp != '=')
if (cTmp == '[' || cTmp == ']')
bSkip2 = !bSkip2;
if (cTmp != '=' && !bSkip2 && cTmp != ']')
{
aTmp.append( cTmp );
bSkip = sal_False;
@ -426,7 +456,15 @@ Reference< XHyphenatedWord > SAL_CALL
if (xEntry.is())
{
//! alternative spellings not yet supported by dictionaries
// FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported,
// because Writer has got a layout problem here.
// Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2
{
xRes = buildHyphWord(aChkWord, xEntry, nLanguage, nIndex + 1 + extrachar);
if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
return xRes;
}
}
else
{

View File

@ -262,9 +262,11 @@ static sal_Bool lcl_HasHyphInfo( const uno::Reference<XDictionaryEntry> &xEntry
sal_Bool bRes = sal_False;
if (xEntry.is())
{
// there has to be (at least one) '=' denoting a hyphenation position
// there has to be (at least one) '=' or '[' denoting a hyphenation position
// and it must not be before any character of the word
sal_Int32 nIdx = xEntry->getDictionaryWord().indexOf( '=' );
if (nIdx == -1)
nIdx = xEntry->getDictionaryWord().indexOf( '[' );
bRes = nIdx != -1 && nIdx != 0;
}
return bRes;