tdf#154499 sw spell checking: add 2-word phrase checking
Hunspell dictionaries can contain phrases, i.e. space separated word sequences, which were used only to reject compounds and to give better suggestions. Now recognize 2-word phrases in the text, no need to break the phrase into single words, e.g. "et" and "cetera", which resulted acceptance of typos (e.g. "et" without "cetera"), also bad suggestions (e.g. "et" and "cetera" independently from the context). More example: == old .dic file == ... et cetera von Neumann veni vidi vici ... List the 2-word phrases, and break the 3 or more word into 2-word phrases: == new .dic file == ... et cetera von Neumann veni vidi vidi vici ... Note: words of the phrase are separated by a space, but recognized also with punctuation, e.g. in the previous example, "Veni, vidi, vici." Note: during typing, the second word of the phrase will be accepted only at ending the paragraph, i.e. pressing Enter. Change-Id: I4a1487abc0e4ab31d09750ee8c817353e6325ca3 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151487 Tested-by: László Németh <nemeth@numbertext.org> Reviewed-by: László Németh <nemeth@numbertext.org>
This commit is contained in:
@@ -33,6 +33,7 @@ class SwScanner
|
|||||||
{
|
{
|
||||||
std::function<LanguageType (sal_Int32, sal_Int32, bool)> m_pGetLangOfChar;
|
std::function<LanguageType (sal_Int32, sal_Int32, bool)> m_pGetLangOfChar;
|
||||||
OUString m_aWord;
|
OUString m_aWord;
|
||||||
|
OUString m_aPrevWord;
|
||||||
const OUString m_aPreDashReplacementText;
|
const OUString m_aPreDashReplacementText;
|
||||||
OUString m_aText;
|
OUString m_aText;
|
||||||
const LanguageType* m_pLanguage;
|
const LanguageType* m_pLanguage;
|
||||||
@@ -62,6 +63,7 @@ public:
|
|||||||
bool NextWord();
|
bool NextWord();
|
||||||
|
|
||||||
const OUString& GetWord() const { return m_aWord; }
|
const OUString& GetWord() const { return m_aWord; }
|
||||||
|
const OUString& GetPrevWord() const { return m_aPrevWord; }
|
||||||
|
|
||||||
sal_Int32 GetBegin() const { return m_nBegin; }
|
sal_Int32 GetBegin() const { return m_nBegin; }
|
||||||
sal_Int32 GetEnd() const { return m_nBegin + m_nLength; }
|
sal_Int32 GetEnd() const { return m_nBegin + m_nLength; }
|
||||||
|
@@ -961,6 +961,7 @@ bool SwScanner::NextWord()
|
|||||||
if ( m_nWordType == i18n::WordType::WORD_COUNT )
|
if ( m_nWordType == i18n::WordType::WORD_COUNT )
|
||||||
m_nLength = forceEachAsianCodePointToWord(m_aText, m_nBegin, m_nLength);
|
m_nLength = forceEachAsianCodePointToWord(m_aText, m_nBegin, m_nLength);
|
||||||
|
|
||||||
|
m_aPrevWord = m_aWord;
|
||||||
m_aWord = m_aPreDashReplacementText.copy( m_nBegin, m_nLength );
|
m_aWord = m_aPreDashReplacementText.copy( m_nBegin, m_nLength );
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -1035,8 +1036,11 @@ bool SwTextNode::Spell(SwSpellArgs* pArgs)
|
|||||||
SwScanner aScanner( *this, m_Text, nullptr, ModelToViewHelper(),
|
SwScanner aScanner( *this, m_Text, nullptr, ModelToViewHelper(),
|
||||||
WordType::DICTIONARY_WORD,
|
WordType::DICTIONARY_WORD,
|
||||||
nBegin, nEnd );
|
nBegin, nEnd );
|
||||||
while( !pArgs->xSpellAlt.is() && aScanner.NextWord() )
|
bool bNextWord = aScanner.NextWord();
|
||||||
|
while( !pArgs->xSpellAlt.is() && bNextWord )
|
||||||
{
|
{
|
||||||
|
bool bCalledNextWord = false;
|
||||||
|
|
||||||
const OUString& rWord = aScanner.GetWord();
|
const OUString& rWord = aScanner.GetWord();
|
||||||
|
|
||||||
// get next language for next word, consider language attributes
|
// get next language for next word, consider language attributes
|
||||||
@@ -1065,27 +1069,47 @@ bool SwTextNode::Spell(SwSpellArgs* pArgs)
|
|||||||
pArgs->xSpellAlt = nullptr;
|
pArgs->xSpellAlt = nullptr;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
OUString sPrevWord = aScanner.GetPrevWord();
|
||||||
|
auto nWordBegin = aScanner.GetBegin();
|
||||||
|
auto nWordEnd = aScanner.GetEnd();
|
||||||
|
bNextWord = aScanner.NextWord();
|
||||||
|
const OUString& rActualWord = aScanner.GetPrevWord();
|
||||||
|
bCalledNextWord = true;
|
||||||
|
// check space separated word pairs in the dictionary, e.g. "vice versa"
|
||||||
|
if ( !((bNextWord && pArgs->xSpeller->isValid( rActualWord + " " + aScanner.GetWord(),
|
||||||
|
static_cast<sal_uInt16>(eActLang), Sequence< PropertyValue >() )) ||
|
||||||
|
( !sPrevWord.isEmpty() && pArgs->xSpeller->isValid( sPrevWord + " " + rActualWord,
|
||||||
|
static_cast<sal_uInt16>(eActLang), Sequence< PropertyValue >() ))) )
|
||||||
{
|
{
|
||||||
// make sure the selection build later from the data
|
// make sure the selection build later from the data
|
||||||
// below does not include "in word" character to the
|
// below does not include "in word" character to the
|
||||||
// left and right in order to preserve those. Therefore
|
// left and right in order to preserve those. Therefore
|
||||||
// count those "in words" in order to modify the
|
// count those "in words" in order to modify the
|
||||||
// selection accordingly.
|
// selection accordingly.
|
||||||
const sal_Unicode* pChar = rWord.getStr();
|
const sal_Unicode* pChar = aScanner.GetPrevWord().getStr();
|
||||||
sal_Int32 nLeft = 0;
|
sal_Int32 nLeft = 0;
|
||||||
while (*pChar++ == CH_TXTATR_INWORD)
|
while (*pChar++ == CH_TXTATR_INWORD)
|
||||||
++nLeft;
|
++nLeft;
|
||||||
pChar = rWord.getLength() ? rWord.getStr() + rWord.getLength() - 1 : nullptr;
|
pChar = rActualWord.getLength() ? rActualWord.getStr() + rActualWord.getLength() - 1 : nullptr;
|
||||||
sal_Int32 nRight = 0;
|
sal_Int32 nRight = 0;
|
||||||
while (pChar && *pChar-- == CH_TXTATR_INWORD)
|
while (pChar && *pChar-- == CH_TXTATR_INWORD)
|
||||||
++nRight;
|
++nRight;
|
||||||
|
|
||||||
pArgs->pStartPos->Assign(*this, aScanner.GetEnd() - nRight );
|
pArgs->pStartPos->Assign(*this, nWordEnd - nRight );
|
||||||
pArgs->pEndPos->Assign(*this, aScanner.GetBegin() + nLeft );
|
pArgs->pEndPos->Assign(*this, nWordBegin + nLeft );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pArgs->xSpellAlt = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( !bCalledNextWord )
|
||||||
|
bNextWord = aScanner.NextWord();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset original text
|
// reset original text
|
||||||
@@ -1342,11 +1366,13 @@ SwRect SwTextFrame::AutoSpell_(SwTextNode & rNode, sal_Int32 nActPos)
|
|||||||
SwScanner aScanner( *pNode, pNode->GetText(), nullptr, ModelToViewHelper(),
|
SwScanner aScanner( *pNode, pNode->GetText(), nullptr, ModelToViewHelper(),
|
||||||
WordType::DICTIONARY_WORD, nBegin, nEnd);
|
WordType::DICTIONARY_WORD, nBegin, nEnd);
|
||||||
|
|
||||||
while( aScanner.NextWord() )
|
bool bNextWord = aScanner.NextWord();
|
||||||
|
while( bNextWord )
|
||||||
{
|
{
|
||||||
const OUString& rWord = aScanner.GetWord();
|
const OUString& rWord = aScanner.GetWord();
|
||||||
nBegin = aScanner.GetBegin();
|
nBegin = aScanner.GetBegin();
|
||||||
sal_Int32 nLen = aScanner.GetLen();
|
sal_Int32 nLen = aScanner.GetLen();
|
||||||
|
bool bCalledNextWord = false;
|
||||||
|
|
||||||
// get next language for next word, consider language attributes
|
// get next language for next word, consider language attributes
|
||||||
// within the word
|
// within the word
|
||||||
@@ -1364,6 +1390,15 @@ SwRect SwTextFrame::AutoSpell_(SwTextNode & rNode, sal_Int32 nActPos)
|
|||||||
// to avoid false alarm
|
// to avoid false alarm
|
||||||
((!bRestoreString && !bContainsComments) || !xSpell->isValid( rWord.replaceAll(OUStringChar(CH_TXTATR_INWORD), ""),
|
((!bRestoreString && !bContainsComments) || !xSpell->isValid( rWord.replaceAll(OUStringChar(CH_TXTATR_INWORD), ""),
|
||||||
static_cast<sal_uInt16>(eActLang), Sequence< PropertyValue >() ) ) )
|
static_cast<sal_uInt16>(eActLang), Sequence< PropertyValue >() ) ) )
|
||||||
|
{
|
||||||
|
OUString sPrevWord = aScanner.GetPrevWord();
|
||||||
|
bNextWord = aScanner.NextWord();
|
||||||
|
bCalledNextWord = true;
|
||||||
|
// check space separated word pairs in the dictionary, e.g. "vice versa"
|
||||||
|
if ( !((bNextWord && xSpell->isValid( aScanner.GetPrevWord() + " " + aScanner.GetWord(),
|
||||||
|
static_cast<sal_uInt16>(eActLang), Sequence< PropertyValue >() )) ||
|
||||||
|
(!sPrevWord.isEmpty() && xSpell->isValid( sPrevWord + " " + aScanner.GetPrevWord(),
|
||||||
|
static_cast<sal_uInt16>(eActLang), Sequence< PropertyValue >() ))) )
|
||||||
{
|
{
|
||||||
sal_Int32 nSmartTagStt = nBegin;
|
sal_Int32 nSmartTagStt = nBegin;
|
||||||
sal_Int32 nDummy = 1;
|
sal_Int32 nDummy = 1;
|
||||||
@@ -1391,6 +1426,15 @@ SwRect SwTextFrame::AutoSpell_(SwTextNode & rNode, sal_Int32 nActPos)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if( bAddAutoCmpl && rACW.GetMinWordLen() <= aScanner.GetPrevWord().getLength() )
|
||||||
|
{
|
||||||
|
// tdf#119695 only add the word if the cursor position is outside the word
|
||||||
|
// so that the incomplete words are not added as autocomplete candidates
|
||||||
|
bool bCursorOutsideWord = nActPos > nBegin + nLen || nActPos < nBegin;
|
||||||
|
if (bCursorOutsideWord)
|
||||||
|
rACW.InsertWord(aScanner.GetPrevWord(), rDoc);
|
||||||
|
}
|
||||||
|
}
|
||||||
else if( bAddAutoCmpl && rACW.GetMinWordLen() <= rWord.getLength() )
|
else if( bAddAutoCmpl && rACW.GetMinWordLen() <= rWord.getLength() )
|
||||||
{
|
{
|
||||||
// tdf#119695 only add the word if the cursor position is outside the word
|
// tdf#119695 only add the word if the cursor position is outside the word
|
||||||
@@ -1400,6 +1444,9 @@ SwRect SwTextFrame::AutoSpell_(SwTextNode & rNode, sal_Int32 nActPos)
|
|||||||
rACW.InsertWord(rWord, rDoc);
|
rACW.InsertWord(rWord, rDoc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( !bCalledNextWord )
|
||||||
|
bNextWord = aScanner.NextWord();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user