Files
libreoffice/lingucomponent/source/lingutil/lingutil.cxx

269 lines
9.0 KiB
C++
Raw Normal View History

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*************************************************************************
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* Copyright 2000, 2010 Oracle and/or its affiliates.
*
* OpenOffice.org - a multi-platform office productivity suite
*
* This file is part of OpenOffice.org.
*
* OpenOffice.org is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3
* only, as published by the Free Software Foundation.
*
* OpenOffice.org is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License version 3 for more details
* (a copy is included in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU Lesser General Public License
* version 3 along with OpenOffice.org. If not, see
* <http://www.openoffice.org/license.html>
* for a copy of the LGPLv3 License.
*
************************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_lingucomponent.hxx"
#if defined(WNT)
#include <tools/prewin.h>
#endif
#if defined(WNT)
#include <Windows.h>
#endif
#if defined(WNT)
#include <tools/postwin.h>
#endif
#include <osl/thread.h>
#include <osl/file.hxx>
#include <tools/debug.hxx>
#include <tools/urlobj.hxx>
#include <i18npool/mslangid.hxx>
#include <unotools/lingucfg.hxx>
#include <unotools/pathoptions.hxx>
#include <rtl/ustring.hxx>
#include <rtl/string.hxx>
#include <linguistic/misc.hxx>
#include <set>
#include <vector>
#include <string.h>
#include <lingutil.hxx>
#include <dictmgr.hxx>
2010-10-15 18:15:35 +01:00
#include <sal/macros.h>
using ::com::sun::star::lang::Locale;
using namespace ::com::sun::star;
#if 0
//////////////////////////////////////////////////////////////////////
String GetDirectoryPathFromFileURL( const String &rFileURL )
{
// get file URL
INetURLObject aURLObj;
aURLObj.SetSmartProtocol( INET_PROT_FILE );
aURLObj.SetSmartURL( rFileURL );
aURLObj.removeSegment();
DBG_ASSERT( !aURLObj.HasError(), "invalid URL" );
String aRes = aURLObj.GetMainURL( INetURLObject::DECODE_TO_IURI );
return aRes;
}
#endif
#if defined(WNT)
rtl::OString Win_GetShortPathName( const rtl::OUString &rLongPathName )
{
rtl::OString aRes;
sal_Unicode aShortBuffer[1024] = {0};
2010-10-15 18:15:35 +01:00
sal_Int32 nShortBufSize = SAL_N_ELEMENTS( aShortBuffer );
// use the version of 'GetShortPathName' that can deal with Unicode...
sal_Int32 nShortLen = GetShortPathNameW(
reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
reinterpret_cast<LPWSTR>( aShortBuffer ),
nShortBufSize );
if (nShortLen < nShortBufSize) // conversion successful?
aRes = rtl::OString( OU2ENC( rtl::OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
else
DBG_ERROR( "Win_GetShortPathName: buffer to short" );
return aRes;
}
#endif //defined(WNT)
//////////////////////////////////////////////////////////////////////
// build list of old style diuctionaries (not as extensions) to use.
// User installed dictionaries (the ones residing in the user paths)
// will get precedence over system installed ones for the same language.
std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
{
std::vector< SvtLinguConfigDictionaryEntry > aRes;
if (!pDicType)
return aRes;
rtl::OUString aFormatName;
String aDicExtension;
#ifdef SYSTEM_DICTS
rtl::OUString aSystemDir;
rtl::OUString aSystemPrefix;
rtl::OUString aSystemSuffix;
#endif
if (strcmp( pDicType, "DICT" ) == 0)
{
aFormatName = A2OU("DICT_SPELL");
aDicExtension = String::CreateFromAscii( ".dic" );
#ifdef SYSTEM_DICTS
aSystemDir = A2OU( DICT_SYSTEM_DIR );
aSystemSuffix = aDicExtension;
#endif
}
else if (strcmp( pDicType, "HYPH" ) == 0)
{
aFormatName = A2OU("DICT_HYPH");
aDicExtension = String::CreateFromAscii( ".dic" );
#ifdef SYSTEM_DICTS
aSystemDir = A2OU( HYPH_SYSTEM_DIR );
aSystemPrefix = A2OU( "hyph_" );
aSystemSuffix = aDicExtension;
#endif
}
else if (strcmp( pDicType, "THES" ) == 0)
{
aFormatName = A2OU("DICT_THES");
aDicExtension = String::CreateFromAscii( ".dat" );
#ifdef SYSTEM_DICTS
aSystemDir = A2OU( THES_SYSTEM_DIR );
aSystemPrefix = A2OU( "th_" );
aSystemSuffix = A2OU( "_v2.dat" );
#endif
}
if (aFormatName.getLength() == 0 || aDicExtension.Len() == 0)
return aRes;
// set of languages to remember the language where it is already
// decided to make use of the dictionary.
std::set< LanguageType > aDicLangInUse;
#ifdef SYSTEM_DICTS
osl::Directory aSystemDicts(aSystemDir);
if (aSystemDicts.open() == osl::FileBase::E_None)
{
osl::DirectoryItem aItem;
osl::FileStatus aFileStatus(FileStatusMask_FileURL);
while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
{
aItem.getFileStatus(aFileStatus);
rtl::OUString sPath = aFileStatus.getFileURL();
if (sPath.lastIndexOf(aSystemSuffix) == sPath.getLength()-aSystemSuffix.getLength())
{
sal_Int32 nStartIndex = sPath.lastIndexOf(sal_Unicode('/')) + 1;
if (!sPath.match(aSystemPrefix, nStartIndex))
continue;
rtl::OUString sChunk = sPath.copy(0, sPath.getLength() - aSystemSuffix.getLength());
sal_Int32 nIndex = nStartIndex + aSystemPrefix.getLength();
rtl::OUString sLang = sChunk.getToken( 0, '_', nIndex );
if (!sLang.getLength())
continue;
rtl::OUString sRegion;
CWS-TOOLING: integrate CWS tl66 2009-05-15 12:28:55 +0200 tl r271932 : #i91812# include fixed 2009-05-07 16:52:41 +0200 tl r271680 : #i97200# one more MathML export problem to fix 2009-05-05 08:33:29 +0200 tl r271494 : #i99401# positiv user-dicts vs negativ user-dicts 2009-05-04 14:14:03 +0200 tl r271452 : #i97200# warning free code; MathML 2.0 export 2009-05-04 13:26:30 +0200 tl r271448 : #i97200# write valid MathML 2.0 on export 2009-04-29 14:21:54 +0200 tl r271371 : #i97200# new MathML token 2009-04-29 11:12:07 +0200 tl r271360 : #i97200# inroducing separate files for import and export 2009-04-28 16:47:42 +0200 tl r271331 : #i97200# better MathML pretty printing 2009-04-28 11:21:57 +0200 tl r271315 : #i97200# MathML attributes and default namespace for MathML 2009-04-28 11:21:24 +0200 tl r271314 : #i97200# MathML attributes and default namespace for MathML 2009-04-23 12:44:18 +0200 tl r271154 : #i97200# math.dtd removed 2009-04-23 12:31:56 +0200 tl r271151 : #i97200# MathML: don't use namespace on attributes 2009-04-22 13:21:11 +0200 tl r271099 : warning-free code 2009-04-22 12:20:13 +0200 tl r271092 : #i100757# loop fixed 2009-04-22 11:29:51 +0200 tl r271086 : #97327# adding mongolian fingerprint for language guessing 2009-04-22 11:25:56 +0200 tl r271083 : #97327# adding mongolian fingerprint for language guessing 2009-04-21 10:39:21 +0200 tl r271025 : #99599# code fix for LRE/RLE embedding 2009-04-20 16:36:33 +0200 tl r270992 : #i99604# HasDigits fixed 2009-04-20 14:44:19 +0200 tl r270985 : #i99604# warning-free code for Windows 2009-04-20 13:48:13 +0200 tl r270980 : #i99604# HasDigits fix for non-ASCII characters 2009-04-20 13:47:50 +0200 tl r270979 : #i99604# HasDigits fix for non-ASCII characters 2009-04-20 12:28:15 +0200 tl r270973 : warning-free code after merging 2009-04-20 10:16:19 +0200 tl r270964 : warning-free code after merging 2009-04-17 14:43:36 +0200 tl r270948 : #i96846# 2009-04-16 13:09:15 +0200 tl r270883 : CWS-TOOLING: rebase CWS tl66 to trunk@270723 (milestone: DEV300:m46) 2009-04-14 14:34:08 +0200 tl r270770 : #101067# warning-free code 2009-04-02 09:07:44 +0200 tl r270368 : #i100757# performance patch for start-up (initialize language guessing on demand only) 2009-03-11 10:37:59 +0100 tl r269301 : #i100083# fixed system dictionary lookup 2009-03-06 13:10:23 +0100 tl r268998 : warning-free code for Windows non-pro 2009-02-23 14:01:23 +0100 tl r268355 : #i99401# winning rules for user-dictionaries changed 2009-02-19 14:05:57 +0100 tl r268281 : #i98644# suggestion improvement when first checker does not know any suggestions 2009-02-19 14:05:02 +0100 tl r268280 : #i98644# suggestion improvement when first checker does not know any suggestions 2009-02-19 13:58:51 +0100 tl r268279 : #i98644# suggestion improvement when first checker does not know any suggestions 2009-02-19 11:38:03 +0100 tl r268266 : #i98644# suggestion improvement when first checker does not know any suggestions 2009-02-12 11:58:34 +0100 tl r267642 : #i96846# some properties declared as maybevoid 2009-02-06 12:43:55 +0100 tl r267454 : #i98644# provide sugestions from secondary spell checkers if the primary does not provide ones 2009-02-05 13:02:26 +0100 tl r267418 : #i98880# a bit clean-up in the grammar checking framework 2009-02-04 12:15:37 +0100 tl r267363 : #i91812# remove unused/duplicate code 2009-02-04 12:09:34 +0100 tl r267362 : #i91812# remove unused/duplicate code 2009-02-04 11:07:57 +0100 tl r267355 : #i91812# remove unused code 2009-02-04 11:06:48 +0100 tl r267354 : #i91812# remove unused code 2009-02-03 14:52:43 +0100 tl r267331 : #i91812# remove unused code 2009-02-03 14:26:00 +0100 tl r267324 : #i91198# adding fingerprint for luxembourgish 2009-02-03 14:20:58 +0100 tl r267323 : #i91198# adding fingerprint for luxembourgish 2009-02-03 14:18:33 +0100 tl r267322 : #i91198# adding fingerprint for luxembourgish 2009-02-03 13:56:39 +0100 tl r267319 : #i91812# remove unused code 2009-02-03 12:41:50 +0100 tl r267314 : #i48400# auto-spellcheck improvement when deleting wrong chars 2009-02-03 11:48:51 +0100 tl r267310 : #i91812# remove unused code 2009-02-03 11:14:29 +0100 tl r267307 : warning free code 2009-02-03 10:45:21 +0100 tl r267306 : #i91812# remove unused code 2009-02-03 10:37:04 +0100 tl r267304 : #i33387# name change for 'View/Selection' 2009-02-03 10:36:17 +0100 tl r267303 : #i33387# name change for 'View/Selection' 2009-02-03 10:32:12 +0100 tl r267302 : #i30642# spelling error in context menu fixed 2009-02-03 10:27:34 +0100 tl r267301 : #i92210# remove unused code types.cxx cfgitem.*
2009-05-19 09:31:27 +00:00
if (nIndex != -1)
sRegion = sChunk.copy( nIndex, sChunk.getLength() - nIndex );
// Thus we first get the language of the dictionary
LanguageType nLang = MsLangId::convertIsoNamesToLanguage(
sLang, sRegion );
if (aDicLangInUse.count( nLang ) == 0)
{
// remember the new language in use
aDicLangInUse.insert( nLang );
// add the dictionary to the resulting vector
SvtLinguConfigDictionaryEntry aDicEntry;
aDicEntry.aLocations.realloc(1);
aDicEntry.aLocaleNames.realloc(1);
rtl::OUString aLocaleName( MsLangId::convertLanguageToIsoString( nLang ) );
aDicEntry.aLocations[0] = sPath;
aDicEntry.aFormatName = aFormatName;
aDicEntry.aLocaleNames[0] = aLocaleName;
aRes.push_back( aDicEntry );
}
}
}
}
#endif
return aRes;
}
void MergeNewStyleDicsAndOldStyleDics(
std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
{
// get list of languages supported by new style dictionaries
std::set< LanguageType > aNewStyleLanguages;
std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
for (aIt = rNewStyleDics.begin() ; aIt != rNewStyleDics.end(); ++aIt)
{
const uno::Sequence< rtl::OUString > aLocaleNames( aIt->aLocaleNames );
sal_Int32 nLocaleNames = aLocaleNames.getLength();
for (sal_Int32 k = 0; k < nLocaleNames; ++k)
{
LanguageType nLang = MsLangId::convertIsoStringToLanguage( aLocaleNames[k] );
aNewStyleLanguages.insert( nLang );
}
}
// now check all old style dictionaries if they will add a not yet
// added language. If so add them to the resulting vector
std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
for (aIt2 = rOldStyleDics.begin(); aIt2 != rOldStyleDics.end(); ++aIt2)
{
sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
// old style dics should only have one language listed...
DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!");
if (nOldStyleDics > 0)
{
LanguageType nLang = MsLangId::convertIsoStringToLanguage( aIt2->aLocaleNames[0] );
if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_NONE)
{
DBG_ERROR( "old style dictionary with invalid language found!" );
continue;
}
// language not yet added?
if (aNewStyleLanguages.count( nLang ) == 0)
rNewStyleDics.push_back( *aIt2 );
}
else
{
DBG_ERROR( "old style dictionary with no language found!" );
}
}
}
//////////////////////////////////////////////////////////////////////
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */