Put the dict_ja and _zh data in files instead of code for iOS
Map the file(s) into memory on demand. The executable file of an app needs to be as small as possible. Including additional data files in an app bundle is fine. Change-Id: Ife9bfe99a2cf0473d459f38f50dfa3304b39e282
This commit is contained in:
@@ -11,6 +11,20 @@ $(eval $(call gb_CustomTarget_CustomTarget,i18npool/breakiterator))
|
||||
|
||||
i18npool_BIDIR := $(call gb_CustomTarget_get_workdir,i18npool/breakiterator)
|
||||
|
||||
ifeq ($(OS),IOS)
|
||||
|
||||
$(call gb_CustomTarget_get_target,i18npool/breakiterator) : \
|
||||
$(i18npool_BIDIR)/dict_ja.data $(i18npool_BIDIR)/dict_zh.data $(i18npool_BIDIR)/OpenOffice_dat.c
|
||||
|
||||
$(i18npool_BIDIR)/dict_%.data : \
|
||||
$(SRCDIR)/i18npool/source/breakiterator/data/%.dic \
|
||||
$(call gb_Executable_get_runtime_dependencies,gendict) \
|
||||
| $(i18npool_BIDIR)/.dir
|
||||
$(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),$(true),DIC,1)
|
||||
$(call gb_Helper_abbreviate_dirs,\
|
||||
$(call gb_Helper_execute,gendict) $< $@ $(patsubst $(i18npool_BIDIR)/dict_%.cxx,%,$@))
|
||||
else
|
||||
|
||||
$(call gb_CustomTarget_get_target,i18npool/breakiterator) : \
|
||||
$(i18npool_BIDIR)/dict_ja.cxx $(i18npool_BIDIR)/dict_zh.cxx $(i18npool_BIDIR)/OpenOffice_dat.c
|
||||
|
||||
@@ -22,6 +36,8 @@ $(i18npool_BIDIR)/dict_%.cxx : \
|
||||
$(call gb_Helper_abbreviate_dirs,\
|
||||
$(call gb_Helper_execute,gendict) $< $@ $(patsubst $(i18npool_BIDIR)/dict_%.cxx,%,$@))
|
||||
|
||||
endif
|
||||
|
||||
i18npool_BRKTXTS := \
|
||||
char_in.brk \
|
||||
char.brk \
|
||||
|
@@ -14,6 +14,14 @@ $(eval $(call gb_Executable_use_libraries,gendict,\
|
||||
$(gb_UWINAPI) \
|
||||
))
|
||||
|
||||
ifeq ($(gb_Side),build)
|
||||
ifneq ($(shell grep OS=IOS $(BUILDDIR)/config_host.mk),)
|
||||
$(eval $(call gb_Executable_add_cxxflags,gendict,\
|
||||
-DDICT_JA_ZH_IN_DATAFILE \
|
||||
))
|
||||
endif
|
||||
endif
|
||||
|
||||
$(eval $(call gb_Executable_add_exception_objects,gendict,\
|
||||
i18npool/source/breakiterator/gendict \
|
||||
))
|
||||
|
@@ -35,6 +35,12 @@ $(eval $(call gb_Library_use_externals,i18npool,\
|
||||
icu_headers \
|
||||
))
|
||||
|
||||
ifeq ($(OS),IOS)
|
||||
$(eval $(call gb_Library_add_cxxflags,i18npool,\
|
||||
-DDICT_JA_ZH_IN_DATAFILE \
|
||||
))
|
||||
endif
|
||||
|
||||
$(eval $(call gb_Library_add_exception_objects,i18npool,\
|
||||
i18npool/source/breakiterator/breakiterator_cjk \
|
||||
i18npool/source/breakiterator/breakiterator_ctl \
|
||||
|
@@ -1,6 +1,19 @@
|
||||
Internationalisation (i18npool) framework ensures that the suite is adaptable to the requirements of different
|
||||
native languages, their local settings and customs, etc without source code modification.
|
||||
native languages, their local settings and customs, etc without source code modification. (Wow, that is such marketing-speak...)
|
||||
|
||||
Specifically for locale data documentation please see i18npool/source/localedata/data/locale.dtd
|
||||
|
||||
See also [http://wiki.documentfoundation.org/Category:I18n]
|
||||
|
||||
On iOS we put the largest data generated here, the dict_ja and dict_zh
|
||||
stuff, into separate files and not into code to keep the size of an
|
||||
app binary down. Temporary test code:
|
||||
|
||||
static bool beenhere = false;
|
||||
if (!beenhere) {
|
||||
beenhere = true;
|
||||
uno::Reference< uno::XComponentContext > xComponentContext(::cppu::defaultBootstrap_InitialComponentContext());
|
||||
uno::Reference< lang::XMultiComponentFactory > xMultiComponentFactoryClient( xComponentContext->getServiceManager() );
|
||||
uno::Reference< uno::XInterface > xInterface =
|
||||
xMultiComponentFactoryClient->createInstanceWithContext( "com.sun.star.i18n.BreakIterator_ja", xComponentContext );
|
||||
}
|
||||
|
@@ -17,7 +17,6 @@
|
||||
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
@@ -32,6 +31,22 @@ using std::vector;
|
||||
|
||||
using namespace ::rtl;
|
||||
|
||||
// For iOS, where we must strive for a minimal executable size, we
|
||||
// keep the data produced by this utility not as large const tables in
|
||||
// source code but instead as separate data files, to be bundled with
|
||||
// an app, and mmapped in at run time.
|
||||
|
||||
// To test this easier on a desktop OS, just make sure
|
||||
// DICT_JA_ZH_IN_DATAFILE is defined when building i18npool.
|
||||
|
||||
#ifdef DICT_JA_ZH_IN_DATAFILE
|
||||
static sal_Int64 dataAreaOffset = 0;
|
||||
static sal_Int64 lenArrayOffset = 0;
|
||||
static sal_Int64 index1Offset = 0;
|
||||
static sal_Int64 index2Offset = 0;
|
||||
static sal_Int64 existMarkOffset = 0;
|
||||
#endif
|
||||
|
||||
/* Utility gendict:
|
||||
|
||||
"BreakIterator_CJK provides input string caching and dictionary searching for
|
||||
@@ -60,12 +75,17 @@ static inline void set_exists(sal_uInt32 index)
|
||||
|
||||
static inline void printIncludes(FILE* source_fp)
|
||||
{
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs("/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n", source_fp);
|
||||
fputs("#include <sal/types.h>\n\n", source_fp);
|
||||
#else
|
||||
(void) source_fp;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void printFunctions(FILE* source_fp, const char *lang)
|
||||
{
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs ("#ifndef DISABLE_DYNLOADING\n", source_fp);
|
||||
fputs ("SAL_DLLPUBLIC_EXPORT const sal_uInt8* getExistMark() { return existMark; }\n", source_fp);
|
||||
fputs ("SAL_DLLPUBLIC_EXPORT const sal_Int16* getIndex1() { return index1; }\n", source_fp);
|
||||
@@ -79,12 +99,20 @@ static inline void printFunctions(FILE* source_fp, const char *lang)
|
||||
fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Int32* getLenArray_%s() { return lenArray; }\n", lang);
|
||||
fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Unicode* getDataArea_%s() { return dataArea; }\n", lang);
|
||||
fputs ("#endif\n", source_fp);
|
||||
#else
|
||||
(void) source_fp;
|
||||
(void) lang;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& lenArray)
|
||||
{
|
||||
// generate main dict. data array
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs("static const sal_Unicode dataArea[] = {\n\t", source_fp);
|
||||
#else
|
||||
dataAreaOffset = ftell(source_fp);
|
||||
#endif
|
||||
sal_Char str[1024];
|
||||
sal_uInt32 lenArrayCurr = 0;
|
||||
sal_Unicode current = 0;
|
||||
@@ -114,28 +142,47 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
|
||||
// first character is stored in charArray, so start from second
|
||||
for (i = 1; i < len; i++, lenArrayCurr++) {
|
||||
set_exists(u[i]);
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fprintf(source_fp, "0x%04x, ", u[i]);
|
||||
if ((lenArrayCurr & 0x0f) == 0x0f)
|
||||
fputs("\n\t", source_fp);
|
||||
#else
|
||||
fwrite(&u[i], sizeof(u[i]), 1, source_fp);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
lenArray.push_back( lenArrayCurr ); // store last ending pointer
|
||||
charArray[current+1] = lenArray.size();
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs("\n};\n", source_fp);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray)
|
||||
{
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
|
||||
fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
|
||||
#else
|
||||
lenArrayOffset = ftell(source_fp);
|
||||
sal_uInt32 zero(0);
|
||||
fwrite(&zero, sizeof(zero), 1, source_fp);
|
||||
#endif
|
||||
for (size_t k = 0; k < lenArray.size(); k++)
|
||||
{
|
||||
if( !(k & 0xf) )
|
||||
fputs("\n\t", source_fp);
|
||||
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
|
||||
#else
|
||||
fwrite(&lenArray[k], sizeof(lenArray[k]), 1, source_fp);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs("\n};\n", source_fp );
|
||||
#endif
|
||||
}
|
||||
|
||||
/* FIXME?: what happens if in every range i there is at least one charArray != 0
|
||||
@@ -143,23 +190,40 @@ static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenA
|
||||
=> then in index2, the last range will be ignored incorrectly */
|
||||
static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
|
||||
{
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
|
||||
#else
|
||||
index1Offset = ftell(source_fp);
|
||||
#endif
|
||||
|
||||
sal_Int16 count = 0;
|
||||
for (sal_Int32 i = 0; i < 0x100; i++) {
|
||||
sal_Int32 j = 0;
|
||||
while( j < 0x100 && charArray[(i<<8) + j] == 0)
|
||||
j++;
|
||||
|
||||
fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
|
||||
set[i] = (j < 0x100 ? count++ : 0xff);
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fprintf(source_fp, "0x%02x, ", set[i]);
|
||||
if ((i & 0x0f) == 0x0f)
|
||||
fputs ("\n\t", source_fp);
|
||||
#else
|
||||
fwrite(&set[i], sizeof(set[i]), 1, source_fp);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs("};\n", source_fp);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
|
||||
{
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs ("static const sal_Int32 index2[] = {\n\t", source_fp);
|
||||
#else
|
||||
index2Offset = ftell(source_fp);
|
||||
#endif
|
||||
sal_Int32 prev = 0;
|
||||
for (sal_Int32 i = 0; i < 0x100; i++) {
|
||||
if (set[i] != 0xff) {
|
||||
@@ -170,28 +234,48 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
|
||||
k++;
|
||||
|
||||
prev = charArray[(i<<8) + j];
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(k < 0x10000 ? charArray[k] + 1 : 0));
|
||||
if ((j & 0x0f) == 0x0f)
|
||||
fputs ("\n\t", source_fp);
|
||||
#else
|
||||
sal_uInt32 n = (k < 0x10000 ? charArray[k] + 1 : 0);
|
||||
fwrite(&n, sizeof(n), 1, source_fp);
|
||||
#endif
|
||||
}
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs ("\n\t", source_fp);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs ("\n};\n", source_fp);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Generates a bitmask for the existance of sal_Unicode values in dictionary;
|
||||
it packs 8 sal_Bool values in 1 sal_uInt8 */
|
||||
static inline void printExistsMask(FILE *source_fp)
|
||||
{
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
|
||||
#else
|
||||
existMarkOffset = ftell(source_fp);
|
||||
#endif
|
||||
for (unsigned int i = 0; i < 0x2000; i++)
|
||||
{
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fprintf(source_fp, "0x%02x, ", exists[i]);
|
||||
if ( (i & 0xf) == 0xf )
|
||||
fputs("\n\t", source_fp);
|
||||
#else
|
||||
fwrite(&exists[i], sizeof(exists[i]), 1, source_fp);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs("\n};\n", source_fp);
|
||||
#endif
|
||||
}
|
||||
|
||||
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
|
||||
@@ -228,14 +312,25 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
|
||||
sal_Int16 set[0x100];
|
||||
|
||||
printIncludes(source_fp);
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs("extern \"C\" {\n", source_fp);
|
||||
printDataArea(dictionary_fp, source_fp, lenArray);
|
||||
printLenArray(source_fp, lenArray);
|
||||
printIndex1(source_fp, set);
|
||||
printIndex2(source_fp, set);
|
||||
printExistsMask(source_fp);
|
||||
printFunctions(source_fp, argv[3]);
|
||||
#endif
|
||||
printDataArea(dictionary_fp, source_fp, lenArray);
|
||||
printLenArray(source_fp, lenArray);
|
||||
printIndex1(source_fp, set);
|
||||
printIndex2(source_fp, set);
|
||||
printExistsMask(source_fp);
|
||||
printFunctions(source_fp, argv[3]);
|
||||
#ifndef DICT_JA_ZH_IN_DATAFILE
|
||||
fputs("}\n", source_fp);
|
||||
#else
|
||||
// Put pointers to the tables at the end of the file...
|
||||
fwrite(&dataAreaOffset, sizeof(dataAreaOffset), 1, source_fp);
|
||||
fwrite(&lenArrayOffset, sizeof(lenArrayOffset), 1, source_fp);
|
||||
fwrite(&index1Offset, sizeof(index1Offset), 1, source_fp);
|
||||
fwrite(&index2Offset, sizeof(index2Offset), 1, source_fp);
|
||||
fwrite(&existMarkOffset, sizeof(existMarkOffset), 1, source_fp);
|
||||
#endif
|
||||
|
||||
fclose(dictionary_fp);
|
||||
fclose(source_fp);
|
||||
|
@@ -17,28 +17,22 @@
|
||||
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
||||
*/
|
||||
|
||||
#include <config_folders.h>
|
||||
|
||||
// xdictionary.cpp: implementation of the xdictionary class.
|
||||
|
||||
|
||||
|
||||
|
||||
#include <osl/file.h>
|
||||
#include <rtl/ustrbuf.hxx>
|
||||
|
||||
#include <rtl/bootstrap.hxx>
|
||||
#include <com/sun/star/i18n/WordType.hpp>
|
||||
#include <xdictionary.hxx>
|
||||
#include <unicode/uchar.h>
|
||||
#include <string.h>
|
||||
#include <breakiteratorImpl.hxx>
|
||||
|
||||
|
||||
// Construction/Destruction
|
||||
|
||||
|
||||
|
||||
namespace com { namespace sun { namespace star { namespace i18n {
|
||||
|
||||
#ifndef DISABLE_DYNLOADING
|
||||
#ifdef DICT_JA_ZH_IN_DATAFILE
|
||||
|
||||
#elif !defined DISABLE_DYNLOADING
|
||||
|
||||
extern "C" { static void SAL_CALL thisModule() {} }
|
||||
|
||||
@@ -74,8 +68,44 @@ xdictionary::xdictionary(const sal_Char *lang) :
|
||||
boundary(),
|
||||
japaneseWordBreak( sal_False )
|
||||
{
|
||||
index1 = 0;
|
||||
#ifndef DISABLE_DYNLOADING
|
||||
existMark = NULL;
|
||||
index1 = NULL;
|
||||
index2 = NULL;
|
||||
lenArray = NULL;
|
||||
dataArea = NULL;
|
||||
|
||||
#ifdef DICT_JA_ZH_IN_DATAFILE
|
||||
|
||||
if( strcmp( lang, "ja" ) == 0 || strcmp( lang, "zh" ) == 0 )
|
||||
{
|
||||
OUString sUrl( "$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/dict_" );
|
||||
rtl::Bootstrap::expandMacros(sUrl);
|
||||
|
||||
if( strcmp( lang, "ja" ) == 0 )
|
||||
sUrl += "ja.data";
|
||||
else if( strcmp( lang, "zh" ) == 0 )
|
||||
sUrl += "zh.data";
|
||||
|
||||
oslFileHandle aFileHandle;
|
||||
sal_uInt64 nFileSize;
|
||||
char *pMapping;
|
||||
if( osl_openFile( sUrl.pData, &aFileHandle, osl_File_OpenFlag_Read ) == osl_File_E_None &&
|
||||
osl_getFileSize( aFileHandle, &nFileSize) == osl_File_E_None &&
|
||||
osl_mapFile( aFileHandle, (void **) &pMapping, nFileSize, 0, osl_File_MapFlag_RandomAccess ) == osl_File_E_None )
|
||||
{
|
||||
// We have the offsets to the parts of the file at its end, see gendict.cxx
|
||||
sal_Int64 *pEOF = (sal_Int64*)(pMapping + nFileSize);
|
||||
|
||||
existMark = (sal_uInt8*) (pMapping + pEOF[-1]);
|
||||
index2 = (sal_Int32*) (pMapping + pEOF[-2]);
|
||||
index1 = (sal_Int16*) (pMapping + pEOF[-3]);
|
||||
lenArray = (sal_Int32*) (pMapping + pEOF[-4]);
|
||||
dataArea = (sal_Unicode*) (pMapping + pEOF[-5]);
|
||||
}
|
||||
}
|
||||
|
||||
#elif !defined DISABLE_DYNLOADING
|
||||
|
||||
#ifdef SAL_DLLPREFIX
|
||||
OUStringBuffer aBuf( strlen(lang) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh)
|
||||
aBuf.appendAscii( SAL_DLLPREFIX );
|
||||
@@ -97,16 +127,9 @@ xdictionary::xdictionary(const sal_Char *lang) :
|
||||
func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getDataArea").pData );
|
||||
dataArea = (sal_Unicode*) (*func)();
|
||||
}
|
||||
else
|
||||
{
|
||||
existMark = NULL;
|
||||
index1 = NULL;
|
||||
index2 = NULL;
|
||||
lenArray = NULL;
|
||||
dataArea = NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if( strcmp( lang, "ja" ) == 0 ) {
|
||||
existMark = getExistMark_ja();
|
||||
index1 = getIndex1_ja();
|
||||
@@ -121,14 +144,7 @@ xdictionary::xdictionary(const sal_Char *lang) :
|
||||
lenArray = getLenArray_zh();
|
||||
dataArea = getDataArea_zh();
|
||||
}
|
||||
else
|
||||
{
|
||||
existMark = NULL;
|
||||
index1 = NULL;
|
||||
index2 = NULL;
|
||||
lenArray = NULL;
|
||||
dataArea = NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
for (sal_Int32 i = 0; i < CACHE_MAX; i++)
|
||||
|
@@ -59,6 +59,9 @@ TiledLibreOffice_setup:
|
||||
mkdir -p $(TiledLibreOffice_resource)/share/config
|
||||
cp -R $(INSTDIR)/share/config/soffice.cfg $(TiledLibreOffice_resource)/share/config
|
||||
|
||||
# Japanese and Chinese dict files
|
||||
cp $(WORKDIR)/CustomTarget/i18npool/breakiterator/dict_*.data $(TiledLibreOffice_resource)/share
|
||||
|
||||
# "registry"
|
||||
cp -R $(INSTDIR)/share/registry $(TiledLibreOffice_resource)/share
|
||||
|
||||
|
Reference in New Issue
Block a user