Refcounting ConceptData, so not freeing it twice, which causes the GPF. Fixed some memory leaks. Fixing a memory overwrite in BreeDict.cxx
731 lines
23 KiB
C++
731 lines
23 KiB
C++
/*************************************************************************
|
|
*
|
|
* $RCSfile: ContextTables.cxx,v $
|
|
*
|
|
* $Revision: 1.5 $
|
|
*
|
|
* last change: $Author: abi $ $Date: 2001-07-05 18:50:40 $
|
|
*
|
|
* The Contents of this file are made available subject to the terms of
|
|
* either of the following licenses
|
|
*
|
|
* - GNU Lesser General Public License Version 2.1
|
|
* - Sun Industry Standards Source License Version 1.1
|
|
*
|
|
* Sun Microsystems Inc., October, 2000
|
|
*
|
|
* GNU Lesser General Public License Version 2.1
|
|
* =============================================
|
|
* Copyright 2000 by Sun Microsystems, Inc.
|
|
* 901 San Antonio Road, Palo Alto, CA 94303, USA
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software Foundation.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
* MA 02111-1307 USA
|
|
*
|
|
*
|
|
* Sun Industry Standards Source License Version 1.1
|
|
* =================================================
|
|
* The contents of this file are subject to the Sun Industry Standards
|
|
* Source License Version 1.1 (the "License"); You may not use this file
|
|
* except in compliance with the License. You may obtain a copy of the
|
|
* License at http://www.openoffice.org/license.html.
|
|
*
|
|
* Software provided under this License is provided on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
|
|
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
|
|
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
|
|
* See the License for the specific provisions governing your rights and
|
|
* obligations concerning the Software.
|
|
*
|
|
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
|
|
*
|
|
* Copyright: 2000 by Sun Microsystems, Inc.
|
|
*
|
|
* All Rights Reserved.
|
|
*
|
|
* Contributor(s): _______________________________________
|
|
*
|
|
*
|
|
************************************************************************/
|
|
|
|
#ifndef _rtl_MEMORY_H_
|
|
#include <rtl/memory.h>
|
|
#endif
|
|
#ifndef _XMLSEARCH_QE_CONTEXTTABLES_HXX_
|
|
#include <qe/ContextTables.hxx>
|
|
#endif
|
|
#ifndef _XMLSEARCH_UTIL_BYTEARRAYDECOMPRESSOR_HXX_
|
|
#include <util/Decompressor.hxx>
|
|
#endif
|
|
|
|
using namespace xmlsearch;
|
|
using namespace xmlsearch::qe;
|
|
|
|
|
|
|
|
Tables::Tables( ContextTables* p )
|
|
: initialWordsCached_( new sal_Int32[ initialWordsCachedL_ = p->initialWordsL_ ] ),
|
|
destsCached_( new sal_Int32[ destsCachedL_ = p->destsL_ ] ),
|
|
linkTypesCached_( new sal_Int32[ linkTypesCachedL_ = p->linkTypesL_ ] ),
|
|
seqNumbersCached_( new sal_Int32[ seqNumbersCachedL_ = p->seqNumbersL_ ] )
|
|
{
|
|
rtl_copyMemory( (void*)initialWordsCached_,
|
|
(void*)p->initialWords_,
|
|
sizeof(sal_Int32) * p->initialWordsL_ );
|
|
|
|
rtl_copyMemory( (void*)destsCached_,
|
|
(void*)p->dests_,
|
|
sizeof(sal_Int32) * p->destsL_ );
|
|
|
|
rtl_copyMemory( (void*)linkTypesCached_,
|
|
(void*)p->linkTypes_,
|
|
sizeof(sal_Int32) * p->linkTypesL_ );
|
|
|
|
rtl_copyMemory( (void*)seqNumbersCached_,
|
|
(void*)p->seqNumbers_,
|
|
sizeof(sal_Int32) * p->seqNumbersL_ );
|
|
}
|
|
|
|
|
|
|
|
Tables::~Tables()
|
|
{
|
|
delete[] seqNumbersCached_;
|
|
delete[] linkTypesCached_;
|
|
delete[] destsCached_;
|
|
delete[] initialWordsCached_;
|
|
}
|
|
|
|
|
|
|
|
void Tables::setTables( ContextTables* p )
|
|
{
|
|
delete[] p->initialWords_;
|
|
p->initialWordsL_ = initialWordsCachedL_;
|
|
p->initialWords_ = initialWordsCached_;
|
|
initialWordsCached_ = 0;
|
|
|
|
delete[] p->dests_;
|
|
p->destsL_ = destsCachedL_;
|
|
p->dests_ = destsCached_;
|
|
destsCached_ = 0;
|
|
|
|
delete[] p->linkTypes_;
|
|
p->linkTypesL_ = linkTypesCachedL_;
|
|
p->linkTypes_ = linkTypesCached_;
|
|
linkTypesCached_ = 0;
|
|
|
|
delete[] p->seqNumbers_;
|
|
p->seqNumbersL_ = seqNumbersCachedL_;
|
|
p->seqNumbers_ = seqNumbersCached_;
|
|
seqNumbersCached_ = 0;
|
|
|
|
p->nTextNodes_ = initialWordsCachedL_;
|
|
}
|
|
|
|
|
|
|
|
|
|
ContextTables::ContextTables( const std::vector< sal_Int32 >& offsets,
|
|
sal_Int32 contextDataL,sal_Int8 *contextData,
|
|
sal_Int32 linkNamesL,rtl::OUString *linkNames )
|
|
: kTable_( 5 ),
|
|
auxArray_( 4096 ),
|
|
lastDocNo_( -1 ),
|
|
offsets_( offsets ),
|
|
contextDataL_( contextDataL ),
|
|
contextData_( contextData ),
|
|
linkNamesL_( linkNamesL ),
|
|
linkNames_( linkNames ),
|
|
cache_( offsets.size() ),
|
|
initialWordsL_( 0 ),
|
|
initialWords_( 0 ),
|
|
destsL_( 0 ),
|
|
dests_( 0 ),
|
|
linkTypesL_( 0 ),
|
|
linkTypes_( 0 ),
|
|
seqNumbersL_( 0 ),
|
|
seqNumbers_( 0 ),
|
|
markersL_( 0 ),
|
|
markers_( 0 )
|
|
{
|
|
for( sal_uInt32 i = 0; i < offsets_.size(); ++i )
|
|
cache_[i] = 0;
|
|
}
|
|
|
|
|
|
|
|
ContextTables::~ContextTables()
|
|
{
|
|
delete[] markers_;
|
|
delete[] seqNumbers_;
|
|
delete[] linkTypes_;
|
|
delete[] dests_;
|
|
delete[] initialWords_;
|
|
|
|
for( sal_uInt32 i = 0; i < cache_.size(); ++i )
|
|
delete cache_[i];
|
|
}
|
|
|
|
|
|
|
|
void ContextTables::setMicroindex( sal_Int32 docNo ) throw( excep::XmlSearchException )
|
|
{
|
|
if( docNo != lastDocNo_ )
|
|
{ // check if we need to do anything
|
|
if( cache_[ docNo ] )
|
|
cache_[ docNo ]->setTables( this );
|
|
else
|
|
{
|
|
sal_Int32 offset = offsets_[ docNo ];
|
|
sal_Int32 k0 = contextData_[ offset ] & 0xFF;
|
|
util::ByteArrayDecompressor compr( contextDataL_,contextData_,offset + 1 );
|
|
kTable_.clear();
|
|
compr.decode( k0,kTable_ );
|
|
// decompress initialWords into auxiliary array
|
|
auxArray_.clear();
|
|
compr.ascDecode( kTable_[0],auxArray_ ); // _initialWords
|
|
|
|
delete[] initialWords_;
|
|
initialWords_ = new sal_Int32[ initialWordsL_ = auxArray_.size() ];
|
|
sal_Int32 k;
|
|
for( k = 0; k < initialWordsL_; ++k ) //?opt
|
|
initialWords_[k] = auxArray_[k];
|
|
|
|
nTextNodes_ = initialWordsL_;
|
|
// decompress destinations into auxiliary array
|
|
auxArray_.clear();
|
|
compr.decode( kTable_[1],auxArray_ ); // _dests
|
|
auxArray_.push_back( -1 ); // sentinel, root
|
|
|
|
delete[] dests_;
|
|
dests_ = new sal_Int32[ destsL_ = auxArray_.size() ];
|
|
for( k = 0; k < destsL_; ++k ) //?opt
|
|
dests_[k] = auxArray_[k];
|
|
|
|
delete[] linkTypes_;
|
|
linkTypes_ = new sal_Int32[ linkTypesL_ = destsL_ - nTextNodes_ - 1 ];
|
|
compr.decode( kTable_[2],linkTypes_ );
|
|
|
|
delete[] seqNumbers_;
|
|
seqNumbers_ = new sal_Int32[ seqNumbersL_ = destsL_ - 1 ];
|
|
compr.decode( kTable_[ 3 ],seqNumbers_ );
|
|
|
|
cache_[docNo] = new Tables( this );
|
|
}
|
|
|
|
lastDocNo_ = docNo;
|
|
delete[] markers_;
|
|
markers_ = new sal_Int32[ markersL_ = destsL_ ];
|
|
}
|
|
initialWordsIndex_ = 0;
|
|
}
|
|
|
|
|
|
|
|
sal_Int32 ContextTables::parentContext( sal_Int32 context)
|
|
{
|
|
return dests_[ context ];
|
|
}
|
|
|
|
|
|
rtl::OUString ContextTables::linkName( sal_Int32 context )
|
|
{
|
|
return linkNames_[ linkTypes_[context ] ];
|
|
}
|
|
|
|
|
|
sal_Int32 ContextTables::linkCode( const rtl::OUString& linkName )
|
|
{
|
|
for( sal_Int32 i = 0; i < linkNamesL_; ++i )
|
|
if( linkName == linkNames_[i] )
|
|
return i;
|
|
return -1; // when not found
|
|
}
|
|
|
|
|
|
bool* ContextTables::getIgnoredElementsSet( sal_Int32& len,
|
|
const sal_Int32 ignoredElementsL,
|
|
const rtl::OUString* ignoredElements )
|
|
{
|
|
bool *result = 0;
|
|
if( ignoredElements && ignoredElementsL > 0 )
|
|
{
|
|
for( sal_Int32 i = 0; i < ignoredElementsL; ++i )
|
|
{
|
|
sal_Int32 code = linkCode( ignoredElements[i] );
|
|
if( code > -1 )
|
|
{
|
|
if( ! result )
|
|
result = new bool[ len = linkNamesL_ ];
|
|
|
|
result[ code ] = true;
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
bool ContextTables::notIgnored( sal_Int32 ctx,
|
|
sal_Int32 ignoredElementsL,bool* ignoredElements )
|
|
{
|
|
do
|
|
{
|
|
if( ignoredElements[ linkTypes_[ ctx ] ] )
|
|
return false;
|
|
}
|
|
while( ( ctx = dests_[ ctx ] ) > -1 ); // parentContext 'hand inlined'
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* starting with ctx and going up the ancestry tree look for the first
|
|
* context with the given linkCode
|
|
*/
|
|
|
|
sal_Int32 ContextTables::firstParentWithCode( const sal_Int32 pos,const sal_Int32 linkCode )
|
|
{
|
|
sal_Int32 ctx = dests_[ wordContextLin(pos) ]; // first parent of text node
|
|
const sal_Int32 shift = nTextNodes_;
|
|
const sal_Int32 limit = destsL_ - 1;
|
|
while( linkTypes_[ ctx - shift ] != linkCode )
|
|
if( ( ctx = dests_[ ctx ] ) == limit )
|
|
return -1;
|
|
return ctx;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* starting with ctx and going up the ancestry tree look for the first
|
|
* context with the given linkCode and given parent code
|
|
*/
|
|
|
|
sal_Int32 ContextTables::firstParentWithCode2( sal_Int32 pos,const sal_Int32 linkCode,const sal_Int32 parentCode)
|
|
{
|
|
sal_Int32 ctx = dests_[ wordContextLin( pos ) ]; // first parent of text node
|
|
const sal_Int32 shift = nTextNodes_;
|
|
const sal_Int32 limit = destsL_ - 1;
|
|
for( sal_Int32 parent = dests_[ctx]; parent < limit; parent = dests_[ parent ] )
|
|
if( linkTypes_[ parent - shift ] == parentCode && linkTypes_[ ctx - shift ] == linkCode )
|
|
return ctx;
|
|
else
|
|
ctx = parent;
|
|
return -1;
|
|
}
|
|
|
|
|
|
/*
|
|
* starting with ctx and going up the ancestry tree look for the first
|
|
* context with the given linkCode and given ancestor code
|
|
*/
|
|
|
|
sal_Int32 ContextTables::firstParentWithCode3( sal_Int32 pos,sal_Int32 linkCode,sal_Int32 ancestorCode )
|
|
{
|
|
sal_Int32 ctx = dests_[ wordContextLin( pos ) ];
|
|
const sal_Int32 shift = nTextNodes_;
|
|
const sal_Int32 limit = destsL_ - 1;
|
|
// find first instance of linkCode
|
|
while( ctx < limit && linkTypes_[ ctx - shift ] != linkCode )
|
|
ctx = dests_[ ctx ];
|
|
if( ctx < limit ) // found linkCode, check ancestry
|
|
for( sal_Int32 ancestor = dests_[ctx];
|
|
ancestor < limit;
|
|
ancestor = dests_[ancestor])
|
|
if (linkTypes_[ancestor - shift] == ancestorCode) // ancestor confirmed
|
|
return ctx; // match found, return successful ctx
|
|
return -1; // match NOT found
|
|
}
|
|
|
|
|
|
/*
|
|
* starting with ctx and going up the ancestry tree look for the first
|
|
* context with any of the given linkCode
|
|
*/
|
|
|
|
sal_Int32 ContextTables::firstParentWithCode4(sal_Int32 pos, sal_Int32 linkCodesL,sal_Int32* linkCodes)
|
|
{
|
|
const sal_Int32 shift = nTextNodes_;
|
|
const sal_Int32 limit = destsL_ - 1;
|
|
for (sal_Int32 ctx = dests_[wordContextLin(pos)]; ctx < limit; ctx = dests_[ctx])
|
|
{
|
|
const sal_Int32 code = linkTypes_[ctx - shift];
|
|
for (sal_Int32 i = 0; i < linkCodesL; i++)
|
|
if (code == linkCodes[i])
|
|
return ctx;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
/*
|
|
* starting with ctx and going up the ancestry tree look for the first
|
|
* context with the given path
|
|
*/
|
|
|
|
sal_Int32 ContextTables::firstParentWithCode5(sal_Int32 pos,sal_Int32 pathCodesL,sal_Int32* pathCodes)
|
|
{
|
|
const sal_Int32 lastCode = pathCodes[ pathCodesL - 1 ];
|
|
const sal_Int32 shift = nTextNodes_;
|
|
const sal_Int32 limit = destsL_ - 1;
|
|
sal_Int32 ctx = dests_[wordContextLin(pos)];
|
|
|
|
SEARCH:
|
|
for(sal_Int32 parent = dests_[ctx];
|
|
parent < limit;
|
|
parent = dests_[parent])
|
|
if( linkTypes_[ctx - shift] == lastCode )
|
|
{ // initial match
|
|
// try to match the entire path
|
|
for(sal_Int32 i = pathCodesL - 2, parent2 = parent; i >= 0; i--)
|
|
if (linkTypes_[parent2 - shift] != pathCodes[i]) // match failure
|
|
goto SEARCH; // try to match higher
|
|
else if ((parent2 = dests_[parent2]) == limit)
|
|
return -1;
|
|
return ctx;
|
|
}
|
|
else
|
|
ctx = parent;
|
|
return -1;
|
|
}
|
|
|
|
|
|
/*
|
|
* starting with ctx and going up the ancestry tree look for the first
|
|
* context with the given linkCode
|
|
*/
|
|
|
|
sal_Int32 ContextTables::firstParentWithCode7( const sal_Int32 pos,const sal_Int32 linkCode,const sal_Int32 seq)
|
|
{
|
|
sal_Int32 ctx = dests_[ wordContextLin(pos) ]; // first parent of text node
|
|
const sal_Int32 shift = nTextNodes_;
|
|
const sal_Int32 limit = destsL_ - 1;
|
|
while (linkTypes_[ctx - shift] != linkCode || seqNumbers_[ctx] != seq)
|
|
if ((ctx = dests_[ctx]) == limit)
|
|
return -1;
|
|
return ctx;
|
|
}
|
|
|
|
|
|
bool ContextTables::isGoverning(sal_Int32 context)
|
|
{
|
|
return linkName(context).equalsAsciiL( RTL_CONSTASCII_STRINGPARAM("TITLE") ) != 0;
|
|
}
|
|
|
|
|
|
void ContextTables::resetContextSearch()
|
|
{
|
|
initialWordsIndex_ = 0;
|
|
}
|
|
|
|
|
|
sal_Int32 ContextTables::wordContextLin(sal_Int32 wordNumber)
|
|
{
|
|
for (sal_Int32 i = initialWordsIndex_; i < nTextNodes_; ++i )
|
|
if (initialWords_[i] > wordNumber)
|
|
{ // first such i
|
|
// - 1 if wordNumbers can be the same
|
|
initialWordsIndex_ = i; // cached to speed up next search
|
|
return i - 1;
|
|
}
|
|
return nTextNodes_ - 1;
|
|
}
|
|
|
|
|
|
// void ContextTables::appendSegment( sal_Int32 context,rtl::OUStringBuffer& result )
|
|
// {
|
|
// result.append( context < nTextNodes_ ?
|
|
// rtl::OUString::createFromAscii( "text()" ) :
|
|
// linkNames_[ linkTypes_[ context - nTextNodes_ ] ] );
|
|
// result.append(sal_Unicode( '[' ) );
|
|
// result.append( seqNumbers_[ context ] );
|
|
// result.append(sal_Unicode( "]/" ) );
|
|
// }
|
|
|
|
|
|
// /*
|
|
// * XPath (forking) location of the hit
|
|
// */
|
|
|
|
// void ContextTables::hitLocation( sal_Int32 termsL,rtl::OUString* terms,
|
|
// sal_Int32 matchesL,sal_Int32* matches,
|
|
// StringBuffer& result )
|
|
// {
|
|
// const sal_Int32 N = termsL;
|
|
// std::vector< sal_Int32 > stacks( N );
|
|
// sal_Int32* wordNumbers = new sal_Int32[N];
|
|
// std::vector< sal_Int32 > stack;
|
|
// sal_Int32 lastInitialWordIndex = -1;
|
|
// sal_Int32 pattern = 0,context = 0,nPopped = 0,pathStart = 0,pathEnd = 0;
|
|
// for( sal_Int32 i = 0,marker = 1; i < N; i++,marker <<= 1 )
|
|
// if ( terms[i] )
|
|
// {
|
|
// const sal_Int32 wordNumber = matches[i*2 + 1];
|
|
// const sal_Int32 initialWordIndex = findIndexBin(wordNumber);
|
|
// wordNumbers[i] = wordNumber - initialWords_[initialWordIndex] + 1;
|
|
// if( initialWordIndex == lastInitialWordIndex ) // save work
|
|
// ; // do nothing, path will be reused
|
|
// else
|
|
// {
|
|
// pattern |= marker;
|
|
// stack = stacks[i] = new IntegerArray();
|
|
|
|
// context = initialWordIndex;
|
|
// do
|
|
// {
|
|
// const sal_Int32 parent = dests_[context];
|
|
// if( parent != -1 )
|
|
// {
|
|
// stack.add( context );
|
|
// markers_[context] |= marker;
|
|
// context = parent;
|
|
// }
|
|
// else
|
|
// break;
|
|
// }
|
|
// while( true );
|
|
// lastInitialWordIndex = initialWordIndex;
|
|
// }
|
|
// }
|
|
|
|
// // find and output common path
|
|
// // process first non-missing match
|
|
|
|
// sal_Int32 i = 0, marker = 1, nMissing = 0;
|
|
|
|
// // find first non-missing matching term
|
|
// // there has to be at least one if the hit was built
|
|
// // count potential leading missing terms to output appropriate elements
|
|
// // before outputting elements for matches
|
|
|
|
// for ( ; i < N; i++,marker <<= 1 )
|
|
// if (terms[i] != null)
|
|
// {
|
|
// result.append( rtl::OUString::createFromAscii( "<Matches path=\"" ) );
|
|
// stack = stacks[i];
|
|
// while (stack.size() > 0)
|
|
// {
|
|
// context = stack.popLast();
|
|
// if ( markers_[context] == pattern )
|
|
// {
|
|
// markers_[context] = 0;
|
|
// appendSegment( context,result ); // belongs to common
|
|
// context = -1; // used
|
|
// ++nPopped;
|
|
// }
|
|
// else
|
|
// break;
|
|
// }
|
|
// // end of 'matches' && common path
|
|
// result.append("\">");
|
|
// // output elements for any leading missingTerms
|
|
// while (--nMissing >= 0)
|
|
// result.append("<MissingTerm/>");
|
|
|
|
// result.append("<Match term=\"");
|
|
// result.append(terms[i]);
|
|
// result.append("\" path=\"");
|
|
// pathStart = result.getLength();
|
|
// if (context != -1)
|
|
// {
|
|
// appendSegment(context, result);
|
|
// markers_[context] = 0;
|
|
// }
|
|
// while (stack.size() > 0 )
|
|
// {
|
|
// context = stack.popLast();
|
|
// appendSegment(context, result);
|
|
// markers_[context] = 0;
|
|
// }
|
|
|
|
// pathEnd = result.length();
|
|
|
|
// result.append("\" tokenNumber=\"");
|
|
// result.append(wordNumbers[i]);
|
|
// result.append("]\"/>");
|
|
|
|
// break; // just the first non-zero
|
|
// }
|
|
// else
|
|
// ++nMissing; // only count leading missing terms
|
|
|
|
// // process the remaining matches
|
|
// for (i++, marker <<= 1 ; i < N; i++, marker <<= 1)
|
|
// if (terms[i] != null) {
|
|
// result.append("<Match term=\"");
|
|
// result.append(terms[i]);
|
|
// result.append("\" path=\"");
|
|
// stack = stacks[i];
|
|
// if (stack == null) // reuse previously generated path
|
|
// result.append(result.substring(pathStart, pathEnd));
|
|
// else {
|
|
// stack.pop(nPopped);
|
|
// pathStart = result.length();
|
|
// while (stack.cardinality() > 0) {
|
|
// context = stack.popLast();
|
|
// appendSegment(context, result);
|
|
// _markers[context] = 0;
|
|
// }
|
|
// pathEnd = result.length();
|
|
// }
|
|
// result.append("\" tokenNumber=\"");
|
|
// result.append(wordNumbers[i]);
|
|
// result.append("]\"/>");
|
|
// }
|
|
// else
|
|
// result.append("<MissingTerm/>");
|
|
// result.append("</Matches>");
|
|
// }
|
|
|
|
|
|
// /*
|
|
// * QueryHitData is initialized in the caller
|
|
// * this function fills the commonPath for all matching terms
|
|
// * and relative paths for the individual terms
|
|
// */
|
|
|
|
// void ContextTables::hitLocation(String[] terms, sal_Int32[] matches, QueryHitData data) {
|
|
// StringBuffer buffer = new StringBuffer(512);
|
|
// const sal_Int32 N = terms.length;
|
|
// IntegerArray[] stacks = new IntegerArray[N];
|
|
// sal_Int32[] wordNumbers = new sal_Int32[N];
|
|
// IntegerArray stack;
|
|
// sal_Int32 lastInitialWordIndex = -1;
|
|
// sal_Int32 pattern = 0, nPopped = 0, pathStart = 0, pathEnd = 0;
|
|
// for (sal_Int32 i = 0, marker = 1; i < N; i++, marker <<= 1)
|
|
// if (terms[i] != null) {
|
|
// const sal_Int32 wordNumber = matches[i*2 + 1];
|
|
// const sal_Int32 initialWordIndex = findIndexBin(wordNumber);
|
|
// wordNumbers[i] = wordNumber - _initialWords[initialWordIndex] + 1;
|
|
// if (initialWordIndex == lastInitialWordIndex) // save work
|
|
// ; // do nothing, path will be reused
|
|
// else {
|
|
// pattern |= marker;
|
|
// stack = stacks[i] = new IntegerArray();
|
|
// for (sal_Int32 ctx = initialWordIndex;;) {
|
|
// const sal_Int32 parent = _dests[ctx];
|
|
// if (parent != -1) {
|
|
// stack.add(ctx);
|
|
// _markers[ctx] |= marker;
|
|
// ctx = parent;
|
|
// }
|
|
// else
|
|
// break;
|
|
// }
|
|
// lastInitialWordIndex = initialWordIndex;
|
|
// }
|
|
// }
|
|
// // find and output common path
|
|
// // process first match
|
|
// StringBuffer path = new StringBuffer(256);
|
|
// String previousPath = null; // we may be copying subpaths from it
|
|
// sal_Int32 i = 0, marker = 1;
|
|
// for ( ; i < N; i++, marker <<= 1)
|
|
// if (terms[i] != null) {
|
|
// sal_Int32 context = 0;
|
|
// stack = stacks[i];
|
|
// while (stack.cardinality() > 0) {
|
|
// context = stack.popLast();
|
|
// if (_markers[context] == pattern) {
|
|
// _markers[context] = 0;
|
|
// appendSegment(context, path); // belongs to common
|
|
// context = -1; // used
|
|
// ++nPopped;
|
|
// }
|
|
// else
|
|
// break;
|
|
// }
|
|
// data.setCommonPath(path.toString());
|
|
// // end of 'matches' && common path
|
|
// path.setLength(0); // will now be used for relative paths
|
|
// pathStart = 0;
|
|
// if (context != -1) {
|
|
// appendSegment(context, path);
|
|
// _markers[context] = 0;
|
|
// }
|
|
// while (stack.cardinality() > 0) {
|
|
// context = stack.popLast();
|
|
// appendSegment(context, path);
|
|
// _markers[context] = 0;
|
|
// }
|
|
// pathEnd = path.length();
|
|
// data.setMatchLocation(i, previousPath = path.toString(), wordNumbers[i]);
|
|
// break; // just the first non-zero
|
|
// }
|
|
|
|
// // process the remaining matches
|
|
// for (i++, marker <<= 1 ; i < N; i++, marker <<= 1)
|
|
// if (terms[i] != null) {
|
|
// path.setLength(0);
|
|
// stack = stacks[i];
|
|
// if (stack == null) // reuse previously generated path
|
|
// path.append(previousPath.substring(pathStart, pathEnd));
|
|
// else {
|
|
// stack.pop(nPopped);
|
|
// pathStart = path.length();
|
|
// while (stack.cardinality() > 0) {
|
|
// const sal_Int32 context = stack.popLast();
|
|
// appendSegment(context, path);
|
|
// _markers[context] = 0;
|
|
// }
|
|
// pathEnd = path.length();
|
|
// }
|
|
// data.setMatchLocation(i, previousPath = path.toString(), wordNumbers[i]);
|
|
// }
|
|
// }
|
|
|
|
// private sal_Int32 ContextTables::findIndexBin(const sal_Int32 wordNumber) {
|
|
// sal_Int32 i = 0, j = _nTextNodes - 1;
|
|
// while (i <= j) {
|
|
// const sal_Int32 k = (i + j) >>> 1;
|
|
// if (_initialWords[k] < wordNumber)
|
|
// i = k + 1;
|
|
// else if (_initialWords[k] > wordNumber)
|
|
// j = k - 1;
|
|
// else
|
|
// return k;
|
|
// }
|
|
// return i - 1;
|
|
// }
|
|
|
|
/*
|
|
public void addGoverningFiller(int query, RoleFiller rf, int parent) {
|
|
// !!! for now assume just one query
|
|
GoverningContext gc = null;
|
|
if (_governingContexts[parent] == null) {
|
|
// find parent governing context
|
|
for (int c = _dests[parent]; ; c = _dests[c])
|
|
if (_governingContexts[c] != null || c == 0) {
|
|
// System.out.println("parent found at " + c);
|
|
gc = new GoverningContext(c, rf);
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
gc = new GoverningContext(_governingContexts[parent], rf);
|
|
_governingContexts[parent] = gc;
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|