Files
libreoffice/xmlhelp/source/cxxhelp/qe/Search.cxx
Vladimir Glazounov aad0d81520 INTEGRATION: CWS ab41_SRC680 (1.9.46); FILE MERGED
2007/08/29 09:44:26 ab 1.9.46.1: #i80952# Applied patch to avoid memory double free
2007-08-30 14:42:59 +00:00

726 lines
20 KiB
C++

/*************************************************************************
*
* OpenOffice.org - a multi-platform office productivity suite
*
* $RCSfile: Search.cxx,v $
*
* $Revision: 1.10 $
*
* last change: $Author: vg $ $Date: 2007-08-30 15:42:59 $
*
* The Contents of this file are made available subject to
* the terms of GNU Lesser General Public License Version 2.1.
*
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2005 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
************************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_xmlhelp.hxx"
#ifndef _XMLSEARCH_QE_SEARCH_HXX_
#include <qe/Search.hxx>
#endif
using namespace std;
using namespace xmlsearch;
using namespace xmlsearch::qe;
/******************************************************************************/
/* */
/* Emtpy ConceptData/Query */
/* */
/******************************************************************************/
class EmptyConceptData
: public ConceptData
{
public:
virtual void generateFillers( std::vector< RoleFiller* >& , sal_Int32 ) { }
}; // end class EmptyQuery
class EmptyQuery
: public Query
{
public:
EmptyQuery()
: Query( 0,0,0,0,0 ),
conceptDataInstance_( )
{
}
ConceptData* makeConceptData( sal_Int32 col,
sal_Int32 concept,
double penalty,
sal_Int32 queryNo )
{
(void)col;
(void)concept;
(void)penalty;
(void)queryNo;
return &conceptDataInstance_;
}
private:
EmptyConceptData conceptDataInstance_;
}; // end class EmptyQuery
/******************************************************************************/
/* */
/* ConceptData1/Query1 */
/* */
/******************************************************************************/
class ConceptData1
: public ConceptData
{
public:
ConceptData1( sal_Int32 id,
sal_Int32 role,
double score,
sal_Int32 queryNo,
sal_Int32 nColumns,
ContextTables* ctxInfo,
sal_Int32 code )
: ConceptData( id,role,score,queryNo,nColumns,ctxInfo ),
fieldCode_( code )
{
}
virtual void generateFillers( std::vector< RoleFiller* >& array, sal_Int32 pos );
private:
sal_Int32 fieldCode_;
}; // end class ConceptData1
class Query1
: public Query
{
public:
Query1( XmlIndex* env,
sal_Int32 nColumns,
sal_Int32 nHits,
sal_Int32 missingPenaltiesL,
double* missingPenalties,
sal_Int32 fieldCode )
: Query( env,nColumns,nHits,missingPenaltiesL,missingPenalties ),
searchFieldCode_( fieldCode )
{
}
virtual ConceptData* makeConceptData( sal_Int32 col,
sal_Int32 concept,
double score,
sal_Int32 query );
private:
sal_Int32 searchFieldCode_;
};
/********************************************************************************/
// Impl
/********************************************************************************/
void ConceptData1::generateFillers( std::vector< RoleFiller* >& array, sal_Int32 pos )
{
if( array[ queryNo_ ] != RoleFiller::STOP() )
{ // not 'prohibited'
// !!! candidate for a single _ctx op
sal_Int32 ancestor = ctx_->firstParentWithCode(pos,fieldCode_);
if( ancestor != -1 )
{
RoleFiller* p = new RoleFiller( nColumns_,
this,
role_,
pos,
ancestor,
pos + proximity_);
p->use( array,queryNo_ );
}
}
if( next_.is() )
next_->generateFillers( array,pos );
}
ConceptData* Query1::makeConceptData( sal_Int32 col,
sal_Int32 concept,
double score,
sal_Int32 query )
{
return new ConceptData1( concept,col,score,query,nColumns_,ctx_,searchFieldCode_ );
}
/******************************************************************************/
/* */
/* QueryFactoryImpl */
/* */
/******************************************************************************/
class QueryFactoryImpl
{
public:
Query* makeQuery( XmlIndex* env,const rtl::OUString& context,sal_Int32 nColumns,sal_Int32 nHits);
Query* empty() { return &emptyQueryInstance_; }
private:
EmptyQuery emptyQueryInstance_;
}; // end class QueryFactoryImpl
Query* QueryFactoryImpl::makeQuery( XmlIndex* env,
const rtl::OUString& context,
sal_Int32 nColumns,
sal_Int32 nHits )
{
if( ! context.getLength() )
return new Query( env,nColumns,nHits,0,0 );
else if( context.indexOf( sal_Unicode( '|' ) ) != -1 )
{
return 0; //t
}
else if( context.indexOf( rtl::OUString::createFromAscii( "//" ) ) != -1 )
{
return 0; //t
}
else if( context.indexOf( sal_Unicode( '/' ) ) != -1 )
{
return 0; //t
}
else if( context.indexOf( sal_Unicode( '@' ) ) != -1 )
{
return 0; //t
}
else if( context.indexOf( sal_Unicode( '[' ) ) != -1 )
{
return 0; //t
}
else
{
sal_Int32 code = env->getContextInfo()->linkCode( context );
if( code != -1 )
return new Query1( env,nColumns,nHits,0,0,code);
else
return &emptyQueryInstance_;
}
}
Search::Search( XmlIndex* env )
: env_( env ),
queryFactory_( 0 ),
nextDocGenHeap_(),
firstGenerator_(),
free2_( 0 ),
limit_( 0 ),
base_( 0 ),
concepts_( new sal_Int32[ ConceptGroupGenerator::NConceptsInGroup ] ),
dataL_( 0 ),
data_( 0 )
{
}
Search::~Search()
{
sal_uInt32 i;
Query* stopq = queryFactory_ ? queryFactory_->empty() : 0;
ConceptData* stopc = stopq ? stopq->makeConceptData( 0,0,0.0,0 ) : 0;
(void)stopc;
for( i = 0; i < queries_.size(); ++i )
if( queries_[i] != stopq )
delete queries_[i];
delete[] concepts_;
delete queryFactory_;
}
Query* Search::addQuery( const rtl::OUString& context,
sal_Int32 nValidTerms,sal_Int32 nMissingTerms,sal_Int32 nHits,
double variantPenalty,
const std::vector< sal_Int32 >& primary,
const std::vector< std::vector< sal_Int32 > >& columns )
{
// by now, scope == context
if( ! queryFactory_ )
queryFactory_ = new QueryFactoryImpl();
Query* query = queryFactory_->makeQuery( env_,context,nValidTerms,nHits );
query->missingTerms( nMissingTerms );
queries_.push_back( query );
for( sal_Int32 i = 0; i < nValidTerms; ++i )
{
if( primary[i] > 0 )
addTerm( i,primary[i],0.0 );
for( sal_uInt32 j = 0; j < columns[i].size(); ++j )
addTerm( i,columns[i][j],variantPenalty );
}
// start stop
query->addControlConceptData( this,queries_.size()-1 );
return query;
}
void Search::startSearch()
{
sal_Int32 i,j;
// set up ConceptData lists
// order search terms
quicksort( 0, free2_ - 1);
// remove duplicates
for (i = 0; i < free2_ - 1; i = j)
{
for (j = i + 1; j < free2_; j++)
{
if( conceptData_[i]->crqEquals( conceptData_[j].get() ) )
conceptData_[j] = 0;
else
i = j;
}
}
// create lists
for( i = 0; i < free2_ - 1; i = j )
{
for (j = i + 1; j < free2_; j++ )
{
if( conceptData_[j].is() )
if( conceptData_[i]->cEquals( conceptData_[j].get() ) )
{
conceptData_[i]->addLast( conceptData_[j].get() );
conceptData_[j] = 0;
}
else
i = j;
}
}
// densify
for( i = 0; i < free2_ - 1; i++)
{
if( ! conceptData_[i].is() )
{
for( j = i + 1; j < free2_; j++)
{
if (conceptData_[j].is() )
{
conceptData_[i] = conceptData_[j];
conceptData_[j] = 0;
break;
}
}
}
}
// set up new document generators
nextDocGenHeap_.reset();
for( i = 0; i < free2_ && conceptData_[i].is(); i++)
{
NextDocGenerator* gen = new NextDocGenerator( conceptData_[i].get(),env_ );
try
{
sal_Int32 doc;
gen->first();
if( ( doc = gen->getDocument() ) != NonnegativeIntegerGenerator::END )
{
/* !!! ??? is concept length used any more in any way
conceptData_[i].
setConceptLength(_env.
getConceptLength(conceptData_[i].getConcept()));
*/
nextDocGenHeap_.addGenerator( gen );
}
}
catch( ... )
{
}
}
nextDocGenHeap_.start();
env_->reset();
env_->resetContextSearch();
searchDocument();
}
void Search::addTerm( sal_Int32 col,sal_Int32 concept,double score )
{
if( env_->occursInText( concept ) )
{
ConceptData* cd = queries_[queries_.size()-1]->makeConceptData( col,concept,score,queries_.size()-1 );
if( sal_uInt32( free2_ ) == conceptData_.size() )
{
conceptData_.push_back( 0 );
// conceptVisitor_ = &conceptData_[0];
}
conceptData_[ free2_++ ] = cd;
}
}
void Search::searchDocument()
{
std::vector< RoleFiller* > start( queries_.size() );
do
{
try
{
switch( nextDocument( start ) )
{
case 0: // multi group
genHeap_.start( start );
while( genHeap_.next( start ) )
;
break;
case 1: // single group
while( firstGenerator_.next() )
firstGenerator_.generateFillers( start );
break;
case 2: // reached the end
return;
}
}
catch( const excep::XmlSearchException& )
{
continue;
}
RoleFiller* next;
for( sal_uInt32 i = 0; i < queries_.size(); ++i )
{
if( ( next = start[i] ) != 0 && next != RoleFiller::STOP() )
next->scoreList( queries_[i],document_ );
else if( queries_[i]->zoned() )
{
RoleFiller* rfs = queries_[i]->getRoleFillers();
if( rfs && rfs != RoleFiller::STOP() )
rfs->scoreList( queries_[i],document_ );
}
}
genHeap_.reset();
}
while( nextDocGenHeap_.isNonEmpty() );
// #i80952
#if 0
for( sal_uInt32 i = 0; i < start.size(); ++i )
if( start[i] != RoleFiller::STOP() )
delete start[i];
#endif
}
sal_Int32 Search::nextDocument( std::vector< RoleFiller* >& start ) throw( xmlsearch::excep::XmlSearchException )
{
while( nextDocGenHeap_.isNonEmpty() )
{ // still something to do
sal_uInt32 i;
for( i = 0; i < queries_.size(); ++i )
if( queries_[i] )
queries_[i]->resetForNextDocument();
// gather all concepts this document has
// and store associated conceptData
sal_Int32 index = 0;
document_ = nextDocGenHeap_.getDocument();
docConcepts_.clear();
queryMasks_.clear();
do
{
docConcepts_.push_back( nextDocGenHeap_.getConcept() );
queryMasks_.push_back( nextDocGenHeap_.getQueryMask() );
ConceptData *conceptData = ( conceptData_[ index++ ] = nextDocGenHeap_.getTerms() ).get();
conceptData->runBy( queries_ );
nextDocGenHeap_.step();
}
while( nextDocGenHeap_.atDocument( document_) );
// if there is no saturation model, some query will always vote YES
// and so every document will be opened
// even if this case, however, savings can be achieved by not generating fillers
// for some queries (not scoring, etc)
// and, with more care, creation of some GroupGenerators can be avoided
// saturating queries with lots of good hits will lead to best results
sal_Int32 voteMask = 0;
Query* query;
for( i = 0; i < queries_.size(); ++i )
{
query = queries_[i];
if( query )
{
query->saveRoleFillers( 0 );
if( query->vote() )
{
// normal reset
start[i] = query->zoned() ? RoleFiller::STOP() : 0;
voteMask |= 1 << i;
}
else
start[i] = RoleFiller::STOP(); // prohibit setting
}
}
// we may eliminate some ConceptGroupGenerators
// those which would be used only by Queries which voted NO
if( voteMask != 0 )
{ // need to open up document
ConceptGroupGenerator* gen;
// !!! don't gather Fillers for disinterested Queries
if( openDocumentIndex( document_ ) )
{ // multi group
// set up all needed generators
sal_Int32 j = 0;
while( ( queryMasks_[j] & voteMask ) == 0 )
++j;
// assert(j < index);
sal_Int32 c = docConcepts_[j];
sal_Int32 group = 0;
// find first group
while( /*group < maxConcepts_.size() &&*/
c > maxConcepts_[ group ] && ++group < limit_ )
;
gen = makeGenerator( group );
gen->addTerms( indexOf(c),conceptData_[j].get() );
for( ++j; j < index; j++ )
if( ( queryMasks_[j] & voteMask ) > 0 )
{
c = docConcepts_[j];
if( c > max_ )
{ // need to find another group
// assert(group < _limit);
while( /*group < maxConcepts_.size() &&*/
c > maxConcepts_[ group ] && ++group < limit_ )
;
gen = makeGenerator( group );
}
gen->addTerms( indexOf(c),conceptData_[j].get() );
}
return 0;
}
else
{ // single group
for( sal_Int32 j = 0; j < index; j++ )
if( queryMasks_[j] & voteMask )
firstGenerator_.addTerms( indexOf( docConcepts_[j] ),conceptData_[j].get() );
return 1;
}
}
}
return 2;
}
bool Search::openDocumentIndex( sal_Int32 docNo ) throw( excep::XmlSearchException )
{
data_ = env_->getPositions( dataL_,docNo );
base_ = env_->getDocumentIndex( docNo );
startingIndex_ = 0;
sal_Int32 kk = data_[ base_ ] & 0xFF, k2;
switch( kk >> 6 )
{ // get type
case 0: // single group, no extents
k2 = data_[base_ + 1];
firstGenerator_.init( dataL_,data_,base_ += 2,k2 );
// decode concept table
nConcepts_ = firstGenerator_.decodeConcepts( kk & 0x3F,0,concepts_ );
return false;
case 2: // multi group, no extents
{
kTable_.clear();
offsets_.clear();
maxConcepts_.clear();
util::ByteArrayDecompressor compr( dataL_,data_,base_ + 1 );
compr.decode( kk & 0x3F,kTable_ );
sal_Int32 last = kTable_.back();
kTable_.pop_back();
compr.ascDecode( last,offsets_ );
last = kTable_.back();
kTable_.pop_back();
compr.ascDecode( last,maxConcepts_ );
base_ += 1 + compr.bytesRead();
limit_ = maxConcepts_.size();
}
return true;
case 1: // single group, extents
case 3: // multi group, extents
throw excep::XmlSearchException( rtl::OUString::createFromAscii( "extents not yet implemented\n" ) );
}
return false;
}
ConceptGroupGenerator* Search::makeGenerator( sal_Int32 group )
throw( excep::XmlSearchException )
{
sal_Int32 shift,index;
if( group > 0 )
{
index = base_ + offsets_[ group - 1 ];
shift = maxConcepts_[ group - 1 ];
}
else
{
index = base_;
shift = 0;
}
// initialize generator
ConceptGroupGenerator* gen =
new ConceptGroupGenerator( dataL_,data_,index,kTable_[ 1 + 2*group ] );
// decode concept table
nConcepts_ = gen->decodeConcepts( kTable_[2*group],shift,concepts_ );
if( group < limit_ )
max_ = concepts_[ nConcepts_ ] = maxConcepts_[ group ];
else
max_ = concepts_[ nConcepts_ - 1 ];
genHeap_.addGenerator( gen );
startingIndex_ = 0; // in _concepts; lower search index
return gen;
}
sal_Int32 Search::indexOf(sal_Int32 concept) throw( excep::XmlSearchException )
{
sal_Int32 i = startingIndex_,j = nConcepts_,k;
while( i <= j )
if( concepts_[ k = (i + j)/2 ] < concept )
i = k + 1;
else if( concept < concepts_[k] )
j = k - 1;
else
{
startingIndex_ = k + 1;
return k;
}
throw excep::XmlSearchException( rtl::OUString::createFromAscii( "indexOf not found" ) );
}
sal_Int32 Search::partition( sal_Int32 p,sal_Int32 r )
{
rtl::Reference< ConceptData > x = conceptData_[ ((p + r) >> 1) & 0x7FFFFFFF ];
sal_Int32 i = p - 1, j = r + 1;
while( true )
{
while( x->compareWith( conceptData_[--j].get() ) )
;
while( conceptData_[++i]->compareWith( x.get() ) )
;
if( i < j )
{
rtl::Reference< ConceptData > t = conceptData_[i];
conceptData_[i] = conceptData_[j];
conceptData_[j] = t;
}
else
return j;
}
}
void Search::quicksort( sal_Int32 p,sal_Int32 r )
{
while (p < r)
{
sal_Int32 q = partition( p,r );
quicksort(p, q);
p = q + 1;
}
}