Files
libreoffice/xmlhelp/source/cxxhelp/qe/DocGenerator.cxx
Kurt Zenker 42b086698b INTEGRATION: CWS warningfixes02 (1.7.2); FILE MERGED
2006/06/30 12:30:30 sb 1.7.2.1: #i66577# Made the code compile (warning-free) on a unxlngi6.pro GCC 4.1.1 Linux box.
2006-07-19 16:13:08 +00:00

500 lines
13 KiB
C++

/*************************************************************************
*
* OpenOffice.org - a multi-platform office productivity suite
*
* $RCSfile: DocGenerator.cxx,v $
*
* $Revision: 1.8 $
*
* last change: $Author: kz $ $Date: 2006-07-19 17:13:08 $
*
* The Contents of this file are made available subject to
* the terms of GNU Lesser General Public License Version 2.1.
*
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2005 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
************************************************************************/
#ifndef _XMLSEARCH_QE_DOCGENERATOR_HXX_
#include <qe/DocGenerator.hxx>
#endif
#ifndef _XMLSEARCH_QE_QUERY_HXX_
#include <qe/Query.hxx>
#endif
using namespace xmlsearch;
using namespace xmlsearch::qe;
const sal_Int32 NonnegativeIntegerGenerator::END = -1;
const sal_Int32 ConceptGroupGenerator::NConceptsInGroup = 16;
const sal_Int32 ConceptGroupGenerator::BitsInLabel = 4;
RoleFiller RoleFiller::roleFiller_;
RoleFiller::RoleFiller()
: m_nRefcount( 0 ),
fixedRole_( 0 ),
filled_( 0 ),
begin_( 0 ),
end_( 0 ),
parentContext_( 0 ),
limit_( 0 ),
next_( 0 ),
fillers_( 0 ),
conceptData_( 0 )
{
}
RoleFiller::RoleFiller( sal_Int32 nColumns,
ConceptData* first,
sal_Int32 role,
sal_Int32 pos,
sal_Int32 parentContext,
sal_Int32 limit )
: m_nRefcount( 0 ),
fixedRole_( sal_uInt8( role & 0xF ) ), // primary/constitutive concept/role
next_( 0 ),
fillers_( nColumns ),
conceptData_( first )
{
filled_ = sal_Int16( 1 << fixedRole_ );
begin_ = pos; // offset in file
// _end = _begin + first.getConceptLength();
end_ = begin_ + 1;
limit_ = limit;
parentContext_ = parentContext;
next_ = 0;
for( sal_uInt32 i = 0; i < fillers_.size(); ++i )
fillers_[i] = 0;
fillers_[ role ] = this;
}
RoleFiller::~RoleFiller()
{
}
void RoleFiller::scoreList( Query* query,sal_Int32 document )
{
sal_Int32 nColumns = query->getNColumns();
RoleFiller* candidateHit = this; // function called for the head of list
RoleFiller* next; // lookahead: if overlap, if so, is it better
// 'candidateHit' always points at the current candidate to be converted to a QueryHit
// 'penalty' is its penalty
// 'next' is used to explore earlier overlapping fillers
// the decision to emit a QueryHit is made when either there's no next
// or next doesn't overlap the current candidate
// the loop's logic makes sure that at emit time there's no better/earlier filler
// to overlap with the candidate
double penalty_ = candidateHit->penalty( query,nColumns );
for( next = candidateHit->next_; next; next = next->next_ )
if( next->end_ < candidateHit->begin_ )
{ // no overlap
candidateHit->makeQueryHit( query,document,penalty_ );
candidateHit = next;
penalty_ = candidateHit->penalty( query,nColumns );
}
else
{ // !!! can be computed in two steps
double penalty2 = next->penalty( query,nColumns );
if( penalty2 <= penalty_ )
{ // prefer next, disregard candidateHit
penalty_ = penalty2;
candidateHit = next;
}
}
candidateHit->makeQueryHit(query,document,penalty_);
}
void RoleFiller::makeQueryHit( Query* query,sal_Int32 doc,double penalty_ )
{
QueryHit* hit = query->maybeCreateQueryHit( penalty_,doc,
begin_,end_,parentContext_ );
if( hit )
{
sal_Int32 N;
sal_Int32* matches = hit->getMatches( N );
N /= 2;
for( sal_Int32 i = 0,j = 0; i < N; ++i )
if( filled_ & 1 << i )
{
matches[ j++ ] = fillers_[ i ]->getConcept();
matches[ j++ ] = fillers_[ i ]->begin_;
}
else
j += 2;
}
}
sal_Int32 RoleFiller::getConcept()
{
return conceptData_->getConcept();
}
void RoleFiller::use( std::vector< RoleFiller*>& place,sal_Int32 query )
{
RoleFiller* rf = place[ query ];
if( rf )
{
place[ query ] = this; // put at the head of list
next_ = rf;
while( rf->limit_ >= begin_ )
{
// check if we can grow/improve a hit
// we don't ever replace filler's fixed role
if( fixedRole_ != rf->fixedRole_ &&
// in same parent context eg. PARA
rf->parentContext_ == parentContext_ )
{
if( ( rf->filled_ & ( 1 << fixedRole_ ) ) == 0 )
{
// not filled yet
rf->filled_ |= 1 << fixedRole_;
rf->fillers_[ fixedRole_ ] = this;
rf->end_ = end_;
}
else
rf->considerReplacementWith( this );
}
if( rf->next_ )
rf = rf->next_;
else
return;
}
}
else
place[query] = this;
}
void RoleFiller::considerReplacementWith( RoleFiller* replacement )
{
// !!! simplistic for now
// needs gap and out of order
sal_Int32 role = replacement->fixedRole_;
if( replacement->getScore() > fillers_[role]->getScore() )
fillers_[ role ] = replacement;
}
double RoleFiller::penalty( Query* query,sal_Int32 nColumns )
{
sal_Int32 length = end_ - begin_ + 1;
double penalty_ = query->lookupPenalty( filled_ );
// !!! here is a chance to check against query
// if hit worth scoring further
// might not be if query already has lots of good hits
for( sal_Int32 i = 0; i < nColumns; ++i )
if( filled_ & ( 1 << i ) )
{
penalty_ += fillers_[i]->conceptData_->getPenalty();
//length -= _fillers[i]._conceptData.getConceptLength() + 1;
length -= 2; // !!! ??? c.length is not used ?
if( filled_ >> (i + 1) )
for( sal_Int32 j = i + 1; j < nColumns; ++j )
if( ( filled_ & 1 << j ) && fillers_[j]->begin_ < begin_ )
penalty_ += query->getOutOufOrderPenalty();
}
double result = penalty_ + length * query->getGapPenalty();
return result < 0.0 ? 0.0 : result; // !!! quick fix
}
NextDocGenerator::NextDocGenerator( ConceptData* cd,XmlIndex* env )
: document_( 0 ),
concept_( cd ? cd->getConcept() : -1 ),
queryMask_( cd ? cd->getQueryMask() : -1 ),
terms_( cd ),
iterator_( env->getDocumentIterator( concept_ ) )
{
}
void NextDocGeneratorHeap::reset()
{
for( sal_Int32 i = 0; i < heapSize_; ++i )
{
delete heap_[i]; heap_[i] = 0;
}
free_ = 0;
nonEmpty_ = false;
}
void NextDocGeneratorHeap::addGenerator( NextDocGenerator* gen )
{
if( sal_uInt32( free_ ) == heap_.size() )
{
heap_.push_back( 0 );
}
heap_[free_++] = gen;
}
void NextDocGeneratorHeap::start()
{
if( ( heapSize_ = free_ ) > 0 )
{
for( sal_Int32 i = heapSize_ / 2; i >= 0; --i )
heapify(i);
nonEmpty_ = true;
}
else
nonEmpty_ = false;
}
void NextDocGeneratorHeap::step() throw( excep::XmlSearchException )
{
if( heap_[0]->next() != NonnegativeIntegerGenerator::END )
heapify(0);
else if ( heapSize_ > 1 )
{
delete heap_[0];
heap_[0] = heap_[--heapSize_];
heap_[ heapSize_ ] = 0;
heapify(0);
}
else
nonEmpty_ = false;
}
void NextDocGeneratorHeap::heapify( sal_Int32 i )
{
NextDocGenerator* temp;
for( sal_Int32 r,l,smallest; ; )
{
r = ( i + 1 ) << 1;
l = r - 1;
smallest = ( l < heapSize_ && heap_[l]->smallerThan( heap_[i] ) ) ? l : i;
if( r < heapSize_ && heap_[r]->smallerThan( heap_[ smallest ] ) )
smallest = r;
if( smallest != i )
{
temp = heap_[ smallest ];
heap_[ smallest ] = heap_[ i ];
heap_[i] = temp;
i = smallest;
}
else
break;
}
}
bool NextDocGeneratorHeap::atDocument( sal_Int32 document )
{
return nonEmpty_ && heap_[0]->getDocument() == document;
}
ConceptGroupGenerator::ConceptGroupGenerator( sal_Int32 dataL,sal_Int8* data,sal_Int32 index,sal_Int32 k )
: last_( 0 ),
k1_( k ),
k2_( BitsInLabel ),
table_( NConceptsInGroup ),
bits_( new util::ByteArrayDecompressor( dataL,data,index ) )
{
}
ConceptGroupGenerator::ConceptGroupGenerator()
: last_( 0 ),
k1_( 0 ),
k2_( BitsInLabel ),
table_( NConceptsInGroup ),
bits_( 0 )
{
}
ConceptGroupGenerator::~ConceptGroupGenerator()
{
delete bits_;
}
void ConceptGroupGenerator::generateFillers( std::vector< RoleFiller* >& array )
{
cData_->generateFillers( array,last_ );
}
bool ConceptGroupGenerator::next() throw( excep::XmlSearchException )
{
while( bits_->readNext( k1_,this ) )
{
sal_Int32 bla = bits_->read( k2_ );
if( ( cData_ = table_[ bla ] ).is() )
return true;
}
return false;
}
sal_Int32 ConceptGroupGenerator::decodeConcepts( sal_Int32 k,
sal_Int32 shift,
sal_Int32 *concepts )
throw( excep::XmlSearchException )
{
return bits_->ascendingDecode( k,shift,concepts );
}
void ConceptGroupGenerator::init( sal_Int32 bytesL,sal_Int8* bytes,sal_Int32 index,sal_Int32 k )
{
k1_ = k;
delete bits_;
bits_ = new util::ByteArrayDecompressor( bytesL,bytes,index );
last_ = 0;
for( sal_Int32 i = 0;i < NConceptsInGroup; i++ )
table_[i] = 0;
}
void ConceptGroupGenerator::addTerms( sal_Int32 index,ConceptData* terms )
{
table_[ index ] = terms;
}
void GeneratorHeap::reset()
{
for( sal_Int32 i = 0; i < heapSize_; ++i )
{
delete heap_[i];
heap_[i] = 0;
}
free_ = 0;
}
void GeneratorHeap::addGenerator( ConceptGroupGenerator* cd )
{
if( sal_uInt32( free_ ) == heap_.size() )
{
heap_.push_back( 0 );
}
heap_[free_++] = cd;
}
void GeneratorHeap::buildHeap()
{
for( sal_Int32 i = heapSize_/2; i >= 0; i-- )
heapify(i);
}
void GeneratorHeap::heapify( sal_Int32 root )
{
for( sal_Int32 smallest = 0; ; )
{
const sal_Int32 right = ( root + 1 ) << 1;
const sal_Int32 left = right - 1;
smallest = ( left < heapSize_ && heap_[left]->position() < heap_[ root ]->position() ) ? left : root;
if( right< heapSize_ && heap_[right]->position() < heap_[smallest]->position() )
smallest = right;
if( smallest != root )
{
ConceptGroupGenerator* temp = heap_[smallest];
heap_[smallest] = heap_[root];
heap_[root] = temp;
root = smallest;
}
else
break;
}
}
bool GeneratorHeap::start( std::vector< RoleFiller* >& array ) throw( xmlsearch::excep::XmlSearchException )
{
if( ( heapSize_ = free_ ) > 0 )
{
for( sal_Int32 i = 0; i < free_; ++i )
heap_[i]->next();
buildHeap();
heap_[0]->generateFillers( array );
return true;
}
else
return false;
}
bool GeneratorHeap::next( std::vector< RoleFiller* >& array ) throw( xmlsearch::excep::XmlSearchException )
{
if( heapSize_ > 0 )
{
if( ! heap_[0]->next() ) // no more
if( heapSize_ > 1)
{
delete heap_[0];
heap_[0] = heap_[--heapSize_];
heap_[heapSize_] = 0;
}
else
{
heapSize_ = 0;
return false;
}
heapify(0);
heap_[0]->generateFillers( array );
return true;
}
else
return false;
}