/************************************************************************* * * OpenOffice.org - a multi-platform office productivity suite * * $RCSfile: DocGenerator.cxx,v $ * * $Revision: 1.7 $ * * last change: $Author: hr $ $Date: 2006-06-20 00:41:11 $ * * The Contents of this file are made available subject to * the terms of GNU Lesser General Public License Version 2.1. * * * GNU Lesser General Public License Version 2.1 * ============================================= * Copyright 2005 by Sun Microsystems, Inc. * 901 San Antonio Road, Palo Alto, CA 94303, USA * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * ************************************************************************/ #ifndef _XMLSEARCH_QE_DOCGENERATOR_HXX_ #include #endif #ifndef _XMLSEARCH_QE_QUERY_HXX_ #include #endif using namespace xmlsearch; using namespace xmlsearch::qe; const sal_Int32 NonnegativeIntegerGenerator::END = -1; const sal_Int32 ConceptGroupGenerator::NConceptsInGroup = 16; const sal_Int32 ConceptGroupGenerator::BitsInLabel = 4; RoleFiller RoleFiller::roleFiller_; RoleFiller::RoleFiller() : m_nRefcount( 0 ), fixedRole_( 0 ), filled_( 0 ), begin_( 0 ), end_( 0 ), parentContext_( 0 ), limit_( 0 ), next_( 0 ), fillers_( 0 ), conceptData_( 0 ) { } RoleFiller::RoleFiller( sal_Int32 nColumns, ConceptData* first, sal_Int32 role, sal_Int32 pos, sal_Int32 parentContext, sal_Int32 limit ) : m_nRefcount( 0 ), fixedRole_( sal_uInt8( role & 0xF ) ), // primary/constitutive concept/role next_( 0 ), fillers_( nColumns ), conceptData_( first ) { filled_ = sal_Int16( 1 << fixedRole_ ); begin_ = pos; // offset in file // _end = _begin + first.getConceptLength(); end_ = begin_ + 1; limit_ = limit; parentContext_ = parentContext; next_ = 0; for( sal_uInt32 i = 0; i < fillers_.size(); ++i ) fillers_[i] = 0; fillers_[ role ] = this; } RoleFiller::~RoleFiller() { } void RoleFiller::scoreList( Query* query,sal_Int32 document ) { sal_Int32 nColumns = query->getNColumns(); RoleFiller* candidateHit = this; // function called for the head of list RoleFiller* next; // lookahead: if overlap, if so, is it better // 'candidateHit' always points at the current candidate to be converted to a QueryHit // 'penalty' is its penalty // 'next' is used to explore earlier overlapping fillers // the decision to emit a QueryHit is made when either there's no next // or next doesn't overlap the current candidate // the loop's logic makes sure that at emit time there's no better/earlier filler // to overlap with the candidate double penalty_ = candidateHit->penalty( query,nColumns ); for( next = candidateHit->next_; next; next = next->next_ ) if( next->end_ < candidateHit->begin_ ) { // no overlap candidateHit->makeQueryHit( query,document,penalty_ ); candidateHit = next; penalty_ = candidateHit->penalty( query,nColumns ); } else { // !!! can be computed in two steps double penalty2 = next->penalty( query,nColumns ); if( penalty2 <= penalty_ ) { // prefer next, disregard candidateHit penalty_ = penalty2; candidateHit = next; } } candidateHit->makeQueryHit(query,document,penalty_); } void RoleFiller::makeQueryHit( Query* query,sal_Int32 doc,double penalty_ ) { QueryHit* hit = query->maybeCreateQueryHit( penalty_,doc, begin_,end_,parentContext_ ); if( hit ) { sal_Int32 N; sal_Int32* matches = hit->getMatches( N ); N /= 2; for( sal_Int32 i = 0,j = 0; i < N; ++i ) if( filled_ & 1 << i ) { matches[ j++ ] = fillers_[ i ]->getConcept(); matches[ j++ ] = fillers_[ i ]->begin_; } else j += 2; } } sal_Int32 RoleFiller::getConcept() { return conceptData_->getConcept(); } void RoleFiller::use( std::vector< RoleFiller*>& place,sal_Int32 query ) { RoleFiller* rf; if( rf = place[ query ] ) { place[ query ] = this; // put at the head of list next_ = rf; while( rf->limit_ >= begin_ ) { // check if we can grow/improve a hit // we don't ever replace filler's fixed role if( fixedRole_ != rf->fixedRole_ && // in same parent context eg. PARA rf->parentContext_ == parentContext_ ) { if( ( rf->filled_ & ( 1 << fixedRole_ ) ) == 0 ) { // not filled yet rf->filled_ |= 1 << fixedRole_; rf->fillers_[ fixedRole_ ] = this; rf->end_ = end_; } else rf->considerReplacementWith( this ); } if( rf->next_ ) rf = rf->next_; else return; } } else place[query] = this; } void RoleFiller::considerReplacementWith( RoleFiller* replacement ) { // !!! simplistic for now // needs gap and out of order sal_Int32 role = replacement->fixedRole_; if( replacement->getScore() > fillers_[role]->getScore() ) fillers_[ role ] = replacement; } double RoleFiller::penalty( Query* query,sal_Int32 nColumns ) { sal_Int32 length = end_ - begin_ + 1; double penalty_ = query->lookupPenalty( filled_ ); // !!! here is a chance to check against query // if hit worth scoring further // might not be if query already has lots of good hits for( sal_Int32 i = 0; i < nColumns; ++i ) if( filled_ & ( 1 << i ) ) { penalty_ += fillers_[i]->conceptData_->getPenalty(); //length -= _fillers[i]._conceptData.getConceptLength() + 1; length -= 2; // !!! ??? c.length is not used ? if( filled_ >> (i + 1) ) for( sal_Int32 j = i + 1; j < nColumns; ++j ) if( ( filled_ & 1 << j ) && fillers_[j]->begin_ < begin_ ) penalty_ += query->getOutOufOrderPenalty(); } double result = penalty_ + length * query->getGapPenalty(); return result < 0.0 ? 0.0 : result; // !!! quick fix } NextDocGenerator::NextDocGenerator( ConceptData* cd,XmlIndex* env ) : document_( 0 ), concept_( cd ? cd->getConcept() : -1 ), queryMask_( cd ? cd->getQueryMask() : -1 ), terms_( cd ), iterator_( env->getDocumentIterator( concept_ ) ) { } void NextDocGeneratorHeap::reset() { for( sal_Int32 i = 0; i < heapSize_; ++i ) { delete heap_[i]; heap_[i] = 0; } free_ = 0; nonEmpty_ = false; } void NextDocGeneratorHeap::addGenerator( NextDocGenerator* gen ) { if( sal_uInt32( free_ ) == heap_.size() ) { heap_.push_back( 0 ); } heap_[free_++] = gen; } void NextDocGeneratorHeap::start() { if( ( heapSize_ = free_ ) > 0 ) { for( sal_Int32 i = heapSize_ / 2; i >= 0; --i ) heapify(i); nonEmpty_ = true; } else nonEmpty_ = false; } void NextDocGeneratorHeap::step() throw( excep::XmlSearchException ) { if( heap_[0]->next() != NonnegativeIntegerGenerator::END ) heapify(0); else if ( heapSize_ > 1 ) { delete heap_[0]; heap_[0] = heap_[--heapSize_]; heap_[ heapSize_ ] = 0; heapify(0); } else nonEmpty_ = false; } void NextDocGeneratorHeap::heapify( sal_Int32 i ) { NextDocGenerator* temp; for( sal_Int32 r,l,smallest; ; ) { r = ( i + 1 ) << 1; l = r - 1; smallest = ( l < heapSize_ && heap_[l]->smallerThan( heap_[i] ) ) ? l : i; if( r < heapSize_ && heap_[r]->smallerThan( heap_[ smallest ] ) ) smallest = r; if( smallest != i ) { temp = heap_[ smallest ]; heap_[ smallest ] = heap_[ i ]; heap_[i] = temp; i = smallest; } else break; } } bool NextDocGeneratorHeap::atDocument( sal_Int32 document ) { return nonEmpty_ && heap_[0]->getDocument() == document; } ConceptGroupGenerator::ConceptGroupGenerator( sal_Int32 dataL,sal_Int8* data,sal_Int32 index,sal_Int32 k ) : last_( 0 ), k1_( k ), k2_( BitsInLabel ), table_( NConceptsInGroup ), bits_( new util::ByteArrayDecompressor( dataL,data,index ) ) { } ConceptGroupGenerator::ConceptGroupGenerator() : last_( 0 ), k1_( 0 ), k2_( BitsInLabel ), table_( NConceptsInGroup ), bits_( 0 ) { } ConceptGroupGenerator::~ConceptGroupGenerator() { delete bits_; } void ConceptGroupGenerator::generateFillers( std::vector< RoleFiller* >& array ) { cData_->generateFillers( array,last_ ); } bool ConceptGroupGenerator::next() throw( excep::XmlSearchException ) { while( bits_->readNext( k1_,this ) ) { sal_Int32 bla = bits_->read( k2_ ); if( ( cData_ = table_[ bla ] ).is() ) return true; } return false; } sal_Int32 ConceptGroupGenerator::decodeConcepts( sal_Int32 k, sal_Int32 shift, sal_Int32 *concepts ) throw( excep::XmlSearchException ) { return bits_->ascendingDecode( k,shift,concepts ); } void ConceptGroupGenerator::init( sal_Int32 bytesL,sal_Int8* bytes,sal_Int32 index,sal_Int32 k ) { k1_ = k; delete bits_; bits_ = new util::ByteArrayDecompressor( bytesL,bytes,index ); last_ = 0; for( sal_Int32 i = 0;i < NConceptsInGroup; i++ ) table_[i] = 0; } void ConceptGroupGenerator::addTerms( sal_Int32 index,ConceptData* terms ) { table_[ index ] = terms; } void GeneratorHeap::reset() { for( sal_Int32 i = 0; i < heapSize_; ++i ) { delete heap_[i]; heap_[i] = 0; } free_ = 0; } void GeneratorHeap::addGenerator( ConceptGroupGenerator* cd ) { if( sal_uInt32( free_ ) == heap_.size() ) { heap_.push_back( 0 ); } heap_[free_++] = cd; } void GeneratorHeap::buildHeap() { for( sal_Int32 i = heapSize_/2; i >= 0; i-- ) heapify(i); } void GeneratorHeap::heapify( sal_Int32 root ) { for( sal_Int32 smallest = 0; ; ) { const sal_Int32 right = ( root + 1 ) << 1; const sal_Int32 left = right - 1; smallest = ( left < heapSize_ && heap_[left]->position() < heap_[ root ]->position() ) ? left : root; if( right< heapSize_ && heap_[right]->position() < heap_[smallest]->position() ) smallest = right; if( smallest != root ) { ConceptGroupGenerator* temp = heap_[smallest]; heap_[smallest] = heap_[root]; heap_[root] = temp; root = smallest; } else break; } } bool GeneratorHeap::start( std::vector< RoleFiller* >& array ) throw( xmlsearch::excep::XmlSearchException ) { if( ( heapSize_ = free_ ) > 0 ) { for( sal_Int32 i = 0; i < free_; ++i ) heap_[i]->next(); buildHeap(); heap_[0]->generateFillers( array ); return true; } else return false; } bool GeneratorHeap::next( std::vector< RoleFiller* >& array ) throw( xmlsearch::excep::XmlSearchException ) { if( heapSize_ > 0 ) { if( ! heap_[0]->next() ) // no more if( heapSize_ > 1) { delete heap_[0]; heap_[0] = heap_[--heapSize_]; heap_[heapSize_] = 0; } else { heapSize_ = 0; return false; } heapify(0); heap_[0]->generateFillers( array ); return true; } else return false; }