Files
libreoffice/xmlhelp/source/cxxhelp/provider/resultsetforquery.cxx
2010-11-05 14:38:22 +00:00

453 lines
18 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*************************************************************************
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* Copyright 2000, 2010 Oracle and/or its affiliates.
*
* OpenOffice.org - a multi-platform office productivity suite
*
* This file is part of OpenOffice.org.
*
* OpenOffice.org is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3
* only, as published by the Free Software Foundation.
*
* OpenOffice.org is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License version 3 for more details
* (a copy is included in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU Lesser General Public License
* version 3 along with OpenOffice.org. If not, see
* <http://www.openoffice.org/license.html>
* for a copy of the LGPLv3 License.
*
************************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_xmlhelp.hxx"
#include <com/sun/star/ucb/Command.hpp>
#include <com/sun/star/ucb/XCommandEnvironment.hpp>
#include <com/sun/star/i18n/XExtendedTransliteration.hpp>
#include <com/sun/star/ucb/XCommandProcessor.hpp>
#include <com/sun/star/lang/Locale.hpp>
#include <com/sun/star/script/XInvocation.hpp>
#ifndef INCLUDED_STL_ALGORITHM
#include <algorithm>
#define INCLUDED_STL_ALGORITHM
#endif
#ifndef INCLUDED_STL_SET
#include <set>
#define INCLUDED_STL_SET
#endif
#include <qe/Query.hxx>
#include <qe/DocGenerator.hxx>
#include "resultsetforquery.hxx"
#include "databases.hxx"
// For testing
// #define LOGGING
using namespace std;
using namespace chelp;
using namespace xmlsearch::excep;
using namespace xmlsearch::qe;
using namespace com::sun::star;
using namespace com::sun::star::ucb;
using namespace com::sun::star::i18n;
using namespace com::sun::star::uno;
using namespace com::sun::star::lang;
struct HitItem
{
rtl::OUString m_aURL;
float m_fScore;
HitItem( void ) {}
HitItem( const rtl::OUString& aURL, float fScore )
: m_aURL( aURL )
, m_fScore( fScore )
{}
bool operator < ( const HitItem& rHitItem ) const
{
return rHitItem.m_fScore < m_fScore;
}
};
ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >& xMSF,
const uno::Reference< XContentProvider >& xProvider,
sal_Int32 nOpenMode,
const uno::Sequence< beans::Property >& seq,
const uno::Sequence< NumberedSortingInfo >& seqSort,
URLParameter& aURLParameter,
Databases* pDatabases )
: ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ),
m_pDatabases( pDatabases ),
m_aURLParameter( aURLParameter )
{
Reference< XTransliteration > xTrans(
xMSF->createInstance( rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.Transliteration" )) ),
UNO_QUERY );
Locale aLocale( aURLParameter.get_language(),
rtl::OUString(),
rtl::OUString() );
if(xTrans.is())
xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE,
aLocale );
// Access Lucene via XInvocation
Reference< script::XInvocation > xInvocation(
xMSF->createInstance( rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.help.HelpSearch" )) ),
UNO_QUERY );
vector< vector< rtl::OUString > > queryList;
{
sal_Int32 idx;
rtl::OUString query = m_aURLParameter.get_query();
while( query.getLength() )
{
idx = query.indexOf( sal_Unicode( ' ' ) );
if( idx == -1 )
idx = query.getLength();
vector< rtl::OUString > currentQuery;
rtl::OUString tmp(query.copy( 0,idx ));
rtl:: OUString toliterate = tmp;
if(xTrans.is()) {
Sequence<sal_Int32> aSeq;
toliterate = xTrans->transliterate(
tmp,0,tmp.getLength(),aSeq);
}
currentQuery.push_back( toliterate );
queryList.push_back( currentQuery );
int nCpy = 1 + idx;
if( nCpy >= query.getLength() )
query = rtl::OUString();
else
query = query.copy( 1 + idx );
}
}
vector< rtl::OUString > aCompleteResultVector;
if( xInvocation.is() )
{
rtl::OUString scope = m_aURLParameter.get_scope();
bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 );
sal_Int32 hitCount = m_aURLParameter.get_hitCount();
#ifdef LOGGING
FILE* pFile = fopen( "d:\\resultset_out.txt", "w" );
#endif
IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() );
rtl::OUString idxDir;
bool bExtension = false;
int iDir = 0;
vector< vector<HitItem>* > aIndexFolderResultVectorVector;
bool bTemporary;
while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 )
{
vector<HitItem> aIndexFolderResultVector;
try
{
vector< vector<HitItem>* > aQueryListResultVectorVector;
set< rtl::OUString > aSet,aCurrent,aResultSet;
int nQueryListSize = queryList.size();
if( nQueryListSize > 1 )
hitCount = 2000;
for( int i = 0; i < nQueryListSize; ++i )
{
vector<HitItem>* pQueryResultVector;
if( nQueryListSize > 1 )
{
pQueryResultVector = new vector<HitItem>();
aQueryListResultVectorVector.push_back( pQueryResultVector );
}
else
{
pQueryResultVector = &aIndexFolderResultVector;
}
pQueryResultVector->reserve( hitCount );
int nParamCount = bCaptionsOnly ? 7 : 6;
Sequence<uno::Any> aParamsSeq( nParamCount );
aParamsSeq[0] = uno::makeAny( rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "-lang" )) );
aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() );
aParamsSeq[2] = uno::makeAny( rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "-index" )) );
rtl::OUString aSystemPath;
osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath );
aParamsSeq[3] = uno::makeAny( aSystemPath );
aParamsSeq[4] = uno::makeAny( rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "-query" )) );
const std::vector< rtl::OUString >& aListItem = queryList[i];
::rtl::OUString aNewQueryStr = aListItem[0];
aParamsSeq[5] = uno::makeAny( aNewQueryStr );
if( bCaptionsOnly )
aParamsSeq[6] = uno::makeAny( rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "-caption" )) );
Sequence< sal_Int16 > aOutParamIndex;
Sequence< uno::Any > aOutParam;
uno::Any aRet = xInvocation->invoke( rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "search" )),
aParamsSeq, aOutParamIndex, aOutParam );
Sequence< float > aScoreSeq;
int nScoreCount = 0;
int nOutParamCount = aOutParam.getLength();
if( nOutParamCount == 1 )
{
const uno::Any* pScoreAnySeq = aOutParam.getConstArray();
if( pScoreAnySeq[0] >>= aScoreSeq )
nScoreCount = aScoreSeq.getLength();
}
Sequence<rtl::OUString> aRetSeq;
if( aRet >>= aRetSeq )
{
if( nQueryListSize > 1 )
aSet.clear();
const rtl::OUString* pRetSeq = aRetSeq.getConstArray();
int nCount = aRetSeq.getLength();
if( nCount > hitCount )
nCount = hitCount;
for( int j = 0 ; j < nCount ; ++j )
{
float fScore = 0.0;
if( j < nScoreCount )
fScore = aScoreSeq[j];
rtl::OUString aURL = pRetSeq[j];
pQueryResultVector->push_back( HitItem( aURL, fScore ) );
if( nQueryListSize > 1 )
aSet.insert( aURL );
#ifdef LOGGING
if( pFile )
{
rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8));
fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() );
}
#endif
}
}
// intersect
if( nQueryListSize > 1 )
{
if( i == 0 )
{
aResultSet = aSet;
}
else
{
aCurrent = aResultSet;
aResultSet.clear();
set_intersection( aSet.begin(),aSet.end(),
aCurrent.begin(),aCurrent.end(),
inserter(aResultSet,aResultSet.begin()));
}
}
}
// Combine results in aIndexFolderResultVector
if( nQueryListSize > 1 )
{
for( int n = 0 ; n < nQueryListSize ; ++n )
{
vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n];
vector<HitItem>& rQueryResultVector = *pQueryResultVector;
int nItemCount = rQueryResultVector.size();
for( int i = 0 ; i < nItemCount ; ++i )
{
const HitItem& rItem = rQueryResultVector[ i ];
set< rtl::OUString >::iterator it;
if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() )
{
HitItem aItemCopy( rItem );
aItemCopy.m_fScore /= nQueryListSize; // To get average score
if( n == 0 )
{
// Use first pass to create entry
aIndexFolderResultVector.push_back( aItemCopy );
#ifdef LOGGING
if( pFile )
{
rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() );
}
#endif
}
else
{
// Find entry in vector
int nCount = aIndexFolderResultVector.size();
for( int j = 0 ; j < nCount ; ++j )
{
HitItem& rFindItem = aIndexFolderResultVector[ j ];
if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) )
{
#ifdef LOGGING
if( pFile )
{
rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i,
rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() );
}
#endif
rFindItem.m_fScore += aItemCopy.m_fScore;
break;
}
}
}
}
}
delete pQueryResultVector;
}
sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() );
}
vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector );
aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector );
aIndexFolderResultVector.clear();
}
catch( const Exception& )
{
}
++iDir;
if( bTemporary )
aIndexFolderIt.deleteTempIndexFolder( idxDir );
} // Iterator
int nVectorCount = aIndexFolderResultVectorVector.size();
vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount];
for( int j = 0 ; j < nVectorCount ; ++j )
pCurrentVectorIndex[j] = 0;
#ifdef LOGGING
if( pFile )
{
for( int k = 0 ; k < nVectorCount ; ++k )
{
vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
int nItemCount = rIndexFolderVector.size();
fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount );
for( int i = 0 ; i < nItemCount ; ++i )
{
const HitItem& rItem = rIndexFolderVector[ i ];
rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8));
fprintf( pFile, " Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() );
}
}
}
#endif
sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount();
sal_Int32 nHitCount = 0;
while( nHitCount < nTotalHitCount )
{
int iVectorWithBestScore = -1;
float fBestScore = 0.0;
for( int k = 0 ; k < nVectorCount ; ++k )
{
vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
if( pCurrentVectorIndex[k] < rIndexFolderVector.size() )
{
const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ];
if( fBestScore < rItem.m_fScore )
{
fBestScore = rItem.m_fScore;
iVectorWithBestScore = k;
}
}
}
if( iVectorWithBestScore == -1 ) // No item left at all
break;
vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore];
const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ];
pCurrentVectorIndex[iVectorWithBestScore]++;
aCompleteResultVector.push_back( rItem.m_aURL );
++nHitCount;
}
delete[] pCurrentVectorIndex;
for( int n = 0 ; n < nVectorCount ; ++n )
{
vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n];
delete pIndexFolderVector;
}
#ifdef LOGGING
fclose( pFile );
#endif
}
sal_Int32 replIdx = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "#HLP#" )).getLength();
rtl::OUString replWith = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "vnd.sun.star.help://" ));
int nResultCount = aCompleteResultVector.size();
for( int r = 0 ; r < nResultCount ; ++r )
{
rtl::OUString aURL = aCompleteResultVector[r];
rtl::OUString aResultStr = replWith + aURL.copy(replIdx);
m_aPath.push_back( aResultStr );
}
m_aItems.resize( m_aPath.size() );
m_aIdents.resize( m_aPath.size() );
Command aCommand;
aCommand.Name = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "getPropertyValues" ));
aCommand.Argument <<= m_sProperty;
for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow )
{
m_aPath[m_nRow] =
m_aPath[m_nRow] +
rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "?Language=" )) +
m_aURLParameter.get_language() +
rtl::OUString(RTL_CONSTASCII_USTRINGPARAM( "&System=" )) +
m_aURLParameter.get_system();
uno::Reference< XContent > content = queryContent();
if( content.is() )
{
uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY );
cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>=
}
}
m_nRow = 0xffffffff;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */