2012-02-15 12:55:11 +00:00
|
|
|
#include <l10ntools/HelpIndexer.hxx>
|
2012-02-19 13:49:08 +01:00
|
|
|
#include "LuceneHelper.hxx"
|
2012-02-14 20:19:37 +01:00
|
|
|
|
|
|
|
#define TODO
|
|
|
|
|
2012-02-14 19:31:18 +01:00
|
|
|
#ifdef TODO
|
|
|
|
#include <CLucene/analysis/LanguageBasedAnalyzer.h>
|
|
|
|
#endif
|
|
|
|
|
2012-02-14 21:56:08 +01:00
|
|
|
#include <rtl/string.hxx>
|
|
|
|
|
2012-02-14 19:31:18 +01:00
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
|
|
|
using namespace lucene::document;
|
|
|
|
|
2012-02-14 21:56:08 +01:00
|
|
|
HelpIndexer::HelpIndexer(rtl::OUString const &lang, rtl::OUString const &module,
|
|
|
|
rtl::OUString const &captionDir, rtl::OUString const &contentDir, rtl::OUString const &indexDir) :
|
|
|
|
d_lang(lang), d_module(module), d_captionDir(captionDir), d_contentDir(contentDir), d_indexDir(indexDir),
|
|
|
|
d_error(), d_files() {}
|
2012-02-14 19:31:18 +01:00
|
|
|
|
|
|
|
bool HelpIndexer::indexDocuments() {
|
|
|
|
if (!scanForFiles()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef TODO
|
|
|
|
// Construct the analyzer appropriate for the given language
|
|
|
|
lucene::analysis::Analyzer *analyzer = (
|
2012-02-14 21:56:08 +01:00
|
|
|
d_lang.compareToAscii("ja") == 0 ?
|
2012-02-14 19:31:18 +01:00
|
|
|
(lucene::analysis::Analyzer*)new lucene::analysis::LanguageBasedAnalyzer(L"cjk") :
|
|
|
|
(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
|
|
|
|
#else
|
|
|
|
lucene::analysis::Analyzer *analyzer = (
|
|
|
|
(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
|
|
|
|
#endif
|
|
|
|
|
2012-02-14 21:56:08 +01:00
|
|
|
rtl::OString indexDirStr;
|
|
|
|
d_indexDir.convertToString(&indexDirStr, RTL_TEXTENCODING_ASCII_US, 0);
|
|
|
|
lucene::index::IndexWriter writer(indexDirStr.getStr(), analyzer, true);
|
2012-02-14 19:31:18 +01:00
|
|
|
|
|
|
|
// Index the identified help files
|
|
|
|
Document doc;
|
2012-02-14 21:56:08 +01:00
|
|
|
for (std::set<rtl::OUString>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
|
2012-02-14 19:31:18 +01:00
|
|
|
doc.clear();
|
|
|
|
if (!helpDocument(*i, &doc)) {
|
|
|
|
delete analyzer;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
writer.addDocument(&doc);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Optimize the index
|
|
|
|
writer.optimize();
|
|
|
|
|
|
|
|
delete analyzer;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-02-14 21:56:08 +01:00
|
|
|
rtl::OUString const & HelpIndexer::getErrorMessage() {
|
2012-02-14 19:31:18 +01:00
|
|
|
return d_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool HelpIndexer::scanForFiles() {
|
|
|
|
if (!scanForFiles(d_contentDir)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (!scanForFiles(d_captionDir)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-02-14 21:56:08 +01:00
|
|
|
bool HelpIndexer::scanForFiles(rtl::OUString const & path) {
|
|
|
|
rtl::OString pathStr;
|
|
|
|
path.convertToString(&pathStr, RTL_TEXTENCODING_ASCII_US, 0);
|
|
|
|
DIR *dir = opendir(pathStr.getStr());
|
2012-02-14 19:31:18 +01:00
|
|
|
if (dir == 0) {
|
2012-02-14 21:56:08 +01:00
|
|
|
d_error = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Error reading directory ")) + path +
|
|
|
|
rtl::OUString::createFromAscii(strerror(errno));
|
2012-02-14 19:31:18 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct dirent *ent;
|
|
|
|
struct stat info;
|
|
|
|
while ((ent = readdir(dir)) != 0) {
|
2012-02-14 21:56:08 +01:00
|
|
|
rtl::OString entPath(pathStr);
|
|
|
|
entPath += rtl::OString(RTL_CONSTASCII_STRINGPARAM("/")) + rtl::OString(ent->d_name);
|
|
|
|
if (stat(entPath.getStr(), &info) == 0 && S_ISREG(info.st_mode)) {
|
|
|
|
d_files.insert(rtl::OUString::createFromAscii(ent->d_name));
|
2012-02-14 19:31:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
closedir(dir);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-02-14 21:56:08 +01:00
|
|
|
bool HelpIndexer::helpDocument(rtl::OUString const & fileName, Document *doc) {
|
2012-02-14 19:31:18 +01:00
|
|
|
// Add the help path as an indexed, untokenized field.
|
2012-02-14 21:56:08 +01:00
|
|
|
rtl::OUString path = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#HLP#")) + d_module + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName;
|
2012-02-15 12:55:11 +00:00
|
|
|
std::vector<TCHAR> aPath(OUStringToTCHARVec(path));
|
|
|
|
doc->add(*new Field(_T("path"), &aPath[0], Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
2012-02-14 19:31:18 +01:00
|
|
|
|
|
|
|
// Add the caption as a field.
|
2012-02-14 21:56:08 +01:00
|
|
|
rtl::OUString captionPath = d_captionDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName;
|
2012-02-14 19:31:18 +01:00
|
|
|
doc->add(*new Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
|
|
|
|
// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
|
|
|
|
|
|
|
|
// Add the content as a field.
|
2012-02-14 21:56:08 +01:00
|
|
|
rtl::OUString contentPath = d_contentDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName;
|
2012-02-14 19:31:18 +01:00
|
|
|
doc->add(*new Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
|
|
|
|
// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-02-14 21:56:08 +01:00
|
|
|
lucene::util::Reader *HelpIndexer::helpFileReader(rtl::OUString const & path) {
|
|
|
|
rtl::OString pathStr;
|
|
|
|
path.convertToString(&pathStr, RTL_TEXTENCODING_ASCII_US, 0);
|
|
|
|
if (access(pathStr.getStr(), R_OK) == 0) {
|
|
|
|
return new lucene::util::FileReader(pathStr.getStr(), "UTF-8");
|
2012-02-14 19:31:18 +01:00
|
|
|
} else {
|
|
|
|
return new lucene::util::StringReader(L"");
|
|
|
|
}
|
|
|
|
}
|