687 lines
23 KiB
Java
687 lines
23 KiB
Java
/*************************************************************************
|
|
*
|
|
* OpenOffice.org - a multi-platform office productivity suite
|
|
*
|
|
* $RCSfile: HelpIndexer.java,v $
|
|
*
|
|
* $Revision: 1.17 $
|
|
*
|
|
* last change: $Author: vg $ $Date: 2006-03-16 11:17:04 $
|
|
*
|
|
* The Contents of this file are made available subject to
|
|
* the terms of GNU Lesser General Public License Version 2.1.
|
|
*
|
|
*
|
|
* GNU Lesser General Public License Version 2.1
|
|
* =============================================
|
|
* Copyright 2005 by Sun Microsystems, Inc.
|
|
* 901 San Antonio Road, Palo Alto, CA 94303, USA
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software Foundation.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
* MA 02111-1307 USA
|
|
*
|
|
************************************************************************/
|
|
|
|
/**************************************************************************
|
|
TODO
|
|
**************************************************************************
|
|
|
|
*************************************************************************/
|
|
|
|
package com.sun.star.help;
|
|
|
|
import java.io.*;
|
|
import java.net.URL;
|
|
import java.util.*;
|
|
|
|
import org.w3c.dom.*;
|
|
import org.xml.sax.*;
|
|
import javax.xml.parsers.*;
|
|
import com.jclark.xsl.sax.*;
|
|
|
|
import com.sun.xmlsearch.util.*;
|
|
import com.sun.xmlsearch.xml.indexer.*;
|
|
|
|
import com.sleepycat.db.*;
|
|
|
|
public class HelpIndexer {
|
|
|
|
private HelpURLStreamHandlerFactory _urlHandler = null;
|
|
private String _language = null, _module = null, _system = null;
|
|
|
|
public HelpIndexer(
|
|
HelpURLStreamHandlerFactory urlHandler,
|
|
String language,
|
|
String module,
|
|
String system) {
|
|
_urlHandler = urlHandler;
|
|
_system = system;
|
|
_language = language;
|
|
_module = module;
|
|
}
|
|
|
|
private final class TagInfo {
|
|
String _tag, _id;
|
|
|
|
public TagInfo(String id, String tag) {
|
|
_tag = tag;
|
|
_id = id;
|
|
}
|
|
|
|
public String get_tag() {
|
|
return _tag;
|
|
}
|
|
|
|
public String get_id() {
|
|
return _id;
|
|
}
|
|
}
|
|
|
|
private final class DocInfo {
|
|
private String _url = null, _id = null;
|
|
|
|
private ArrayList _helptags = new ArrayList();
|
|
|
|
public void append(String id, String tag) {
|
|
_helptags.add(new TagInfo(id, tag));
|
|
}
|
|
|
|
public String getURL() {
|
|
return _url;
|
|
}
|
|
|
|
public void setURL(String url) {
|
|
_url = url;
|
|
}
|
|
|
|
public void setId(String id) {
|
|
_id = id;
|
|
}
|
|
|
|
public String getId() {
|
|
return _id;
|
|
}
|
|
|
|
public ArrayList getAppendices() {
|
|
return _helptags;
|
|
}
|
|
}
|
|
|
|
Hashtable _hashDocInfo = new Hashtable();
|
|
Hashtable _hashHelptext = new Hashtable();
|
|
|
|
private void schnitzel() {
|
|
// Determine the location of the database
|
|
String installDirectory = HelpDatabases.getInstallDirectory();
|
|
Db table = null;
|
|
|
|
try {
|
|
table = new Db(null, 0);
|
|
table.set_error_stream(System.err);
|
|
table.set_errpfx("indexing");
|
|
|
|
// Create indexDirectory, if not existent
|
|
String indexDirectory =
|
|
installDirectory
|
|
+ _language
|
|
+ File.separator
|
|
+ _module
|
|
+ ".idx";
|
|
File indexDir = new File(indexDirectory);
|
|
if (indexDir.exists() && indexDir.isFile())
|
|
indexDir.delete();
|
|
|
|
if (!indexDir.exists())
|
|
indexDir.mkdir();
|
|
|
|
indexDir = null;
|
|
|
|
// Initialize the indexBuilder
|
|
XmlIndexBuilder builder = new XmlIndexBuilder(indexDirectory);
|
|
|
|
String[] translations = { "vnd.sun.star.help://", "#HLP#" };
|
|
PrefixTranslator translator =
|
|
PrefixTranslator.makePrefixTranslator(translations);
|
|
builder.setPrefixTranslator(translator);
|
|
|
|
builder.clearIndex(); // Build index from scratch
|
|
builder.setTransformLocation(
|
|
installDirectory + "..\\StyleSheets\\Indexing\\");
|
|
builder.init("index");
|
|
|
|
// Determine and read the database
|
|
String fileName =
|
|
installDirectory + _language + File.separator + _module + ".db";
|
|
|
|
table.open(null,fileName, null, Db.DB_BTREE, Db.DB_RDONLY, 0644);
|
|
Dbc cursor = table.cursor(null, 0);
|
|
StringDbt key = new StringDbt();
|
|
StringDbt data = new StringDbt();
|
|
|
|
boolean first = true;
|
|
// Initially the cursor must allocate the necessary memory
|
|
key.set_flags(Db.DB_DBT_MALLOC);
|
|
data.set_flags(Db.DB_DBT_MALLOC);
|
|
while (Db.DB_NOTFOUND != cursor.get(key, data, Db.DB_NEXT)) {
|
|
try {
|
|
String keyStr = key.getString();
|
|
String dataStr = data.getFile();
|
|
String tagStr = data.getHash();
|
|
|
|
DocInfo info = (DocInfo) _hashDocInfo.get(dataStr);
|
|
|
|
if (info == null) {
|
|
info = new DocInfo();
|
|
_hashDocInfo.put(dataStr, info);
|
|
}
|
|
|
|
if (!tagStr.equals(""))
|
|
info.append(keyStr, tagStr);
|
|
else {
|
|
String url =
|
|
"vnd.sun.star.help://"
|
|
+ _module
|
|
+ "/"
|
|
+ keyStr
|
|
+ "?Language="
|
|
+ _language;
|
|
info.setURL(url);
|
|
info.setId(keyStr);
|
|
}
|
|
} catch (Exception e) {
|
|
}
|
|
if (first) {
|
|
key.set_flags(Db.DB_DBT_REALLOC);
|
|
data.set_flags(Db.DB_DBT_REALLOC);
|
|
first = false;
|
|
}
|
|
}
|
|
cursor.close();
|
|
table.close(0);
|
|
|
|
System.out.println("Indexing...");
|
|
Enumeration enumer = _hashDocInfo.elements();
|
|
int cut = 0;
|
|
while (enumer.hasMoreElements() && cut < 100000000) {
|
|
try {
|
|
DocInfo info = (DocInfo) enumer.nextElement();
|
|
String url = info.getURL();
|
|
if (url == null) {
|
|
System.out.println(
|
|
"<----------------------------------->");
|
|
System.out.println(
|
|
"big error: found helptext without URL");
|
|
System.out.println(
|
|
"<----------------------------------->");
|
|
continue;
|
|
}
|
|
cut++;
|
|
|
|
HelpURLStreamHandlerFactory.setMode(null);
|
|
byte[] embResolved = getSourceDocument(url);
|
|
// InputSource in = new InputSource( new ByteArrayInputStream( embResolved ) );
|
|
|
|
ByteArrayInputStream inbyte =
|
|
new ByteArrayInputStream(embResolved);
|
|
InputStreamReader inread;
|
|
try {
|
|
inread = new InputStreamReader(inbyte, "UTF8");
|
|
} catch (UnsupportedEncodingException e) {
|
|
inread = new InputStreamReader(inbyte);
|
|
}
|
|
InputSource in = new InputSource(inread);
|
|
|
|
in.setEncoding("UTF8");
|
|
Document docResolved = null;
|
|
try {
|
|
DocumentBuilderFactory dFactory = DocumentBuilderFactory.newInstance();
|
|
dFactory.setValidating(false);
|
|
DocumentBuilder dBuilder = dFactory.newDocumentBuilder();
|
|
docResolved = dBuilder.parse(in);
|
|
} catch (Exception e) {
|
|
if (docResolved == null)
|
|
System.err.println("Nullpointer");
|
|
|
|
System.err.println(e.getMessage());
|
|
}
|
|
|
|
String id = info.getId();
|
|
if (id == null)
|
|
System.out.println("Found tag without valid id");
|
|
else
|
|
addKeywords(docResolved, info.getId());
|
|
|
|
Object[] attrList = info.getAppendices().toArray();
|
|
for (int i = 0; i < attrList.length; ++i) {
|
|
TagInfo tag = (TagInfo) attrList[i];
|
|
Node node = extractHelptext(docResolved, tag.get_tag());
|
|
if (node != null) {
|
|
String text = dump(node);
|
|
_hashHelptext.put(tag.get_id(), text);
|
|
}
|
|
}
|
|
HelpURLStreamHandlerFactory.setMode(embResolved);
|
|
|
|
int idx = url.indexOf('?');
|
|
if (idx != -1)
|
|
url = url.substring(0, idx);
|
|
System.out.println(url);
|
|
builder.indexDocument(new URL(url), "");
|
|
} catch (Exception e) {
|
|
System.err.println(e.getMessage());
|
|
}
|
|
}
|
|
|
|
try {
|
|
dumpHelptext();
|
|
_keywords.dump();
|
|
builder.close();
|
|
} catch (Exception e) {
|
|
System.err.println(e.getMessage());
|
|
}
|
|
} catch (DbRunRecoveryException e) {
|
|
System.out.println("Not able to create cursor: " + e.getMessage());
|
|
System.exit(1);
|
|
} catch (DbException e) {
|
|
System.out.println("Error initializing database");
|
|
System.exit(1);
|
|
} catch (FileNotFoundException fnfe) {
|
|
System.err.println("HelpAccess: " + fnfe.getMessage());
|
|
System.exit(1);
|
|
} catch (java.lang.Exception e) {
|
|
System.out.println("any other exception" + e.getMessage());
|
|
}
|
|
}
|
|
|
|
public final class NodeIterator {
|
|
private final class StackElement {
|
|
public boolean _isDone;
|
|
public Node _node;
|
|
|
|
StackElement(Node node) {
|
|
_isDone = false;
|
|
_node = node;
|
|
}
|
|
}
|
|
|
|
private Stack stack = new Stack();
|
|
|
|
public NodeIterator(Node node) {
|
|
stack.push(new StackElement(node));
|
|
}
|
|
|
|
void change() {
|
|
((StackElement) (stack.peek()))._isDone = true;
|
|
NodeList top =
|
|
((StackElement) (stack.peek()))._node.getChildNodes();
|
|
for (int i = top.getLength() - 1; i >= 0; --i)
|
|
stack.push(new StackElement(top.item(i)));
|
|
}
|
|
|
|
public Node next() {
|
|
if (stack.empty())
|
|
return null;
|
|
while (!((StackElement) (stack.peek()))._isDone)
|
|
change();
|
|
return ((StackElement) stack.pop())._node;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Given a dom of the document, the next Help:Helptext following Help:HelpID value="tag" is extracted
|
|
*/
|
|
|
|
private Node extractHelptext(Node node, String tag) {
|
|
boolean found = false;
|
|
Node test;
|
|
NodeIterator it = new NodeIterator(node);
|
|
while ((test = it.next()) != null) {
|
|
if (!found
|
|
&& test.getNodeName().equals("help:help-id")
|
|
&& ((Element) test).getAttribute("value").equals(tag))
|
|
found = true;
|
|
if (found && test.getNodeName().equals("help:help-text"))
|
|
return test;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public final class Keywords {
|
|
private Hashtable _hash = new Hashtable();
|
|
|
|
class Data {
|
|
int pos = 0;
|
|
String[] _idList = new String[5];
|
|
|
|
void append(String id) {
|
|
if (pos == _idList.length) {
|
|
String[] buff = _idList;
|
|
_idList = new String[pos + 5];
|
|
for (int i = 0; i < buff.length; ++i)
|
|
_idList[i] = buff[i];
|
|
buff = null;
|
|
}
|
|
_idList[pos++] = id;
|
|
}
|
|
|
|
int getLength() {
|
|
return pos;
|
|
}
|
|
|
|
String getString() {
|
|
String ret = new String();
|
|
for (int i = 0; i < pos; ++i) {
|
|
ret += (_idList[i] + ";");
|
|
}
|
|
return ret;
|
|
}
|
|
} // end class data
|
|
|
|
public void insert(String key, String id) {
|
|
Data data = (Data) _hash.get(key);
|
|
if (data == null) {
|
|
data = new Data();
|
|
_hash.put(key, data);
|
|
}
|
|
data.append(id);
|
|
}
|
|
|
|
void dump() {
|
|
Enumeration enumer = _hash.keys();
|
|
int j = 0;
|
|
String[] list = new String[_hash.size()];
|
|
while (enumer.hasMoreElements()) {
|
|
list[j++] = (String) enumer.nextElement();
|
|
}
|
|
|
|
Db table;
|
|
try {
|
|
table = new Db(null, 0);
|
|
|
|
String fileName =
|
|
HelpDatabases.getInstallDirectory()
|
|
+ _language
|
|
+ File.separator
|
|
+ _module
|
|
+ ".key";
|
|
|
|
table.open(null,fileName, null, Db.DB_BTREE, Db.DB_CREATE, 0644);
|
|
|
|
for (int i = 0; i < list.length; ++i) {
|
|
Data data = (Data) _hash.get(list[i]);
|
|
StringDbt key = new StringDbt(list[i]);
|
|
StringDbt value = new StringDbt(data.getString());
|
|
table.put(null, key, value, 0);
|
|
}
|
|
table.close(0);
|
|
} catch (Exception e) {
|
|
System.out.println("error writing keydata");
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
void dumpHelptext() {
|
|
Enumeration enumer = _hashHelptext.keys();
|
|
int j = 0;
|
|
String[] list = new String[_hashHelptext.size()];
|
|
while (enumer.hasMoreElements()) {
|
|
list[j++] = (String) enumer.nextElement();
|
|
}
|
|
|
|
Db table;
|
|
try {
|
|
table = new Db(null, 0);
|
|
|
|
String fileName =
|
|
HelpDatabases.getInstallDirectory()
|
|
+ _language
|
|
+ File.separator
|
|
+ _module
|
|
+ ".ht";
|
|
|
|
table.open(null,fileName, null, Db.DB_BTREE, Db.DB_CREATE, 0644);
|
|
|
|
for (int i = 0; i < list.length; ++i) {
|
|
String data = (String) _hashHelptext.get(list[i]);
|
|
StringDbt key = new StringDbt(list[i]);
|
|
StringDbt value = new StringDbt(data);
|
|
table.put(null, key, value, 0);
|
|
}
|
|
table.close(0);
|
|
} catch (Exception e) {
|
|
System.out.println("error writing keydata");
|
|
}
|
|
}
|
|
|
|
Keywords _keywords = new Keywords();
|
|
|
|
private void addKeywords(Node node, String id) {
|
|
Node test;
|
|
NodeIterator it = new NodeIterator(node);
|
|
while ((test = it.next()) != null) {
|
|
if (test.getNodeName().equals("help:key-word")) {
|
|
Element element = (Element) test;
|
|
|
|
String isEmbedded = element.getAttribute("embedded");
|
|
|
|
if (isEmbedded != null && isEmbedded.equals("false")) {
|
|
String keyword = element.getAttribute("value");
|
|
String getJump = element.getAttribute("tag");
|
|
|
|
if (!keyword.equals("") && !id.equals(""))
|
|
_keywords.insert(keyword, id + "#" + getJump);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns a textual representation of
|
|
* the node
|
|
*/
|
|
|
|
private String dump(Node node) {
|
|
String app = new String();
|
|
if (node.hasChildNodes()) {
|
|
NodeList list = node.getChildNodes();
|
|
for (int i = 0; i < list.getLength(); ++i)
|
|
app += dump(list.item(i));
|
|
}
|
|
if (node.getNodeType() == Node.ELEMENT_NODE) {
|
|
/*
|
|
String start = "<" + node.getNodeName();
|
|
NamedNodeMap attr = node.getAttributes();
|
|
for( int j = 0; j < attr.getLength(); ++j )
|
|
{
|
|
start += ( " " + ((Attr)attr.item(j)).getName()+"=\"" + ((Attr)attr.item(j)).getValue() + "\"");
|
|
}
|
|
start += ">";
|
|
String end = "</" + node.getNodeName() + ">";
|
|
|
|
return start + app + end;
|
|
*/
|
|
} else if (node.getNodeType() == Node.TEXT_NODE) {
|
|
return ((Text) node).toString();
|
|
}
|
|
return app;
|
|
}
|
|
|
|
// This is a configurable class, which capsulates the parser initialization stuff and all this things
|
|
|
|
public static final class ParseStuff {
|
|
private final XSLProcessor _processor;
|
|
private final OutputMethodHandlerImpl _output;
|
|
|
|
private ParseStuff() {
|
|
_processor = new XSLProcessorImpl();
|
|
// Determine the parser
|
|
SAXParserFactory spf = SAXParserFactory.newInstance();
|
|
spf.setValidating(false);
|
|
try {
|
|
_processor.setParser(spf.newSAXParser().getParser());
|
|
} catch (java.lang.Exception e) {
|
|
System.err.println(
|
|
"<!-- NO HELP AVAILABLE: no parser found -->");
|
|
System.exit(1);
|
|
}
|
|
|
|
// Determine the OutputMethodHandler
|
|
_output = new OutputMethodHandlerImpl(_processor);
|
|
_processor.setOutputMethodHandler(_output);
|
|
}
|
|
|
|
// Loading from a URL
|
|
public ParseStuff(URL url) {
|
|
this();
|
|
try {
|
|
_processor.loadStylesheet(
|
|
new InputSource(url.toExternalForm()));
|
|
} catch (SAXException e) {
|
|
System.err.println("<!-- Syntactic error in stylesheet -->");
|
|
System.err.println(e.getMessage());
|
|
System.exit(1);
|
|
} catch (java.io.IOException e) {
|
|
System.err.println("<!-- Style sheet not found -->");
|
|
System.err.println(e.getMessage());
|
|
System.exit(1);
|
|
}
|
|
// TODO System.out.println( "Using stylesheet: " + url.toExternalForm() );
|
|
}
|
|
|
|
// Loading from a file
|
|
public ParseStuff(String styleSheet) {
|
|
this();
|
|
try {
|
|
_processor.loadStylesheet(
|
|
new InputSource(new FileInputStream(styleSheet)));
|
|
} catch (SAXException e) {
|
|
System.err.println("<!-- Syntactic error in stylesheet -->");
|
|
System.exit(1);
|
|
} catch (java.io.IOException e) {
|
|
System.err.println("<!-- Style sheet not found: -->");
|
|
System.err.println(e.getMessage());
|
|
System.exit(1);
|
|
}
|
|
// System.out.println( "Using stylesheet: " + styleSheet );
|
|
}
|
|
|
|
public byte[] parse(String url) {
|
|
InputSource _in = new InputSource(url);
|
|
_in.setEncoding("UTF-8");
|
|
|
|
HelpOutputStream _out = new HelpOutputStream();
|
|
try {
|
|
// OutputStreamDestination _dest =
|
|
// new OutputStreamDestination( _out );
|
|
HelpProvider.ProviderDestination _dest =
|
|
new HelpProvider.ProviderDestination(_out, "UTF-8");
|
|
synchronized (this) {
|
|
_output.setDestination(_dest);
|
|
_processor.parse(_in);
|
|
_out.flush();
|
|
}
|
|
} catch ( SAXParseException e ){
|
|
e.printStackTrace();
|
|
System.err.println("url: " + url);
|
|
System.err.println(e.getMessage());
|
|
System.err.println("Line: "+e.getLineNumber()+" Column: "+e.getColumnNumber()+" public ID: "+e.getPublicId()+" System ID: "+e.getSystemId() );
|
|
System.exit(1);
|
|
|
|
}
|
|
catch (Exception e) {
|
|
e.printStackTrace();
|
|
System.err.println("url: " + url);
|
|
System.err.println(e.getMessage());
|
|
System.exit(1);
|
|
}
|
|
|
|
return _out.getBigBuffer();
|
|
}
|
|
|
|
public void setParameter(String key, Object value) {
|
|
_processor.setParameter(key, value);
|
|
}
|
|
} // end class ParseStuff
|
|
|
|
/**
|
|
* Returns the embedding resolved document
|
|
*/
|
|
|
|
ParseStuff _stuff = null;
|
|
|
|
byte[] getSourceDocument(String url) {
|
|
// Initialize
|
|
if (_stuff == null) {
|
|
String styleSheet =
|
|
HelpDatabases.getInstallDirectory()
|
|
+ "..\\StyleSheets\\Indexing\\ResEmb.xsl";
|
|
_stuff = new ParseStuff(styleSheet);
|
|
|
|
// Setting the parameters
|
|
_stuff.setParameter("Language", _language);
|
|
_stuff.setParameter("Database", _module);
|
|
}
|
|
|
|
// and parse
|
|
return _stuff.parse(url);
|
|
}
|
|
|
|
// Now has to be settled up
|
|
public static void main(String[] args) throws Exception {
|
|
System.setProperty("XMLSEARCH", HelpDatabases.getInstallDirectory());
|
|
if (args.length != 6) {
|
|
System.out.println(
|
|
"Usage example: main -language de -module swriter -system WIN");
|
|
System.exit(1);
|
|
}
|
|
|
|
String language = null, module = null, system = null;
|
|
|
|
for (int i = 0; i < 5; i += 2) {
|
|
if (args[i].trim().equals("-language"))
|
|
language = args[i + 1];
|
|
else if (args[i].trim().equals("-module"))
|
|
module = args[i + 1];
|
|
else if (args[i].trim().equals("-system"))
|
|
system = args[i + 1];
|
|
}
|
|
|
|
if (language == null || module == null || system == null) {
|
|
System.out.println(
|
|
"Usage example: main -language de -module swriter -system WIN");
|
|
System.exit(1);
|
|
} else {
|
|
System.out.println(" Configuring for \"system\" = " + system);
|
|
System.out.println(" \"module\" = " + module);
|
|
System.out.println(" \"language\" = " + language);
|
|
}
|
|
|
|
try {
|
|
String urlmode = HelpDatabases.getURLMode();
|
|
HelpURLStreamHandlerFactory urlHandler =
|
|
new HelpURLStreamHandlerFactory(urlmode);
|
|
URL.setURLStreamHandlerFactory(urlHandler);
|
|
|
|
HelpIndexer helpIndexer =
|
|
new HelpIndexer(urlHandler, language, module, system);
|
|
|
|
// helpIndexer.indexDatabase();
|
|
helpIndexer.schnitzel();
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
}
|