/************************************************************************* * * $RCSfile: HelpIndexer.java,v $ * * $Revision: 1.11 $ * * last change: $Author: kz $ $Date: 2004-08-30 17:24:35 $ * * The Contents of this file are made available subject to tUhe terms of * either of the following licenses * * - GNU Lesser General Public License Version 2.1 * - Sun Industry Standards Source License Version 1.1 * * Sun Microsystems Inc., October, 2000 * * GNU Lesser General Public License Version 2.1 * ============================================= * Copyright 2000 by Sun Microsystems, Inc. * 901 San Antonio Road, Palo Alto, CA 94303, USA * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * * * Sun Industry Standards Source License Version 1.1 * ================================================= * The contents of this file are subject to the Sun Industry Standards * Source License Version 1.1 (the "License"); You may not use this file * except in compliance with the License. You may obtain a copy of the * License at http://www.openoffice.org/license.html. * * Software provided under this License is provided on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. * See the License for the specific provisions governing your rights and * obligations concerning the Software. * * The Initial Developer of the Original Code is: Sun Microsystems, Inc. * * Copyright: 2000 by Sun Microsystems, Inc. * * All Rights Reserved. * * Contributor(s): _______________________________________ * * ************************************************************************/ /************************************************************************** TODO ************************************************************************** *************************************************************************/ package com.sun.star.help; import java.io.*; import java.net.URL; import java.util.*; import org.w3c.dom.*; import com.sun.xml.tree.*; import org.xml.sax.*; import javax.xml.parsers.*; import com.jclark.xsl.sax.*; import com.sun.xmlsearch.util.*; import com.sun.xmlsearch.xml.indexer.*; import com.sleepycat.db.*; import com.sun.star.help.HelpURLStreamHandlerFactory; public class HelpIndexer { private HelpURLStreamHandlerFactory _urlHandler = null; private String _language = null, _module = null, _system = null; public HelpIndexer( HelpURLStreamHandlerFactory urlHandler, String language, String module, String system) { _urlHandler = urlHandler; _system = system; _language = language; _module = module; } private final class TagInfo { String _tag, _id; public TagInfo(String id, String tag) { _tag = tag; _id = id; } public String get_tag() { return _tag; } public String get_id() { return _id; } } private final class DocInfo { private String _url = null, _id = null; private ArrayList _helptags = new ArrayList(); public void append(String id, String tag) { _helptags.add(new TagInfo(id, tag)); } public String getURL() { return _url; } public void setURL(String url) { _url = url; } public void setId(String id) { _id = id; } public String getId() { return _id; } public ArrayList getAppendices() { return _helptags; } } Hashtable _hashDocInfo = new Hashtable(); Hashtable _hashHelptext = new Hashtable(); private void schnitzel() { // Determine the location of the database String installDirectory = HelpDatabases.getInstallDirectory(); Db table = null; try { table = new Db(null, 0); table.set_error_stream(System.err); table.set_errpfx("indexing"); // Create indexDirectory, if not existent String indexDirectory = installDirectory + _language + File.separator + _module + ".idx"; File indexDir = new File(indexDirectory); if (indexDir.exists() && indexDir.isFile()) indexDir.delete(); if (!indexDir.exists()) indexDir.mkdir(); indexDir = null; // Initialize the indexBuilder XmlIndexBuilder builder = new XmlIndexBuilder(indexDirectory); String[] translations = { "vnd.sun.star.help://", "#HLP#" }; PrefixTranslator translator = PrefixTranslator.makePrefixTranslator(translations); builder.setPrefixTranslator(translator); builder.clearIndex(); // Build index from scratch builder.setTransformLocation( installDirectory + "..\\StyleSheets\\Indexing\\"); builder.init("index"); // Determine and read the database String fileName = installDirectory + _language + File.separator + _module + ".db"; table.open(fileName, null, Db.DB_BTREE, Db.DB_RDONLY, 0644); Dbc cursor = table.cursor(null, 0); StringDbt key = new StringDbt(); StringDbt data = new StringDbt(); boolean first = true; // Initially the cursor must allocate the necessary memory key.set_flags(Db.DB_DBT_MALLOC); data.set_flags(Db.DB_DBT_MALLOC); while (Db.DB_NOTFOUND != cursor.get(key, data, Db.DB_NEXT)) { try { String keyStr = key.getString(); String dataStr = data.getFile(); String tagStr = data.getHash(); DocInfo info = (DocInfo) _hashDocInfo.get(dataStr); if (info == null) { info = new DocInfo(); _hashDocInfo.put(dataStr, info); } if (!tagStr.equals("")) info.append(keyStr, tagStr); else { String url = "vnd.sun.star.help://" + _module + "/" + keyStr + "?Language=" + _language; info.setURL(url); info.setId(keyStr); } } catch (Exception e) { } if (first) { key.set_flags(Db.DB_DBT_REALLOC); data.set_flags(Db.DB_DBT_REALLOC); first = false; } } cursor.close(); table.close(0); System.out.println("Indexing..."); Enumeration enum = _hashDocInfo.elements(); int cut = 0; while (enum.hasMoreElements() && cut < 100000000) { try { DocInfo info = (DocInfo) enum.nextElement(); String url = info.getURL(); if (url == null) { System.out.println( "<----------------------------------->"); System.out.println( "big error: found helptext without URL"); System.out.println( "<----------------------------------->"); continue; } cut++; HelpURLStreamHandlerFactory.setMode(null); byte[] embResolved = getSourceDocument(url); // InputSource in = new InputSource( new ByteArrayInputStream( embResolved ) ); ByteArrayInputStream inbyte = new ByteArrayInputStream(embResolved); InputStreamReader inread; try { inread = new InputStreamReader(inbyte, "UTF8"); } catch (UnsupportedEncodingException e) { inread = new InputStreamReader(inbyte); } InputSource in = new InputSource(inread); in.setEncoding("UTF8"); Document docResolved = null; try { docResolved = XmlDocument.createXmlDocument(in, false); } catch (Exception e) { if (docResolved == null) System.err.println("Nullpointer"); System.err.println(e.getMessage()); } String id = info.getId(); if (id == null) System.out.println("Found tag without valid id"); else addKeywords(docResolved, info.getId()); Object[] attrList = info.getAppendices().toArray(); for (int i = 0; i < attrList.length; ++i) { TagInfo tag = (TagInfo) attrList[i]; Node node = extractHelptext(docResolved, tag.get_tag()); if (node != null) { String text = dump(node); _hashHelptext.put(tag.get_id(), text); } } HelpURLStreamHandlerFactory.setMode(embResolved); int idx = url.indexOf('?'); if (idx != -1) url = url.substring(0, idx); System.out.println(url); builder.indexDocument(new URL(url), ""); } catch (Exception e) { System.err.println(e.getMessage()); } } try { dumpHelptext(); _keywords.dump(); builder.close(); } catch (Exception e) { System.err.println(e.getMessage()); } } catch (DbRunRecoveryException e) { System.out.println("Not able to create cursor: " + e.getMessage()); System.exit(1); } catch (DbException e) { System.out.println("Error initializing database"); System.exit(1); } catch (FileNotFoundException fnfe) { System.err.println("HelpAccess: " + fnfe.getMessage()); System.exit(1); } catch (java.lang.Exception e) { System.out.println("any other exception" + e.getMessage()); } } public final class NodeIterator { private final class StackElement { public boolean _isDone; public Node _node; StackElement(Node node) { _isDone = false; _node = node; } } private Stack stack = new Stack(); public NodeIterator(Node node) { stack.push(new StackElement(node)); } void change() { ((StackElement) (stack.peek()))._isDone = true; NodeList top = ((StackElement) (stack.peek()))._node.getChildNodes(); for (int i = top.getLength() - 1; i >= 0; --i) stack.push(new StackElement(top.item(i))); } public Node next() { if (stack.empty()) return null; while (!((StackElement) (stack.peek()))._isDone) change(); return ((StackElement) stack.pop())._node; } } /** * Given a dom of the document, the next Help:Helptext following Help:HelpID value="tag" is extracted */ private Node extractHelptext(Node node, String tag) { boolean found = false; Node test; NodeIterator it = new NodeIterator(node); while ((test = it.next()) != null) { if (!found && test.getNodeName().equals("help:help-id") && ((Element) test).getAttribute("value").equals(tag)) found = true; if (found && test.getNodeName().equals("help:help-text")) return test; } return null; } public final class Keywords { private Hashtable _hash = new Hashtable(); class Data { int pos = 0; String[] _idList = new String[5]; void append(String id) { if (pos == _idList.length) { String[] buff = _idList; _idList = new String[pos + 5]; for (int i = 0; i < buff.length; ++i) _idList[i] = buff[i]; buff = null; } _idList[pos++] = id; } int getLength() { return pos; } String getString() { String ret = new String(); for (int i = 0; i < pos; ++i) { ret += (_idList[i] + ";"); } return ret; } } // end class data public void insert(String key, String id) { Data data = (Data) _hash.get(key); if (data == null) { data = new Data(); _hash.put(key, data); } data.append(id); } void dump() { Enumeration enum = _hash.keys(); int j = 0; String[] list = new String[_hash.size()]; while (enum.hasMoreElements()) { list[j++] = (String) enum.nextElement(); } Db table; try { table = new Db(null, 0); String fileName = HelpDatabases.getInstallDirectory() + _language + File.separator + _module + ".key"; table.open(fileName, null, Db.DB_BTREE, Db.DB_CREATE, 0644); for (int i = 0; i < list.length; ++i) { Data data = (Data) _hash.get(list[i]); StringDbt key = new StringDbt(list[i]); StringDbt value = new StringDbt(data.getString()); table.put(null, key, value, 0); } table.close(0); } catch (Exception e) { System.out.println("error writing keydata"); } } } void dumpHelptext() { Enumeration enum = _hashHelptext.keys(); int j = 0; String[] list = new String[_hashHelptext.size()]; while (enum.hasMoreElements()) { list[j++] = (String) enum.nextElement(); } Db table; try { table = new Db(null, 0); String fileName = HelpDatabases.getInstallDirectory() + _language + File.separator + _module + ".ht"; table.open(fileName, null, Db.DB_BTREE, Db.DB_CREATE, 0644); for (int i = 0; i < list.length; ++i) { String data = (String) _hashHelptext.get(list[i]); StringDbt key = new StringDbt(list[i]); StringDbt value = new StringDbt(data); table.put(null, key, value, 0); } table.close(0); } catch (Exception e) { System.out.println("error writing keydata"); } } Keywords _keywords = new Keywords(); private void addKeywords(Node node, String id) { Node test; NodeIterator it = new NodeIterator(node); while ((test = it.next()) != null) { if (test.getNodeName().equals("help:key-word")) { Element element = (Element) test; String isEmbedded = element.getAttribute("embedded"); if (isEmbedded != null && isEmbedded.equals("false")) { String keyword = element.getAttribute("value"); String getJump = element.getAttribute("tag"); if (!keyword.equals("") && !id.equals("")) _keywords.insert(keyword, id + "#" + getJump); } } } } /** * Returns a textual representation of * the node */ private String dump(Node node) { String app = new String(); if (node.hasChildNodes()) { NodeList list = node.getChildNodes(); for (int i = 0; i < list.getLength(); ++i) app += dump(list.item(i)); } if (node.getNodeType() == Node.ELEMENT_NODE) { /* String start = "<" + node.getNodeName(); NamedNodeMap attr = node.getAttributes(); for( int j = 0; j < attr.getLength(); ++j ) { start += ( " " + ((Attr)attr.item(j)).getName()+"=\"" + ((Attr)attr.item(j)).getValue() + "\""); } start += ">"; String end = ""; return start + app + end; */ } else if (node.getNodeType() == Node.TEXT_NODE) { return ((Text) node).toString(); } return app; } // This is a configurable class, which capsulates the parser initialization stuff and all this things public static final class ParseStuff { private final XSLProcessor _processor; private final OutputMethodHandlerImpl _output; private ParseStuff() { _processor = new XSLProcessorImpl(); // Determine the parser SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setValidating(false); try { _processor.setParser(spf.newSAXParser().getParser()); } catch (java.lang.Exception e) { System.err.println( ""); System.exit(1); } // Determine the OutputMethodHandler _output = new OutputMethodHandlerImpl(_processor); _processor.setOutputMethodHandler(_output); } // Loading from a URL public ParseStuff(URL url) { this(); try { _processor.loadStylesheet( new InputSource(url.toExternalForm())); } catch (SAXException e) { System.err.println(""); System.err.println(e.getMessage()); System.exit(1); } catch (java.io.IOException e) { System.err.println(""); System.err.println(e.getMessage()); System.exit(1); } // TODO System.out.println( "Using stylesheet: " + url.toExternalForm() ); } // Loading from a file public ParseStuff(String styleSheet) { this(); try { _processor.loadStylesheet( new InputSource(new FileInputStream(styleSheet))); } catch (SAXException e) { System.err.println(""); System.exit(1); } catch (java.io.IOException e) { System.err.println(""); System.exit(1); } // System.out.println( "Using stylesheet: " + styleSheet ); } public byte[] parse(String url) { InputSource _in = new InputSource(url); _in.setEncoding("UTF-8"); HelpOutputStream _out = new HelpOutputStream(); try { // OutputStreamDestination _dest = // new OutputStreamDestination( _out ); HelpProvider.ProviderDestination _dest = new HelpProvider.ProviderDestination(_out, "UTF-8"); synchronized (this) { _output.setDestination(_dest); _processor.parse(_in); _out.flush(); } } catch (Exception e) { e.printStackTrace(); System.err.println("url: " + url); System.err.println(e.getMessage()); System.exit(1); } return _out.getBigBuffer(); } public void setParameter(String key, Object value) { _processor.setParameter(key, value); } } // end class ParseStuff /** * Returns the embedding resolved document */ ParseStuff _stuff = null; byte[] getSourceDocument(String url) { // Initialize if (_stuff == null) { String styleSheet = HelpDatabases.getInstallDirectory() + "..\\StyleSheets\\Indexing\\ResEmb.xsl"; _stuff = new ParseStuff(styleSheet); // Setting the parameters _stuff.setParameter("Language", _language); _stuff.setParameter("Database", _module); } // and parse return _stuff.parse(url); } // Now has to be settled up public static void main(String[] args) throws Exception { System.setProperty("XMLSEARCH", HelpDatabases.getInstallDirectory()); if (args.length != 6) { System.out.println( "Usage example: main -language de -module swriter -system WIN"); System.exit(1); } String language = null, module = null, system = null; for (int i = 0; i < 5; i += 2) { if (args[i].trim().equals("-language")) language = args[i + 1]; else if (args[i].trim().equals("-module")) module = args[i + 1]; else if (args[i].trim().equals("-system")) system = args[i + 1]; } if (language == null || module == null || system == null) { System.out.println( "Usage example: main -language de -module swriter -system WIN"); System.exit(1); } else { System.out.println(" Configuring for \"system\" = " + system); System.out.println(" \"module\" = " + module); System.out.println(" \"language\" = " + language); } try { String urlmode = HelpDatabases.getURLMode(); HelpURLStreamHandlerFactory urlHandler = new HelpURLStreamHandlerFactory(urlmode); URL.setURLStreamHandlerFactory(urlHandler); HelpIndexer helpIndexer = new HelpIndexer(urlHandler, language, module, system); // helpIndexer.indexDatabase(); helpIndexer.schnitzel(); } catch (Exception e) { e.printStackTrace(); } } }