XmlFilterAdaptor: use the fastparser API when possible

part of the process of making SvXMLImport fastparser-only

Which uncovered several bugs because I end up stacking fast and
slow parsers, not once, but twice.

Specifically, we have a problem here with default namespaces e.g.

    <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
    <semantics><mrow><mstyle mathsize="12pt">

where going from slow- to fast- parser loses this information,
because there is no way to represent this in the fastparser world,
so we end up with nastiness when we transition back to slow-parser,
and then back-again to fast-parser.

So I fixed a couple of places XMLEmbeddedObjectImportContext
and in SvXMLLegacyToFastDocHandler, and then worked around some of
it by introducing an new XImporter2 interface so I could strip out
out one of the slowparser -> fastparser transitions.

Change-Id: I491487b99271898da50dc999d3b9b9c39cbd97fd
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/104514
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
This commit is contained in:
Noel
2020-10-19 09:36:04 +02:00
committed by Noel Grandin
parent 37a8142720
commit 2b946d245e
9 changed files with 354 additions and 36 deletions

View File

@@ -32,6 +32,7 @@ $(eval $(call gb_Library_use_libraries,xmlfa,\
sal \
tl \
utl \
xo \
))
$(eval $(call gb_Library_add_exception_objects,xmlfa,\

View File

@@ -24,6 +24,7 @@
#include <com/sun/star/beans/PropertyValue.hpp>
#include <com/sun/star/xml/XImportFilter.hpp>
#include <com/sun/star/xml/XImportFilter2.hpp>
#include <com/sun/star/xml/XExportFilter.hpp>
#include <com/sun/star/xml/sax/Parser.hpp>
#include <com/sun/star/xml/sax/InputSource.hpp>
@@ -55,7 +56,7 @@ namespace filter::odfflatxml {
* OdfFlatXml export and imports ODF flat XML documents by plugging a pass-through
* filter implementation into XmlFilterAdaptor.
*/
class OdfFlatXml : public WeakImplHelper<XImportFilter,
class OdfFlatXml : public WeakImplHelper<XImportFilter, XImportFilter2,
XExportFilter, DocumentHandlerAdapter, css::lang::XServiceInfo>
{
private:
@@ -74,6 +75,12 @@ namespace filter::odfflatxml {
const Reference< XDocumentHandler >& docHandler,
const Sequence< OUString >& userData) override;
// XImportFilter2
virtual sal_Bool SAL_CALL
importer(const Sequence< PropertyValue >& sourceData,
const Reference< XFastParser >& fastParser,
const Sequence< OUString >& userData) override;
// XExportFilter
virtual sal_Bool SAL_CALL
exporter(
@@ -126,25 +133,76 @@ OdfFlatXml::importer(
if (!inputStream.is())
return false;
Reference<XParser> saxParser = Parser::create(m_xContext);
InputSource inputSource;
inputSource.sSystemId = url;
inputSource.sPublicId = url;
inputSource.aInputStream = inputStream;
css::uno::Reference< css::xml::sax::XFastParser > xFastParser = dynamic_cast<
css::xml::sax::XFastParser* >( docHandler.get() );
saxParser->setDocumentHandler(docHandler);
try
{
css::uno::Reference< css::io::XSeekable > xSeekable( inputStream, css::uno::UNO_QUERY );
if ( xSeekable.is() )
xSeekable->seek( 0 );
css::uno::Reference< css::xml::sax::XFastParser > xFastParser (docHandler, UNO_QUERY );
if( xFastParser.is() )
xFastParser->parseStream( inputSource );
else
{
Reference<XParser> saxParser = Parser::create(m_xContext);
saxParser->setDocumentHandler(docHandler);
saxParser->parseStream(inputSource);
}
}
catch (const Exception &)
{
TOOLS_WARN_EXCEPTION("filter.odfflatxml", "");
return false;
}
catch (const std::exception &exc)
{
SAL_WARN("filter.odfflatxml", exc.what());
return false;
}
return true;
}
sal_Bool
OdfFlatXml::importer(
const Sequence< PropertyValue >& sourceData,
const Reference< XFastParser >& xFastParser,
const Sequence< OUString >& /* userData */)
{
// Read InputStream to read from and a URL used for the system id
// of the InputSource we create from the given sourceData sequence
Reference<XInputStream> inputStream;
OUString paramName;
OUString url;
sal_Int32 paramCount = sourceData.getLength();
for (sal_Int32 paramIdx = 0; paramIdx < paramCount; paramIdx++)
{
paramName = sourceData[paramIdx].Name;
if ( paramName == "InputStream" )
sourceData[paramIdx].Value >>= inputStream;
else if ( paramName == "URL" )
sourceData[paramIdx].Value >>= url;
}
OSL_ASSERT(inputStream.is());
if (!inputStream.is())
return false;
InputSource inputSource;
inputSource.sSystemId = url;
inputSource.sPublicId = url;
inputSource.aInputStream = inputStream;
try
{
css::uno::Reference< css::io::XSeekable > xSeekable( inputStream, css::uno::UNO_QUERY );
if ( xSeekable.is() )
xSeekable->seek( 0 );
xFastParser->parseStream( inputSource );
}
catch (const Exception &)
{

View File

@@ -24,8 +24,9 @@
#include <tools/urlobj.hxx>
#include "XmlFilterAdaptor.hxx"
#include <com/sun/star/io/XActiveDataSource.hpp>
#include <com/sun/star/xml/sax/XDocumentHandler.hpp>
#include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
#include <com/sun/star/xml/XImportFilter.hpp>
#include <com/sun/star/xml/XImportFilter2.hpp>
#include <com/sun/star/xml/XExportFilter.hpp>
#include <com/sun/star/task/XStatusIndicator.hpp>
#include <com/sun/star/style/XStyleFamiliesSupplier.hpp>
@@ -42,6 +43,7 @@
#include <comphelper/scopeguard.hxx>
#include <cppuhelper/supportsservice.hxx>
#include <unotools/pathoptions.hxx>
#include <xmloff/xmlimp.hxx>
using namespace comphelper;
using namespace com::sun::star::uno;
@@ -107,12 +109,13 @@ bool XmlFilterAdaptor::importImpl( const Sequence< css::beans::PropertyValue >&
aAnys[0] <<= xInfoSet;
Reference < XDocumentHandler > xHandler( mxContext->getServiceManager()->createInstanceWithArgumentsAndContext( sXMLImportService, aAnys, mxContext ), UNO_QUERY );
if (!xHandler.is()) {
SAL_WARN("filter.xmlfa", "XmlFilterAdaptor: unable to create service " << sXMLImportService);
return false;
}
// the underlying SvXMLImport implements XFastParser, XImporter, XFastDocumentHandler
Reference < XInterface > xFilter = mxContext->getServiceManager()->createInstanceWithArgumentsAndContext( sXMLImportService, aAnys, mxContext );
assert(xFilter);
Reference < XFastDocumentHandler > xHandler( xFilter, UNO_QUERY );
assert(xHandler);
Reference < XImporter > xImporter( xHandler, UNO_QUERY );
assert(xImporter);
xImporter->setTargetDocument ( mxDoc );
if (xStatusIndicator.is()){
@@ -122,7 +125,8 @@ bool XmlFilterAdaptor::importImpl( const Sequence< css::beans::PropertyValue >&
// Creating a ConverterBridge instance
Reference< XInterface > xConvBridge(mxContext->getServiceManager()->createInstanceWithContext(udConvertClass, mxContext), UNO_QUERY);
Reference< XInterface > xConvBridge(
mxContext->getServiceManager()->createInstanceWithContext(udConvertClass, mxContext), UNO_QUERY);
if (!xConvBridge.is()) {
SAL_WARN("filter.xmlfa", "XmlFilterAdaptor: unable to create service " << udConvertClass);
return false;
@@ -130,7 +134,8 @@ bool XmlFilterAdaptor::importImpl( const Sequence< css::beans::PropertyValue >&
if (xStatusIndicator.is())
xStatusIndicator->setValue(nSteps++);
Reference< XImportFilter > xConverter( xConvBridge, UNO_QUERY );
Reference< XImportFilter > xConverter1( xConvBridge, UNO_QUERY );
Reference< XImportFilter2 > xConverter2( xConvBridge, UNO_QUERY );
// prevent unnecessary broadcasting when loading
Reference< XModel > xModel( mxDoc, UNO_QUERY );
@@ -170,10 +175,24 @@ bool XmlFilterAdaptor::importImpl( const Sequence< css::beans::PropertyValue >&
// Calling Filtering Component
try {
if (!xConverter->importer(aDescriptor,xHandler,msUserData)) {
if (xStatusIndicator.is())
xStatusIndicator->end();
return false;
auto pImport = dynamic_cast<SvXMLImport*>(xHandler.get());
assert(pImport);
if (xConverter2)
{
if (!xConverter2->importer(aDescriptor,pImport,msUserData)) {
if (xStatusIndicator.is())
xStatusIndicator->end();
return false;
}
}
else
{
Reference<XDocumentHandler> xDocHandler = new SvXMLLegacyToFastDocHandler(pImport);
if (!xConverter1->importer(aDescriptor,xDocHandler,msUserData)) {
if (xStatusIndicator.is())
xStatusIndicator->end();
return false;
}
}
}
catch( const Exception& )

View File

@@ -47,6 +47,7 @@
#include <com/sun/star/xml/sax/XFastParser.hpp>
#include <com/sun/star/xml/sax/Writer.hpp>
#include <com/sun/star/xml/XImportFilter.hpp>
#include <com/sun/star/xml/XImportFilter2.hpp>
#include <com/sun/star/xml/XExportFilter.hpp>
#include <com/sun/star/util/theMacroExpander.hpp>
@@ -97,7 +98,7 @@ namespace XSLT
* supporting service from an extension for a specific filter; the
* service must support com.sun.star.xml.xslt.XSLT2Transformer.
*/
class XSLTFilter : public WeakImplHelper<XImportFilter, XExportFilter,
class XSLTFilter : public WeakImplHelper<XImportFilter, XImportFilter2, XExportFilter,
XStreamListener, ExtendedDocumentHandlerAdapter, XServiceInfo>
{
private:
@@ -151,6 +152,12 @@ namespace XSLT
XDocumentHandler>& xHandler,
const Sequence<OUString>& msUserData) override;
// XImportFilter2
virtual sal_Bool SAL_CALL
importer(const Sequence<PropertyValue>& aSourceData, const css::uno::Reference<
XFastParser>& xFastParser,
const Sequence<OUString>& msUserData) override;
// XExportFilter
virtual sal_Bool SAL_CALL
exporter(const Sequence<PropertyValue>& aSourceData, const Sequence<
@@ -314,10 +321,6 @@ namespace XSLT
if (!xInputStream.is())
return false;
// create SAX parser that will read the document file
// and provide events to xHandler passed to this call
css::uno::Reference<XParser> xSaxParser = Parser::create(m_xContext);
// create transformer
Sequence<Any> args(3);
NamedValue nv;
@@ -366,8 +369,6 @@ namespace XSLT
aInput.sPublicId = aURL;
aInput.aInputStream = pipein;
// set doc handler
xSaxParser->setDocumentHandler(xHandler);
css::uno::Reference< css::xml::sax::XFastParser > xFastParser = dynamic_cast<
css::xml::sax::XFastParser* >( xHandler.get() );
@@ -404,7 +405,14 @@ namespace XSLT
if( xFastParser.is() )
xFastParser->parseStream( aInput );
else
{
// create SAX parser that will read the document file
// and provide events to xHandler passed to this call
css::uno::Reference<XParser> xSaxParser = Parser::create(m_xContext);
// set doc handler
xSaxParser->setDocumentHandler(xHandler);
xSaxParser->parseStream( aInput );
}
}
m_tcontrol->terminate();
return !m_bError;
@@ -422,6 +430,134 @@ namespace XSLT
}
}
sal_Bool
XSLTFilter::importer(const Sequence<PropertyValue>& aSourceData,
const css::uno::Reference<XFastParser>& xFastParser, const Sequence<
OUString>& msUserData)
{
if (msUserData.getLength() < 5)
return false;
OUString udStyleSheet = rel2abs(msUserData[4]);
// get information from media descriptor
// the input stream that represents the imported file
// is most important here since we need to supply it to
// the sax parser that drives the supplied document handler
sal_Int32 nLength = aSourceData.getLength();
OUString aName, aURL;
css::uno::Reference<XInputStream> xInputStream;
css::uno::Reference<XInteractionHandler> xInterActionHandler;
for (sal_Int32 i = 0; i < nLength; i++)
{
aName = aSourceData[i].Name;
Any value = aSourceData[i].Value;
if ( aName == "InputStream" )
value >>= xInputStream;
else if ( aName == "URL" )
value >>= aURL;
else if ( aName == "InteractionHandler" )
value >>= xInterActionHandler;
}
OSL_ASSERT(xInputStream.is());
if (!xInputStream.is())
return false;
// create transformer
Sequence<Any> args(3);
NamedValue nv;
nv.Name = "StylesheetURL";
nv.Value <<= expandUrl(udStyleSheet);
args[0] <<= nv;
nv.Name = "SourceURL";
nv.Value <<= aURL;
args[1] <<= nv;
nv.Name = "SourceBaseURL";
nv.Value <<= INetURLObject(aURL).getBase();
args[2] <<= nv;
m_tcontrol = impl_createTransformer(msUserData[1], args);
assert(xFastParser.is());
OSL_ASSERT(xInputStream.is());
OSL_ASSERT(m_tcontrol.is());
if (xFastParser.is() && xInputStream.is() && m_tcontrol.is())
{
try
{
css::uno::Reference<css::io::XSeekable> xSeek(xInputStream, UNO_QUERY);
if (xSeek.is())
xSeek->seek(0);
// we want to be notified when the processing is done...
m_tcontrol->addListener(css::uno::Reference<XStreamListener> (
this));
// connect input to transformer
m_tcontrol->setInputStream(xInputStream);
// create pipe
css::uno::Reference<XOutputStream> pipeout =
Pipe::create(m_xContext);
css::uno::Reference<XInputStream> pipein(pipeout, UNO_QUERY);
//connect transformer to pipe
m_tcontrol->setOutputStream(pipeout);
// connect pipe to sax parser
InputSource aInput;
aInput.sSystemId = aURL;
aInput.sPublicId = aURL;
aInput.aInputStream = pipein;
// transform
m_tcontrol->start();
TimeValue timeout = { TRANSFORMATION_TIMEOUT_SEC, 0};
osl::Condition::Result result(m_cTransformed.wait(&timeout));
while (osl::Condition::result_timeout == result) {
if (xInterActionHandler.is()) {
Sequence<Any> excArgs(0);
css::ucb::InteractiveAugmentedIOException exc(
"Timeout!",
static_cast< OWeakObject * >( this ),
InteractionClassification_ERROR,
css::ucb::IOErrorCode_GENERAL,
excArgs);
Any r;
r <<= exc;
::comphelper::OInteractionRequest* pRequest = new ::comphelper::OInteractionRequest(r);
css::uno::Reference< XInteractionRequest > xRequest(pRequest);
::comphelper::OInteractionRetry* pRetry = new ::comphelper::OInteractionRetry;
::comphelper::OInteractionAbort* pAbort = new ::comphelper::OInteractionAbort;
pRequest->addContinuation(pRetry);
pRequest->addContinuation(pAbort);
xInterActionHandler->handle(xRequest);
if (pAbort->wasSelected()) {
m_bError = true;
m_cTransformed.set();
}
}
result = m_cTransformed.wait(&timeout);
};
if (!m_bError)
xFastParser->parseStream( aInput );
m_tcontrol->terminate();
return !m_bError;
}
catch( const Exception& )
{
// something went wrong
TOOLS_WARN_EXCEPTION("filter.xslt", "");
return false;
}
}
else
{
return false;
}
}
sal_Bool
XSLTFilter::exporter(const Sequence<PropertyValue>& aSourceData,
const Sequence<OUString>& msUserData)

View File

@@ -151,6 +151,7 @@ class XMLOFF_DLLPUBLIC SvXMLLegacyToFastDocHandler final : public ::cppu::WeakIm
private:
rtl::Reference< SvXMLImport > mrImport;
rtl::Reference< sax_fastparser::FastAttributeList > mxFastAttributes;
std::stack<sal_uInt16> maDefaultNamespaces;
public:
SvXMLLegacyToFastDocHandler( const rtl::Reference< SvXMLImport > & rImport );

View File

@@ -4245,6 +4245,7 @@ $(eval $(call gb_UnoApi_add_idlfiles,offapi,com/sun/star/xml,\
FastAttribute \
XExportFilter \
XImportFilter \
XImportFilter2 \
))
$(eval $(call gb_UnoApi_add_idlfiles,offapi,com/sun/star/xml/crypto,\
CipherID \

View File

@@ -0,0 +1,71 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#ifndef _COM_SUN_STAR_XML_XIMPORTFILTER2_IDL__
#define _COM_SUN_STAR_XML_XIMPORTFILTER2_IDL__
#include <com/sun/star/uno/RuntimeException.idl>
#include <com/sun/star/uno/XInterface.idl>
#include <com/sun/star/beans/PropertyValue.idl>
#include <com/sun/star/xml/sax/XFastParser.idl>
#include <com/sun/star/lang/IllegalArgumentException.idl>
module com { module sun { module star { module xml {
/** interface to implement for an XML-based import filter.
Enhanced vs XImportFilter to take a XFastDocumentHandler.
@since LibreOffice 7.1
*/
interface XImportFilter2: com::sun::star::uno::XInterface
{
/** performs the import.
<p>The source data (location indicated by <var>aSourceData</var>),
and the XML representation of the document must be generated by calls
to xocHandler (???) methods.
@param aSourceData
com::sun::star::document::MediaDescriptor
which defines the data source
@param msUserData
Sequence of strings which contains the user data defined in the
TypeDetection.xml
@param xFastParser
the fast parser for the XML document, i.e. an SvXMLImport subclass
@returns
`TRUE` if import process is successful
*/
boolean importer(
[in] sequence< com::sun::star::beans::PropertyValue > aSourceData,
[in] com::sun::star::xml::sax::XFastParser xFastParser,
[in] sequence< string > msUserData )
raises( com::sun::star::lang::IllegalArgumentException );
};
}; }; }; };
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

View File

@@ -260,7 +260,7 @@ void XMLEmbeddedObjectImportContext::StartElement(
sal_uInt16 nPos = rNamespaceMap.GetFirstKey();
while( USHRT_MAX != nPos )
{
OUString aAttrName( rNamespaceMap.GetAttrNameByKey( nPos ) );
OUString aAttrName = "xmlns:" + rNamespaceMap.GetPrefixByKey( nPos );
if( xAttrList->getValueByName( aAttrName ).isEmpty() )
{
pAttrList->AddAttribute( aAttrName,

View File

@@ -2248,6 +2248,9 @@ void SAL_CALL SvXMLLegacyToFastDocHandler::endDocument()
void SAL_CALL SvXMLLegacyToFastDocHandler::startElement( const OUString& rName,
const uno::Reference< xml::sax::XAttributeList >& xAttrList )
{
sal_uInt16 nDefaultNamespace = XML_NAMESPACE_UNKNOWN;
if (!maDefaultNamespaces.empty())
nDefaultNamespace = maDefaultNamespaces.top();
mrImport->processNSAttributes(xAttrList);
OUString aLocalName;
sal_uInt16 nPrefix = mrImport->mpNamespaceMap->GetKeyByAttrName( rName, &aLocalName );
@@ -2257,30 +2260,57 @@ void SAL_CALL SvXMLLegacyToFastDocHandler::startElement( const OUString& rName,
sal_Int16 nAttrCount = xAttrList.is() ? xAttrList->getLength() : 0;
for( sal_Int16 i=0; i < nAttrCount; i++ )
{
OUString aLocalAttrName;
OUString aNamespace;
const OUString& rAttrName = xAttrList->getNameByIndex( i );
const OUString& rAttrValue = xAttrList->getValueByIndex( i );
// don't add unknown namespaces to the map
sal_uInt16 const nAttrPrefix = mrImport->mpNamespaceMap->GetKeyByQName(
rAttrName, nullptr, &aLocalAttrName, &aNamespace, SvXMLNamespaceMap::QNameMode::AttrValue);
if( XML_NAMESPACE_XMLNS != nAttrPrefix )
if (rAttrName == "xmlns")
{
auto const nToken = SvXMLImport::getTokenFromName(aLocalAttrName);
if (XML_NAMESPACE_UNKNOWN == nAttrPrefix || nToken == xmloff::XML_TOKEN_INVALID)
sal_uInt16 nNamespaceKey = mrImport->mpNamespaceMap->GetKeyByName(rAttrValue);
if (nNamespaceKey != XML_NAMESPACE_UNKNOWN)
{
mxFastAttributes->addUnknown(aNamespace,
nDefaultNamespace = nNamespaceKey;
continue;
}
assert(false && "unknown namespace");
}
else if (rAttrName.indexOf(":") == -1 && nDefaultNamespace != XML_NAMESPACE_UNKNOWN)
{
auto const nToken = SvXMLImport::getTokenFromName(rAttrName);
if (nToken == xmloff::XML_TOKEN_INVALID)
{
mxFastAttributes->addUnknown(mrImport->mpNamespaceMap->GetNameByKey(nDefaultNamespace),
OUStringToOString(rAttrName, RTL_TEXTENCODING_UTF8),
OUStringToOString(rAttrValue, RTL_TEXTENCODING_UTF8));
}
else
{
sal_Int32 const nAttr = NAMESPACE_TOKEN(nAttrPrefix) | nToken;
sal_Int32 const nAttr = NAMESPACE_TOKEN(nDefaultNamespace) | nToken;
mxFastAttributes->add(nAttr, OUStringToOString(rAttrValue, RTL_TEXTENCODING_UTF8).getStr());
}
continue;
}
OUString aLocalAttrName;
OUString aNamespace;
// don't add unknown namespaces to the map
sal_uInt16 const nAttrPrefix = mrImport->mpNamespaceMap->GetKeyByQName(
rAttrName, nullptr, &aLocalAttrName, &aNamespace, SvXMLNamespaceMap::QNameMode::AttrValue);
if( XML_NAMESPACE_XMLNS == nAttrPrefix )
continue; // ignore
auto const nToken = SvXMLImport::getTokenFromName(aLocalAttrName);
if (XML_NAMESPACE_UNKNOWN == nAttrPrefix || nToken == xmloff::XML_TOKEN_INVALID)
{
mxFastAttributes->addUnknown(aNamespace,
OUStringToOString(rAttrName, RTL_TEXTENCODING_UTF8),
OUStringToOString(rAttrValue, RTL_TEXTENCODING_UTF8));
}
else
{
sal_Int32 const nAttr = NAMESPACE_TOKEN(nAttrPrefix) | nToken;
mxFastAttributes->add(nAttr, OUStringToOString(rAttrValue, RTL_TEXTENCODING_UTF8).getStr());
}
}
mrImport->startFastElement( mnElement, mxFastAttributes.get() );
maDefaultNamespaces.push(nDefaultNamespace);
}
void SAL_CALL SvXMLLegacyToFastDocHandler::endElement( const OUString& rName )
@@ -2289,6 +2319,7 @@ void SAL_CALL SvXMLLegacyToFastDocHandler::endElement( const OUString& rName )
sal_uInt16 nPrefix = mrImport->mpNamespaceMap->GetKeyByAttrName( rName, &aLocalName );
sal_Int32 mnElement = NAMESPACE_TOKEN( nPrefix ) | SvXMLImport::getTokenFromName(aLocalName);
mrImport->endFastElement( mnElement );
maDefaultNamespaces.pop();
}
void SAL_CALL SvXMLLegacyToFastDocHandler::characters( const OUString& aChars )