508 lines
16 KiB
C++
508 lines
16 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#include "fastserializer.hxx"
|
|
|
|
#include <com/sun/star/xml/sax/FastTokenHandler.hpp>
|
|
#include <rtl/math.hxx>
|
|
#include <comphelper/processfactory.hxx>
|
|
#include <comphelper/sequenceasvector.hxx>
|
|
|
|
#include <string.h>
|
|
|
|
#if OSL_DEBUG_LEVEL > 0
|
|
#include <iostream>
|
|
#include <set>
|
|
#endif
|
|
|
|
using ::comphelper::SequenceAsVector;
|
|
using ::com::sun::star::uno::Reference;
|
|
using ::com::sun::star::uno::Sequence;
|
|
using ::com::sun::star::xml::Attribute;
|
|
using ::com::sun::star::io::XOutputStream;
|
|
|
|
#define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
|
|
#define NAMESPACE(x) (x >> 16)
|
|
#define TOKEN(x) (x & 0xffff)
|
|
// number of characters without terminating 0
|
|
#define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
|
|
|
|
static const char sClosingBracket[] = ">";
|
|
static const char sSlashAndClosingBracket[] = "/>";
|
|
static const char sColon[] = ":";
|
|
static const char sOpeningBracket[] = "<";
|
|
static const char sOpeningBracketAndSlash[] = "</";
|
|
static const char sQuote[] = "\"";
|
|
static const char sEqualSignAndQuote[] = "=\"";
|
|
static const char sSpace[] = " ";
|
|
static const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
|
|
|
|
namespace sax_fastparser {
|
|
FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
|
|
: maCachedOutputStream()
|
|
, maMarkStack()
|
|
, mbMarkStackEmpty(true)
|
|
, mpDoubleStr(NULL)
|
|
, mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
|
|
{
|
|
rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
|
|
mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
|
|
::comphelper::getProcessComponentContext());
|
|
assert(xOutputStream.is()); // cannot do anything without that
|
|
maCachedOutputStream.setOutputStream( xOutputStream );
|
|
}
|
|
|
|
FastSaxSerializer::~FastSaxSerializer()
|
|
{
|
|
rtl_string_release(mpDoubleStr);
|
|
}
|
|
|
|
void FastSaxSerializer::startDocument()
|
|
{
|
|
writeBytes(sXmlHeader, N_CHARS(sXmlHeader));
|
|
}
|
|
|
|
void FastSaxSerializer::write( double value )
|
|
{
|
|
rtl_math_doubleToString(
|
|
&mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
|
|
RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
|
|
0, sal_True);
|
|
|
|
write(mpDoubleStr->buffer, mpDoubleStr->length);
|
|
// and "clear" the string
|
|
mpDoubleStr->length = 0;
|
|
mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
|
|
}
|
|
|
|
void FastSaxSerializer::write( const OUString& sOutput, bool bEscape )
|
|
{
|
|
const sal_Int32 nLength = sOutput.getLength();
|
|
for (sal_Int32 i = 0; i < nLength; ++i)
|
|
{
|
|
const sal_Unicode cUnicode = sOutput[ i ];
|
|
const char cChar = cUnicode;
|
|
if (cUnicode & 0xff80)
|
|
{
|
|
write( OString(&cUnicode, 1, RTL_TEXTENCODING_UTF8) );
|
|
}
|
|
else if(bEscape) switch( cChar )
|
|
{
|
|
case '<': writeBytes( "<", 4 ); break;
|
|
case '>': writeBytes( ">", 4 ); break;
|
|
case '&': writeBytes( "&", 5 ); break;
|
|
case '\'': writeBytes( "'", 6 ); break;
|
|
case '"': writeBytes( """, 6 ); break;
|
|
case '\n': writeBytes( " ", 5 ); break;
|
|
case '\r': writeBytes( " ", 5 ); break;
|
|
default: writeBytes( &cChar, 1 ); break;
|
|
}
|
|
else
|
|
writeBytes( &cChar, 1 );
|
|
}
|
|
}
|
|
|
|
void FastSaxSerializer::write( const OString& sOutput, bool bEscape )
|
|
{
|
|
write( sOutput.getStr(), sOutput.getLength(), bEscape );
|
|
}
|
|
|
|
void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
|
|
{
|
|
if (nLen == -1)
|
|
nLen = pStr ? strlen(pStr) : 0;
|
|
|
|
if (!bEscape)
|
|
{
|
|
writeBytes( pStr, nLen );
|
|
return;
|
|
}
|
|
|
|
for (sal_Int32 i = 0; i < nLen; ++i)
|
|
{
|
|
char c = pStr[ i ];
|
|
switch( c )
|
|
{
|
|
case '<': writeBytes( "<", 4 ); break;
|
|
case '>': writeBytes( ">", 4 ); break;
|
|
case '&': writeBytes( "&", 5 ); break;
|
|
case '\'': writeBytes( "'", 6 ); break;
|
|
case '"': writeBytes( """, 6 ); break;
|
|
case '\n': writeBytes( " ", 5 ); break;
|
|
case '\r': writeBytes( " ", 5 ); break;
|
|
default: writeBytes( &c, 1 ); break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void FastSaxSerializer::endDocument()
|
|
{
|
|
assert(mbMarkStackEmpty && maMarkStack.empty());
|
|
maCachedOutputStream.flush();
|
|
}
|
|
|
|
void FastSaxSerializer::writeId( ::sal_Int32 nElement )
|
|
{
|
|
if( HAS_NAMESPACE( nElement ) ) {
|
|
writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
|
|
writeBytes(sColon, N_CHARS(sColon));
|
|
writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
|
|
} else
|
|
writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement));
|
|
}
|
|
|
|
#ifdef DBG_UTIL
|
|
OString FastSaxSerializer::getId( ::sal_Int32 nElement )
|
|
{
|
|
if (HAS_NAMESPACE(nElement)) {
|
|
Sequence<sal_Int8> const ns(
|
|
mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
|
|
Sequence<sal_Int8> const name(
|
|
mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
|
|
return OString(reinterpret_cast<sal_Char const*>(ns.getConstArray()), ns.getLength())
|
|
+ OString(sColon, N_CHARS(sColon))
|
|
+ OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
|
|
} else {
|
|
Sequence<sal_Int8> const name(
|
|
mxFastTokenHandler->getUTF8Identifier(nElement));
|
|
return OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
|
|
{
|
|
if ( !mbMarkStackEmpty )
|
|
{
|
|
maCachedOutputStream.flush();
|
|
maMarkStack.top()->setCurrentElement( Element );
|
|
}
|
|
|
|
#ifdef DBG_UTIL
|
|
m_DebugStartedElements.push(Element);
|
|
#endif
|
|
|
|
writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
|
|
|
|
writeId(Element);
|
|
if (pAttrList)
|
|
writeFastAttributeList(*pAttrList);
|
|
else
|
|
writeTokenValueList();
|
|
|
|
writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
|
|
}
|
|
|
|
void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
|
|
{
|
|
#ifdef DBG_UTIL
|
|
assert(!m_DebugStartedElements.empty());
|
|
// Well-formedness constraint: Element Type Match
|
|
assert(Element == m_DebugStartedElements.top());
|
|
m_DebugStartedElements.pop();
|
|
#endif
|
|
|
|
writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
|
|
|
|
writeId(Element);
|
|
|
|
writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
|
|
}
|
|
|
|
void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
|
|
{
|
|
if ( !mbMarkStackEmpty )
|
|
{
|
|
maCachedOutputStream.flush();
|
|
maMarkStack.top()->setCurrentElement( Element );
|
|
}
|
|
|
|
writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
|
|
|
|
writeId(Element);
|
|
if (pAttrList)
|
|
writeFastAttributeList(*pAttrList);
|
|
else
|
|
writeTokenValueList();
|
|
|
|
writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket));
|
|
}
|
|
|
|
::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > FastSaxSerializer::getOutputStream()
|
|
{
|
|
return maCachedOutputStream.getOutputStream();
|
|
}
|
|
|
|
void FastSaxSerializer::writeTokenValueList()
|
|
{
|
|
#ifdef DBG_UTIL
|
|
::std::set<OString> DebugAttributes;
|
|
#endif
|
|
for (size_t j = 0; j < maTokenValues.size(); j++)
|
|
{
|
|
writeBytes(sSpace, N_CHARS(sSpace));
|
|
|
|
sal_Int32 nToken = maTokenValues[j].nToken;
|
|
writeId(nToken);
|
|
|
|
#ifdef DBG_UTIL
|
|
// Well-formedness constraint: Unique Att Spec
|
|
OString const nameId(getId(nToken));
|
|
assert(DebugAttributes.find(nameId) == DebugAttributes.end());
|
|
DebugAttributes.insert(nameId);
|
|
#endif
|
|
|
|
writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
|
|
|
|
write(maTokenValues[j].pValue, -1, true);
|
|
|
|
writeBytes(sQuote, N_CHARS(sQuote));
|
|
}
|
|
maTokenValues.clear();
|
|
}
|
|
|
|
void FastSaxSerializer::writeFastAttributeList(FastAttributeList& rAttrList)
|
|
{
|
|
#ifdef DBG_UTIL
|
|
::std::set<OString> DebugAttributes;
|
|
#endif
|
|
const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
|
|
for (size_t j = 0; j < Tokens.size(); j++)
|
|
{
|
|
writeBytes(sSpace, N_CHARS(sSpace));
|
|
|
|
sal_Int32 nToken = Tokens[j];
|
|
writeId(nToken);
|
|
|
|
#ifdef DBG_UTIL
|
|
// Well-formedness constraint: Unique Att Spec
|
|
OString const nameId(getId(nToken));
|
|
assert(DebugAttributes.find(nameId) == DebugAttributes.end());
|
|
DebugAttributes.insert(nameId);
|
|
#endif
|
|
|
|
writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
|
|
|
|
write(rAttrList.getFastAttributeValue(j), rAttrList.AttributeValueLength(j), true);
|
|
|
|
writeBytes(sQuote, N_CHARS(sQuote));
|
|
}
|
|
}
|
|
|
|
void FastSaxSerializer::mark( const Int32Sequence& aOrder )
|
|
{
|
|
if ( aOrder.hasElements() )
|
|
{
|
|
boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) );
|
|
maMarkStack.push( pSort );
|
|
maCachedOutputStream.setOutput( pSort );
|
|
}
|
|
else
|
|
{
|
|
boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) );
|
|
maMarkStack.push( pMerge );
|
|
maCachedOutputStream.setOutput( pMerge );
|
|
}
|
|
mbMarkStackEmpty = false;
|
|
}
|
|
|
|
void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
|
|
{
|
|
SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
|
|
if ( mbMarkStackEmpty )
|
|
return;
|
|
|
|
// flush, so that we get everything in getData()
|
|
maCachedOutputStream.flush();
|
|
|
|
if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE)
|
|
{
|
|
Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
|
|
maMarkStack.pop();
|
|
mbMarkStackEmpty = true;
|
|
maCachedOutputStream.resetOutputToStream();
|
|
maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
|
|
return;
|
|
}
|
|
|
|
const Int8Sequence aMerge( maMarkStack.top()->getData() );
|
|
maMarkStack.pop();
|
|
if (maMarkStack.empty())
|
|
{
|
|
mbMarkStackEmpty = true;
|
|
maCachedOutputStream.resetOutputToStream();
|
|
}
|
|
else
|
|
{
|
|
maCachedOutputStream.setOutput( maMarkStack.top() );
|
|
}
|
|
|
|
switch ( eMergeType )
|
|
{
|
|
case MERGE_MARKS_APPEND: maMarkStack.top()->append( aMerge ); break;
|
|
case MERGE_MARKS_PREPEND: maMarkStack.top()->prepend( aMerge ); break;
|
|
case MERGE_MARKS_POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
|
|
case MERGE_MARKS_IGNORE : break;
|
|
|
|
}
|
|
}
|
|
|
|
void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
|
|
{
|
|
maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
|
|
}
|
|
|
|
void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
|
|
{
|
|
maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
|
|
}
|
|
|
|
FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
|
|
{
|
|
merge( maData, maPostponed, true );
|
|
maPostponed.realloc( 0 );
|
|
|
|
return maData;
|
|
}
|
|
|
|
#if OSL_DEBUG_LEVEL > 0
|
|
void FastSaxSerializer::ForMerge::print( )
|
|
{
|
|
std::cerr << "Data: ";
|
|
for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
|
|
{
|
|
std::cerr << maData[i];
|
|
}
|
|
|
|
std::cerr << "\nPostponed: ";
|
|
for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
|
|
{
|
|
std::cerr << maPostponed[i];
|
|
}
|
|
|
|
std::cerr << "\n";
|
|
}
|
|
#endif
|
|
|
|
void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
|
|
{
|
|
merge( maData, rWhat, false );
|
|
}
|
|
|
|
void FastSaxSerializer::ForMerge::append( const Int8Sequence &rWhat )
|
|
{
|
|
merge( maData, rWhat, true );
|
|
}
|
|
|
|
void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
|
|
{
|
|
merge( maPostponed, rWhat, true );
|
|
}
|
|
|
|
void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
|
|
{
|
|
sal_Int32 nMergeLen = rMerge.getLength();
|
|
if ( nMergeLen > 0 )
|
|
{
|
|
sal_Int32 nTopLen = rTop.getLength();
|
|
|
|
rTop.realloc( nTopLen + nMergeLen );
|
|
if ( bAppend )
|
|
{
|
|
// append the rMerge to the rTop
|
|
memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
|
|
}
|
|
else
|
|
{
|
|
// prepend the rMerge to the rTop
|
|
memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
|
|
memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
|
|
}
|
|
}
|
|
}
|
|
|
|
void FastSaxSerializer::ForMerge::resetData( )
|
|
{
|
|
maData = Int8Sequence();
|
|
}
|
|
|
|
void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
|
|
{
|
|
SequenceAsVector< sal_Int32 > aOrder( maOrder );
|
|
if( std::find( aOrder.begin(), aOrder.end(), nElement ) != aOrder.end() )
|
|
{
|
|
mnCurrentElement = nElement;
|
|
if ( maData.find( nElement ) == maData.end() )
|
|
maData[ nElement ] = Int8Sequence();
|
|
}
|
|
}
|
|
|
|
void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
|
|
{
|
|
append( rWhat );
|
|
}
|
|
|
|
void FastSaxSerializer::ForSort::append( const Int8Sequence &rWhat )
|
|
{
|
|
merge( maData[mnCurrentElement], rWhat, true );
|
|
}
|
|
|
|
void FastSaxSerializer::ForSort::sort()
|
|
{
|
|
// Clear the ForMerge data to avoid duplicate items
|
|
resetData();
|
|
|
|
// Sort it all
|
|
std::map< sal_Int32, Int8Sequence >::iterator iter;
|
|
for ( sal_Int32 i=0, len=maOrder.getLength(); i < len; i++ )
|
|
{
|
|
iter = maData.find( maOrder[i] );
|
|
if ( iter != maData.end() )
|
|
ForMerge::append( iter->second );
|
|
}
|
|
}
|
|
|
|
FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
|
|
{
|
|
sort( );
|
|
return ForMerge::getData();
|
|
}
|
|
|
|
#if OSL_DEBUG_LEVEL > 0
|
|
void FastSaxSerializer::ForSort::print( )
|
|
{
|
|
std::map< sal_Int32, Int8Sequence >::iterator iter = maData.begin();
|
|
while ( iter != maData.end( ) )
|
|
{
|
|
std::cerr << "pair: " << iter->first;
|
|
for ( sal_Int32 i=0, len=iter->second.getLength(); i < len; ++i )
|
|
std::cerr << iter->second[i];
|
|
std::cerr << "\n";
|
|
++iter;
|
|
}
|
|
|
|
sort( );
|
|
ForMerge::print();
|
|
}
|
|
#endif
|
|
|
|
} // namespace sax_fastparser
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|