2010-10-12 15:55:21 +02:00
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2012-10-01 16:08:38 +01:00
/*
* This file is part of the LibreOffice project .
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License , v . 2.0 . If a copy of the MPL was not distributed with this
* file , You can obtain one at http : //mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice :
*
* Licensed to the Apache Software Foundation ( ASF ) under one or more
* contributor license agreements . See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership . The ASF licenses this file to you under the Apache
* License , Version 2.0 ( the " License " ) ; you may not use this file
* except in compliance with the License . You may obtain a copy of
* the License at http : //www.apache.org/licenses/LICENSE-2.0 .
*/
2008-08-14 16:03:25 +00:00
2013-12-16 22:27:05 +01:00
# if defined _MSC_VER
2008-08-14 16:03:25 +00:00
# pragma warning(push, 1)
# endif
# include "pdfparse.hxx"
// workaround windows compiler: do not include multi_pass.hpp
CWS-TOOLING: integrate CWS boost134
2009-09-09 10:30:24 +0200 fs r275970 : #i100127# provided by rene: check whether system-boost's function.hpp compiles without exception support
2009-09-04 21:58:00 +0200 fs r275832 : indention
2009-09-04 21:57:10 +0200 fs r275831 : #i10000# type in boost/spirit check. Thanks to thb
2009-09-04 10:58:54 +0200 fs r275787 : #i100127# provided by tono@openoffice.org: mingw needs using directives for std::min/max
2009-09-04 10:31:54 +0200 fs r275782 : #i100127# (approved by hjs) on Solaris, set GNUTAR to /usr/sfw/bin/gtar, this is available on baseline machines (Solaris 10.x), but not necessarily available in LD_LIBRARY_PATH
2009-08-19 21:47:55 +0200 rene r275161 : #i10000# check for the header actually used now
2009-08-17 21:55:31 +0200 fs r275073 : CWS-TOOLING: rebase CWS boost134 to trunk@275001 (milestone: DEV300:m55)
2009-07-16 09:39:25 +0200 fs r274031 : #i100127# extract download/% with GNUTAR instead of TAR
2009-07-14 09:18:15 +0200 fs r273960 : CWS-TOOLING: rebase CWS boost134 to trunk@273858 (milestone: DEV300:m52)
2009-07-13 09:55:14 +0200 fs r273919 : #i100127# use boost 1.39, not 1.34
2009-07-13 09:45:57 +0200 fs r273918 : use GNUTAR when extracting download/%.tar.gz, not TAR (necessary with boost 1.39 tarball)
2009-07-10 21:10:52 +0200 fs r273905 : #i100127# spirit now part of boost
2009-07-06 12:12:13 +0200 fs r273732 : #i10000# lngconvex, used in src/wizards, is built in shell => added missing module dependency
2009-07-02 15:34:51 +0200 fs r273660 : #i100127#
2009-07-02 13:35:24 +0200 fs r273645 : #i100127# MSVC needs some hinting when using boost::bind on (some) member functions
2009-07-02 10:33:59 +0200 fs r273625 : removed unused include (lead to an internal compiler error with MSVC and boost 1.34+)
2009-07-02 09:44:35 +0200 fs r273619 : change some include orders, prevents 'internal compiler errors' with MSVC
2009-07-01 09:19:25 +0200 fs r273547 : #i100127# who the heck is defining max as something unusable?
2009-07-01 09:17:28 +0200 fs r273546 : #i100127# defining min/max is causing trouble now
2009-07-01 09:15:40 +0200 fs r273545 : #i100127# reverted previous patch, and properly fixed the problem by defining BOOST_MEM_FN_ENABLE_CDECL, and disabling warning 4180
2009-07-01 09:14:23 +0200 fs r273544 : #i100127# re-disable warning 4180, still needed for some files
2009-07-01 09:07:33 +0200 fs r273543 : #i100127# reverted previous patch, and properly fixed the problem by defining BOOST_MEM_FN_ENABLE_CDECL
2009-06-24 11:00:32 +0200 fs r273319 : oops, re-introduced some inc locations
2009-06-23 22:10:02 +0200 fs r273304 : #i100127# disable warning C4180
2009-06-23 22:08:36 +0200 fs r273303 : #i100127# disable warning C4180
2009-06-16 12:26:49 +0200 fs r273015 : #i10000# (provided by mst@openoffice.org)
2009-06-15 09:10:27 +0200 fs r272962 : #i10000# use -r instead of -a when copying the include files, this prevents the spurious 'failed to preserve ownership' errors
2009-06-11 23:55:06 +0200 fs r272902 : #i100127#
2009-06-11 23:54:47 +0200 fs r272901 : #i100127#
2009-06-11 23:54:05 +0200 fs r272900 : make compile on unxsol[i|s]4
2009-06-10 10:12:22 +0200 fs r272798 : #i100127# don't deliver from within unxlngi6/misc/build/boost, but from a 'normalized' location unxlngi6/inc
2009-06-10 09:48:00 +0200 fs r272795 : #i100127# use 1.34.1 unconditionally on all platforms
2009-09-17 10:11:42 +00:00
# include <boost/spirit/include/classic_core.hpp>
# include <boost/spirit/include/classic_utility.hpp>
# include <boost/spirit/include/classic_error_handling.hpp>
# include <boost/spirit/include/classic_file_iterator.hpp>
2008-08-14 16:03:25 +00:00
# include <boost/bind.hpp>
2012-09-02 13:44:33 +02:00
# include <string.h>
2008-08-14 16:03:25 +00:00
# include <rtl/strbuf.hxx>
2009-12-01 17:55:30 +01:00
# include <rtl/alloc.h>
2008-08-14 16:03:25 +00:00
// disable warnings again because someone along the line has enabled them
2013-12-16 22:27:05 +01:00
# if defined _MSC_VER
2008-08-14 16:03:25 +00:00
# pragma warning(push, 1)
# endif
using namespace boost : : spirit ;
using namespace pdfparse ;
2011-02-26 14:35:17 +01:00
2008-08-14 16:03:25 +00:00
class StringEmitContext : public EmitContext
{
OStringBuffer m_aBuf ;
public :
StringEmitContext ( ) : EmitContext ( ) , m_aBuf ( 256 ) { }
virtual ~ StringEmitContext ( ) { }
2014-03-26 16:37:00 +01:00
virtual bool write ( const void * pBuf , unsigned int nLen ) throw ( ) SAL_OVERRIDE
2008-08-14 16:03:25 +00:00
{
2015-03-28 19:06:30 +01:00
m_aBuf . append ( static_cast < const sal_Char * > ( pBuf ) , nLen ) ;
2008-08-14 16:03:25 +00:00
return true ;
}
2014-03-26 16:37:00 +01:00
virtual unsigned int getCurPos ( ) throw ( ) SAL_OVERRIDE { return m_aBuf . getLength ( ) ; }
virtual bool copyOrigBytes ( unsigned int nOrigOffset , unsigned int nLen ) throw ( ) SAL_OVERRIDE
2015-04-24 12:33:08 +02:00
{ return ( nOrigOffset + nLen < static_cast < unsigned int > ( m_aBuf . getLength ( ) ) ) & &
write ( m_aBuf . getStr ( ) + nOrigOffset , nLen ) ; }
2014-03-26 16:37:00 +01:00
virtual unsigned int readOrigBytes ( unsigned int nOrigOffset , unsigned int nLen , void * pBuf ) throw ( ) SAL_OVERRIDE
2008-08-14 16:03:25 +00:00
{
if ( nOrigOffset + nLen < static_cast < unsigned int > ( m_aBuf . getLength ( ) ) )
{
2012-09-02 13:44:33 +02:00
memcpy ( pBuf , m_aBuf . getStr ( ) + nOrigOffset , nLen ) ;
2008-08-14 16:03:25 +00:00
return nLen ;
}
return 0 ;
}
OString getString ( ) { return m_aBuf . makeStringAndClear ( ) ; }
} ;
template < class iteratorT >
class PDFGrammar : public grammar < PDFGrammar < iteratorT > >
{
public :
PDFGrammar ( const iteratorT & first )
: m_fDouble ( 0.0 ) , m_aGlobalBegin ( first ) { }
~ PDFGrammar ( )
{
if ( ! m_aObjectStack . empty ( ) )
delete m_aObjectStack . front ( ) ;
}
double m_fDouble ;
std : : vector < unsigned int > m_aUIntStack ;
std : : vector < PDFEntry * > m_aObjectStack ;
2013-04-07 12:06:47 +02:00
OString m_aErrorString ;
2008-08-14 16:03:25 +00:00
iteratorT m_aGlobalBegin ;
public :
2010-04-27 19:27:46 +02:00
struct pdf_string_parser
{
typedef nil_t result_t ;
template < typename ScannerT >
std : : ptrdiff_t
2010-10-05 13:49:45 +01:00
operator ( ) ( ScannerT const & scan , result_t & ) const
2010-04-27 19:27:46 +02:00
{
std : : ptrdiff_t len = 0 ;
int nBraceLevel = 0 ;
while ( ! scan . at_end ( ) )
{
char c = * scan ;
if ( c = = ' ) ' )
{
nBraceLevel - - ;
if ( nBraceLevel < 0 )
break ;
}
else if ( c = = ' ( ' )
nBraceLevel + + ;
else if ( c = = ' \\ ' ) // ignore escaped braces
{
+ + len ;
2015-04-29 02:30:46 +10:00
+ + scan . first ; // tdf#63054: avoid skipping spaces
if ( scan . first = = scan . last ) // tdf#63054: avoid skipping spaces
2010-04-27 19:27:46 +02:00
break ;
}
+ + len ;
+ + scan ;
}
return scan . at_end ( ) ? - 1 : len ;
}
} ;
2008-08-14 16:03:25 +00:00
template < typename ScannerT >
struct definition
{
definition ( const PDFGrammar < iteratorT > & rSelf )
{
PDFGrammar < iteratorT > * pSelf = const_cast < PDFGrammar < iteratorT > * > ( & rSelf ) ;
// workaround workshop compiler: comment_p doesn't work
// comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
comment = lexeme_d [ ( ch_p ( ' % ' ) > > * ( ~ ch_p ( ' \r ' ) & ~ ch_p ( ' \n ' ) ) > > eol_p ) [ boost : : bind ( & PDFGrammar : : pushComment , pSelf , _1 , _2 ) ] ] ;
boolean = ( str_p ( " true " ) | str_p ( " false " ) ) [ boost : : bind ( & PDFGrammar : : pushBool , pSelf , _1 , _2 ) ] ;
// workaround workshop compiler: confix_p doesn't work
//stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
stream = ( str_p ( " stream " ) > > * ( anychar_p - str_p ( " endstream " ) ) > > str_p ( " endstream " ) ) [ boost : : bind ( & PDFGrammar : : emitStream , pSelf , _1 , _2 ) ] ;
name = lexeme_d [
ch_p ( ' / ' )
> > ( * ( anychar_p - chset_p ( " \t \n \f \r ()<>[]{}/% " ) - ch_p ( ' \0 ' ) ) )
[ boost : : bind ( & PDFGrammar : : pushName , pSelf , _1 , _2 ) ] ] ;
// workaround workshop compiler: confix_p doesn't work
//stringtype = ( confix_p("(",*anychar_p, ")") |
// confix_p("<",*xdigit_p, ">") )
// [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
2010-04-27 19:27:46 +02:00
stringtype = ( ( ch_p ( ' ( ' ) > > functor_parser < pdf_string_parser > ( ) > > ch_p ( ' ) ' ) ) |
2008-08-14 16:03:25 +00:00
( ch_p ( ' < ' ) > > * xdigit_p > > ch_p ( ' > ' ) ) )
[ boost : : bind ( & PDFGrammar : : pushString , pSelf , _1 , _2 ) ] ;
null_object = str_p ( " null " ) [ boost : : bind ( & PDFGrammar : : pushNull , pSelf , _1 , _2 ) ] ;
# ifdef USE_ASSIGN_ACTOR
objectref = ( uint_p [ push_back_a ( pSelf - > m_aUIntStack ) ]
> > uint_p [ push_back_a ( pSelf - > m_aUIntStack ) ]
> > ch_p ( ' R ' )
> > eps_p
) [ boost : : bind ( & PDFGrammar : : pushObjectRef , pSelf , _1 , _2 ) ] ;
# else
objectref = ( uint_p [ boost : : bind ( & PDFGrammar : : push_back_action_uint , pSelf , _1 ) ]
> > uint_p [ boost : : bind ( & PDFGrammar : : push_back_action_uint , pSelf , _1 ) ]
> > ch_p ( ' R ' )
> > eps_p
) [ boost : : bind ( & PDFGrammar : : pushObjectRef , pSelf , _1 , _2 ) ] ;
# endif
# ifdef USE_ASSIGN_ACTOR
simple_type = objectref | name |
( real_p [ assign_a ( pSelf - > m_fDouble ) ] > > eps_p )
[ boost : : bind ( & PDFGrammar : : pushDouble , pSelf , _1 , _2 ) ]
| stringtype | boolean | null_object ;
# else
simple_type = objectref | name |
( real_p [ boost : : bind ( & PDFGrammar : : assign_action_double , pSelf , _1 ) ] > > eps_p )
[ boost : : bind ( & PDFGrammar : : pushDouble , pSelf , _1 , _2 ) ]
| stringtype | boolean | null_object ;
# endif
dict_begin = str_p ( " << " ) [ boost : : bind ( & PDFGrammar : : beginDict , pSelf , _1 , _2 ) ] ;
dict_end = str_p ( " >> " ) [ boost : : bind ( & PDFGrammar : : endDict , pSelf , _1 , _2 ) ] ;
array_begin = str_p ( " [ " ) [ boost : : bind ( & PDFGrammar : : beginArray , pSelf , _1 , _2 ) ] ;
array_end = str_p ( " ] " ) [ boost : : bind ( & PDFGrammar : : endArray , pSelf , _1 , _2 ) ] ;
# ifdef USE_ASSIGN_ACTOR
object_begin = uint_p [ push_back_a ( pSelf - > m_aUIntStack ) ]
> > uint_p [ push_back_a ( pSelf - > m_aUIntStack ) ]
> > str_p ( " obj " ) [ boost : : bind ( & PDFGrammar : : beginObject , pSelf , _1 , _2 ) ] ;
# else
object_begin = uint_p [ boost : : bind ( & PDFGrammar : : push_back_action_uint , pSelf , _1 ) ]
> > uint_p [ boost : : bind ( & PDFGrammar : : push_back_action_uint , pSelf , _1 ) ]
> > str_p ( " obj " ) [ boost : : bind ( & PDFGrammar : : beginObject , pSelf , _1 , _2 ) ] ;
# endif
object_end = str_p ( " endobj " ) [ boost : : bind ( & PDFGrammar : : endObject , pSelf , _1 , _2 ) ] ;
xref = str_p ( " xref " ) > > uint_p > > uint_p
> > lexeme_d [
+ ( repeat_p ( 10 ) [ digit_p ]
> > blank_p
> > repeat_p ( 5 ) [ digit_p ]
> > blank_p
> > ( ch_p ( ' n ' ) | ch_p ( ' f ' ) )
> > repeat_p ( 2 ) [ space_p ]
) ] ;
dict_element = dict_begin | comment | simple_type
| array_begin | array_end | dict_end ;
object = object_begin
> > * dict_element
> > ! stream
> > object_end ;
trailer = str_p ( " trailer " ) [ boost : : bind ( & PDFGrammar : : beginTrailer , pSelf , _1 , _2 ) ]
> > * dict_element
> > str_p ( " startxref " )
> > uint_p
> > str_p ( " %%EOF " ) [ boost : : bind ( & PDFGrammar : : endTrailer , pSelf , _1 , _2 ) ] ;
# ifdef USE_ASSIGN_ACTOR
pdfrule = ! ( lexeme_d [
str_p ( " %PDF- " )
> > uint_p [ push_back_a ( pSelf - > m_aUIntStack ) ]
> > ch_p ( ' . ' )
> > uint_p [ push_back_a ( pSelf - > m_aUIntStack ) ]
> > * ( ( ~ ch_p ( ' \r ' ) & ~ ch_p ( ' \n ' ) ) )
> > eol_p
] ) [ boost : : bind ( & PDFGrammar : : haveFile , pSelf , _1 , _2 ) ]
> > * ( comment | object | ( xref > > trailer ) ) ;
# else
pdfrule = ! ( lexeme_d [
str_p ( " %PDF- " )
> > uint_p [ boost : : bind ( & PDFGrammar : : push_back_action_uint , pSelf , _1 ) ]
> > ch_p ( ' . ' )
> > uint_p [ boost : : bind ( & PDFGrammar : : push_back_action_uint , pSelf , _1 ) ]
> > * ( ( ~ ch_p ( ' \r ' ) & ~ ch_p ( ' \n ' ) ) )
> > eol_p
] ) [ boost : : bind ( & PDFGrammar : : haveFile , pSelf , _1 , _2 ) ]
> > * ( comment | object | ( xref > > trailer ) ) ;
# endif
}
rule < ScannerT > comment , stream , boolean , name , stringtype , null_object , simple_type ,
objectref , array , value , dict_element , dict_begin , dict_end ,
array_begin , array_end , object , object_begin , object_end ,
xref , trailer , pdfrule ;
const rule < ScannerT > & start ( ) const { return pdfrule ; }
} ;
# ifndef USE_ASSIGN_ACTOR
void push_back_action_uint ( unsigned int i )
{
m_aUIntStack . push_back ( i ) ;
}
void assign_action_double ( double d )
{
m_fDouble = d ;
}
# endif
2015-04-02 14:07:31 +02:00
static void parseError ( const char * pMessage , iteratorT pLocation )
2008-08-14 16:03:25 +00:00
{
throw_ ( pLocation , pMessage ) ;
}
2013-04-07 12:06:47 +02:00
OString iteratorToString ( iteratorT first , iteratorT last ) const
2008-08-14 16:03:25 +00:00
{
2013-04-07 12:06:47 +02:00
OStringBuffer aStr ( 32 ) ;
2008-08-14 16:03:25 +00:00
while ( first ! = last )
{
aStr . append ( * first ) ;
+ + first ;
}
return aStr . makeStringAndClear ( ) ;
}
2012-01-21 15:21:16 +01:00
void haveFile ( iteratorT pBegin , SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
2008-08-14 16:03:25 +00:00
{
if ( m_aObjectStack . empty ( ) )
{
PDFFile * pFile = new PDFFile ( ) ;
pFile - > m_nMinor = m_aUIntStack . back ( ) ;
m_aUIntStack . pop_back ( ) ;
pFile - > m_nMajor = m_aUIntStack . back ( ) ;
m_aUIntStack . pop_back ( ) ;
m_aObjectStack . push_back ( pFile ) ;
}
else
parseError ( " found file header in unusual place " , pBegin ) ;
}
void pushComment ( iteratorT first , iteratorT last )
{
// add a comment to the current stack element
PDFComment * pComment =
new PDFComment ( iteratorToString ( first , last ) ) ;
if ( m_aObjectStack . empty ( ) )
m_aObjectStack . push_back ( new PDFPart ( ) ) ;
PDFContainer * pContainer = dynamic_cast < PDFContainer * > ( m_aObjectStack . back ( ) ) ;
if ( pContainer = = NULL )
parseError ( " comment without container " , first ) ;
pContainer - > m_aSubElements . push_back ( pComment ) ;
}
void insertNewValue ( PDFEntry * pNewValue , iteratorT pPos )
{
PDFContainer * pContainer = NULL ;
const char * pMsg = NULL ;
if ( ! m_aObjectStack . empty ( ) & &
( pContainer = dynamic_cast < PDFContainer * > ( m_aObjectStack . back ( ) ) ) ! = NULL )
{
if ( dynamic_cast < PDFDict * > ( pContainer ) = = NULL & &
dynamic_cast < PDFArray * > ( pContainer ) = = NULL )
{
PDFObject * pObj = dynamic_cast < PDFObject * > ( pContainer ) ;
if ( pObj )
{
if ( pObj - > m_pObject = = NULL )
pObj - > m_pObject = pNewValue ;
else
{
pMsg = " second value for object " ;
pContainer = NULL ;
}
}
else if ( dynamic_cast < PDFDict * > ( pNewValue ) )
{
PDFTrailer * pTrailer = dynamic_cast < PDFTrailer * > ( pContainer ) ;
if ( pTrailer )
{
if ( pTrailer - > m_pDict = = NULL )
pTrailer - > m_pDict = dynamic_cast < PDFDict * > ( pNewValue ) ;
else
pContainer = NULL ;
}
else
pContainer = NULL ;
}
else
pContainer = NULL ;
}
}
if ( pContainer )
pContainer - > m_aSubElements . push_back ( pNewValue ) ;
else
{
if ( ! pMsg )
{
if ( dynamic_cast < PDFContainer * > ( pNewValue ) )
pMsg = " array without container " ;
else
pMsg = " value without container " ;
}
delete pNewValue ;
parseError ( pMsg , pPos ) ;
}
}
void pushName ( iteratorT first , iteratorT last )
{
insertNewValue ( new PDFName ( iteratorToString ( first , last ) ) , first ) ;
}
2012-01-21 15:21:16 +01:00
void pushDouble ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT /*last*/ )
2008-08-14 16:03:25 +00:00
{
insertNewValue ( new PDFNumber ( m_fDouble ) , first ) ;
}
void pushString ( iteratorT first , iteratorT last )
{
insertNewValue ( new PDFString ( iteratorToString ( first , last ) ) , first ) ;
}
void pushBool ( iteratorT first , iteratorT last )
{
insertNewValue ( new PDFBool ( ( last - first = = 4 ) ) , first ) ;
}
2012-01-21 15:21:16 +01:00
void pushNull ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
insertNewValue ( new PDFNull ( ) , first ) ;
}
2012-01-21 15:21:16 +01:00
void beginObject ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT /*last*/ )
2008-08-14 16:03:25 +00:00
{
if ( m_aObjectStack . empty ( ) )
m_aObjectStack . push_back ( new PDFPart ( ) ) ;
unsigned int nGeneration = m_aUIntStack . back ( ) ;
m_aUIntStack . pop_back ( ) ;
unsigned int nObject = m_aUIntStack . back ( ) ;
m_aUIntStack . pop_back ( ) ;
PDFObject * pObj = new PDFObject ( nObject , nGeneration ) ;
pObj - > m_nOffset = first - m_aGlobalBegin ;
PDFContainer * pContainer = dynamic_cast < PDFContainer * > ( m_aObjectStack . back ( ) ) ;
if ( pContainer & &
( dynamic_cast < PDFFile * > ( pContainer ) | |
dynamic_cast < PDFPart * > ( pContainer ) ) )
{
pContainer - > m_aSubElements . push_back ( pObj ) ;
m_aObjectStack . push_back ( pObj ) ;
}
else
parseError ( " object in wrong place " , first ) ;
}
2012-01-21 15:21:16 +01:00
void endObject ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
if ( m_aObjectStack . empty ( ) )
parseError ( " endobj without obj " , first ) ;
else if ( dynamic_cast < PDFObject * > ( m_aObjectStack . back ( ) ) = = NULL )
parseError ( " spurious endobj " , first ) ;
else
m_aObjectStack . pop_back ( ) ;
}
2012-01-21 15:21:16 +01:00
void pushObjectRef ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
unsigned int nGeneration = m_aUIntStack . back ( ) ;
m_aUIntStack . pop_back ( ) ;
unsigned int nObject = m_aUIntStack . back ( ) ;
m_aUIntStack . pop_back ( ) ;
insertNewValue ( new PDFObjectRef ( nObject , nGeneration ) , first ) ;
}
2012-01-21 15:21:16 +01:00
void beginDict ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
PDFDict * pDict = new PDFDict ( ) ;
pDict - > m_nOffset = first - m_aGlobalBegin ;
insertNewValue ( pDict , first ) ;
// will not come here if insertion fails (exception)
m_aObjectStack . push_back ( pDict ) ;
}
2012-01-21 15:21:16 +01:00
void endDict ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
PDFDict * pDict = NULL ;
if ( m_aObjectStack . empty ( ) )
parseError ( " dictionary end without begin " , first ) ;
else if ( ( pDict = dynamic_cast < PDFDict * > ( m_aObjectStack . back ( ) ) ) = = NULL )
parseError ( " spurious dictionary end " , first ) ;
else
m_aObjectStack . pop_back ( ) ;
PDFEntry * pOffender = pDict - > buildMap ( ) ;
if ( pOffender )
{
StringEmitContext aCtx ;
aCtx . write ( " offending dictionary element: " , 30 ) ;
pOffender - > emit ( aCtx ) ;
m_aErrorString = aCtx . getString ( ) ;
parseError ( m_aErrorString . getStr ( ) , first ) ;
}
}
2012-01-21 15:21:16 +01:00
void beginArray ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
PDFArray * pArray = new PDFArray ( ) ;
pArray - > m_nOffset = first - m_aGlobalBegin ;
insertNewValue ( pArray , first ) ;
// will not come here if insertion fails (exception)
m_aObjectStack . push_back ( pArray ) ;
}
2012-01-21 15:21:16 +01:00
void endArray ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
if ( m_aObjectStack . empty ( ) )
parseError ( " array end without begin " , first ) ;
else if ( dynamic_cast < PDFArray * > ( m_aObjectStack . back ( ) ) = = NULL )
parseError ( " spurious array end " , first ) ;
else
m_aObjectStack . pop_back ( ) ;
}
void emitStream ( iteratorT first , iteratorT last )
{
if ( m_aObjectStack . empty ( ) )
parseError ( " stream without object " , first ) ;
PDFObject * pObj = dynamic_cast < PDFObject * > ( m_aObjectStack . back ( ) ) ;
if ( pObj & & pObj - > m_pObject )
{
if ( pObj - > m_pStream )
parseError ( " multiple streams in object " , first ) ;
PDFDict * pDict = dynamic_cast < PDFDict * > ( pObj - > m_pObject ) ;
if ( pDict )
{
PDFStream * pStream = new PDFStream ( first - m_aGlobalBegin , last - m_aGlobalBegin , pDict ) ;
pObj - > m_pStream = pStream ;
pObj - > m_aSubElements . push_back ( pStream ) ;
}
}
else
parseError ( " stream without object " , first ) ;
}
2012-01-21 15:21:16 +01:00
void beginTrailer ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
if ( m_aObjectStack . empty ( ) )
m_aObjectStack . push_back ( new PDFPart ( ) ) ;
PDFTrailer * pTrailer = new PDFTrailer ( ) ;
pTrailer - > m_nOffset = first - m_aGlobalBegin ;
PDFContainer * pContainer = dynamic_cast < PDFContainer * > ( m_aObjectStack . back ( ) ) ;
if ( pContainer & &
( dynamic_cast < PDFFile * > ( pContainer ) | |
dynamic_cast < PDFPart * > ( pContainer ) ) )
{
pContainer - > m_aSubElements . push_back ( pTrailer ) ;
m_aObjectStack . push_back ( pTrailer ) ;
}
else
parseError ( " trailer in wrong place " , first ) ;
}
2012-01-21 15:21:16 +01:00
void endTrailer ( iteratorT first , SAL_UNUSED_PARAMETER iteratorT )
2008-08-14 16:03:25 +00:00
{
if ( m_aObjectStack . empty ( ) )
parseError ( " %%EOF without trailer " , first ) ;
else if ( dynamic_cast < PDFTrailer * > ( m_aObjectStack . back ( ) ) = = NULL )
parseError ( " spurious %%EOF " , first ) ;
else
m_aObjectStack . pop_back ( ) ;
}
} ;
2012-02-10 14:48:36 -05:00
# ifdef WIN32
PDFEntry * PDFReader : : read ( const char * pBuffer , unsigned int nLen )
{
PDFGrammar < const char * > aGrammar ( pBuffer ) ;
try
{
2013-03-20 23:47:45 +01:00
# if OSL_DEBUG_LEVEL > 1
2012-02-10 14:48:36 -05:00
boost : : spirit : : parse_info < const char * > aInfo =
2013-03-20 23:47:45 +01:00
# endif
2012-02-10 14:48:36 -05:00
boost : : spirit : : parse ( pBuffer ,
pBuffer + nLen ,
aGrammar ,
boost : : spirit : : space_p ) ;
2013-03-20 23:47:45 +01:00
# if OSL_DEBUG_LEVEL > 1
2013-09-20 07:51:29 -07:00
SAL_INFO ( " sdext.pdfimport.pdfparse " , " parseinfo: stop = " < < aInfo . stop < < " (buff= " < < pBuffer < < " , offset = " < < aInfo . stop - pBuffer < < " ), hit = " < < ( aInfo . hit ? OUString ( " true " ) : OUString ( " false " ) ) < < " , full = " < < ( aInfo . full ? OUString ( " true " ) : OUString ( " false " ) ) < < " , length = " < < ( int ) aInfo . length ) ;
2013-03-20 23:47:45 +01:00
# endif
2012-02-10 14:48:36 -05:00
}
catch ( const parser_error < const char * , const char * > & rError )
{
2013-03-20 23:47:45 +01:00
# if OSL_DEBUG_LEVEL > 1
2013-09-20 07:51:29 -07:00
OString aTmp ;
unsigned int nElem = aGrammar . m_aObjectStack . size ( ) ;
2012-02-10 14:48:36 -05:00
for ( unsigned int i = 0 ; i < nElem ; i + + )
2013-09-20 07:51:29 -07:00
aTmp + = " " + OString ( typeid ( * ( aGrammar . m_aObjectStack [ i ] ) ) . name ( ) ) ;
2013-03-20 23:47:45 +01:00
2013-03-30 04:57:16 +01:00
SAL_WARN ( " sdext.pdfimport.pdfparse " , " parse error: " < < rError . descriptor < < " at buffer pos " < < rError . where - pBuffer < < " , object stack: " < < aTmp ) ;
2013-03-20 23:47:45 +01:00
# endif
2012-02-10 14:48:36 -05:00
}
PDFEntry * pRet = NULL ;
unsigned int nEntries = aGrammar . m_aObjectStack . size ( ) ;
if ( nEntries = = 1 )
{
pRet = aGrammar . m_aObjectStack . back ( ) ;
aGrammar . m_aObjectStack . pop_back ( ) ;
}
2013-03-20 23:47:45 +01:00
# if OSL_DEBUG_LEVEL > 1
2012-02-10 14:48:36 -05:00
else if ( nEntries > 1 )
2013-03-20 23:47:45 +01:00
SAL_WARN ( " sdext.pdfimport.pdfparse " , " error got " < < nEntries < < " stack objects in parse " ) ;
# endif
2012-02-10 14:48:36 -05:00
return pRet ;
}
# endif
2008-08-14 16:03:25 +00:00
PDFEntry * PDFReader : : read ( const char * pFileName )
{
2012-02-10 14:48:36 -05:00
# ifdef WIN32
2009-12-01 17:55:30 +01:00
/* #i106583#
since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
C + + stdlib istream_iterator does not allow " - " apparently
using spirit 2.0 doesn ' t work in our environment with the MSC
So for the time being bite the bullet and read the whole file .
FIXME : give Spirit 2. x another try when we upgrade boost again .
*/
PDFEntry * pRet = NULL ;
FILE * fp = fopen ( pFileName , " rb " ) ;
if ( fp )
{
fseek ( fp , 0 , SEEK_END ) ;
unsigned int nLen = ( unsigned int ) ftell ( fp ) ;
fseek ( fp , 0 , SEEK_SET ) ;
char * pBuf = ( char * ) rtl_allocateMemory ( nLen ) ;
if ( pBuf )
{
fread ( pBuf , 1 , nLen , fp ) ;
pRet = read ( pBuf , nLen ) ;
rtl_freeMemory ( pBuf ) ;
}
fclose ( fp ) ;
}
return pRet ;
2012-02-10 14:48:36 -05:00
# else
2008-08-14 16:03:25 +00:00
file_iterator < > file_start ( pFileName ) ;
if ( ! file_start )
return NULL ;
file_iterator < > file_end = file_start . make_end ( ) ;
PDFGrammar < file_iterator < > > aGrammar ( file_start ) ;
try
{
2013-03-20 23:47:45 +01:00
# if OSL_DEBUG_LEVEL > 1
2008-08-14 16:03:25 +00:00
boost : : spirit : : parse_info < file_iterator < > > aInfo =
2013-03-20 23:47:45 +01:00
# endif
2008-08-14 16:03:25 +00:00
boost : : spirit : : parse ( file_start ,
file_end ,
aGrammar ,
boost : : spirit : : space_p ) ;
2013-03-20 23:47:45 +01:00
# if OSL_DEBUG_LEVEL > 1
SAL_INFO ( " sdext.pdfimport.pdfparse " , " parseinfo: stop at offset = " < < aInfo . stop - file_start < < " , hit = " < < ( aInfo . hit ? " true " : " false " ) < < " , full = " < < ( aInfo . full ? " true " : " false " ) < < " , length = " < < aInfo . length ) ;
# endif
2008-08-14 16:03:25 +00:00
}
2011-12-26 03:38:13 +09:00
catch ( const parser_error < const char * , file_iterator < > > & rError )
2008-08-14 16:03:25 +00:00
{
2013-11-12 15:39:44 +01:00
SAL_WARN ( " sdext.pdfimport.pdfparse " , " parse error: " < < rError . descriptor < < " at buffer pos " < < rError . where - file_start ) ;
2013-03-20 23:47:45 +01:00
# if OSL_DEBUG_LEVEL > 1
OUString aTmp ;
unsigned int nElem = aGrammar . m_aObjectStack . size ( ) ;
for ( unsigned int i = 0 ; i < nElem ; i + + )
2008-08-14 16:03:25 +00:00
{
2013-03-20 23:47:45 +01:00
aTmp + = " " ;
aTmp + = OUString ( typeid ( * ( aGrammar . m_aObjectStack [ i ] ) ) . name ( ) ,
strlen ( typeid ( * ( aGrammar . m_aObjectStack [ i ] ) ) . name ( ) ) ,
RTL_TEXTENCODING_ASCII_US ) ;
2008-08-14 16:03:25 +00:00
}
2013-11-12 15:39:44 +01:00
SAL_WARN ( " sdext.pdfimport.pdfparse " , " parse error object stack: " < < aTmp ) ;
2013-03-20 23:47:45 +01:00
# endif
2008-08-14 16:03:25 +00:00
}
PDFEntry * pRet = NULL ;
unsigned int nEntries = aGrammar . m_aObjectStack . size ( ) ;
if ( nEntries = = 1 )
{
pRet = aGrammar . m_aObjectStack . back ( ) ;
aGrammar . m_aObjectStack . pop_back ( ) ;
}
2013-03-20 23:47:45 +01:00
# if OSL_DEBUG_LEVEL > 1
2008-08-14 16:03:25 +00:00
else if ( nEntries > 1 )
{
2013-03-20 23:47:45 +01:00
SAL_WARN ( " sdext.pdfimport.pdfparse " , " error got " < < nEntries < < " stack objects in parse " ) ;
2008-08-14 16:03:25 +00:00
for ( unsigned int i = 0 ; i < nEntries ; i + + )
{
2013-03-20 23:47:45 +01:00
SAL_WARN ( " sdext.pdfimport.pdfparse " , typeid ( * aGrammar . m_aObjectStack [ i ] ) . name ( ) ) ;
2008-08-14 16:03:25 +00:00
PDFObject * pObj = dynamic_cast < PDFObject * > ( aGrammar . m_aObjectStack [ i ] ) ;
if ( pObj )
2013-03-20 23:47:45 +01:00
SAL_WARN ( " sdext.pdfimport.pdfparse " , " -> object " < < pObj - > m_nNumber < < " generation " < < pObj - > m_nGeneration ) ;
2008-08-14 16:03:25 +00:00
else
2013-03-20 23:47:45 +01:00
SAL_WARN ( " sdext.pdfimport.pdfparse " , " (type " < < typeid ( * aGrammar . m_aObjectStack [ i ] ) . name ( ) < < " ) " ) ;
2008-08-14 16:03:25 +00:00
}
}
2013-03-20 23:47:45 +01:00
# endif
2008-08-14 16:03:25 +00:00
return pRet ;
2012-02-10 14:48:36 -05:00
# endif // WIN32
2008-08-14 16:03:25 +00:00
}
2013-12-16 22:27:05 +01:00
# if defined _MSC_VER
2008-08-14 16:03:25 +00:00
# pragma warning(pop)
# endif
2010-10-12 15:55:21 +02:00
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */