related: fdo#73682 Introduce HTML detection service
Change-Id: I66bb579019ce8411b821c623955a454fd81cf811 Reviewed-on: https://gerrit.libreoffice.org/7600 Reviewed-by: Kohei Yoshida <libreoffice@kohei.us> Tested-by: Kohei Yoshida <libreoffice@kohei.us>
This commit is contained in:
parent
6063555744
commit
cc2893834d
@ -270,6 +270,7 @@ $(eval $(call gb_Helper_register_libraries_for_install,OOOLIBS,ooo, \
|
||||
$(if $(ENABLE_DIRECTX),gdipluscanvas) \
|
||||
guesslang \
|
||||
$(if $(filter DESKTOP,$(BUILD_TYPE)),helplinker) \
|
||||
htmlfd \
|
||||
i18npool \
|
||||
i18nsearch \
|
||||
hyphen \
|
||||
|
36
filter/Library_htmlfd.mk
Normal file
36
filter/Library_htmlfd.mk
Normal file
@ -0,0 +1,36 @@
|
||||
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
|
||||
#*************************************************************************
|
||||
#
|
||||
# This file is part of the LibreOffice project.
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
#*************************************************************************
|
||||
|
||||
$(eval $(call gb_Library_Library,htmlfd))
|
||||
|
||||
$(eval $(call gb_Library_set_componentfile,htmlfd,filter/source/htmlfilterdetect/htmlfd))
|
||||
|
||||
$(eval $(call gb_Library_use_external,xmlfd,boost_headers))
|
||||
|
||||
$(eval $(call gb_Library_use_sdk_api,htmlfd))
|
||||
|
||||
$(eval $(call gb_Library_use_libraries,htmlfd,\
|
||||
ucbhelper \
|
||||
cppuhelper \
|
||||
cppu \
|
||||
sal \
|
||||
tl \
|
||||
utl \
|
||||
svt \
|
||||
$(gb_UWINAPI) \
|
||||
))
|
||||
|
||||
$(eval $(call gb_Library_add_exception_objects,htmlfd,\
|
||||
filter/source/htmlfilterdetect/fdcomp \
|
||||
filter/source/htmlfilterdetect/filterdetect \
|
||||
))
|
||||
|
||||
# vim: set noet sw=4 ts=4:
|
@ -34,6 +34,7 @@ $(eval $(call gb_Module_add_targets,filter,\
|
||||
Library_exp) \
|
||||
Library_filterconfig \
|
||||
Library_flash \
|
||||
Library_htmlfd \
|
||||
Library_icd \
|
||||
Library_icg \
|
||||
Library_idx \
|
||||
|
@ -16,7 +16,7 @@
|
||||
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
||||
-->
|
||||
<node oor:name="generic_HTML" oor:op="replace" >
|
||||
<prop oor:name="DetectService"><value>com.sun.star.text.FormatDetector</value></prop>
|
||||
<prop oor:name="DetectService"><value>com.sun.star.comp.filters.HtmlFilterDetect</value></prop>
|
||||
<prop oor:name="URLPattern"><value>private:factory/swriter/web*</value></prop>
|
||||
<prop oor:name="Extensions"><value>html htm</value></prop>
|
||||
<prop oor:name="MediaType"><value>text/html</value></prop>
|
||||
|
36
filter/source/htmlfilterdetect/fdcomp.cxx
Normal file
36
filter/source/htmlfilterdetect/fdcomp.cxx
Normal file
@ -0,0 +1,36 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/*
|
||||
* This file is part of the LibreOffice project.
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
#include <sal/config.h>
|
||||
|
||||
#include <cppuhelper/factory.hxx>
|
||||
#include <cppuhelper/implementationentry.hxx>
|
||||
#include <sal/types.h>
|
||||
|
||||
#include "filterdetect.hxx"
|
||||
|
||||
namespace {
|
||||
|
||||
static cppu::ImplementationEntry const services[] = {
|
||||
{ &HtmlFilterDetect_createInstance, &HtmlFilterDetect_getImplementationName,
|
||||
&HtmlFilterDetect_getSupportedServiceNames,
|
||||
&cppu::createSingleComponentFactory, 0, 0 },
|
||||
{ 0, 0, 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
extern "C" SAL_DLLPUBLIC_EXPORT void * SAL_CALL htmlfd_component_getFactory(
|
||||
char const * pImplName, void * pServiceManager, void * pRegistryKey)
|
||||
{
|
||||
return cppu::component_getFactoryHelper(
|
||||
pImplName, pServiceManager, pRegistryKey, services);
|
||||
}
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|
232
filter/source/htmlfilterdetect/filterdetect.cxx
Normal file
232
filter/source/htmlfilterdetect/filterdetect.cxx
Normal file
@ -0,0 +1,232 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/*
|
||||
* This file is part of the LibreOffice project.
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
#include "filterdetect.hxx"
|
||||
|
||||
#include <svtools/htmltokn.h>
|
||||
#include <tools/urlobj.hxx>
|
||||
#include <ucbhelper/content.hxx>
|
||||
#include <unotools/mediadescriptor.hxx>
|
||||
#include <unotools/ucbstreamhelper.hxx>
|
||||
|
||||
#include <com/sun/star/io/XInputStream.hpp>
|
||||
#include <cppuhelper/supportsservice.hxx>
|
||||
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
using com::sun::star::io::XInputStream;
|
||||
using com::sun::star::uno::Sequence;
|
||||
using com::sun::star::uno::Reference;
|
||||
using com::sun::star::uno::Any;
|
||||
using com::sun::star::uno::XComponentContext;
|
||||
using com::sun::star::uno::XInterface;
|
||||
using com::sun::star::uno::Exception;
|
||||
using com::sun::star::uno::RuntimeException;
|
||||
using com::sun::star::ucb::XCommandEnvironment;
|
||||
|
||||
using namespace com::sun::star;
|
||||
using namespace com::sun::star::beans;
|
||||
|
||||
namespace {
|
||||
|
||||
enum DetectPhase {
|
||||
BeforeTag,
|
||||
TagOpened,
|
||||
InTagName
|
||||
};
|
||||
|
||||
bool isHTMLStream(const OString& aStreamHeader)
|
||||
{
|
||||
const char* pHeader = aStreamHeader.getStr();
|
||||
const int nLength = aStreamHeader.getLength();
|
||||
int nStartOfTagIndex = 0;
|
||||
int i = 0;
|
||||
|
||||
DetectPhase dp = BeforeTag;
|
||||
|
||||
for ( i = 0; i < nLength; ++i, ++pHeader )
|
||||
{
|
||||
char c = *pHeader;
|
||||
if ( c == ' ' || c == '\n' || c == '\t' )
|
||||
{
|
||||
if ( dp == TagOpened )
|
||||
return false; // Invalid: Should start with a tag name
|
||||
else if ( dp == InTagName )
|
||||
break; // End of tag name reached
|
||||
}
|
||||
else if ( c == '<' )
|
||||
{
|
||||
if ( dp == BeforeTag )
|
||||
dp = TagOpened;
|
||||
else
|
||||
return false; // Invalid: Nested '<'
|
||||
}
|
||||
else if ( c == '>' )
|
||||
{
|
||||
if ( dp == InTagName )
|
||||
break; // End of tag name reached
|
||||
else
|
||||
return false; // Invalid: Empty tag or before '<'
|
||||
}
|
||||
else if ( c == '!' )
|
||||
{
|
||||
if ( i == 1 && dp == TagOpened )
|
||||
return true; // "<!" at the very beginning of the file
|
||||
else
|
||||
return false; // Invalid: '!' before '<' or inside tag name
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( dp == BeforeTag )
|
||||
return false; // Invalid: Should start with a tag
|
||||
else if ( dp == TagOpened )
|
||||
{
|
||||
nStartOfTagIndex = i;
|
||||
dp = InTagName;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The string following '<' has to be a known HTML token.
|
||||
if ( GetHTMLToken( OStringToOUString( aStreamHeader.copy( nStartOfTagIndex, i - nStartOfTagIndex ),
|
||||
RTL_TEXTENCODING_ASCII_US ) ) != 0 )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
OUString SAL_CALL HtmlFilterDetect::detect(Sequence<PropertyValue>& lDescriptor)
|
||||
throw (RuntimeException)
|
||||
{
|
||||
OUString sUrl;
|
||||
OUString sDocService;
|
||||
OString resultString;
|
||||
Reference<XInputStream> xInStream;
|
||||
|
||||
const PropertyValue *pValue = lDescriptor.getConstArray();
|
||||
sal_Int32 nLength = lDescriptor.getLength();
|
||||
sal_Int32 location = nLength;
|
||||
|
||||
for ( sal_Int32 i = 0; i < nLength; ++i )
|
||||
{
|
||||
if ( pValue[i].Name == utl::MediaDescriptor::PROP_URL() )
|
||||
pValue[i].Value >>= sUrl;
|
||||
else if ( pValue[i].Name == utl::MediaDescriptor::PROP_INPUTSTREAM() )
|
||||
pValue[i].Value >>= xInStream;
|
||||
else if ( pValue[i].Name == utl::MediaDescriptor::PROP_DOCUMENTSERVICE() )
|
||||
{
|
||||
location = i;
|
||||
pValue[i].Value >>= sDocService;
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if ( !xInStream.is() )
|
||||
{
|
||||
ucbhelper::Content aContent( sUrl, Reference<XCommandEnvironment>(), mxCtx );
|
||||
xInStream = aContent.openStream();
|
||||
if ( !xInStream.is() )
|
||||
return OUString();
|
||||
}
|
||||
|
||||
boost::scoped_ptr<SvStream> pInStream( utl::UcbStreamHelper::CreateStream( xInStream ) );
|
||||
if ( !pInStream || pInStream->GetError() )
|
||||
return OUString();
|
||||
|
||||
pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
|
||||
sal_Size nUniPos = pInStream->Tell();
|
||||
|
||||
const sal_uInt16 nSize = 4096;
|
||||
|
||||
if ( nUniPos == 3 || nUniPos == 0 ) // UTF-8 or non-Unicode
|
||||
resultString = read_uInt8s_ToOString( *pInStream, nSize );
|
||||
else // UTF-16
|
||||
resultString = OUStringToOString( read_uInt16s_ToOUString( *pInStream, nSize ), RTL_TEXTENCODING_ASCII_US );
|
||||
|
||||
if ( isHTMLStream( resultString.toAsciiLowerCase() ) )
|
||||
{
|
||||
// Some Apps/Web services use ".xls" extension to indicate that
|
||||
// the given file should be opened by a spreadsheet software
|
||||
if ( sDocService.isEmpty() )
|
||||
{
|
||||
INetURLObject aParser( sUrl );
|
||||
OUString aExt = aParser.getExtension( INetURLObject::LAST_SEGMENT, true, INetURLObject::DECODE_WITH_CHARSET );
|
||||
aExt = aExt.toAsciiLowerCase();
|
||||
|
||||
if ( aExt == "xls" )
|
||||
{
|
||||
if ( location == lDescriptor.getLength() )
|
||||
{
|
||||
lDescriptor.realloc( location + 1 );
|
||||
lDescriptor[location].Name = utl::MediaDescriptor::PROP_DOCUMENTSERVICE();
|
||||
}
|
||||
lDescriptor[location].Value <<= OUString( "com.sun.star.sheet.SpreadsheetDocument" );
|
||||
}
|
||||
}
|
||||
return OUString( "generic_HTML" );
|
||||
}
|
||||
}
|
||||
catch (const Exception &)
|
||||
{
|
||||
OSL_FAIL( "An Exception occurred while opening File stream" );
|
||||
}
|
||||
|
||||
return OUString(); // Failed
|
||||
}
|
||||
|
||||
// XInitialization
|
||||
|
||||
void SAL_CALL HtmlFilterDetect::initialize(const Sequence<Any>& /*aArguments*/)
|
||||
throw (Exception, RuntimeException)
|
||||
{
|
||||
}
|
||||
|
||||
OUString HtmlFilterDetect_getImplementationName()
|
||||
{
|
||||
return OUString( "com.sun.star.comp.filters.HtmlFilterDetect" );
|
||||
}
|
||||
|
||||
Sequence<OUString> HtmlFilterDetect_getSupportedServiceNames()
|
||||
{
|
||||
Sequence<OUString> aRet(2);
|
||||
OUString* pArray = aRet.getArray();
|
||||
pArray[0] = "com.sun.star.document.ExtendedTypeDetection";
|
||||
pArray[1] = "com.sun.star.comp.filters.HtmlFilterDetect";
|
||||
return aRet;
|
||||
}
|
||||
|
||||
Reference<XInterface> HtmlFilterDetect_createInstance(const Reference<XComponentContext>& rCtx)
|
||||
{
|
||||
return (cppu::OWeakObject*) new HtmlFilterDetect( rCtx );
|
||||
}
|
||||
|
||||
// XServiceInfo
|
||||
|
||||
OUString SAL_CALL HtmlFilterDetect::getImplementationName()
|
||||
throw (RuntimeException)
|
||||
{
|
||||
return HtmlFilterDetect_getImplementationName();
|
||||
}
|
||||
|
||||
sal_Bool SAL_CALL HtmlFilterDetect::supportsService(const OUString& rServiceName)
|
||||
throw (RuntimeException)
|
||||
{
|
||||
return cppu::supportsService( this, rServiceName );
|
||||
}
|
||||
|
||||
Sequence<OUString> SAL_CALL HtmlFilterDetect::getSupportedServiceNames()
|
||||
throw (RuntimeException)
|
||||
{
|
||||
return HtmlFilterDetect_getSupportedServiceNames();
|
||||
}
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|
64
filter/source/htmlfilterdetect/filterdetect.hxx
Normal file
64
filter/source/htmlfilterdetect/filterdetect.hxx
Normal file
@ -0,0 +1,64 @@
|
||||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
/*
|
||||
* This file is part of the LibreOffice project.
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_FILTER_SOURCE_HTMLFILTERDETECT_FILTERDETECT_HXX
|
||||
#define INCLUDED_FILTER_SOURCE_HTMLFILTERDETECT_FILTERDETECT_HXX
|
||||
|
||||
#include <com/sun/star/document/XExtendedFilterDetection.hpp>
|
||||
#include <com/sun/star/lang/XInitialization.hpp>
|
||||
#include <com/sun/star/lang/XServiceInfo.hpp>
|
||||
#include <com/sun/star/uno/XComponentContext.hpp>
|
||||
|
||||
#include <cppuhelper/implbase3.hxx>
|
||||
|
||||
class HtmlFilterDetect : public cppu::WeakImplHelper3<
|
||||
com::sun::star::document::XExtendedFilterDetection,
|
||||
com::sun::star::lang::XInitialization,
|
||||
com::sun::star::lang::XServiceInfo>
|
||||
{
|
||||
com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext> mxCtx;
|
||||
|
||||
public:
|
||||
|
||||
HtmlFilterDetect(const com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext>& xCtx) :
|
||||
mxCtx(xCtx) {}
|
||||
virtual ~HtmlFilterDetect() {}
|
||||
|
||||
// XExtendedFilterDetection
|
||||
|
||||
virtual OUString SAL_CALL detect(com::sun::star::uno::Sequence<com::sun::star::beans::PropertyValue>& lDescriptor)
|
||||
throw (com::sun::star::uno::RuntimeException);
|
||||
|
||||
// XInitialization
|
||||
|
||||
virtual void SAL_CALL initialize(const ::com::sun::star::uno::Sequence<com::sun::star::uno::Any>& aArguments)
|
||||
throw (com::sun::star::uno::Exception, com::sun::star::uno::RuntimeException);
|
||||
|
||||
// XServiceInfo
|
||||
|
||||
virtual OUString SAL_CALL getImplementationName()
|
||||
throw (com::sun::star::uno::RuntimeException);
|
||||
|
||||
virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName)
|
||||
throw (com::sun::star::uno::RuntimeException);
|
||||
|
||||
virtual com::sun::star::uno::Sequence<OUString> SAL_CALL getSupportedServiceNames()
|
||||
throw (com::sun::star::uno::RuntimeException);
|
||||
};
|
||||
|
||||
OUString HtmlFilterDetect_getImplementationName();
|
||||
|
||||
com::sun::star::uno::Sequence<OUString> HtmlFilterDetect_getSupportedServiceNames();
|
||||
|
||||
com::sun::star::uno::Reference<com::sun::star::uno::XInterface>
|
||||
HtmlFilterDetect_createInstance(const com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext>& rCtx);
|
||||
|
||||
#endif
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|
15
filter/source/htmlfilterdetect/htmlfd.component
Normal file
15
filter/source/htmlfilterdetect/htmlfd.component
Normal file
@ -0,0 +1,15 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
* This file is part of the LibreOffice project.
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
-->
|
||||
|
||||
<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
|
||||
prefix="htmlfd" xmlns="http://openoffice.org/2010/uno-components">
|
||||
<implementation name="com.sun.star.comp.filters.HtmlFilterDetect">
|
||||
<service name="com.sun.star.document.ExtendedTypeDetection"/>
|
||||
</implementation>
|
||||
</component>
|
@ -29,6 +29,7 @@ $(eval $(call gb_Rdb_add_components,services,\
|
||||
filter/source/config/cache/filterconfig1 \
|
||||
filter/source/flash/flash \
|
||||
filter/source/graphic/graphicfilter \
|
||||
filter/source/htmlfilterdetect/htmlfd \
|
||||
filter/source/msfilter/msfilter \
|
||||
filter/source/odfflatxml/odfflatxml \
|
||||
filter/source/pdf/pdffilter \
|
||||
|
@ -46,6 +46,7 @@ gb_EXTRAMERGEDLIBS := \
|
||||
graphicfilter \
|
||||
guesslang \
|
||||
$(if $(ENABLE_JAVA),hsqldb) \
|
||||
htmlfd \
|
||||
hyphen \
|
||||
icd \
|
||||
icg \
|
||||
|
Loading…
x
Reference in New Issue
Block a user