From b262e6e5da0c2e66e00cd6d68d7c2df11a34ff5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=C5=BE=20Vajngerl?= Date: Tue, 26 Jun 2018 20:06:40 +0200 Subject: [PATCH] xmloff: prevent embedding same font files with checksumming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When embedding fonts to ODF documents we can get into a situation where the same font is embedded multiple times and this wastes space. To prevent this we calculate the checksum of the font first and track the fonts that are embedded. When the font has the same checksum, the path to already embedded font is returned. Change-Id: I087c42d9a63b1697b9b4ea985dbce2fda760a996 Reviewed-on: https://gerrit.libreoffice.org/56507 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl --- include/xmloff/XMLFontAutoStylePool.hxx | 5 +- xmloff/source/style/XMLFontAutoStylePool.cxx | 80 +++++++++++++++++--- 2 files changed, 75 insertions(+), 10 deletions(-) diff --git a/include/xmloff/XMLFontAutoStylePool.hxx b/include/xmloff/XMLFontAutoStylePool.hxx index e747bb8b20d8..530d05b2762a 100644 --- a/include/xmloff/XMLFontAutoStylePool.hxx +++ b/include/xmloff/XMLFontAutoStylePool.hxx @@ -26,6 +26,7 @@ #include #include #include +#include #include class XMLFontAutoStylePool_Impl; @@ -33,13 +34,15 @@ class SvXMLExport; class XMLOFF_DLLPUBLIC XMLFontAutoStylePool : public salhelper::SimpleReferenceObject { +private: SvXMLExport& rExport; std::unique_ptr m_pFontAutoStylePool; std::set m_aNames; bool m_bTryToEmbedFonts; + std::unordered_map m_aEmbeddedFontFiles; - OUString embedFontFile( const OUString& fontUrl ); + OUString embedFontFile(OUString const & rFileUrl, OUString const & rFamilyName); protected: diff --git a/xmloff/source/style/XMLFontAutoStylePool.cxx b/xmloff/source/style/XMLFontAutoStylePool.cxx index 1c13ea75cff4..1fb06598f841 100644 --- a/xmloff/source/style/XMLFontAutoStylePool.cxx +++ b/xmloff/source/style/XMLFontAutoStylePool.cxx @@ -34,6 +34,7 @@ #include #include +#include using namespace ::com::sun::star; using namespace ::com::sun::star::uno; @@ -367,7 +368,7 @@ void XMLFontAutoStylePool::exportXML() if (!fontFilesMap.count(sFileUrl)) { const OUString docUrl = bExportFlat ? - lcl_checkFontFile(sFileUrl) : embedFontFile(sFileUrl); + lcl_checkFontFile(sFileUrl) : embedFontFile(sFileUrl, pEntry->GetFamilyName()); if (!docUrl.isEmpty()) fontFilesMap[sFileUrl] = docUrl; else @@ -429,10 +430,72 @@ void XMLFontAutoStylePool::exportXML() } } -OUString XMLFontAutoStylePool::embedFontFile( const OUString& fileUrl ) +OUString getFreeFontName(uno::Reference const & rxStorage, OUString const & rFamilyName) +{ + OUString sName; + int nIndex = 1; + do + { + sName = "Font_" + + rFamilyName.replaceAll(" ", "_") + "_" + + OUString::number(nIndex) + ".ttf"; + nIndex++; + } while (rxStorage->hasByName(sName)); + + return sName; +} + +OString convertToHashString(std::vector const & rHash) +{ + std::stringstream aStringStream; + for (auto const & rByte : rHash) + { + aStringStream << std::setw(2) << std::setfill('0') << std::hex << int(rByte); + } + + return OString(aStringStream.str().c_str()); +} + +OString getFileHash(OUString const & rFileUrl) +{ + OString aHash; + osl::File aFile(rFileUrl); + if (aFile.open(osl_File_OpenFlag_Read) != osl::File::E_None) + return aHash; + + comphelper::Hash aHashEngine(comphelper::HashType::SHA512); + for (;;) + { + sal_Int8 aBuffer[4096]; + sal_uInt64 nReadSize; + sal_Bool bEof; + if (aFile.isEndOfFile(&bEof) != osl::File::E_None) + { + SAL_WARN("xmloff", "Error reading font file " << rFileUrl); + return aHash; + } + if (bEof) + break; + if (aFile.read(aBuffer, 4096, nReadSize) != osl::File::E_None) + { + SAL_WARN("xmloff", "Error reading font file " << rFileUrl); + return aHash; + } + if (nReadSize == 0) + break; + aHashEngine.update(reinterpret_cast(aBuffer), nReadSize); + } + return convertToHashString(aHashEngine.finalize()); +} + +OUString XMLFontAutoStylePool::embedFontFile(OUString const & fileUrl, OUString const & rFamilyName) { try { + OString sHashString = getFileHash(fileUrl); + if (m_aEmbeddedFontFiles.find(sHashString) != m_aEmbeddedFontFiles.end()) + return m_aEmbeddedFontFiles.at(sHashString); + osl::File file( fileUrl ); if( file.open( osl_File_OpenFlag_Read ) != osl::File::E_None ) return OUString(); @@ -443,12 +506,9 @@ OUString XMLFontAutoStylePool::embedFontFile( const OUString& fileUrl ) uno::Reference< embed::XStorage > storage; storage.set( GetExport().GetTargetStorage()->openStorageElement( "Fonts", ::embed::ElementModes::WRITE ), uno::UNO_QUERY_THROW ); - int index = 0; - OUString name; - do - { - name = "font" + OUString::number( ++index ) + ".ttf"; - } while( storage->hasByName( name ) ); + + OUString name = getFreeFontName(storage, rFamilyName); + uno::Reference< io::XOutputStream > outputStream; outputStream.set( storage->openStreamElement( name, ::embed::ElementModes::WRITE ), UNO_QUERY_THROW ); uno::Reference < beans::XPropertySet > propertySet( outputStream, uno::UNO_QUERY ); @@ -484,7 +544,9 @@ OUString XMLFontAutoStylePool::embedFontFile( const OUString& fileUrl ) if( transaction.is()) { transaction->commit(); - return "Fonts/" + name; + OUString sInternalName = "Fonts/" + name; + m_aEmbeddedFontFiles.emplace(sHashString, sInternalName); + return sInternalName; } } } catch( const Exception& e )