sw floattable: fix lost tables around a floating table from DOCX

The DOCX file has 3 floating tables: the first one is immediately
followed by a second one, and the second one also has an inner table.

The problem is that somehow dmapper thinks it's fine to merge the two
outer table together, than the inner table invalidates some of its cell
start/end references, so at the end the outer table conversion fails,
which means we only have 1 table, not 3.

This is a bit similar to commit 1c99616f86
(sw floattable, crashtesting: fix PDF export of
forum-mso-en3-26783.docx, 2023-05-02), but here fixing the problem at
the dmapper level sounded too complex. Instead inject the text node
that'll serve as an anchor point at a tokenizer level.

With this, the DOCX version of the bnc#816603 bugdoc (the one that
started all workarounds in 2013) has one less overlapping table, but it
still has problems.

Change-Id: I54307b4c554da51bb55287e61bca700d8343d11c
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/152599
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
Tested-by: Jenkins
This commit is contained in:
Miklos Vajna
2023-06-05 08:57:05 +02:00
parent 5841f73d51
commit 01ad8ec4bb
7 changed files with 149 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#*************************************************************************
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
#*************************************************************************
$(eval $(call gb_CppunitTest_CppunitTest,writerfilter_ooxml))
$(eval $(call gb_CppunitTest_use_externals,writerfilter_ooxml,\
boost_headers \
))
$(eval $(call gb_CppunitTest_add_exception_objects,writerfilter_ooxml, \
writerfilter/qa/cppunittests/ooxml/ooxml \
))
$(eval $(call gb_CppunitTest_use_libraries,writerfilter_ooxml, \
basegfx \
comphelper \
cppu \
cppuhelper \
oox \
sal \
subsequenttest \
test \
unotest \
utl \
tl \
vcl \
))
$(eval $(call gb_CppunitTest_use_sdk_api,writerfilter_ooxml))
$(eval $(call gb_CppunitTest_use_ure,writerfilter_ooxml))
$(eval $(call gb_CppunitTest_use_vcl,writerfilter_ooxml))
$(eval $(call gb_CppunitTest_use_rdb,writerfilter_ooxml,services))
$(eval $(call gb_CppunitTest_use_custom_headers,writerfilter_ooxml,\
officecfg/registry \
))
$(eval $(call gb_CppunitTest_use_configuration,writerfilter_ooxml))
# we need to explicitly depend on library writerfilter because it is not implied
# by a link relation
$(call gb_CppunitTest_get_target,writerfilter_ooxml) : $(call gb_Library_get_target,writerfilter)
# vim: set noet sw=4 ts=4:

View File

@@ -18,6 +18,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,writerfilter,\
CppunitTest_writerfilter_filters_test \
CppunitTest_writerfilter_misc \
CppunitTest_writerfilter_dmapper \
CppunitTest_writerfilter_ooxml \
CppunitTest_writerfilter_rtftok \
))

View File

@@ -0,0 +1,48 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <test/unoapi_test.hxx>
#include <com/sun/star/text/XTextTablesSupplier.hpp>
using namespace ::com::sun::star;
namespace
{
/// Tests for writerfilter/source/ooxml/.
class Test : public UnoApiTest
{
public:
Test()
: UnoApiTest("/writerfilter/qa/cppunittests/ooxml/data/")
{
}
};
CPPUNIT_TEST_FIXTURE(Test, testFloatingTablesLost)
{
// Given a document with 2 floating tables, the 2nd has an inner floating table as well:
loadFromURL(u"floattable-tables-lost.docx");
// When counting the created Writer tables:
uno::Reference<text::XTextTablesSupplier> xTextDocument(mxComponent, uno::UNO_QUERY);
uno::Reference<container::XIndexAccess> xTables(xTextDocument->getTextTables(), uno::UNO_QUERY);
// Then make sure that all 3 tables are imported:
// Without the accompanying fix in place, this test would have failed with:
// - Expected: 3
// - Actual : 1
// i.e. only the inner table was imported, the 2 others were lost.
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), xTables->getCount());
}
}
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

View File

@@ -412,6 +412,11 @@ void OOXMLFastContextHandler::startParagraphGroup()
if (!isForwardEvents())
return;
if (mpParserState->GetFloatingTableEnded())
{
mpParserState->SetFloatingTableEnded(false);
}
if (mpParserState->isInParagraphGroup())
endParagraphGroup();
@@ -1612,6 +1617,14 @@ void OOXMLFastContextHandlerTextTable::lcl_startFastElement
(Token_t /*Element*/,
const uno::Reference< xml::sax::XFastAttributeList > & /*Attribs*/)
{
if (mpParserState->GetFloatingTableEnded())
{
// We're starting a new table, but the previous table was floating. Insert a dummy paragraph
// to ensure that the floating table has a suitable anchor.
startParagraphGroup();
endOfParagraph();
}
mpParserState->startTable();
mnTableDepth++;
@@ -1638,6 +1651,20 @@ void OOXMLFastContextHandlerTextTable::lcl_endFastElement
mpParserState->setCharacterProperties(pProps);
mnTableDepth--;
OOXMLPropertySet::Pointer_t pTableProps = mpParserState->GetTableProperties();
if (pTableProps)
{
for (const auto& rTableProp : *pTableProps)
{
if (rTableProp->getId() == NS_ooxml::LN_CT_TblPrBase_tblpPr)
{
mpParserState->SetFloatingTableEnded(true);
break;
}
}
}
mpParserState->endTable();
}

View File

@@ -207,6 +207,16 @@ void OOXMLParserState::setTableProperties(const OOXMLPropertySet::Pointer_t& pPr
}
}
OOXMLPropertySet::Pointer_t OOXMLParserState::GetTableProperties() const
{
if (mTableProps.empty())
{
return nullptr;
}
return mTableProps.top();
}
// tdf#108714
void OOXMLParserState::resolvePostponedBreak(Stream & rStream)
{

View File

@@ -58,6 +58,8 @@ class OOXMLParserState final : public virtual SvRefBase
std::vector<SavedAlternateState> maSavedAlternateStates;
std::vector<OOXMLPropertySet::Pointer_t> mvPostponedBreaks;
bool mbStartFootnote;
/// We just ended a floating table. Starting a paragraph or table resets this.
bool m_bFloatingTableEnded = false;
public:
typedef tools::SvRef<OOXMLParserState> Pointer_t;
@@ -101,6 +103,7 @@ public:
void setRowProperties(const OOXMLPropertySet::Pointer_t& pProps);
void resolveTableProperties(Stream& rStream);
void setTableProperties(const OOXMLPropertySet::Pointer_t& pProps);
OOXMLPropertySet::Pointer_t GetTableProperties() const;
// tdf#108714
void resolvePostponedBreak(Stream& rStream);
void setPostponedBreak(const OOXMLPropertySet::Pointer_t& pProps);
@@ -115,6 +118,12 @@ public:
void setStartFootnote(bool bStartFootnote);
bool isStartFootnote() const { return mbStartFootnote; }
void SetFloatingTableEnded(bool bFloatingTableEnded)
{
m_bFloatingTableEnded = bFloatingTableEnded;
}
bool GetFloatingTableEnded() const { return m_bFloatingTableEnded; }
};
}