tdf#150866: Add support for TEXTSPLIT function

Change-Id: I918b46c29791f865e841eb967b2799f7986538bc
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/185027
Tested-by: Jenkins
Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
Tested-by: Xisco Fauli <xiscofauli@libreoffice.org>
This commit is contained in:
Xisco Fauli 2025-05-07 16:55:55 +02:00
parent 88945b5afd
commit 4c2596a4b5
18 changed files with 4825 additions and 7 deletions

View File

@ -288,6 +288,7 @@ const std::pair<const char *, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_ODFF[] =
{ "COM.MICROSOFT.HSTACK" , SC_OPCODE_HSTACK },
{ "COM.MICROSOFT.VSTACK" , SC_OPCODE_VSTACK },
{ "COM.MICROSOFT.TAKE" , SC_OPCODE_TAKE },
{ "COM.MICROSOFT.TEXTSPLIT" , SC_OPCODE_TEXTSPLIT },
{ "COM.MICROSOFT.TOCOL" , SC_OPCODE_TOCOL },
{ "COM.MICROSOFT.TOROW" , SC_OPCODE_TOROW },
{ "COM.MICROSOFT.UNIQUE" , SC_OPCODE_UNIQUE },
@ -756,6 +757,7 @@ const std::pair<const char *, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_OOXML[] =
{ "_xlfn.HSTACK" , SC_OPCODE_HSTACK },
{ "_xlfn.VSTACK" , SC_OPCODE_VSTACK },
{ "_xlfn.TAKE" , SC_OPCODE_TAKE },
{ "_xlfn.TEXTSPLIT" , SC_OPCODE_TEXTSPLIT },
{ "_xlfn.TOCOL" , SC_OPCODE_TOCOL },
{ "_xlfn.TOROW" , SC_OPCODE_TOROW },
{ "_xlfn.UNIQUE" , SC_OPCODE_UNIQUE },
@ -1227,6 +1229,7 @@ const std::pair<const char *, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_PODF[] =
{ "HSTACK" , SC_OPCODE_HSTACK },
{ "VSTACK" , SC_OPCODE_VSTACK },
{ "TAKE" , SC_OPCODE_TAKE },
{ "TEXTSPLIT" , SC_OPCODE_TEXTSPLIT },
{ "TOCOL" , SC_OPCODE_TOCOL },
{ "TOROW" , SC_OPCODE_TOROW },
{ "UNIQUE" , SC_OPCODE_UNIQUE },
@ -1698,6 +1701,7 @@ const std::pair<const char *, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_API[] =
{ "HSTACK" , SC_OPCODE_HSTACK },
{ "VSTACK" , SC_OPCODE_VSTACK },
{ "TAKE" , SC_OPCODE_TAKE },
{ "TEXTSPLIT" , SC_OPCODE_TEXTSPLIT },
{ "TOCOL" , SC_OPCODE_TOCOL },
{ "TOROW" , SC_OPCODE_TOROW },
{ "UNIQUE" , SC_OPCODE_UNIQUE },
@ -2167,6 +2171,7 @@ const std::pair<const char *, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH[] =
{ "HSTACK" , SC_OPCODE_HSTACK },
{ "VSTACK" , SC_OPCODE_VSTACK },
{ "TAKE" , SC_OPCODE_TAKE },
{ "TEXTSPLIT" , SC_OPCODE_TEXTSPLIT },
{ "TOCOL" , SC_OPCODE_TOCOL },
{ "TOROW" , SC_OPCODE_TOROW },
{ "UNIQUE" , SC_OPCODE_UNIQUE },
@ -2617,6 +2622,7 @@ const std::pair<TranslateId, int> RID_STRLIST_FUNCTION_NAMES[] =
{ NC_("RID_STRLIST_FUNCTION_NAMES", "HSTACK") , SC_OPCODE_HSTACK },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "VSTACK") , SC_OPCODE_VSTACK },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "TAKE") , SC_OPCODE_TAKE },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "TEXTSPLIT") , SC_OPCODE_TEXTSPLIT },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "TOCOL") , SC_OPCODE_TOCOL },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "TOROW") , SC_OPCODE_TOROW },
{ NC_("RID_STRLIST_FUNCTION_NAMES", "UNIQUE") , SC_OPCODE_UNIQUE },

View File

@ -1284,6 +1284,7 @@ bool FormulaCompiler::IsMatrixFunction( OpCode eOpCode )
case ocHStack :
case ocVStack :
case ocTake :
case ocTextSplit :
case ocToCol :
case ocToRow :
case ocUnique :

View File

@ -524,12 +524,13 @@
#define SC_OPCODE_HSTACK 509
#define SC_OPCODE_VSTACK 510
#define SC_OPCODE_TAKE 511
#define SC_OPCODE_TOCOL 512
#define SC_OPCODE_TOROW 513
#define SC_OPCODE_UNIQUE 514
#define SC_OPCODE_WRAPCOLS 515
#define SC_OPCODE_WRAPROWS 516
#define SC_OPCODE_STOP_2_PAR 517 /* last function with two or more parameters' OpCode + 1 */
#define SC_OPCODE_TEXTSPLIT 512
#define SC_OPCODE_TOCOL 513
#define SC_OPCODE_TOROW 514
#define SC_OPCODE_UNIQUE 515
#define SC_OPCODE_WRAPCOLS 516
#define SC_OPCODE_WRAPROWS 517
#define SC_OPCODE_STOP_2_PAR 518 /* last function with two or more parameters' OpCode + 1 */
#define SC_OPCODE_STOP_FUNCTION SC_OPCODE_STOP_2_PAR /* last function's OpCode + 1 */
#define SC_OPCODE_LAST_OPCODE_ID (SC_OPCODE_STOP_FUNCTION - 1) /* last OpCode */

View File

@ -520,6 +520,7 @@ enum OpCode : sal_uInt16
ocHStack = SC_OPCODE_HSTACK,
ocVStack = SC_OPCODE_VSTACK,
ocTake = SC_OPCODE_TAKE,
ocTextSplit = SC_OPCODE_TEXTSPLIT,
ocToCol = SC_OPCODE_TOCOL,
ocToRow = SC_OPCODE_TOROW,
ocUnique = SC_OPCODE_UNIQUE,
@ -1016,6 +1017,7 @@ inline std::string OpCodeEnumToString(OpCode eCode)
case ocHStack: return "HStack";
case ocVStack: return "VStack";
case ocTake: return "Take";
case ocTextSplit: return "TextSplit";
case ocToCol: return "ToCol";
case ocToRow: return "ToRow";
case ocUnique: return "Unique";

View File

@ -62,6 +62,7 @@ https://docs.oasis-open.org/office/OpenDocument/v1.3/os/part4-formula/OpenDocume
* HSTACK
* VSTACK
* TAKE
* TEXTSPLIT
* TOCOL
* TOROW
* WRAPCOLS

View File

@ -607,6 +607,7 @@ inline constexpr OUString HID_FUNC_EXPAND_MS = u"SC_HID_FUNC_EXPAND_MS"_ustr;
inline constexpr OUString HID_FUNC_HSTACK_MS = u"SC_HID_FUNC_HSTACK_MS"_ustr;
inline constexpr OUString HID_FUNC_VSTACK_MS = u"SC_HID_FUNC_VSTACK_MS"_ustr;
inline constexpr OUString HID_FUNC_TAKE_MS = u"SC_HID_FUNC_TAKE_MS"_ustr;
inline constexpr OUString HID_FUNC_TEXTSPLIT_MS = u"SC_HID_FUNC_TEXTSPLIT_MS"_ustr;
inline constexpr OUString HID_FUNC_TOCOL_MS = u"SC_HID_FUNC_TOCOL_MS"_ustr;
inline constexpr OUString HID_FUNC_TOROW_MS = u"SC_HID_FUNC_TOROW_MS"_ustr;
inline constexpr OUString HID_FUNC_UNIQUE_MS = u"SC_HID_FUNC_UNIQUE_MS"_ustr;

View File

@ -4329,6 +4329,24 @@ const TranslateId SC_OPCODE_TAKE_ARY[] =
NC_("SC_OPCODE_TAKE", "The number of columns to take. A negative value takes from the end of the array.")
};
// -=*# Resource for function TEXTSPLIT #*=-
const TranslateId SC_OPCODE_TEXTSPLIT_ARY[] =
{
NC_("SC_OPCODE_TEXTSPLIT", "Splits text by a given delimiter to an array of multiple cells."),
NC_("SC_OPCODE_TEXTSPLIT", "Text"),
NC_("SC_OPCODE_TEXTSPLIT", "The text to split."),
NC_("SC_OPCODE_TEXTSPLIT", "Column Delimiter"),
NC_("SC_OPCODE_TEXTSPLIT", "The text to delimit the columns. Multiple delimiters can be supplied."),
NC_("SC_OPCODE_TEXTSPLIT", "Row Delimiter"),
NC_("SC_OPCODE_TEXTSPLIT", "The text to delimit the rows. Multiple delimiters can be supplied."),
NC_("SC_OPCODE_TEXTSPLIT", "Ignore Empty"),
NC_("SC_OPCODE_TEXTSPLIT", "Set to TRUE to ignore consecutive delimiters otherwise creates an empty cell. The default to FALSE."),
NC_("SC_OPCODE_TEXTSPLIT", "Match mode"),
NC_("SC_OPCODE_TEXTSPLIT", "Set to 1 to perform a case-insensitive match otherwise does a case-sensitive match. The default to 0."),
NC_("SC_OPCODE_TEXTSPLIT", "Pad with"),
NC_("SC_OPCODE_TEXTSPLIT", "The value with which to pad. The default is #N/A.")
};
// -=*# Resource for function TOCOL #*=-
const TranslateId SC_OPCODE_TOCOL_ARY[] =
{

View File

@ -77,7 +77,7 @@ public:
ScFunctionListObj::ScFunctionListObj()
: UnoApiTest(u"/sc/qa/extras/testdocuments"_ustr)
, XElementAccess(cppu::UnoType<uno::Sequence<beans::PropertyValue>>::get())
, XIndexAccess(415)
, XIndexAccess(416)
, XNameAccess(u"IF"_ustr)
, XServiceInfo(u"stardiv.StarCalc.ScFunctionListObj"_ustr,
u"com.sun.star.sheet.FunctionDescriptions"_ustr)

File diff suppressed because it is too large Load Diff

View File

@ -3125,6 +3125,7 @@ CPPUNIT_TEST_FIXTURE(Test, testFunctionLists)
"SORTBY",
"STYLE",
"TAKE",
"TEXTSPLIT",
"TOCOL",
"TOROW",
"UNIQUE",

View File

@ -800,6 +800,7 @@ ScFunctionList::ScFunctionList( bool bEnglishFunctionNames )
{ SC_OPCODE_HSTACK, ENTRY(SC_OPCODE_HSTACK_ARY), 0, ID_FUNCTION_GRP_TABLE, HID_FUNC_HSTACK_MS, VAR_ARGS + 1, { 0, 0 }, 0 },
{ SC_OPCODE_VSTACK, ENTRY(SC_OPCODE_VSTACK_ARY), 0, ID_FUNCTION_GRP_TABLE, HID_FUNC_VSTACK_MS, VAR_ARGS + 1, { 0, 0 }, 0 },
{ SC_OPCODE_TAKE, ENTRY(SC_OPCODE_TAKE_ARY), 0, ID_FUNCTION_GRP_TABLE, HID_FUNC_TAKE_MS, 3, { 0, 1, 1 }, 0 },
{ SC_OPCODE_TEXTSPLIT, ENTRY(SC_OPCODE_TEXTSPLIT_ARY), 0, ID_FUNCTION_GRP_TABLE, HID_FUNC_TEXTSPLIT_MS, 6, { 0, 1, 1, 1, 1, 1 }, 0 },
{ SC_OPCODE_TOCOL, ENTRY(SC_OPCODE_TOCOL_ARY), 0, ID_FUNCTION_GRP_TABLE, HID_FUNC_TOCOL_MS, 3, { 0, 1, 1 }, 0 },
{ SC_OPCODE_TOROW, ENTRY(SC_OPCODE_TOROW_ARY), 0, ID_FUNCTION_GRP_TABLE, HID_FUNC_TOROW_MS, 3, { 0, 1, 1 }, 0 },
{ SC_OPCODE_UNIQUE, ENTRY(SC_OPCODE_UNIQUE_ARY), 0, ID_FUNCTION_GRP_TABLE, HID_FUNC_UNIQUE_MS, 3, { 0, 1, 1 }, 0 },

View File

@ -736,6 +736,7 @@ private:
void ScHStack();
void ScVStack();
void ScTake();
void ScTextSplit();
void ScToCol();
void ScToRow();
void ScUnique();

View File

@ -9295,6 +9295,207 @@ void ScInterpreter::ScTake()
ScTakeOrDrop(/*bTake*/ true);
}
static std::vector<OUString> lcl_SplitText(const OUString& rText, const std::vector<svl::SharedString>& rDelimiters,
bool bIgnoreEmpty, bool bMatchMode)
{
std::vector<OUString> aResStr;
if (!rDelimiters.size() || rText.isEmpty())
{
aResStr.push_back(rText);
}
else
{
const sal_Int32 nLength (rText.getLength());
sal_Int32 nStart(0);
while (nStart < nLength)
{
sal_Int32 nIndex = nLength;
sal_Int32 nDelLength(0);
// Find the first delimiter
for (auto& rDelimiter : rDelimiters)
{
if (rDelimiter.isEmpty())
continue;
OUString sDelimiter = rDelimiter.getString();
sal_Int32 nDelimiterIndex;
if (bMatchMode)
{
nDelimiterIndex = ScGlobal::getCharClass().lowercase(rText).indexOf(
ScGlobal::getCharClass().lowercase(sDelimiter), nStart);
}
else
nDelimiterIndex = rText.indexOf(sDelimiter, nStart);
if (nDelimiterIndex != -1 && nDelimiterIndex < nIndex)
{
nDelLength = sDelimiter.getLength();
nIndex = nDelimiterIndex;
}
}
OUString sRes(rText.copy(nStart, nIndex - nStart));
if (!bIgnoreEmpty || !sRes.isEmpty())
{
aResStr.push_back(sRes);
}
nStart = nIndex + nDelLength;
}
}
return aResStr;
}
void ScInterpreter::ScTextSplit()
{
sal_uInt8 nParamCount = GetByte();
if (!MustHaveParamCount(nParamCount, 1, 6))
return;
// 6rd argument optional - pad_with
std::optional<svl::SharedString> aPadWith;
if (nParamCount == 6)
aPadWith = GetString();
// 5rd argument optional - match_mode
bool bMatchMode = false;
if (nParamCount >= 5)
{
if (!IsMissing())
{
bMatchMode = GetBool();
}
else
Pop();
}
// 4rd argument optional - ignore_empty
bool bIgnoreEmpty = false;
if (nParamCount >= 4)
{
if (!IsMissing())
bIgnoreEmpty = GetBool();
else
Pop();
}
// 3rd argument optional - row_delimiter
std::vector<svl::SharedString> aRowDelimiters;
if (nParamCount >= 3)
{
ScMatrixRef pMatSource = nullptr;
SCSIZE nsC = 0, nsR = 0;
switch (GetStackType())
{
case svSingleRef:
case svDoubleRef:
case svMatrix:
case svExternalSingleRef:
case svExternalDoubleRef:
{
pMatSource = GetMatrix();
if (!pMatSource)
{
PushIllegalParameter();
return;
}
pMatSource->GetDimensions(nsC, nsR);
for (SCSIZE i = 0; i < nsC; i++)
{
for (SCSIZE j = 0; j < nsR; j++)
{
aRowDelimiters.push_back(pMatSource->GetString(i,j));
}
}
}
break;
default:
aRowDelimiters.push_back(GetString());
}
}
// 2nd argument optional - col_delimiter
std::vector<svl::SharedString> aColDelimiters;
if (nParamCount >= 2)
{
ScMatrixRef pMatSource = nullptr;
SCSIZE nsC = 0, nsR = 0;
switch (GetStackType())
{
case svSingleRef:
case svDoubleRef:
case svMatrix:
case svExternalSingleRef:
case svExternalDoubleRef:
{
pMatSource = GetMatrix();
if (!pMatSource)
{
PushIllegalParameter();
return;
}
pMatSource->GetDimensions(nsC, nsR);
for (SCSIZE i = 0; i < nsC; i++)
{
for (SCSIZE j = 0; j < nsR; j++)
{
aColDelimiters.push_back(pMatSource->GetString(i,j));
}
}
}
break;
default:
aColDelimiters.push_back(GetString());
}
}
// 1st argument: text
svl::SharedString sText = GetString();
if (sText.isEmpty())
{
PushIllegalParameter();
return;
}
std::vector<OUString> aRowStrs = lcl_SplitText(sText.getString(), aRowDelimiters, bIgnoreEmpty, bMatchMode);
std::vector<std::vector<OUString>> aRes;
SCSIZE nCols = 1;
SCSIZE nRows = aRowStrs.size();
for (auto& rRow : aRowStrs)
{
std::vector<OUString> aColStrs = lcl_SplitText(rRow, aColDelimiters, bIgnoreEmpty, bMatchMode);
nCols = std::max(nCols, aColStrs.size());
aRes.push_back(aColStrs);
}
ScMatrixRef pResMat = GetNewMat(nCols, nRows, /*bEmpty*/true);
for (SCSIZE col = 0; col < nCols; ++col)
{
for (SCSIZE row = 0; row < nRows; ++row)
{
if (col < aRes[row].size())
{
pResMat->PutString(mrStrPool.intern(aRes[row][col]), col, row);
}
else
{
if (!aPadWith.has_value())
pResMat->PutError(FormulaError::NotAvailable, col, row);
else
pResMat->PutString(aPadWith.value(), col, row);
}
}
}
PushMatrix(pResMat);
}
void ScInterpreter::ScToColOrRow(bool bCol)
{
sal_uInt8 nParamCount = GetByte();

View File

@ -4130,6 +4130,7 @@ StackVar ScInterpreter::Interpret()
case ocHStack : ScHStack(); break;
case ocVStack : ScVStack(); break;
case ocTake : ScTake(); break;
case ocTextSplit : ScTextSplit(); break;
case ocToCol : ScToCol(); break;
case ocToRow : ScToRow(); break;
case ocUnique : ScUnique(); break;

View File

@ -264,6 +264,7 @@ const ScParameterClassification::RawData ScParameterClassification::pRawData[] =
{ ocTake, {{ ReferenceOrRefArray, Value, Value, }, 0, ForceArrayReturn } },
{ ocTTest, {{ ForceArray, ForceArray, Value, Value }, 0, Value }},
{ ocTextJoin_MS, {{ Reference, Value, Reference }, 1, Value }},
{ ocTextSplit, {{ ReferenceOrRefArray, ReferenceOrRefArray, ReferenceOrRefArray, Value, Value, Value }, 0, ForceArrayReturn } },
{ ocToCol, {{ ReferenceOrRefArray, Value, Value, }, 0, ForceArrayReturn } },
{ ocToRow, {{ ReferenceOrRefArray, Value, Value, }, 0, ForceArrayReturn } },
{ ocTrend, {{ Reference, Reference, Reference, Value }, 0, Value }},

View File

@ -1598,6 +1598,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
case ocHStack:
case ocVStack:
case ocTake:
case ocTextSplit:
case ocToCol:
case ocToRow:
case ocUnique:

View File

@ -626,6 +626,7 @@ const XclFunctionInfo saFuncTable_2024[] =
EXC_FUNCENTRY_V_VR( ocHStack, 1, MX, 0, "HSTACK" ),
EXC_FUNCENTRY_V_VR( ocVStack, 1, MX, 0, "VSTACK" ),
EXC_FUNCENTRY_V_VR( ocTake, 1, 3, 0, "TAKE" ),
EXC_FUNCENTRY_V_VR( ocTextSplit, 1, 6, 0, "TEXTSPLIT" ),
EXC_FUNCENTRY_V_VR( ocToCol, 1, 3, 0, "TOCOL" ),
EXC_FUNCENTRY_V_VR( ocToRow, 1, 3, 0, "TOROW" ),
EXC_FUNCENTRY_V_VR( ocWrapCols, 2, 3, 0, "WRAPCOLS"),

View File

@ -901,6 +901,7 @@ const FunctionData saFuncTable2024[] =
{ "COM.MICROSOFT.HSTACK", "HSTACK", NOID, NOID, 1, MX, A, { VO }, FuncFlags::MACROCALL_NEW },
{ "COM.MICROSOFT.VSTACK", "VSTACK", NOID, NOID, 1, MX, A, { VO }, FuncFlags::MACROCALL_NEW },
{ "COM.MICROSOFT.TAKE", "TAKE", NOID, NOID, 1, 3, A, { VO }, FuncFlags::MACROCALL_NEW },
{ "COM.MICROSOFT.TEXTSPLIT", "TEXTSPLIT", NOID, NOID, 1, 6, A, { VO }, FuncFlags::MACROCALL_NEW },
{ "COM.MICROSOFT.TOCOL", "TOCOL", NOID, NOID, 1, 3, A, { VO }, FuncFlags::MACROCALL_NEW },
{ "COM.MICROSOFT.TOROW", "TOROW", NOID, NOID, 1, 3, A, { VO }, FuncFlags::MACROCALL_NEW },
{ "COM.MICROSOFT.WRAPCOLS", "WRAPCOLS", NOID, NOID, 2, 3, A, { VO }, FuncFlags::MACROCALL_NEW },