tdf#145381 handle closing brackets in URLs correctly

The task presents an URL that ends with a closing bracket. If
pasted to LO, the closing bracket got interpreted as not being part
of the URL due to the heuristical interpretation of URLs in
urihelper.
Adapted this to handle matching brackets, so that an closing and
ending bracket will be added to the uri text when there is a
matching pair.
Added unit test to testFindFirstURLInText with simplified uri
example.

Change-Id: I58dd460a37d0066ff46845832eabd2a790e4ccd1
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/126832
Tested-by: Jenkins
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
This commit is contained in:
Armin Le Grand (Allotropia)
2021-12-14 14:45:58 +01:00
committed by Stephan Bergmann
parent 4812c8df39
commit 76f2937618
2 changed files with 35 additions and 6 deletions

View File

@@ -322,6 +322,7 @@ void Test::testFindFirstURLInText() {
{ "{a:\\bla/bla/bla...}", "file:///a:/bla/bla/bla", 1, 15 },
{ "#b:/c/d#e#f#", "file:///b:/c/d", 1, 7 },
{ "a:/", "file:///a:/", 0, 3 },
{ "http://sun.com/R_(l_a)", "http://sun.com/R_(l_a)", 0, 22 },
{ ".component:", nullptr, 0, 0 },
{ ".uno:", nullptr, 0, 0 },
{ "cid:", nullptr, 0, 0 },

View File

@@ -347,9 +347,12 @@ bool isBoundary2(CharClass const & rCharClass, OUString const & rStr,
}
}
// tdf#145381 Added MatchingBracketDepth counter o detect maching closing
// brackets that are part of the uri
bool checkWChar(CharClass const & rCharClass, OUString const & rStr,
sal_Int32 * pPos, sal_Int32 * pEnd, bool bBackslash = false,
bool bPipe = false)
sal_Int32 * pPos, sal_Int32 * pEnd,
sal_Int32 * pMatchingBracketDepth = nullptr,
bool bBackslash = false, bool bPipe = false)
{
sal_Unicode c = rStr[*pPos];
if (rtl::isAscii(c))
@@ -360,7 +363,7 @@ bool checkWChar(CharClass const & rCharClass, OUString const & rStr,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&'
1, 1, 1, 1, 1, 4, 1, 4, // ()*+,-./
5, 6, 1, 1, 1, 4, 1, 4, // ()*+,-./
4, 4, 4, 4, 4, 4, 4, 4, // 01234567
4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>?
4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG
@@ -402,6 +405,24 @@ bool checkWChar(CharClass const & rCharClass, OUString const & rStr,
// isBoundary1)
*pEnd = ++(*pPos);
return true;
case 5: // opening bracket
++(*pPos);
if(nullptr != pMatchingBracketDepth)
++(*pMatchingBracketDepth);
return true;
case 6: // closing bracket
++(*pPos);
if(nullptr != pMatchingBracketDepth && *pMatchingBracketDepth > 0)
{
--(*pMatchingBracketDepth);
// tdf#145381 When there was an opening bracket, detect this closing bracket
// as part of the uri
*pEnd = *pPos;
}
return true;
}
}
else if (rCharClass.isLetterNumeric(rStr, *pPos))
@@ -499,6 +520,11 @@ OUString URIHelper::FindFirstURLInText(OUString const & rText,
// Productions 6--9 are only applicable if the FSysStyle::Dos bit is set in
// eStyle.
// tdf#145381: In addition to the productions I added a mechanism to detect
// matching brackets. The task presents the case of an url that ends on a
// closing bracket. This needs to be detected as part of the uri in the case
// that a matching opening bracket exists.
bool bBoundary1 = true;
bool bBoundary2 = true;
for (sal_Int32 nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
@@ -516,7 +542,7 @@ OUString URIHelper::FindFirstURLInText(OUString const & rText,
sal_Int32 nPrefixEnd = i;
sal_Int32 nUriEnd = i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd, true,
&& checkWChar(rCharClass, rText, &i, &nUriEnd, nullptr, true,
true)) ;
if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
{
@@ -544,8 +570,10 @@ OUString URIHelper::FindFirstURLInText(OUString const & rText,
while (rText[i++] != ':') ;
sal_Int32 nPrefixEnd = i;
sal_Int32 nUriEnd = i;
sal_Int32 nMatchingBracketDepth = 0;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
&& checkWChar(rCharClass, rText, &i, &nUriEnd,
&nMatchingBracketDepth)) ;
if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
{
++i;
@@ -655,7 +683,7 @@ OUString URIHelper::FindFirstURLInText(OUString const & rText,
sal_Int32 nUriEnd = ++i;
while (i != rEnd
&& checkWChar(rCharClass, rText, &i, &nUriEnd,
true)) ;
nullptr, true)) ;
if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
{
INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),