tdf#70833: IDNA support when exporing hyperlinks to PDF
Any URLs using non-ASCII IDNA syntax need to be resolved to ASCII-only, as PDF URI Action's URI needs to be "encoded in 7-bit ASCII." Introduce URIHelper::resolveIdnaHost (svl/urihelper.hxx), which internally uses icu::IDNA, which requires to bump the minimal --with-system-icu requirement from 4.2 to 4.6, which means ICU_RECLASSIFIED_CLOSE_PARENTHESIS is always true now. Change-Id: I0e20d9a20ed2b869fba0cc7c969721411db590b3 Reviewed-on: https://gerrit.libreoffice.org/19669 Reviewed-by: Stephan Bergmann <sbergman@redhat.com> Tested-by: Stephan Bergmann <sbergman@redhat.com>
This commit is contained in:
parent
b051510796
commit
a346dfccd7
@ -273,7 +273,6 @@ export ICU_CFLAGS=$(gb_SPACE)@ICU_CFLAGS@
|
||||
export ICU_LIBS=$(gb_SPACE)@ICU_LIBS@
|
||||
export ICU_MAJOR=@ICU_MAJOR@
|
||||
export ICU_MINOR=@ICU_MINOR@
|
||||
export ICU_RECLASSIFIED_CLOSE_PARENTHESIS=@ICU_RECLASSIFIED_CLOSE_PARENTHESIS@
|
||||
export ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER=@ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER@
|
||||
export ICU_RECLASSIFIED_HEBREW_LETTER=@ICU_RECLASSIFIED_HEBREW_LETTER@
|
||||
export ICU_RECLASSIFIED_PREPEND_SET_EMPTY=@ICU_RECLASSIFIED_PREPEND_SET_EMPTY@
|
||||
|
11
configure.ac
11
configure.ac
@ -8949,7 +8949,6 @@ SYSTEM_GENCMN=
|
||||
|
||||
ICU_MAJOR=56
|
||||
ICU_MINOR=1
|
||||
ICU_RECLASSIFIED_CLOSE_PARENTHESIS="TRUE"
|
||||
ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE"
|
||||
ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE"
|
||||
ICU_RECLASSIFIED_HEBREW_LETTER="TRUE"
|
||||
@ -8974,10 +8973,10 @@ if test "$with_system_icu" = "yes"; then
|
||||
ICU_MAJOR=`echo $ICU_VERSION | cut -d"." -f1`
|
||||
ICU_MINOR=`echo $ICU_VERSION | cut -d"." -f2`
|
||||
|
||||
if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "2" \); then
|
||||
if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "6" \); then
|
||||
AC_MSG_RESULT([OK, $ICU_VERSION])
|
||||
else
|
||||
AC_MSG_ERROR([not suitable, only >= 4.2 supported currently])
|
||||
AC_MSG_ERROR([not suitable, only >= 4.6 supported currently])
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -9013,11 +9012,6 @@ You can use --with-system-icu-for-build=force to use it anyway.])
|
||||
if test -z "$SYSTEM_GENCMN"; then
|
||||
AC_MSG_ERROR([\'gencmn\' not found in \$PATH, install the icu development tool \'gencmn\'])
|
||||
fi
|
||||
if test "$ICU_MAJOR" -ge "49" -o \( "$ICU_MAJOR" = "4" -a "$ICU_MINOR" -ge "4" \); then
|
||||
ICU_RECLASSIFIED_CLOSE_PARENTHESIS="TRUE"
|
||||
else
|
||||
ICU_RECLASSIFIED_CLOSE_PARENTHESIS=
|
||||
fi
|
||||
if test "$ICU_MAJOR" -ge "49"; then
|
||||
ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE"
|
||||
ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE"
|
||||
@ -9055,7 +9049,6 @@ AC_SUBST(SYSTEM_GENCCODE)
|
||||
AC_SUBST(SYSTEM_GENCMN)
|
||||
AC_SUBST(ICU_MAJOR)
|
||||
AC_SUBST(ICU_MINOR)
|
||||
AC_SUBST(ICU_RECLASSIFIED_CLOSE_PARENTHESIS)
|
||||
AC_SUBST(ICU_RECLASSIFIED_PREPEND_SET_EMPTY)
|
||||
AC_SUBST(ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER)
|
||||
AC_SUBST(ICU_RECLASSIFIED_HEBREW_LETTER)
|
||||
|
@ -98,8 +98,7 @@ $(i18npool_BIDIR)/%.brk : $(i18npool_BIDIR)/%.txt $(call gb_ExternalExecutable_g
|
||||
# sed substitution...
|
||||
$(i18npool_BIDIR)/%.txt : \
|
||||
$(SRCDIR)/i18npool/source/breakiterator/data/%.txt | $(i18npool_BIDIR)/.dir
|
||||
sed -e ': dummy' \
|
||||
$(if $(ICU_RECLASSIFIED_CLOSE_PARENTHESIS),-e "s#\[:LineBreak = Close_Punctuation:\]#\[& \[:LineBreak = Close_Parenthesis:\]\]#") \
|
||||
sed -e "s#\[:LineBreak = Close_Punctuation:\]#\[& \[:LineBreak = Close_Parenthesis:\]\]#" \
|
||||
$(if $(ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER),,\
|
||||
-e '/\[:LineBreak = Conditional_Japanese_Starter:\]/d' \
|
||||
-e 's# $$CJ##' \
|
||||
|
@ -152,6 +152,23 @@ SVL_DLLPUBLIC OUString removePassword(OUString const & rURI,
|
||||
INetURLObject::EncodeMechanism eEncodeMechanism = INetURLObject::WAS_ENCODED,
|
||||
INetURLObject::DecodeMechanism eDecodeMechanism = INetURLObject::DECODE_TO_IURI,
|
||||
rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
|
||||
|
||||
/** Resolve a URL's host component domain name in IDNA syntax to plain DNS
|
||||
syntax.
|
||||
|
||||
For details, see RFC 5890 "Internationalized Domain Names for Applications
|
||||
(IDNA): Definitions and Document Framework."
|
||||
|
||||
@param: url An arbitrary string, should be a URI.
|
||||
|
||||
@return If the input matches the syntax of a hierarchical URL, and it has
|
||||
a host component that matches the IDNA2008 domain name syntax, and that
|
||||
domain name contains any U-labels, return a version of the input URL with
|
||||
the host component resolved to plain DNS syntax. Otherwise, return the
|
||||
input unchanged.
|
||||
*/
|
||||
SVL_DLLPUBLIC OUString resolveIdnaHost(OUString const & url);
|
||||
|
||||
}
|
||||
|
||||
#endif // INCLUDED_SVL_URIHELPER_HXX
|
||||
|
@ -21,6 +21,8 @@ $(eval $(call gb_Library_Library,svl))
|
||||
|
||||
$(eval $(call gb_Library_use_externals,svl,\
|
||||
boost_headers \
|
||||
icu_headers \
|
||||
icuuc \
|
||||
mdds_headers \
|
||||
libxml2 \
|
||||
))
|
||||
|
@ -198,9 +198,12 @@ public:
|
||||
|
||||
void testFindFirstURLInText();
|
||||
|
||||
void testResolveIdnaHost();
|
||||
|
||||
CPPUNIT_TEST_SUITE(Test);
|
||||
CPPUNIT_TEST(testNormalizedMakeRelative);
|
||||
CPPUNIT_TEST(testFindFirstURLInText);
|
||||
CPPUNIT_TEST(testResolveIdnaHost);
|
||||
CPPUNIT_TEST(finish);
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
|
||||
@ -423,6 +426,66 @@ void Test::testFindFirstURLInText() {
|
||||
}
|
||||
}
|
||||
|
||||
void Test::testResolveIdnaHost() {
|
||||
OUString input;
|
||||
|
||||
input.clear();
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("Foo.M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://Muenchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://-M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://M\xC3\xBCnchen-.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://xn--M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://xy--M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://.M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://-bar.M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://bar-.M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://xn--bar.M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
input = OUString::fromUtf8("foo://xy--bar.M\xC3\xBCnchen.de");
|
||||
CPPUNIT_ASSERT_EQUAL(input, URIHelper::resolveIdnaHost(input));
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(
|
||||
OUString::fromUtf8("foo://M\xC3\xBCnchen@xn--mnchen-3ya.de"),
|
||||
URIHelper::resolveIdnaHost(
|
||||
OUString::fromUtf8("foo://M\xC3\xBCnchen@M\xC3\xBCnchen.de")));
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(
|
||||
OUString::fromUtf8("foo://xn--mnchen-3ya.de."),
|
||||
URIHelper::resolveIdnaHost(
|
||||
OUString::fromUtf8("foo://M\xC3\xBCnchen.de.")));
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(
|
||||
OUString::fromUtf8("Foo://bar@xn--mnchen-3ya.de:123/?bar#baz"),
|
||||
URIHelper::resolveIdnaHost(
|
||||
OUString::fromUtf8("Foo://bar@M\xC3\xBCnchen.de:123/?bar#baz")));
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(
|
||||
OUString::fromUtf8("foo://xn--mnchen-3ya.de"),
|
||||
URIHelper::resolveIdnaHost(
|
||||
OUString::fromUtf8("foo://Mu\xCC\x88nchen.de")));
|
||||
}
|
||||
|
||||
css::uno::Reference< css::uno::XComponentContext > Test::m_context;
|
||||
|
||||
CPPUNIT_TEST_SUITE_REGISTRATION(Test);
|
||||
|
@ -17,6 +17,10 @@
|
||||
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
||||
*/
|
||||
|
||||
#include <sal/config.h>
|
||||
|
||||
#include <unicode/idna.h>
|
||||
|
||||
#include <svl/urihelper.hxx>
|
||||
#include <com/sun/star/ucb/Command.hpp>
|
||||
#include <com/sun/star/ucb/IllegalIdentifierException.hpp>
|
||||
@ -725,4 +729,68 @@ OUString URIHelper::removePassword(OUString const & rURI,
|
||||
aObj.GetURLNoPass(eDecodeMechanism, eCharset);
|
||||
}
|
||||
|
||||
OUString URIHelper::resolveIdnaHost(OUString const & url) {
|
||||
css::uno::Reference<css::uri::XUriReference> uri(
|
||||
css::uri::UriReferenceFactory::create(
|
||||
comphelper::getProcessComponentContext())
|
||||
->parse(url));
|
||||
if (!(uri.is() && uri->hasAuthority())) {
|
||||
return url;
|
||||
}
|
||||
auto auth(uri->getAuthority());
|
||||
sal_Int32 hostStart = auth.indexOf('@') + 1;
|
||||
sal_Int32 hostEnd = auth.getLength() - 1;
|
||||
while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd])) {
|
||||
--hostEnd;
|
||||
}
|
||||
if (!(hostEnd > hostStart && auth[hostEnd] == ':')) {
|
||||
hostEnd = auth.getLength() - 1;
|
||||
}
|
||||
auto asciiOnly = true;
|
||||
for (auto i = hostStart; i != hostEnd; ++i) {
|
||||
if (!rtl::isAscii(auth[i])) {
|
||||
asciiOnly = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (asciiOnly) {
|
||||
// Avoid icu::IDNA case normalization in purely non-IDNA domain names:
|
||||
return url;
|
||||
}
|
||||
UErrorCode e = U_ZERO_ERROR;
|
||||
std::unique_ptr<icu::IDNA> idna(
|
||||
icu::IDNA::createUTS46Instance(
|
||||
(UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ
|
||||
| UIDNA_CHECK_CONTEXTO),
|
||||
e));
|
||||
if (U_FAILURE(e)) {
|
||||
SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e);
|
||||
return url;
|
||||
}
|
||||
icu::UnicodeString ascii;
|
||||
icu::IDNAInfo info;
|
||||
idna->nameToASCII(
|
||||
icu::UnicodeString(
|
||||
reinterpret_cast<UChar const *>(auth.getStr() + hostStart),
|
||||
hostEnd - hostStart),
|
||||
ascii, info, e);
|
||||
if (U_FAILURE(e) || info.hasErrors()) {
|
||||
return url;
|
||||
}
|
||||
OUStringBuffer buf(uri->getScheme());
|
||||
buf.append("://").append(auth.getStr(), hostStart);
|
||||
buf.append(
|
||||
reinterpret_cast<sal_Unicode const *>(ascii.getBuffer()),
|
||||
ascii.length());
|
||||
buf.append(auth.getStr() + hostEnd, auth.getLength() - hostEnd)
|
||||
.append(uri->getPath());
|
||||
if (uri->hasQuery()) {
|
||||
buf.append('?').append(uri->getQuery());
|
||||
}
|
||||
if (uri->hasFragment()) {
|
||||
buf.append('#').append(uri->getFragment());
|
||||
}
|
||||
return buf.makeStringAndClear();
|
||||
}
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <rtl/crc.h>
|
||||
#include <rtl/digest.h>
|
||||
#include <rtl/ustrbuf.hxx>
|
||||
#include <svl/urihelper.hxx>
|
||||
#include <tools/debug.hxx>
|
||||
#include <tools/fract.hxx>
|
||||
#include <tools/stream.hxx>
|
||||
@ -4495,8 +4496,10 @@ we check in the following sequence:
|
||||
// are the correct one!!
|
||||
|
||||
// extract target file type
|
||||
auto url(URIHelper::resolveIdnaHost(rLink.m_aURL));
|
||||
|
||||
INetURLObject aDocumentURL( m_aContext.BaseURL );
|
||||
INetURLObject aTargetURL( rLink.m_aURL );
|
||||
INetURLObject aTargetURL( url );
|
||||
bool bSetGoToRMode = false;
|
||||
bool bTargetHasPDFExtension = false;
|
||||
INetProtocol eTargetProtocol = aTargetURL.GetProtocol();
|
||||
@ -4507,7 +4510,7 @@ we check in the following sequence:
|
||||
// getting the needed URL information from the current document path
|
||||
if( eTargetProtocol == INetProtocol::NotValid )
|
||||
{
|
||||
if( rLink.m_aURL.getLength() > 4 && rLink.m_aURL.startsWith("\\\\\\\\"))
|
||||
if( url.getLength() > 4 && url.startsWith("\\\\\\\\"))
|
||||
{
|
||||
bIsUNCPath = true;
|
||||
}
|
||||
@ -4516,7 +4519,7 @@ we check in the following sequence:
|
||||
INetURLObject aNewBase( aDocumentURL );//duplicate document URL
|
||||
aNewBase.removeSegment(); //remove last segment from it, obtaining the base URL of the
|
||||
//target document
|
||||
aNewBase.insertName( rLink.m_aURL );
|
||||
aNewBase.insertName( url );
|
||||
aTargetURL = aNewBase;//reassign the new target URL
|
||||
//recompute the target protocol, with the new URL
|
||||
//normal URL processing resumes
|
||||
@ -4564,7 +4567,7 @@ we check in the following sequence:
|
||||
{
|
||||
aLine.append( "/Launch/Win<</F" );
|
||||
// INetURLObject is not good with UNC paths, use original path
|
||||
appendLiteralStringEncrypt( rLink.m_aURL, rLink.m_nObject, aLine, osl_getThreadTextEncoding() );
|
||||
appendLiteralStringEncrypt( url, rLink.m_nObject, aLine, osl_getThreadTextEncoding() );
|
||||
aLine.append( ">>" );
|
||||
}
|
||||
else
|
||||
|
Loading…
x
Reference in New Issue
Block a user