From cd1b5d77332ad2ba38f87eeca5012af25795a802 Mon Sep 17 00:00:00 2001 From: Francis Dupont Date: Sat, 7 Jan 2017 05:21:19 +0100 Subject: [PATCH 1/8] [5105] Added an escape option to strutil tokens for delimiters *only* --- src/lib/dhcpsrv/parsers/dhcp_parsers.cc | 5 +- .../dhcpsrv/tests/dhcp_parsers_unittest.cc | 32 ++++++++ src/lib/util/strutil.cc | 80 ++++++++++++++----- src/lib/util/strutil.h | 8 +- src/lib/util/tests/strutil_unittest.cc | 32 +++++++- 5 files changed, 133 insertions(+), 24 deletions(-) diff --git a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc index cf3e116b76..f516595229 100644 --- a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc +++ b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc @@ -588,7 +588,10 @@ OptionDataParser::createOption(ConstElementPtr option_data) { // separated values then we need to split this string into // individual values - each value will be used to initialize // one data field of an option. - data_tokens = isc::util::str::tokens(data_param, ","); + // It is the only usage of the escape option: this allows + // to embed commas in individual values and to return + // for instance a string value with embedded commas. + data_tokens = isc::util::str::tokens(data_param, ",", true); } else { // Otherwise, the option data is specified as a string of diff --git a/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc b/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc index 203b496ad1..8fec9117f5 100644 --- a/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc +++ b/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1231,6 +1232,37 @@ TEST_F(ParseConfigTest, optionDataNoSubOpion) { ASSERT_EQ(0, opt->getOptions().size()); } +// This tests option-data in CSV format and embedded commas. +TEST_F(ParseConfigTest, commaCSVFormatOptionData) { + + // Configuration string. + std::string config = + "{ \"option-data\": [ {" + " \"csv-format\": true," + " \"code\": 41," + " \"data\": \"EST5EDT4\\\\,M3.2.0/02:00\\\\,M11.1.0/02:00\"," + " \"space\": \"dhcp6\"" + " } ]" + "}"; + + // Verify that the configuration string parses. + int rcode = parseConfiguration(config, true); + ASSERT_EQ(0, rcode); + + // Verify that the option can be retrieved. + OptionPtr opt = getOptionPtr(DHCP6_OPTION_SPACE, 41); + ASSERT_TRUE(opt); + + // Get the option as an option string. + OptionStringPtr opt_str = boost::dynamic_pointer_cast(opt); + ASSERT_TRUE(opt_str); + + + // Verify that the option data is correct. + string val = "EST5EDT4,M3.2.0/02:00,M11.1.0/02:00"; + EXPECT_EQ(val, opt_str->getValue()); +} + /// The next set of tests check basic operation of the HooksLibrariesParser. // // Convenience function to set a configuration of zero or more hooks diff --git a/src/lib/util/strutil.cc b/src/lib/util/strutil.cc index 208d158346..71181c3eaa 100644 --- a/src/lib/util/strutil.cc +++ b/src/lib/util/strutil.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -61,29 +61,69 @@ trim(const string& instring) { // another dependency on a Boost library. vector -tokens(const std::string& text, const std::string& delim) { +tokens(const std::string& text, const std::string& delim, bool escape) { vector result; - - // Search for the first non-delimiter character - size_t start = text.find_first_not_of(delim); - while (start != string::npos) { - - // Non-delimiter found, look for next delimiter - size_t end = text.find_first_of(delim, start); - if (end != string::npos) { - - // Delimiter found, so extract string & search for start of next - // non-delimiter segment. - result.push_back(text.substr(start, (end - start))); - start = text.find_first_not_of(delim, end); - + string token; + bool in_token = false; + bool escaped = false; + for (auto c = text.cbegin(); c != text.cend(); ++c) { + if (delim.find(*c) != string::npos) { + // Current character is a delimiter + if (!in_token) { + // Two or more delimiters, eat them + } else if (escaped) { + // Escaped delimiter in a token: reset escaped and keep it + escaped = false; + token.push_back(*c); + } else { + // End of the current token: save it if not empty + if (!token.empty()) { + result.push_back(token); + } + // Reset state + in_token = false; + token.clear(); + } + } else if (escape && (*c == '\\')) { + // Current character is the escape character + if (!in_token) { + // The escape character is the first character of a new token + in_token = true; + } + if (escaped) { + // Escaped escape: reset escaped and keep both characters + escaped = false; + token.push_back('\\'); + token.push_back(*c); + } else { + // Remember to keep the next character + escaped = true; + } } else { - - // End of string found, extract rest of string and flag to exit - result.push_back(text.substr(start)); - start = string::npos; + // Not a delimiter nor an escape + if (!in_token) { + // First character of a new token + in_token = true; + } + if (escaped) { + // Escaped common character: as there was no escape + escaped = false; + token.push_back('\\'); + token.push_back(*c); + } else { + // The common case: keep it + token.push_back(*c); + } } } + // End of input: close and save the current token if not empty + if (escaped) { + // Pending escape + token.push_back('\\'); + } + if (!token.empty()) { + result.push_back(token); + } return (result); } diff --git a/src/lib/util/strutil.h b/src/lib/util/strutil.h index bacc828de5..2750591550 100644 --- a/src/lib/util/strutil.h +++ b/src/lib/util/strutil.h @@ -1,4 +1,4 @@ -// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -69,6 +69,8 @@ std::string trim(const std::string& instring); /// invisible leading and trailing delimiter characters. Therefore both cases /// reduce to a set of contiguous delimiters, which are considered a single /// delimiter (so getting rid of the string). +/// Optional escape allows to escape delimiter characters (and *only* them) +/// using backslash. /// /// We could use Boost for this, but this (simple) function eliminates one /// dependency in the code. @@ -76,10 +78,12 @@ std::string trim(const std::string& instring); /// \param text String to be split. Passed by value as the internal copy is /// altered during the processing. /// \param delim Delimiter characters +/// \param escape Use backslash to escape delimiter characters /// /// \return Vector of tokens. std::vector tokens(const std::string& text, - const std::string& delim = std::string(" \t\n")); + const std::string& delim = std::string(" \t\n"), + bool escape = false); /// \brief Uppercase Character diff --git a/src/lib/util/tests/strutil_unittest.cc b/src/lib/util/tests/strutil_unittest.cc index 887057d38e..48c24634bb 100644 --- a/src/lib/util/tests/strutil_unittest.cc +++ b/src/lib/util/tests/strutil_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -143,6 +143,36 @@ TEST(StringUtilTest, Tokens) { EXPECT_EQ(string("gamma"), result[3]); EXPECT_EQ(string("delta"), result[4]); EXPECT_EQ(string("epsilon"), result[5]); + + // Escaped delimiter + result = isc::util::str::tokens("foo\\,bar", ",", true); + EXPECT_EQ(1, result.size()); + EXPECT_EQ(string("foo,bar"), result[0]); + + // Escaped escape + result = isc::util::str::tokens("foo\\\\,bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo\\\\"), result[0]); + EXPECT_EQ(string("bar"), result[1]); + + // Escaped standard character + result = isc::util::str::tokens("fo\\o,bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("fo\\o"), result[0]); + EXPECT_EQ(string("bar"), result[1]); + + // Escape at the end + result = isc::util::str::tokens("foo,bar\\", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo"), result[0]); + EXPECT_EQ(string("bar\\"), result[1]); + + // Escape opening a token + result = isc::util::str::tokens("foo,\\,,bar", ",", true); + ASSERT_EQ(3, result.size()); + EXPECT_EQ(string("foo"), result[0]); + EXPECT_EQ(string(","), result[1]); + EXPECT_EQ(string("bar"), result[2]); } // Changing case From 45e725696a80e403124b97e0c5228d634a9d3bad Mon Sep 17 00:00:00 2001 From: Tomek Mrugalski Date: Wed, 1 Feb 2017 20:57:48 +0100 Subject: [PATCH 2/8] [5105] Added examples for escaped options. --- doc/examples/kea4/multiple-options.json | 16 ++++++++----- doc/examples/kea6/multiple-options.json | 32 +++++++++++++++---------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/doc/examples/kea4/multiple-options.json b/doc/examples/kea4/multiple-options.json index 940c448547..c5d284427d 100644 --- a/doc/examples/kea4/multiple-options.json +++ b/doc/examples/kea4/multiple-options.json @@ -27,10 +27,10 @@ # "renew-timer": 1000, # "rebind-timer": 2000, -# Defining a subnet. There are 3 DHCP options returned to the -# clients connected to this subnet. The first two options are -# identified by the name. The third option is identified by the -# option code. +// Defining a subnet. There are 3 DHCP options returned to the +// clients connected to this subnet. The first and third options are +// identified by the name. The third option is identified by the +// option code. "subnet4": [ { "pools": [ { "pool": "192.0.2.10 - 192.0.2.200" } ], @@ -46,8 +46,12 @@ "data": "192.0.2.1" }, { - "code": 15, - "data": "example.org" + // String options that have a comma in their values need to have + // it escaped (i.e. each comma is predeced by a backslash). That's + // because commas are reserved for separating fields in compound + // options. + "name": "boot-file-name", + "data": "EST5EDT4\,M3.2.0/02:00\,M11.1.0/02:00" } ] } diff --git a/doc/examples/kea6/multiple-options.json b/doc/examples/kea6/multiple-options.json index b0d2cf1be3..3677da4c29 100644 --- a/doc/examples/kea6/multiple-options.json +++ b/doc/examples/kea6/multiple-options.json @@ -28,17 +28,17 @@ "renew-timer": 1000, "rebind-timer": 2000, -# Defining a subnet. There are 2 DHCP options returned to the -# clients connected to this subnet. The first option is identified -# by the name. The second option is identified by the code. -# There are two address pools defined within this subnet. Pool -# specific value for option 12 is defined for the pool: -# 2001:db8:1::1 - 2001:db8:1::100. Clients obtaining an address -# from this pool will be assigned option 12 with a value of -# 3001:cafe::21. Clients belonging to this subnet but obtaining -# addresses from the other pool, or the clients obtaining -# stateless configuration will be assigned subnet specific value -# of option 12, i.e. 2001:db8:1:0:ff00::1. +// Defining a subnet. There are 3 DHCP options returned to the +// clients connected to this subnet. The first option is identified +// by the name. The second option is identified by the code. +// There are two address pools defined within this subnet. Pool +// specific value for option 12 is defined for the pool: +// 2001:db8:1::1 - 2001:db8:1::100. Clients obtaining an address +// from this pool will be assigned option 12 with a value of +// 3001:cafe::21. Clients belonging to this subnet but obtaining +// addresses from the other pool, or the clients obtaining +// stateless configuration will be assigned subnet specific value +// of option 12, i.e. 2001:db8:1:0:ff00::1. "subnet6": [ { "option-data": [ @@ -49,6 +49,14 @@ { "code": 12, "data": "2001:db8:1:0:ff00::1" + }, + { + // String options that have a comma in their values need to have + // it escaped (i.e. each comma is predeced by a backslash). That's + // because commas are reserved for separating fields in compound + // options. + "name": "new-posix-timezone", + "data": "EST5EDT4\,M3.2.0/02:00\,M11.1.0/02:00" } ], "pools": [ @@ -62,7 +70,7 @@ ] }, { - "pool": "2001:db8:1::500 - 2001:db8:2::1000" + "pool": "2001:db8:1::500 - 2001:db8:1::1000" } ], "subnet": "2001:db8:1::/64", From c33bbfc41827b03f48e9fe323bb1c85f42096e41 Mon Sep 17 00:00:00 2001 From: Tomek Mrugalski Date: Wed, 1 Feb 2017 21:08:26 +0100 Subject: [PATCH 3/8] [5105] User's Guide updated. --- doc/guide/dhcp4-srv.xml | 28 ++++++++++++++++++++++++++++ doc/guide/dhcp6-srv.xml | 27 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/doc/guide/dhcp4-srv.xml b/doc/guide/dhcp4-srv.xml index 6c74a2fada..f21ee4b2e2 100644 --- a/doc/guide/dhcp4-srv.xml +++ b/doc/guide/dhcp4-srv.xml @@ -1016,6 +1016,34 @@ temporarily override a list of interface names and listen on all interfaces. structures. "Type" designates the format of the data: the meanings of the various types is given in . + + When the data field is a string, and that string contains the comma + (,; U+002C) character, the comma must be escaped with a reverse solidus + character (\; U+005C). For example, the string + "foo,bar" would be represented as: + +"Dhcp4": { + "subnet4": [ + { + "pools": [ + { + "option-data": [ + { + "name": "boot-file-name", + "data": "foo\,bar" + } + ] + }, + ... + ], + ... + }, + ... + ], + ... +} + + Some options are designated as arrays, which means that more than one value is allowed in such an option. For example the option time-servers diff --git a/doc/guide/dhcp6-srv.xml b/doc/guide/dhcp6-srv.xml index 166c932e66..89a731767c 100644 --- a/doc/guide/dhcp6-srv.xml +++ b/doc/guide/dhcp6-srv.xml @@ -1050,6 +1050,33 @@ temporarily override a list of interface names and listen on all interfaces. which was not assigned by IANA) are listed in . + When the data field is a string, and that string contains the comma + (,; U+002C) character, the comma must be escaped with a reverse solidus + character (\; U+005C). For example, the string "EST5EDT4,M3.2.0/02:00,M11.1.0/02:00" would be + represented as: + +"Dhcp6": { + "subnet6": [ + { + "pools": [ + { + "option-data": [ + { + "name": "new-posix-timezone", + "data": "EST5EDT4\,M3.2.0/02:00\,M11.1.0/02:00" + } + ] + }, + ... + ], + ... + }, + ... + ], + ... +} + + Some options are designated as arrays, which means that more than one value is allowed in such an option. For example the option dns-servers From 3201a422f071edd93ae0295cd8e41fb00c6227ab Mon Sep 17 00:00:00 2001 From: Tomek Mrugalski Date: Thu, 2 Feb 2017 13:46:07 +0100 Subject: [PATCH 4/8] [5105] Updated examples and User's Guide to match double escapes. --- doc/examples/kea4/multiple-options.json | 8 +++++--- doc/examples/kea6/multiple-options.json | 6 ++++-- doc/guide/dhcp4-srv.xml | 7 ++++--- doc/guide/dhcp6-srv.xml | 9 ++++++--- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/doc/examples/kea4/multiple-options.json b/doc/examples/kea4/multiple-options.json index c5d284427d..1d18d0025f 100644 --- a/doc/examples/kea4/multiple-options.json +++ b/doc/examples/kea4/multiple-options.json @@ -47,11 +47,13 @@ }, { // String options that have a comma in their values need to have - // it escaped (i.e. each comma is predeced by a backslash). That's + // it escaped (i.e. each comma is predeced by two backslashes). That's // because commas are reserved for separating fields in compound - // options. + // options. At the same time, we need to be conformant with JSON spec, + // that does not allow "\,". Therefore the slightly uncommon double + // backslashes natation is needed. "name": "boot-file-name", - "data": "EST5EDT4\,M3.2.0/02:00\,M11.1.0/02:00" + "data": "EST5EDT4\\,M3.2.0/02:00\\,M11.1.0/02:00" } ] } diff --git a/doc/examples/kea6/multiple-options.json b/doc/examples/kea6/multiple-options.json index 3677da4c29..75fd517bda 100644 --- a/doc/examples/kea6/multiple-options.json +++ b/doc/examples/kea6/multiple-options.json @@ -52,9 +52,11 @@ }, { // String options that have a comma in their values need to have - // it escaped (i.e. each comma is predeced by a backslash). That's + // it escaped (i.e. each comma is predeced by two backslashes). That's // because commas are reserved for separating fields in compound - // options. + // options. At the same time, we need to be conformant with JSON spec, + // that does not allow "\,". Therefore the slightly uncommon double + // backslashes natation is needed. "name": "new-posix-timezone", "data": "EST5EDT4\,M3.2.0/02:00\,M11.1.0/02:00" } diff --git a/doc/guide/dhcp4-srv.xml b/doc/guide/dhcp4-srv.xml index f21ee4b2e2..7c21acfc28 100644 --- a/doc/guide/dhcp4-srv.xml +++ b/doc/guide/dhcp4-srv.xml @@ -1018,8 +1018,9 @@ temporarily override a list of interface names and listen on all interfaces. When the data field is a string, and that string contains the comma - (,; U+002C) character, the comma must be escaped with a reverse solidus - character (\; U+005C). For example, the string + (,; U+002C) character, the comma must be escaped with a double reverse solidus + character (\; U+005C). This double escape is required, because a + single escape (\,) would make the JSON invalid. For example, the string "foo,bar" would be represented as: "Dhcp4": { @@ -1030,7 +1031,7 @@ temporarily override a list of interface names and listen on all interfaces. "option-data": [ { "name": "boot-file-name", - "data": "foo\,bar" + "data": "foo\\,bar" } ] }, diff --git a/doc/guide/dhcp6-srv.xml b/doc/guide/dhcp6-srv.xml index 89a731767c..aa0ac27a66 100644 --- a/doc/guide/dhcp6-srv.xml +++ b/doc/guide/dhcp6-srv.xml @@ -1050,9 +1050,12 @@ temporarily override a list of interface names and listen on all interfaces. which was not assigned by IANA) are listed in . - When the data field is a string, and that string contains the comma - (,; U+002C) character, the comma must be escaped with a reverse solidus - character (\; U+005C). For example, the string "EST5EDT4,M3.2.0/02:00,M11.1.0/02:00" would be + When the data field is a string, and that string contains + the comma (,; U+002C) character, the comma must be escaped with a + reverse solidus character (\; U+005C). This double escape is + required, because a single escape (\,) would make the JSON + invalid. For example, the string + "EST5EDT4,M3.2.0/02:00,M11.1.0/02:00" would be represented as: "Dhcp6": { From 08e76963d598869e1177e40e4fc3e3f2a5d14032 Mon Sep 17 00:00:00 2001 From: Tomek Mrugalski Date: Thu, 2 Feb 2017 14:16:13 +0100 Subject: [PATCH 5/8] [5105] kea6 example corrected --- doc/examples/kea6/multiple-options.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/examples/kea6/multiple-options.json b/doc/examples/kea6/multiple-options.json index 75fd517bda..3fd217792a 100644 --- a/doc/examples/kea6/multiple-options.json +++ b/doc/examples/kea6/multiple-options.json @@ -58,7 +58,7 @@ // that does not allow "\,". Therefore the slightly uncommon double // backslashes natation is needed. "name": "new-posix-timezone", - "data": "EST5EDT4\,M3.2.0/02:00\,M11.1.0/02:00" + "data": "EST5EDT4\\,M3.2.0/02:00\\,M11.1.0/02:00" } ], "pools": [ From 6252fce36437c07f1288fcc2231c2832c348358a Mon Sep 17 00:00:00 2001 From: Francis Dupont Date: Mon, 6 Feb 2017 00:53:58 +0100 Subject: [PATCH 6/8] [5105] Fixed the \\ in tokens and improved doc (guides and examples) --- doc/examples/kea4/multiple-options.json | 18 +++++++++++++----- doc/examples/kea6/multiple-options.json | 18 +++++++++++++----- doc/guide/dhcp4-srv.xml | 9 +++++---- doc/guide/dhcp6-srv.xml | 7 ++++--- src/lib/util/strutil.cc | 5 ++--- src/lib/util/strutil.h | 4 ++-- src/lib/util/tests/strutil_unittest.cc | 8 +++++++- 7 files changed, 46 insertions(+), 23 deletions(-) diff --git a/doc/examples/kea4/multiple-options.json b/doc/examples/kea4/multiple-options.json index 1d18d0025f..00544ec8b8 100644 --- a/doc/examples/kea4/multiple-options.json +++ b/doc/examples/kea4/multiple-options.json @@ -47,13 +47,21 @@ }, { // String options that have a comma in their values need to have - // it escaped (i.e. each comma is predeced by two backslashes). That's - // because commas are reserved for separating fields in compound - // options. At the same time, we need to be conformant with JSON spec, - // that does not allow "\,". Therefore the slightly uncommon double - // backslashes natation is needed. + // it escaped (i.e. each comma is predeced by two backslashes). + // That's because commas are reserved for separating fields in + // compound options. At the same time, we need to be conformant + // with JSON spec, that does not allow "\,". Therefore the + // slightly uncommon double backslashes notation is needed. "name": "boot-file-name", "data": "EST5EDT4\\,M3.2.0/02:00\\,M11.1.0/02:00" + + // Legal JSON escapes are \ followed by "\/bfnrt character + // or \u followed by 4 hexa-decimal numbers (currently Kea + // supports only \u0000 to \u00ff code points). + // CSV processing translates '\\' into '\' and '\,' into ',' + // only so for instance '\x' is translated into '\x'. But + // as it works on a JSON string value each of these '\' + // characters must be doubled on JSON input. } ] } diff --git a/doc/examples/kea6/multiple-options.json b/doc/examples/kea6/multiple-options.json index 3fd217792a..3e30487517 100644 --- a/doc/examples/kea6/multiple-options.json +++ b/doc/examples/kea6/multiple-options.json @@ -52,13 +52,21 @@ }, { // String options that have a comma in their values need to have - // it escaped (i.e. each comma is predeced by two backslashes). That's - // because commas are reserved for separating fields in compound - // options. At the same time, we need to be conformant with JSON spec, - // that does not allow "\,". Therefore the slightly uncommon double - // backslashes natation is needed. + // it escaped (i.e. each comma is predeced by two backslashes). + // That's because commas are reserved for separating fields in + // compound options. At the same time, we need to be conformant + // with JSON spec, that does not allow "\,". Therefore the + // slightly uncommon double backslashes notation is needed. "name": "new-posix-timezone", "data": "EST5EDT4\\,M3.2.0/02:00\\,M11.1.0/02:00" + + // Legal JSON escapes are \ followed by "\/bfnrt character + // or \u followed by 4 hexa-decimal numbers (currently Kea + // supports only \u0000 to \u00ff code points). + // CSV processing translates '\\' into '\' and '\,' into ',' + // only so for instance '\x' is translated into '\x'. But + // as it works on a JSON string value each of these '\' + // characters must be doubled on JSON input. } ], "pools": [ diff --git a/doc/guide/dhcp4-srv.xml b/doc/guide/dhcp4-srv.xml index 7c21acfc28..5997836577 100644 --- a/doc/guide/dhcp4-srv.xml +++ b/doc/guide/dhcp4-srv.xml @@ -1017,11 +1017,12 @@ temporarily override a list of interface names and listen on all interfaces. the various types is given in . - When the data field is a string, and that string contains the comma + When a data field is a string, and that string contains the comma (,; U+002C) character, the comma must be escaped with a double reverse solidus - character (\; U+005C). This double escape is required, because a - single escape (\,) would make the JSON invalid. For example, the string - "foo,bar" would be represented as: + character (\; U+005C). This double escape is required, because both the + routine splitting CSV data into fields and JSON use the same escape + character: a single escape (\,) would make the JSON invalid. + For example, the string "foo,bar" would be represented as: "Dhcp4": { "subnet4": [ diff --git a/doc/guide/dhcp6-srv.xml b/doc/guide/dhcp6-srv.xml index aa0ac27a66..650b6b0564 100644 --- a/doc/guide/dhcp6-srv.xml +++ b/doc/guide/dhcp6-srv.xml @@ -1050,11 +1050,12 @@ temporarily override a list of interface names and listen on all interfaces. which was not assigned by IANA) are listed in . - When the data field is a string, and that string contains + When a data field is a string, and that string contains the comma (,; U+002C) character, the comma must be escaped with a reverse solidus character (\; U+005C). This double escape is - required, because a single escape (\,) would make the JSON - invalid. For example, the string + required, because both the routine splitting CSV data into fields + and JSON use the same escape character: a single escape (\,) would + make the JSON invalid. For example, the string "EST5EDT4,M3.2.0/02:00,M11.1.0/02:00" would be represented as: diff --git a/src/lib/util/strutil.cc b/src/lib/util/strutil.cc index 71181c3eaa..b3f576be95 100644 --- a/src/lib/util/strutil.cc +++ b/src/lib/util/strutil.cc @@ -91,9 +91,8 @@ tokens(const std::string& text, const std::string& delim, bool escape) { in_token = true; } if (escaped) { - // Escaped escape: reset escaped and keep both characters + // Escaped escape: reset escaped and keep one character escaped = false; - token.push_back('\\'); token.push_back(*c); } else { // Remember to keep the next character @@ -106,7 +105,7 @@ tokens(const std::string& text, const std::string& delim, bool escape) { in_token = true; } if (escaped) { - // Escaped common character: as there was no escape + // Escaped common character: as escape was false escaped = false; token.push_back('\\'); token.push_back(*c); diff --git a/src/lib/util/strutil.h b/src/lib/util/strutil.h index 2750591550..1dd5de01de 100644 --- a/src/lib/util/strutil.h +++ b/src/lib/util/strutil.h @@ -69,8 +69,8 @@ std::string trim(const std::string& instring); /// invisible leading and trailing delimiter characters. Therefore both cases /// reduce to a set of contiguous delimiters, which are considered a single /// delimiter (so getting rid of the string). -/// Optional escape allows to escape delimiter characters (and *only* them) -/// using backslash. +/// Optional escape allows to escape delimiter characters (and *only* them +/// and the escape character itself) using backslash. /// /// We could use Boost for this, but this (simple) function eliminates one /// dependency in the code. diff --git a/src/lib/util/tests/strutil_unittest.cc b/src/lib/util/tests/strutil_unittest.cc index 48c24634bb..33ca6c8e0b 100644 --- a/src/lib/util/tests/strutil_unittest.cc +++ b/src/lib/util/tests/strutil_unittest.cc @@ -152,9 +152,15 @@ TEST(StringUtilTest, Tokens) { // Escaped escape result = isc::util::str::tokens("foo\\\\,bar", ",", true); ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("foo\\\\"), result[0]); + EXPECT_EQ(string("foo\\"), result[0]); EXPECT_EQ(string("bar"), result[1]); + // Double escapes + result = isc::util::str::tokens("foo\\\\\\\\,\\bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo\\\\"), result[0]); + EXPECT_EQ(string("\\bar"), result[1]); + // Escaped standard character result = isc::util::str::tokens("fo\\o,bar", ",", true); ASSERT_EQ(2, result.size()); From fa79ac2396aa94d7bac91bd12d3593ebaaa9386d Mon Sep 17 00:00:00 2001 From: Francis Dupont Date: Fri, 10 Feb 2017 19:40:55 +0100 Subject: [PATCH 7/8] [master] Added ChangeLog entry for #5105 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 186c28912a..d55e019380 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +1213. [bug] fdupont + Option string values containing comma can now be specified + correctly by preceding comma with double backslashes (e.g. + "foo\\,bar"). + (Trac #5105, git xxx) + 1212. [doc] andreipavelQ Many spelling corrections. (Github #47, git a6a7ca1ced8c63c1e11ef4c572f09272340afdd7) From 1504886636c72e178e4a33442185cc0860b7cd2e Mon Sep 17 00:00:00 2001 From: Francis Dupont Date: Fri, 10 Feb 2017 19:42:06 +0100 Subject: [PATCH 8/8] [master] Updated git hash --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index d55e019380..1a31c1728f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,7 +2,7 @@ Option string values containing comma can now be specified correctly by preceding comma with double backslashes (e.g. "foo\\,bar"). - (Trac #5105, git xxx) + (Trac #5105, git fa79ac2396aa94d7bac91bd12d3593ebaaa9386d) 1212. [doc] andreipavelQ Many spelling corrections.