diff --git a/ChangeLog b/ChangeLog
index a8921b0f0a..62480be32a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,15 @@
-1213. [func] tomek
+1214. [func] tomek
Bison parser implemented for Control-agent. The code is able
to syntactically parse input configuration, but the output
is not used yet.
(Trac #5076, git d99048aa5b90efa7812a75cdae98a0913470f5a6)
+1213. [bug] fdupont
+ Option string values containing comma can now be specified
+ correctly by preceding comma with double backslashes (e.g.
+ "foo\\,bar").
+ (Trac #5105, git fa79ac2396aa94d7bac91bd12d3593ebaaa9386d)
+
1212. [doc] andreipavelQ
Many spelling corrections.
(Github #47, git a6a7ca1ced8c63c1e11ef4c572f09272340afdd7)
diff --git a/doc/examples/kea4/multiple-options.json b/doc/examples/kea4/multiple-options.json
index 940c448547..00544ec8b8 100644
--- a/doc/examples/kea4/multiple-options.json
+++ b/doc/examples/kea4/multiple-options.json
@@ -27,10 +27,10 @@
# "renew-timer": 1000,
# "rebind-timer": 2000,
-# Defining a subnet. There are 3 DHCP options returned to the
-# clients connected to this subnet. The first two options are
-# identified by the name. The third option is identified by the
-# option code.
+// Defining a subnet. There are 3 DHCP options returned to the
+// clients connected to this subnet. The first and third options are
+// identified by the name. The third option is identified by the
+// option code.
"subnet4": [
{
"pools": [ { "pool": "192.0.2.10 - 192.0.2.200" } ],
@@ -46,8 +46,22 @@
"data": "192.0.2.1"
},
{
- "code": 15,
- "data": "example.org"
+ // String options that have a comma in their values need to have
+ // it escaped (i.e. each comma is predeced by two backslashes).
+ // That's because commas are reserved for separating fields in
+ // compound options. At the same time, we need to be conformant
+ // with JSON spec, that does not allow "\,". Therefore the
+ // slightly uncommon double backslashes notation is needed.
+ "name": "boot-file-name",
+ "data": "EST5EDT4\\,M3.2.0/02:00\\,M11.1.0/02:00"
+
+ // Legal JSON escapes are \ followed by "\/bfnrt character
+ // or \u followed by 4 hexa-decimal numbers (currently Kea
+ // supports only \u0000 to \u00ff code points).
+ // CSV processing translates '\\' into '\' and '\,' into ','
+ // only so for instance '\x' is translated into '\x'. But
+ // as it works on a JSON string value each of these '\'
+ // characters must be doubled on JSON input.
}
]
}
diff --git a/doc/examples/kea6/multiple-options.json b/doc/examples/kea6/multiple-options.json
index 45b249dd7e..3ee58ef0de 100644
--- a/doc/examples/kea6/multiple-options.json
+++ b/doc/examples/kea6/multiple-options.json
@@ -28,7 +28,7 @@
"renew-timer": 1000,
"rebind-timer": 2000,
-# Defining a subnet. There are 2 DHCP options returned to the
+# Defining a subnet. There are 3 DHCP options returned to the
# clients connected to this subnet. The first option is identified
# by the name. The second option is identified by the code.
# There are two address pools defined within this subnet. Pool
@@ -51,6 +51,24 @@
{
"code": 12,
"data": "2001:db8:1:0:ff00::1"
+ },
+ {
+ // String options that have a comma in their values need to have
+ // it escaped (i.e. each comma is predeced by two backslashes).
+ // That's because commas are reserved for separating fields in
+ // compound options. At the same time, we need to be conformant
+ // with JSON spec, that does not allow "\,". Therefore the
+ // slightly uncommon double backslashes notation is needed.
+ "name": "new-posix-timezone",
+ "data": "EST5EDT4\\,M3.2.0/02:00\\,M11.1.0/02:00"
+
+ // Legal JSON escapes are \ followed by "\/bfnrt character
+ // or \u followed by 4 hexa-decimal numbers (currently Kea
+ // supports only \u0000 to \u00ff code points).
+ // CSV processing translates '\\' into '\' and '\,' into ','
+ // only so for instance '\x' is translated into '\x'. But
+ // as it works on a JSON string value each of these '\'
+ // characters must be doubled on JSON input.
}
],
"pools": [
@@ -64,7 +82,7 @@
]
},
{
- "pool": "2001:db8:1::500 - 2001:db8:2::1000"
+ "pool": "2001:db8:1::500 - 2001:db8:1::1000"
}
],
"pd-pools": [
diff --git a/doc/guide/dhcp4-srv.xml b/doc/guide/dhcp4-srv.xml
index dda110682c..d1bc86c7b2 100644
--- a/doc/guide/dhcp4-srv.xml
+++ b/doc/guide/dhcp4-srv.xml
@@ -1016,6 +1016,36 @@ temporarily override a list of interface names and listen on all interfaces.
structures. "Type" designates the format of the data: the meanings of
the various types is given in .
+
+ When a data field is a string, and that string contains the comma
+ (,; U+002C) character, the comma must be escaped with a double reverse solidus
+ character (\; U+005C). This double escape is required, because both the
+ routine splitting CSV data into fields and JSON use the same escape
+ character: a single escape (\,) would make the JSON invalid.
+ For example, the string "foo,bar" would be represented as:
+
+"Dhcp4": {
+ "subnet4": [
+ {
+ "pools": [
+ {
+ "option-data": [
+ {
+ "name": "boot-file-name",
+ "data": "foo\\,bar"
+ }
+ ]
+ },
+ ...
+ ],
+ ...
+ },
+ ...
+ ],
+ ...
+}
+
+
Some options are designated as arrays, which means that more than one
value is allowed in such an option. For example the option time-servers
diff --git a/doc/guide/dhcp6-srv.xml b/doc/guide/dhcp6-srv.xml
index 062608ca8f..44f8ddabb3 100644
--- a/doc/guide/dhcp6-srv.xml
+++ b/doc/guide/dhcp6-srv.xml
@@ -1050,6 +1050,37 @@ temporarily override a list of interface names and listen on all interfaces.
which was not assigned by IANA) are listed in
.
+ When a data field is a string, and that string contains
+ the comma (,; U+002C) character, the comma must be escaped with a
+ reverse solidus character (\; U+005C). This double escape is
+ required, because both the routine splitting CSV data into fields
+ and JSON use the same escape character: a single escape (\,) would
+ make the JSON invalid. For example, the string
+ "EST5EDT4,M3.2.0/02:00,M11.1.0/02:00" would be
+ represented as:
+
+"Dhcp6": {
+ "subnet6": [
+ {
+ "pools": [
+ {
+ "option-data": [
+ {
+ "name": "new-posix-timezone",
+ "data": "EST5EDT4\,M3.2.0/02:00\,M11.1.0/02:00"
+ }
+ ]
+ },
+ ...
+ ],
+ ...
+ },
+ ...
+ ],
+ ...
+}
+
+
Some options are designated as arrays, which means that more than one
value is allowed in such an option. For example the option dns-servers
diff --git a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc
index e085c5443a..cafe509703 100644
--- a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc
+++ b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc
@@ -526,7 +526,10 @@ OptionDataParser::createOption(ConstElementPtr option_data) {
// separated values then we need to split this string into
// individual values - each value will be used to initialize
// one data field of an option.
- data_tokens = isc::util::str::tokens(data_param, ",");
+ // It is the only usage of the escape option: this allows
+ // to embed commas in individual values and to return
+ // for instance a string value with embedded commas.
+ data_tokens = isc::util::str::tokens(data_param, ",", true);
} else {
// Otherwise, the option data is specified as a string of
diff --git a/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc b/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc
index 0a293ef5e3..c7a729d5c9 100644
--- a/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc
+++ b/src/lib/dhcpsrv/tests/dhcp_parsers_unittest.cc
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -1210,6 +1211,37 @@ TEST_F(ParseConfigTest, optionDataNoSubOpion) {
ASSERT_EQ(0, opt->getOptions().size());
}
+// This tests option-data in CSV format and embedded commas.
+TEST_F(ParseConfigTest, commaCSVFormatOptionData) {
+
+ // Configuration string.
+ std::string config =
+ "{ \"option-data\": [ {"
+ " \"csv-format\": true,"
+ " \"code\": 41,"
+ " \"data\": \"EST5EDT4\\\\,M3.2.0/02:00\\\\,M11.1.0/02:00\","
+ " \"space\": \"dhcp6\""
+ " } ]"
+ "}";
+
+ // Verify that the configuration string parses.
+ int rcode = parseConfiguration(config, true);
+ ASSERT_EQ(0, rcode);
+
+ // Verify that the option can be retrieved.
+ OptionPtr opt = getOptionPtr(DHCP6_OPTION_SPACE, 41);
+ ASSERT_TRUE(opt);
+
+ // Get the option as an option string.
+ OptionStringPtr opt_str = boost::dynamic_pointer_cast(opt);
+ ASSERT_TRUE(opt_str);
+
+
+ // Verify that the option data is correct.
+ string val = "EST5EDT4,M3.2.0/02:00,M11.1.0/02:00";
+ EXPECT_EQ(val, opt_str->getValue());
+}
+
/// The next set of tests check basic operation of the HooksLibrariesParser.
//
// Convenience function to set a configuration of zero or more hooks
diff --git a/src/lib/util/strutil.cc b/src/lib/util/strutil.cc
index 208d158346..b3f576be95 100644
--- a/src/lib/util/strutil.cc
+++ b/src/lib/util/strutil.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -61,29 +61,68 @@ trim(const string& instring) {
// another dependency on a Boost library.
vector
-tokens(const std::string& text, const std::string& delim) {
+tokens(const std::string& text, const std::string& delim, bool escape) {
vector result;
-
- // Search for the first non-delimiter character
- size_t start = text.find_first_not_of(delim);
- while (start != string::npos) {
-
- // Non-delimiter found, look for next delimiter
- size_t end = text.find_first_of(delim, start);
- if (end != string::npos) {
-
- // Delimiter found, so extract string & search for start of next
- // non-delimiter segment.
- result.push_back(text.substr(start, (end - start)));
- start = text.find_first_not_of(delim, end);
-
+ string token;
+ bool in_token = false;
+ bool escaped = false;
+ for (auto c = text.cbegin(); c != text.cend(); ++c) {
+ if (delim.find(*c) != string::npos) {
+ // Current character is a delimiter
+ if (!in_token) {
+ // Two or more delimiters, eat them
+ } else if (escaped) {
+ // Escaped delimiter in a token: reset escaped and keep it
+ escaped = false;
+ token.push_back(*c);
+ } else {
+ // End of the current token: save it if not empty
+ if (!token.empty()) {
+ result.push_back(token);
+ }
+ // Reset state
+ in_token = false;
+ token.clear();
+ }
+ } else if (escape && (*c == '\\')) {
+ // Current character is the escape character
+ if (!in_token) {
+ // The escape character is the first character of a new token
+ in_token = true;
+ }
+ if (escaped) {
+ // Escaped escape: reset escaped and keep one character
+ escaped = false;
+ token.push_back(*c);
+ } else {
+ // Remember to keep the next character
+ escaped = true;
+ }
} else {
-
- // End of string found, extract rest of string and flag to exit
- result.push_back(text.substr(start));
- start = string::npos;
+ // Not a delimiter nor an escape
+ if (!in_token) {
+ // First character of a new token
+ in_token = true;
+ }
+ if (escaped) {
+ // Escaped common character: as escape was false
+ escaped = false;
+ token.push_back('\\');
+ token.push_back(*c);
+ } else {
+ // The common case: keep it
+ token.push_back(*c);
+ }
}
}
+ // End of input: close and save the current token if not empty
+ if (escaped) {
+ // Pending escape
+ token.push_back('\\');
+ }
+ if (!token.empty()) {
+ result.push_back(token);
+ }
return (result);
}
diff --git a/src/lib/util/strutil.h b/src/lib/util/strutil.h
index bacc828de5..1dd5de01de 100644
--- a/src/lib/util/strutil.h
+++ b/src/lib/util/strutil.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -69,6 +69,8 @@ std::string trim(const std::string& instring);
/// invisible leading and trailing delimiter characters. Therefore both cases
/// reduce to a set of contiguous delimiters, which are considered a single
/// delimiter (so getting rid of the string).
+/// Optional escape allows to escape delimiter characters (and *only* them
+/// and the escape character itself) using backslash.
///
/// We could use Boost for this, but this (simple) function eliminates one
/// dependency in the code.
@@ -76,10 +78,12 @@ std::string trim(const std::string& instring);
/// \param text String to be split. Passed by value as the internal copy is
/// altered during the processing.
/// \param delim Delimiter characters
+/// \param escape Use backslash to escape delimiter characters
///
/// \return Vector of tokens.
std::vector tokens(const std::string& text,
- const std::string& delim = std::string(" \t\n"));
+ const std::string& delim = std::string(" \t\n"),
+ bool escape = false);
/// \brief Uppercase Character
diff --git a/src/lib/util/tests/strutil_unittest.cc b/src/lib/util/tests/strutil_unittest.cc
index 99aac9f360..7176e74c9d 100644
--- a/src/lib/util/tests/strutil_unittest.cc
+++ b/src/lib/util/tests/strutil_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2011-2016 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -143,6 +143,42 @@ TEST(StringUtilTest, Tokens) {
EXPECT_EQ(string("gamma"), result[3]);
EXPECT_EQ(string("delta"), result[4]);
EXPECT_EQ(string("epsilon"), result[5]);
+
+ // Escaped delimiter
+ result = isc::util::str::tokens("foo\\,bar", ",", true);
+ EXPECT_EQ(1, result.size());
+ EXPECT_EQ(string("foo,bar"), result[0]);
+
+ // Escaped escape
+ result = isc::util::str::tokens("foo\\\\,bar", ",", true);
+ ASSERT_EQ(2, result.size());
+ EXPECT_EQ(string("foo\\"), result[0]);
+ EXPECT_EQ(string("bar"), result[1]);
+
+ // Double escapes
+ result = isc::util::str::tokens("foo\\\\\\\\,\\bar", ",", true);
+ ASSERT_EQ(2, result.size());
+ EXPECT_EQ(string("foo\\\\"), result[0]);
+ EXPECT_EQ(string("\\bar"), result[1]);
+
+ // Escaped standard character
+ result = isc::util::str::tokens("fo\\o,bar", ",", true);
+ ASSERT_EQ(2, result.size());
+ EXPECT_EQ(string("fo\\o"), result[0]);
+ EXPECT_EQ(string("bar"), result[1]);
+
+ // Escape at the end
+ result = isc::util::str::tokens("foo,bar\\", ",", true);
+ ASSERT_EQ(2, result.size());
+ EXPECT_EQ(string("foo"), result[0]);
+ EXPECT_EQ(string("bar\\"), result[1]);
+
+ // Escape opening a token
+ result = isc::util::str::tokens("foo,\\,,bar", ",", true);
+ ASSERT_EQ(3, result.size());
+ EXPECT_EQ(string("foo"), result[0]);
+ EXPECT_EQ(string(","), result[1]);
+ EXPECT_EQ(string("bar"), result[2]);
}
// Changing case