2
0
mirror of https://gitlab.isc.org/isc-projects/kea synced 2025-08-31 14:05:33 +00:00

[2382] unrelated fix to lexer: support empty qstring and nul termination.

an empty qstring previously caused an exception, which is a clear bug
and should be fixed.  nul-terminating string regions is an extension,
but I found it useful when implementing RDATA parsers.
This commit is contained in:
JINMEI Tatuya
2012-11-30 13:51:33 -08:00
parent ca8fc9f414
commit f73f27474f
3 changed files with 27 additions and 3 deletions

View File

@@ -458,8 +458,11 @@ String::handle(MasterLexer& lexer) const {
if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
getLexerImpl(lexer)->source_->ungetChar();
// make sure it nul-terminated as a c-str (excluded from token
// data).
data.push_back('\0');
getLexerImpl(lexer)->token_ =
MasterToken(&data.at(0), data.size());
MasterToken(&data.at(0), data.size() - 1);
return;
}
escaped = (c == '\\' && !escaped);
@@ -486,7 +489,10 @@ QString::handle(MasterLexer& lexer) const {
escaped = false;
data.back() = '"';
} else {
token = MasterToken(&data.at(0), data.size(), true);
// make sure it nul-terminated as a c-str (excluded from token
// data). This also simplifies the case of an empty string.
data.push_back('\0');
token = MasterToken(&data.at(0), data.size() - 1, true);
return;
}
} else if (c == '\n' && !escaped) {
@@ -529,7 +535,8 @@ Number::handle(MasterLexer& lexer) const {
token = MasterToken(MasterToken::NUMBER_OUT_OF_RANGE);
}
} else {
token = MasterToken(&data.at(0), data.size());
data.push_back('\0'); // see String::handle()
token = MasterToken(&data.at(0), data.size() - 1);
}
return;
}

View File

@@ -90,6 +90,13 @@ public:
/// the region. On the other hand, it is not ensured that the string
/// is nul-terminated. So the usual string manipulation API may not work
/// as expected.
///
/// The `MasterLexer` implementation ensures that there are at least
/// len + 1 bytes of valid memory region starting from beg, and that
/// beg[len] is \0. This means the application can use the bytes as a
/// validly nul-terminated C string if there is no intermediate nul
/// character. Note also that due to this property beg is always non
/// NULL; for an empty string len will be set to 0 and beg[0] is \0.
struct StringRegion {
const char* beg; ///< The start address of the string
size_t len; ///< The length of the string in bytes

View File

@@ -269,6 +269,10 @@ stringTokenCheck(const std::string& expected, const MasterToken& token,
token.getStringRegion().beg +
token.getStringRegion().len);
EXPECT_EQ(expected, actual);
// There should be "hidden" nul-terminator after the string data.
ASSERT_NE(static_cast<const char*>(NULL), token.getStringRegion().beg);
EXPECT_EQ(0, *(token.getStringRegion().beg + token.getStringRegion().len));
}
TEST_F(MasterLexerStateTest, string) {
@@ -365,6 +369,7 @@ TEST_F(MasterLexerStateTest, stringEscape) {
TEST_F(MasterLexerStateTest, quotedString) {
ss << "\"ignore-quotes\"\n";
ss << "\"quoted string\" "; // space is part of the qstring
ss << "\"\" "; // empty quoted string
// also check other separator characters. note that \r doesn't cause
// UNBALANCED_QUOTES. Not sure if it's intentional, but that's how the
// BIND 9 version works, so we follow it (it should be too minor to matter
@@ -391,6 +396,11 @@ TEST_F(MasterLexerStateTest, quotedString) {
s_qstring.handle(lexer);
stringTokenCheck("quoted string", s_string.getToken(lexer), true);
// Empty string is okay as qstring
EXPECT_EQ(&s_qstring, State::start(lexer, options));
s_qstring.handle(lexer);
stringTokenCheck("", s_string.getToken(lexer), true);
// Also checks other separator characters within a qstring
EXPECT_EQ(&s_qstring, State::start(lexer, options));
s_qstring.handle(lexer);