2
0
mirror of https://gitlab.isc.org/isc-projects/kea synced 2025-08-31 14:05:33 +00:00

Merge remote-tracking branch 'origin/trac2506' into base/loader

This commit is contained in:
Michal 'vorner' Vaner
2012-12-03 09:42:55 +01:00
6 changed files with 614 additions and 343 deletions

View File

@@ -36,7 +36,7 @@ using namespace master_lexer_internal;
struct MasterLexer::MasterLexerImpl {
MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
MasterLexerImpl() : source_(NULL), token_(MasterToken::NOT_STARTED),
paren_count_(0), last_was_eol_(false),
has_previous_(false),
previous_paren_count_(0),
@@ -82,7 +82,7 @@ struct MasterLexer::MasterLexerImpl {
std::vector<InputSourcePtr> sources_;
InputSource* source_; // current source (NULL if sources_ is empty)
Token token_; // currently recognized token (set by a state)
MasterToken token_; // currently recognized token (set by a state)
std::vector<char> data_; // placeholder for string data
// These are used in states, and defined here only as a placeholder.
@@ -165,9 +165,8 @@ MasterLexer::getSourceLine() const {
return (impl_->sources_.back()->getCurrentLine());
}
const MasterLexer::Token&
const MasterToken&
MasterLexer::getNextToken(Options options) {
// If the source is not available
if (impl_->source_ == NULL) {
isc_throw(isc::InvalidOperation, "No source to read tokens from");
}
@@ -178,7 +177,7 @@ MasterLexer::getNextToken(Options options) {
impl_->has_previous_ = true;
// Reset the token now. This is to check a token was actually produced.
// This is debugging aid.
impl_->token_ = Token(Token::NO_TOKEN_PRODUCED);
impl_->token_ = MasterToken(MasterToken::NO_TOKEN_PRODUCED);
// And get the token
// This actually handles EOF internally too.
@@ -188,8 +187,62 @@ MasterLexer::getNextToken(Options options) {
}
// Make sure a token was produced. Since this Can Not Happen, we assert
// here instead of throwing.
assert(impl_->token_.getType() != Token::ERROR ||
impl_->token_.getErrorCode() != Token::NO_TOKEN_PRODUCED);
assert(impl_->token_.getType() != MasterToken::ERROR ||
impl_->token_.getErrorCode() != MasterToken::NO_TOKEN_PRODUCED);
return (impl_->token_);
}
namespace {
inline MasterLexer::Options
optionsForTokenType(MasterToken::Type expect) {
switch (expect) {
case MasterToken::STRING:
return (MasterLexer::NONE);
case MasterToken::QSTRING:
return (MasterLexer::QSTRING);
case MasterToken::NUMBER:
return (MasterLexer::NUMBER);
default:
isc_throw(InvalidParameter,
"expected type for getNextToken not supported: " << expect);
}
}
}
const MasterToken&
MasterLexer::getNextToken(MasterToken::Type expect, bool eol_ok) {
// Get the next token, specifying an appropriate option corresponding to
// the expected type. The result should be set in impl_->token_.
getNextToken(optionsForTokenType(expect));
if (impl_->token_.getType() == MasterToken::ERROR) {
if (impl_->token_.getErrorCode() == MasterToken::NUMBER_OUT_OF_RANGE) {
ungetToken();
}
throw LexerError(__FILE__, __LINE__, impl_->token_);
}
const bool is_eol_like =
(impl_->token_.getType() == MasterToken::END_OF_LINE ||
impl_->token_.getType() == MasterToken::END_OF_FILE);
if (eol_ok && is_eol_like) {
return (impl_->token_);
}
if (impl_->token_.getType() == MasterToken::STRING &&
expect == MasterToken::QSTRING) {
return (impl_->token_);
}
if (impl_->token_.getType() != expect) {
ungetToken();
if (is_eol_like) {
throw LexerError(__FILE__, __LINE__,
MasterToken(MasterToken::UNEXPECTED_END));
}
assert(expect == MasterToken::NUMBER);
throw LexerError(__FILE__, __LINE__,
MasterToken(MasterToken::BAD_NUMBER));
}
return (impl_->token_);
}
@@ -212,16 +265,17 @@ const char* const error_text[] = {
"unexpected end of input", // UNEXPECTED_END
"unbalanced quotes", // UNBALANCED_QUOTES
"no token produced", // NO_TOKEN_PRODUCED
"number out of range" // NUMBER_OUT_OF_RANGE
"number out of range", // NUMBER_OUT_OF_RANGE
"not a valid number" // BAD_NUMBER
};
const size_t error_text_max_count = sizeof(error_text) / sizeof(error_text[0]);
} // end unnamed namespace
std::string
MasterLexer::Token::getErrorText() const {
MasterToken::getErrorText() const {
if (type_ != ERROR) {
isc_throw(InvalidOperation,
"Token::getErrorText() for non error type");
"MasterToken::getErrorText() for non error type");
}
// The class integrity ensures the following:
@@ -234,14 +288,12 @@ namespace master_lexer_internal {
// Note that these need to be defined here so that they can refer to
// the details of MasterLexerImpl.
typedef MasterLexer::Token Token; // convenience shortcut
bool
State::wasLastEOL(const MasterLexer& lexer) const {
return (lexer.impl_->last_was_eol_);
}
const MasterLexer::Token&
const MasterToken&
State::getToken(const MasterLexer& lexer) const {
return (lexer.impl_->token_);
}
@@ -271,7 +323,7 @@ public:
if (c != '\n') {
getLexerImpl(lexer)->source_->ungetChar();
}
getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
getLexerImpl(lexer)->token_ = MasterToken(MasterToken::END_OF_LINE);
getLexerImpl(lexer)->last_was_eol_ = true;
}
};
@@ -342,24 +394,24 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
if (c == InputSource::END_OF_STREAM) {
lexerimpl.last_was_eol_ = false;
if (paren_count != 0) {
lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
lexerimpl.token_ = MasterToken(MasterToken::UNBALANCED_PAREN);
paren_count = 0; // reset to 0; this helps in lenient mode.
return (NULL);
}
lexerimpl.token_ = Token(Token::END_OF_FILE);
lexerimpl.token_ = MasterToken(MasterToken::END_OF_FILE);
return (NULL);
} else if (c == ' ' || c == '\t') {
// If requested and we are not in (), recognize the initial space.
if (lexerimpl.last_was_eol_ && paren_count == 0 &&
(options & MasterLexer::INITIAL_WS) != 0) {
lexerimpl.last_was_eol_ = false;
lexerimpl.token_ = Token(Token::INITIAL_WS);
lexerimpl.token_ = MasterToken(MasterToken::INITIAL_WS);
return (NULL);
}
} else if (c == '\n') {
lexerimpl.last_was_eol_ = true;
if (paren_count == 0) { // we don't recognize EOL if we are in ()
lexerimpl.token_ = Token(Token::END_OF_LINE);
lexerimpl.token_ = MasterToken(MasterToken::END_OF_LINE);
return (NULL);
}
} else if (c == '\r') {
@@ -375,7 +427,7 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
} else if (c == ')') {
lexerimpl.last_was_eol_ = false;
if (paren_count == 0) {
lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
lexerimpl.token_ = MasterToken(MasterToken::UNBALANCED_PAREN);
return (NULL);
}
--paren_count;
@@ -407,7 +459,7 @@ String::handle(MasterLexer& lexer) const {
if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
getLexerImpl(lexer)->source_->ungetChar();
getLexerImpl(lexer)->token_ =
MasterLexer::Token(&data.at(0), data.size());
MasterToken(&data.at(0), data.size());
return;
}
escaped = (c == '\\' && !escaped);
@@ -417,7 +469,7 @@ String::handle(MasterLexer& lexer) const {
void
QString::handle(MasterLexer& lexer) const {
MasterLexer::Token& token = getLexerImpl(lexer)->token_;
MasterToken& token = getLexerImpl(lexer)->token_;
std::vector<char>& data = getLexerImpl(lexer)->data_;
data.clear();
@@ -425,7 +477,7 @@ QString::handle(MasterLexer& lexer) const {
while (true) {
const int c = getLexerImpl(lexer)->source_->getChar();
if (c == InputSource::END_OF_STREAM) {
token = Token(Token::UNEXPECTED_END);
token = MasterToken(MasterToken::UNEXPECTED_END);
return;
} else if (c == '"') {
if (escaped) {
@@ -434,12 +486,12 @@ QString::handle(MasterLexer& lexer) const {
escaped = false;
data.back() = '"';
} else {
token = MasterLexer::Token(&data.at(0), data.size(), true);
token = MasterToken(&data.at(0), data.size(), true);
return;
}
} else if (c == '\n' && !escaped) {
getLexerImpl(lexer)->source_->ungetChar();
token = Token(Token::UNBALANCED_QUOTES);
token = MasterToken(MasterToken::UNBALANCED_QUOTES);
return;
} else {
escaped = (c == '\\' && !escaped);
@@ -450,7 +502,7 @@ QString::handle(MasterLexer& lexer) const {
void
Number::handle(MasterLexer& lexer) const {
MasterLexer::Token& token = getLexerImpl(lexer)->token_;
MasterToken& token = getLexerImpl(lexer)->token_;
// It may yet turn out to be a string, so we first
// collect all the data
@@ -470,15 +522,14 @@ Number::handle(MasterLexer& lexer) const {
try {
const uint32_t number32 =
boost::lexical_cast<uint32_t, const char*>(&data[0]);
token = MasterLexer::Token(number32);
token = MasterToken(number32);
} catch (const boost::bad_lexical_cast&) {
// Since we already know we have only digits,
// range should be the only possible problem.
token = Token(Token::NUMBER_OUT_OF_RANGE);
token = MasterToken(MasterToken::NUMBER_OUT_OF_RANGE);
}
} else {
token = MasterLexer::Token(&data.at(0),
data.size());
token = MasterToken(&data.at(0), data.size());
}
return;
}

View File

@@ -28,6 +28,236 @@ namespace master_lexer_internal {
class State;
}
/// \brief Tokens for \c MasterLexer
///
/// This is a simple value-class encapsulating a type of a lexer token and
/// (if it has a value) its value. Essentially, the class provides
/// constructors corresponding to different types of tokens, and corresponding
/// getter methods. The type and value are fixed at the time of construction
/// and will never be modified throughout the lifetime of the object.
/// The getter methods are still provided to maximize the safety; an
/// application cannot refer to a value that is invalid for the type of token.
///
/// This class is intentionally implemented as copyable and assignable
/// (using the default version of copy constructor and assignment operator),
/// but it's mainly for internal implementation convenience. Applications will
/// simply refer to Token object as a reference via the \c MasterLexer class.
class MasterToken {
public:
/// \brief Enumeration for token types
///
/// \note At the time of initial implementation, all numeric tokens
/// that would be extracted from \c MasterLexer should be represented
/// as an unsigned 32-bit integer. If we see the need for larger integers
/// or negative numbers, we can then extend the token types.
enum Type {
END_OF_LINE, ///< End of line detected
END_OF_FILE, ///< End of file detected
INITIAL_WS, ///< White spaces at the beginning of a line after an
///< end of line (if asked for detecting it)
NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
/// no-value (type only) types.
/// Mainly for internal use.
STRING, ///< A single string
QSTRING, ///< A single string quoted by double-quotes (").
NUMBER, ///< A decimal number (unsigned 32-bit)
ERROR ///< Error detected in getting a token
};
/// \brief Enumeration for lexer error codes
enum ErrorCode {
NOT_STARTED, ///< The lexer is just initialized and has no token
UNBALANCED_PAREN, ///< Unbalanced parentheses detected
UNEXPECTED_END, ///< The lexer reaches the end of line or file
/// unexpectedly
UNBALANCED_QUOTES, ///< Unbalanced quotations detected
NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
/// error and should never get out of the lexer.
NUMBER_OUT_OF_RANGE, ///< Number was out of range
BAD_NUMBER, ///< Number is expected but not recognized
MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
/// (excluding this one). Mainly for internal use.
};
/// \brief A simple representation of a range of a string.
///
/// This is a straightforward pair of the start pointer of a string
/// and its length. The \c STRING and \c QSTRING types of tokens
/// will be primarily represented in this form.
///
/// Any character can be stored in the valid range of the region.
/// In particular, there can be a nul character (\0) in the middle of
/// the region. On the other hand, it is not ensured that the string
/// is nul-terminated. So the usual string manipulation API may not work
/// as expected.
struct StringRegion {
const char* beg; ///< The start address of the string
size_t len; ///< The length of the string in bytes
};
/// \brief Constructor for non-value type of token.
///
/// \throw InvalidParameter A value type token is specified.
/// \param type The type of the token. It must indicate a non-value
/// type (not larger than \c NOVALUE_TYPE_MAX).
explicit MasterToken(Type type) : type_(type) {
if (type > NOVALUE_TYPE_MAX) {
isc_throw(InvalidParameter, "Token per-type constructor "
"called with invalid type: " << type);
}
}
/// \brief Constructor for string and quoted-string types of token.
///
/// The optional \c quoted parameter specifies whether it's a quoted or
/// non quoted string.
///
/// The string is specified as a pair of a pointer to the start address
/// and its length. Any character can be contained in any position of
/// the valid range (see \c StringRegion).
///
/// When it's a quoted string, the quotation marks must be excluded
/// from the specified range.
///
/// \param str_beg The start address of the string
/// \param str_len The size of the string in bytes
/// \param quoted true if it's a quoted string; false otherwise.
MasterToken(const char* str_beg, size_t str_len, bool quoted = false) :
type_(quoted ? QSTRING : STRING)
{
val_.str_region_.beg = str_beg;
val_.str_region_.len = str_len;
}
/// \brief Constructor for number type of token.
///
/// \brief number An unsigned 32-bit integer corresponding to the token
/// value.
explicit MasterToken(uint32_t number) : type_(NUMBER) {
val_.number_ = number;
}
/// \brief Constructor for error type of token.
///
/// \throw InvalidParameter Invalid error code value is specified.
/// \brief error_code A pre-defined constant of \c ErrorCode.
explicit MasterToken(ErrorCode error_code) : type_(ERROR) {
if (!(error_code < MAX_ERROR_CODE)) {
isc_throw(InvalidParameter, "Invalid master lexer error code: "
<< error_code);
}
val_.error_code_ = error_code;
}
/// \brief Return the token type.
///
/// \throw none
Type getType() const { return (type_); }
/// \brief Return the value of a string-variant token.
///
/// \throw InvalidOperation Called on a non string-variant types of token.
/// \return A reference to \c StringRegion corresponding to the string
/// token value.
const StringRegion& getStringRegion() const {
if (type_ != STRING && type_ != QSTRING) {
isc_throw(InvalidOperation,
"Token::getStringRegion() for non string-variant type");
}
return (val_.str_region_);
}
/// \brief Return the value of a string-variant token as a string object.
///
/// Note that the underlying string may contain a nul (\0) character
/// in the middle. The returned string object will contain all characters
/// of the valid range of the underlying string. So some string
/// operations such as c_str() may not work as expected.
///
/// \throw InvalidOperation Called on a non string-variant types of token.
/// \throw std::bad_alloc Resource allocation failure in constructing the
/// string object.
/// \return A std::string object corresponding to the string token value.
std::string getString() const {
std::string ret;
getString(ret);
return (ret);
}
/// \brief Fill in a string with the value of a string-variant token.
///
/// This is similar to the other version of \c getString(), but
/// the caller is supposed to pass a placeholder string object.
/// This will be more efficient if the caller uses the same
/// \c MasterLexer repeatedly and needs to get string token in the
/// form of a string object many times as this version could reuse
/// the existing internal storage of the passed string.
///
/// Any existing content of the passed string will be removed.
///
/// \throw InvalidOperation Called on a non string-variant types of token.
/// \throw std::bad_alloc Resource allocation failure in constructing the
/// string object.
///
/// \param ret A string object to be filled with the token string.
void getString(std::string& ret) const {
if (type_ != STRING && type_ != QSTRING) {
isc_throw(InvalidOperation,
"Token::getString() for non string-variant type");
}
ret.assign(val_.str_region_.beg,
val_.str_region_.beg + val_.str_region_.len);
}
/// \brief Return the value of a string-variant token as a string object.
///
/// \throw InvalidOperation Called on a non number type of token.
/// \return The integer corresponding to the number token value.
uint32_t getNumber() const {
if (type_ != NUMBER) {
isc_throw(InvalidOperation,
"Token::getNumber() for non number type");
}
return (val_.number_);
}
/// \brief Return the error code of a error type token.
///
/// \throw InvalidOperation Called on a non error type of token.
/// \return The error code of the token.
ErrorCode getErrorCode() const {
if (type_ != ERROR) {
isc_throw(InvalidOperation,
"Token::getErrorCode() for non error type");
}
return (val_.error_code_);
};
/// \brief Return a textual description of the error of a error type token.
///
/// The returned string would be useful to produce a log message when
/// a zone file parser encounters an error.
///
/// \throw InvalidOperation Called on a non error type of token.
/// \throw std::bad_alloc Resource allocation failure in constructing the
/// string object.
/// \return A string object that describes the meaning of the error.
std::string getErrorText() const;
private:
Type type_; // this is not const so the class can be assignable
// We use a union to represent different types of token values via the
// unified Token class. The class integrity should ensure valid operation
// on the union; getter methods should only refer to the member set at
// the construction.
union {
StringRegion str_region_;
uint32_t number_;
ErrorCode error_code_;
} val_;
};
/// \brief Tokenizer for parsing DNS master files.
///
/// The \c MasterLexer class provides tokenize interfaces for parsing DNS
@@ -71,13 +301,28 @@ class MasterLexer {
public:
/// \brief Exception thrown when we fail to read from the input
/// stream or file.
struct ReadError : public Unexpected {
class ReadError : public Unexpected {
public:
ReadError(const char* file, size_t line, const char* what) :
Unexpected(file, line, what)
{}
};
class Token; // we define it separately for better readability
/// \brief Exception thrown from a wrapper version of
/// \c MasterLexer::getNextToken() for non fatal errors.
///
/// See the method description for more details.
///
/// The \c token_ member variable (read-only) is set to a \c MasterToken
/// object of type ERROR indicating the reason for the error.
class LexerError : public Exception {
public:
LexerError(const char* file, size_t line, MasterToken error_token) :
Exception(file, line, error_token.getErrorText().c_str()),
token_(error_token)
{}
const MasterToken token_;
};
/// \brief Options for getNextToken.
///
@@ -213,7 +458,77 @@ public:
/// source (eg. I/O error in the file on the disk).
/// \throw std::bad_alloc in case allocation of some internal resources
/// or the token fail.
const Token& getNextToken(Options options = NONE);
const MasterToken& getNextToken(Options options = NONE);
/// \brief Parse the input for the expected type of token.
///
/// This method is a wrapper of the other version, customized for the case
/// where a particular type of token is expected as the next one.
/// More specifically, it's intended to be used to get tokens for RDATA
/// fields. Since most RDATA types of fixed format, the token type is
/// often predictable and the method interface can be simplified.
///
/// This method basically works as follows: it gets the type of the
/// expected token, calls the other version of \c getNextToken(Options),
/// and returns the token if it's of the expected type (due to the usage
/// assumption this should be normally the case). There are some non
/// trivial details though:
///
/// - If the expected type is MasterToken::QSTRING, both quoted and
/// unquoted strings are recognized and returned.
/// - If the optional \c eol_ok parameter is \c true (very rare case),
/// MasterToken::END_OF_LINE and MasterToken::END_OF_FILE are recognized
/// and returned if they are found instead of the expected type of
/// token.
/// - If the next token is not of the expected type (including the case
/// a number is expected but it's out of range), ungetToken() is
/// internally called so the caller can re-read that token.
/// - If other types or errors (such as unbalanced parentheses) are
/// detected, the erroneous part isn't "ungotten"; the caller can
/// continue parsing after that part.
///
/// In some very rare cases where the RDATA has an optional trailing field,
/// the \c eol_ok parameter would be set to \c true. This way the caller
/// can handle both cases (the field does or does not exist) by a single
/// call to this method. In all other cases \c eol_ok should be set to
/// \c false, and that is the default and can be omitted.
///
/// Unlike the other version of \c getNextToken(Options), this method
/// throws an exception of type \c LexerError for non fatal errors such as
/// broken syntax or encountering an unexpected type of token. This way
/// the caller can write RDATA parser code without bothering to handle
/// errors for each field. For example, pseudo parser code for MX RDATA
/// would look like this:
/// \code
/// const uint32_t pref =
/// lexer.getNextToken(MasterToken::NUMBER).getNumber();
/// // check if pref is the uint16_t range; no other check is needed.
/// const Name mx(lexer.getNextToken(MasterToken::STRING).getString());
/// \endcode
///
/// In the case where \c LexerError exception is thrown, it's expected
/// to be handled comprehensively for the parser of the RDATA or at a
/// higher layer. The \c token_ member variable of the corresponding
/// \c LexerError exception object stores a token of type
/// \c MasterToken::ERROR that indicates the reason for the error.
///
/// Due to the specific intended usage of this method, only a subset
/// of \c MasterToken::Type values are acceptable for the \c expect
/// parameter: \c MasterToken::STRING, \c MasterToken::QSTRING, and
/// \c MasterToken::NUMBER. Specifying other values will result in
/// an \c InvalidParameter exception.
///
/// \throw InvalidParameter The expected token type is not allowed for
/// this method.
/// \throw LexerError The lexer finds non fatal error or it finds an
/// \throw other Anything the other version of getNextToken() can throw.
///
/// \param expect Expected type of token. Must be either STRING, QSTRING,
/// or NUMBER.
/// \param eol_ok \c true iff END_OF_LINE or END_OF_FILE is acceptable.
/// \return The expected type of token.
const MasterToken& getNextToken(MasterToken::Type expect,
bool eol_ok = false);
/// \brief Return the last token back to the lexer.
///
@@ -247,235 +562,6 @@ operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
}
/// \brief Tokens for \c MasterLexer
///
/// This is a simple value-class encapsulating a type of a lexer token and
/// (if it has a value) its value. Essentially, the class provides
/// constructors corresponding to different types of tokens, and corresponding
/// getter methods. The type and value are fixed at the time of construction
/// and will never be modified throughout the lifetime of the object.
/// The getter methods are still provided to maximize the safety; an
/// application cannot refer to a value that is invalid for the type of token.
///
/// This class is intentionally implemented as copyable and assignable
/// (using the default version of copy constructor and assignment operator),
/// but it's mainly for internal implementation convenience. Applications will
/// simply refer to Token object as a reference via the \c MasterLexer class.
class MasterLexer::Token {
public:
/// \brief Enumeration for token types
///
/// \note At the time of initial implementation, all numeric tokens
/// that would be extracted from \c MasterLexer should be represented
/// as an unsigned 32-bit integer. If we see the need for larger integers
/// or negative numbers, we can then extend the token types.
enum Type {
END_OF_LINE, ///< End of line detected
END_OF_FILE, ///< End of file detected
INITIAL_WS, ///< White spaces at the beginning of a line after an
///< end of line (if asked for detecting it)
NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
/// no-value (type only) types.
/// Mainly for internal use.
STRING, ///< A single string
QSTRING, ///< A single string quoted by double-quotes (").
NUMBER, ///< A decimal number (unsigned 32-bit)
ERROR ///< Error detected in getting a token
};
/// \brief Enumeration for lexer error codes
enum ErrorCode {
NOT_STARTED, ///< The lexer is just initialized and has no token
UNBALANCED_PAREN, ///< Unbalanced parentheses detected
UNEXPECTED_END, ///< The lexer reaches the end of line or file
/// unexpectedly
UNBALANCED_QUOTES, ///< Unbalanced quotations detected
NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
/// error and should never get out of the lexer.
NUMBER_OUT_OF_RANGE, ///< Number was out of range
MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
/// (excluding this one). Mainly for internal use.
};
/// \brief A simple representation of a range of a string.
///
/// This is a straightforward pair of the start pointer of a string
/// and its length. The \c STRING and \c QSTRING types of tokens
/// will be primarily represented in this form.
///
/// Any character can be stored in the valid range of the region.
/// In particular, there can be a nul character (\0) in the middle of
/// the region. On the other hand, it is not ensured that the string
/// is nul-terminated. So the usual string manipulation API may not work
/// as expected.
struct StringRegion {
const char* beg; ///< The start address of the string
size_t len; ///< The length of the string in bytes
};
/// \brief Constructor for non-value type of token.
///
/// \throw InvalidParameter A value type token is specified.
/// \param type The type of the token. It must indicate a non-value
/// type (not larger than \c NOVALUE_TYPE_MAX).
explicit Token(Type type) : type_(type) {
if (type > NOVALUE_TYPE_MAX) {
isc_throw(InvalidParameter, "Token per-type constructor "
"called with invalid type: " << type);
}
}
/// \brief Constructor for string and quoted-string types of token.
///
/// The optional \c quoted parameter specifies whether it's a quoted or
/// non quoted string.
///
/// The string is specified as a pair of a pointer to the start address
/// and its length. Any character can be contained in any position of
/// the valid range (see \c StringRegion).
///
/// When it's a quoted string, the quotation marks must be excluded
/// from the specified range.
///
/// \param str_beg The start address of the string
/// \param str_len The size of the string in bytes
/// \param quoted true if it's a quoted string; false otherwise.
Token(const char* str_beg, size_t str_len, bool quoted = false) :
type_(quoted ? QSTRING : STRING)
{
val_.str_region_.beg = str_beg;
val_.str_region_.len = str_len;
}
/// \brief Constructor for number type of token.
///
/// \brief number An unsigned 32-bit integer corresponding to the token
/// value.
explicit Token(uint32_t number) : type_(NUMBER) {
val_.number_ = number;
}
/// \brief Constructor for error type of token.
///
/// \throw InvalidParameter Invalid error code value is specified.
/// \brief error_code A pre-defined constant of \c ErrorCode.
explicit Token(ErrorCode error_code) : type_(ERROR) {
if (!(error_code < MAX_ERROR_CODE)) {
isc_throw(InvalidParameter, "Invalid master lexer error code: "
<< error_code);
}
val_.error_code_ = error_code;
}
/// \brief Return the token type.
///
/// \throw none
Type getType() const { return (type_); }
/// \brief Return the value of a string-variant token.
///
/// \throw InvalidOperation Called on a non string-variant types of token.
/// \return A reference to \c StringRegion corresponding to the string
/// token value.
const StringRegion& getStringRegion() const {
if (type_ != STRING && type_ != QSTRING) {
isc_throw(InvalidOperation,
"Token::getStringRegion() for non string-variant type");
}
return (val_.str_region_);
}
/// \brief Return the value of a string-variant token as a string object.
///
/// Note that the underlying string may contain a nul (\0) character
/// in the middle. The returned string object will contain all characters
/// of the valid range of the underlying string. So some string
/// operations such as c_str() may not work as expected.
///
/// \throw InvalidOperation Called on a non string-variant types of token.
/// \throw std::bad_alloc Resource allocation failure in constructing the
/// string object.
/// \return A std::string object corresponding to the string token value.
std::string getString() const {
std::string ret;
getString(ret);
return (ret);
}
/// \brief Fill in a string with the value of a string-variant token.
///
/// This is similar to the other version of \c getString(), but
/// the caller is supposed to pass a placeholder string object.
/// This will be more efficient if the caller uses the same
/// \c MasterLexer repeatedly and needs to get string token in the
/// form of a string object many times as this version could reuse
/// the existing internal storage of the passed string.
///
/// Any existing content of the passed string will be removed.
///
/// \throw InvalidOperation Called on a non string-variant types of token.
/// \throw std::bad_alloc Resource allocation failure in constructing the
/// string object.
///
/// \param ret A string object to be filled with the token string.
void getString(std::string& ret) const {
if (type_ != STRING && type_ != QSTRING) {
isc_throw(InvalidOperation,
"Token::getString() for non string-variant type");
}
ret.assign(val_.str_region_.beg,
val_.str_region_.beg + val_.str_region_.len);
}
/// \brief Return the value of a string-variant token as a string object.
///
/// \throw InvalidOperation Called on a non number type of token.
/// \return The integer corresponding to the number token value.
uint32_t getNumber() const {
if (type_ != NUMBER) {
isc_throw(InvalidOperation,
"Token::getNumber() for non number type");
}
return (val_.number_);
}
/// \brief Return the error code of a error type token.
///
/// \throw InvalidOperation Called on a non error type of token.
/// \return The error code of the token.
ErrorCode getErrorCode() const {
if (type_ != ERROR) {
isc_throw(InvalidOperation,
"Token::getErrorCode() for non error type");
}
return (val_.error_code_);
};
/// \brief Return a textual description of the error of a error type token.
///
/// The returned string would be useful to produce a log message when
/// a zone file parser encounters an error.
///
/// \throw InvalidOperation Called on a non error type of token.
/// \throw std::bad_alloc Resource allocation failure in constructing the
/// string object.
/// \return A string object that describes the meaning of the error.
std::string getErrorText() const;
private:
Type type_; // this is not const so the class can be assignable
// We use a union to represent different types of token values via the
// unified Token class. The class integrity should ensure valid operation
// on the union; getter methods should only refer to the member set at
// the construction.
union {
StringRegion str_region_;
uint32_t number_;
ErrorCode error_code_;
} val_;
};
} // namespace dns
} // namespace isc
#endif // MASTER_LEXER_H

View File

@@ -43,10 +43,10 @@ namespace master_lexer_internal {
/// state, so it makes more sense to separate the interface for the transition
/// from the initial state.
///
/// When an object of a specific state class completes the session, it
/// normally sets the identified token in the lexer, and returns NULL;
/// if more transition is necessary, it returns a pointer to the next state
/// object.
/// If the whole lexer transition is completed within start(), it sets the
/// identified token and returns NULL; otherwise it returns a pointer to
/// an object of a specific state class that completes the session
/// on the call of handle().
///
/// As is usual in the state design pattern, the \c State class is made
/// a friend class of \c MasterLexer and can refer to its internal details.
@@ -119,7 +119,7 @@ public:
/// purposes.
///@{
bool wasLastEOL(const MasterLexer& lexer) const;
const MasterLexer::Token& getToken(const MasterLexer& lexer) const;
const MasterToken& getToken(const MasterLexer& lexer) const;
size_t getParenCount(const MasterLexer& lexer) const;
///@}

View File

@@ -24,7 +24,7 @@ using namespace isc::dns;
using namespace master_lexer_internal;
namespace {
typedef MasterLexer::Token Token; // shortcut
typedef MasterToken Token; // shortcut
class MasterLexerStateTest : public ::testing::Test {
protected:
@@ -260,7 +260,7 @@ TEST_F(MasterLexerStateTest, crlf) {
// Commonly used check for string related test cases, checking if the given
// token has expected values.
void
stringTokenCheck(const std::string& expected, const MasterLexer::Token& token,
stringTokenCheck(const std::string& expected, const MasterToken& token,
bool quoted = false)
{
EXPECT_EQ(quoted ? Token::QSTRING : Token::STRING, token.getType());

View File

@@ -31,27 +31,27 @@ const size_t TEST_STRING_LEN = sizeof(TEST_STRING) - 1;
class MasterLexerTokenTest : public ::testing::Test {
protected:
MasterLexerTokenTest() :
token_eof(MasterLexer::Token::END_OF_FILE),
token_eof(MasterToken::END_OF_FILE),
token_str(TEST_STRING, TEST_STRING_LEN),
token_num(42),
token_err(MasterLexer::Token::UNEXPECTED_END)
token_err(MasterToken::UNEXPECTED_END)
{}
const MasterLexer::Token token_eof; // an example of non-value type token
const MasterLexer::Token token_str;
const MasterLexer::Token token_num;
const MasterLexer::Token token_err;
const MasterToken token_eof; // an example of non-value type token
const MasterToken token_str;
const MasterToken token_num;
const MasterToken token_err;
};
TEST_F(MasterLexerTokenTest, strings) {
// basic construction and getter checks
EXPECT_EQ(MasterLexer::Token::STRING, token_str.getType());
EXPECT_EQ(MasterToken::STRING, token_str.getType());
EXPECT_EQ(std::string("string token"), token_str.getString());
std::string strval = "dummy"; // this should be replaced
token_str.getString(strval);
EXPECT_EQ(std::string("string token"), strval);
const MasterLexer::Token::StringRegion str_region =
const MasterToken::StringRegion str_region =
token_str.getStringRegion();
EXPECT_EQ(TEST_STRING, str_region.beg);
EXPECT_EQ(TEST_STRING_LEN, str_region.len);
@@ -62,17 +62,17 @@ TEST_F(MasterLexerTokenTest, strings) {
std::string expected_str("string token");
expected_str.push_back('\0');
EXPECT_EQ(expected_str,
MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString());
MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString(strval);
MasterToken(TEST_STRING, TEST_STRING_LEN + 1).getString());
MasterToken(TEST_STRING, TEST_STRING_LEN + 1).getString(strval);
EXPECT_EQ(expected_str, strval);
// Construct type of qstring
EXPECT_EQ(MasterLexer::Token::QSTRING,
MasterLexer::Token(TEST_STRING, sizeof(TEST_STRING), true).
EXPECT_EQ(MasterToken::QSTRING,
MasterToken(TEST_STRING, sizeof(TEST_STRING), true).
getType());
// if we explicitly set 'quoted' to false, it should be normal string
EXPECT_EQ(MasterLexer::Token::STRING,
MasterLexer::Token(TEST_STRING, sizeof(TEST_STRING), false).
EXPECT_EQ(MasterToken::STRING,
MasterToken(TEST_STRING, sizeof(TEST_STRING), false).
getType());
// getString/StringRegion() aren't allowed for non string(-variant) types
@@ -86,23 +86,23 @@ TEST_F(MasterLexerTokenTest, strings) {
TEST_F(MasterLexerTokenTest, numbers) {
EXPECT_EQ(42, token_num.getNumber());
EXPECT_EQ(MasterLexer::Token::NUMBER, token_num.getType());
EXPECT_EQ(MasterToken::NUMBER, token_num.getType());
// It's copyable and assignable.
MasterLexer::Token token(token_num);
MasterToken token(token_num);
EXPECT_EQ(42, token.getNumber());
EXPECT_EQ(MasterLexer::Token::NUMBER, token.getType());
EXPECT_EQ(MasterToken::NUMBER, token.getType());
token = token_num;
EXPECT_EQ(42, token.getNumber());
EXPECT_EQ(MasterLexer::Token::NUMBER, token.getType());
EXPECT_EQ(MasterToken::NUMBER, token.getType());
// it's okay to replace it with a different type of token
token = token_eof;
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, token.getType());
EXPECT_EQ(MasterToken::END_OF_FILE, token.getType());
// Possible max value
token = MasterLexer::Token(0xffffffff);
token = MasterToken(0xffffffff);
EXPECT_EQ(4294967295u, token.getNumber());
// getNumber() isn't allowed for non number types
@@ -112,58 +112,52 @@ TEST_F(MasterLexerTokenTest, numbers) {
TEST_F(MasterLexerTokenTest, novalues) {
// Just checking we can construct them and getType() returns correct value.
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, token_eof.getType());
EXPECT_EQ(MasterLexer::Token::END_OF_LINE,
MasterLexer::Token(MasterLexer::Token::END_OF_LINE).getType());
EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
MasterLexer::Token(MasterLexer::Token::INITIAL_WS).getType());
EXPECT_EQ(MasterToken::END_OF_FILE, token_eof.getType());
EXPECT_EQ(MasterToken::END_OF_LINE,
MasterToken(MasterToken::END_OF_LINE).getType());
EXPECT_EQ(MasterToken::INITIAL_WS,
MasterToken(MasterToken::INITIAL_WS).getType());
// Special types of tokens cannot have value-based types
EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::STRING),
isc::InvalidParameter);
EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::QSTRING),
isc::InvalidParameter);
EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::NUMBER),
isc::InvalidParameter);
EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::ERROR),
isc::InvalidParameter);
EXPECT_THROW(MasterToken t(MasterToken::STRING), isc::InvalidParameter);
EXPECT_THROW(MasterToken t(MasterToken::QSTRING), isc::InvalidParameter);
EXPECT_THROW(MasterToken t(MasterToken::NUMBER), isc::InvalidParameter);
EXPECT_THROW(MasterToken t(MasterToken::ERROR), isc::InvalidParameter);
}
TEST_F(MasterLexerTokenTest, errors) {
EXPECT_EQ(MasterLexer::Token::ERROR, token_err.getType());
EXPECT_EQ(MasterLexer::Token::UNEXPECTED_END, token_err.getErrorCode());
EXPECT_EQ(MasterToken::ERROR, token_err.getType());
EXPECT_EQ(MasterToken::UNEXPECTED_END, token_err.getErrorCode());
EXPECT_EQ("unexpected end of input", token_err.getErrorText());
EXPECT_EQ("lexer not started",
MasterLexer::Token(MasterLexer::Token::NOT_STARTED).
EXPECT_EQ("lexer not started", MasterToken(MasterToken::NOT_STARTED).
getErrorText());
EXPECT_EQ("unbalanced parentheses",
MasterLexer::Token(MasterLexer::Token::UNBALANCED_PAREN).
MasterToken(MasterToken::UNBALANCED_PAREN).
getErrorText());
EXPECT_EQ("unbalanced quotes",
MasterLexer::Token(MasterLexer::Token::UNBALANCED_QUOTES).
EXPECT_EQ("unbalanced quotes", MasterToken(MasterToken::UNBALANCED_QUOTES).
getErrorText());
EXPECT_EQ("no token produced",
MasterLexer::Token(MasterLexer::Token::NO_TOKEN_PRODUCED).
EXPECT_EQ("no token produced", MasterToken(MasterToken::NO_TOKEN_PRODUCED).
getErrorText());
EXPECT_EQ("number out of range",
MasterLexer::Token(MasterLexer::Token::NUMBER_OUT_OF_RANGE).
MasterToken(MasterToken::NUMBER_OUT_OF_RANGE).
getErrorText());
EXPECT_EQ("not a valid number",
MasterToken(MasterToken::BAD_NUMBER).getErrorText());
// getErrorCode/Text() isn't allowed for non number types
EXPECT_THROW(token_num.getErrorCode(), isc::InvalidOperation);
EXPECT_THROW(token_num.getErrorText(), isc::InvalidOperation);
// Only the pre-defined error code is accepted. Hardcoding '6' (max code
// Only the pre-defined error code is accepted. Hardcoding '7' (max code
// + 1) is intentional; it'd be actually better if we notice it when we
// update the enum list (which shouldn't happen too often).
EXPECT_THROW(MasterLexer::Token(MasterLexer::Token::ErrorCode(6)),
EXPECT_THROW(MasterToken(MasterToken::ErrorCode(7)),
isc::InvalidParameter);
// Check the coexistence of "from number" and "from error-code"
// constructors won't cause confusion.
EXPECT_EQ(MasterLexer::Token::NUMBER,
MasterLexer::Token(static_cast<uint32_t>(
MasterLexer::Token::NOT_STARTED)).
EXPECT_EQ(MasterToken::NUMBER,
MasterToken(static_cast<uint32_t>(MasterToken::NOT_STARTED)).
getType());
}
}

View File

@@ -141,19 +141,19 @@ TEST_F(MasterLexerTest, getNextToken) {
lexer.pushSource(ss);
// First, the newline should get out.
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Then the whitespace, if we specify the option.
EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
EXPECT_EQ(MasterToken::INITIAL_WS,
lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
// The newline
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// The (quoted) string
EXPECT_EQ(MasterLexer::Token::QSTRING,
EXPECT_EQ(MasterToken::QSTRING,
lexer.getNextToken(MasterLexer::QSTRING).getType());
// And the end of line and file
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
}
// Test we correctly find end of file.
@@ -162,12 +162,12 @@ TEST_F(MasterLexerTest, eof) {
lexer.pushSource(ss);
// The first one is found to be EOF
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
// And it stays on EOF for any following attempts
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
// And we can step back one token, but that is the EOF too.
lexer.ungetToken();
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
}
// Check we properly return error when there's an opened parentheses and no
@@ -177,12 +177,12 @@ TEST_F(MasterLexerTest, getUnbalancedParen) {
lexer.pushSource(ss);
// The string gets out first
EXPECT_EQ(MasterLexer::Token::STRING, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::STRING, lexer.getNextToken().getType());
// Then an unbalanced parenthesis
EXPECT_EQ(MasterLexer::Token::UNBALANCED_PAREN,
EXPECT_EQ(MasterToken::UNBALANCED_PAREN,
lexer.getNextToken().getErrorCode());
// And then EOF
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
}
// Check we properly return error when there's an opened quoted string and no
@@ -192,10 +192,10 @@ TEST_F(MasterLexerTest, getUnbalancedString) {
lexer.pushSource(ss);
// Then an unbalanced qstring (reported as an unexpected end)
EXPECT_EQ(MasterLexer::Token::UNEXPECTED_END,
EXPECT_EQ(MasterToken::UNEXPECTED_END,
lexer.getNextToken(MasterLexer::QSTRING).getErrorCode());
// And then EOF
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
}
// Test ungetting tokens works
@@ -204,28 +204,28 @@ TEST_F(MasterLexerTest, ungetToken) {
lexer.pushSource(ss);
// Try getting the newline
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Return it and get again
lexer.ungetToken();
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Get the string and return it back
EXPECT_EQ(MasterLexer::Token::QSTRING,
EXPECT_EQ(MasterToken::QSTRING,
lexer.getNextToken(MasterLexer::QSTRING).getType());
lexer.ungetToken();
// But if we change the options, it honors them
EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
EXPECT_EQ(MasterToken::INITIAL_WS,
lexer.getNextToken(MasterLexer::QSTRING |
MasterLexer::INITIAL_WS).getType());
// Get to the "more" string
EXPECT_EQ(MasterLexer::Token::QSTRING,
EXPECT_EQ(MasterToken::QSTRING,
lexer.getNextToken(MasterLexer::QSTRING).getType());
EXPECT_EQ(MasterLexer::Token::STRING,
EXPECT_EQ(MasterToken::STRING,
lexer.getNextToken(MasterLexer::QSTRING).getType());
// Return it back. It should get inside the parentheses.
// Upon next attempt to get it again, the newline inside the parentheses
// should be still ignored.
lexer.ungetToken();
EXPECT_EQ(MasterLexer::Token::STRING,
EXPECT_EQ(MasterToken::STRING,
lexer.getNextToken(MasterLexer::QSTRING).getType());
}
@@ -235,16 +235,16 @@ TEST_F(MasterLexerTest, ungetRealOptions) {
ss << "\n \n";
lexer.pushSource(ss);
// Skip the first newline
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// If we call it the usual way, it skips up to the newline and returns
// it
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Now we return it. If we call it again, but with different options,
// we get the initial whitespace.
lexer.ungetToken();
EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
EXPECT_EQ(MasterToken::INITIAL_WS,
lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
}
@@ -253,7 +253,7 @@ TEST_F(MasterLexerTest, ungetTwice) {
ss << "\n";
lexer.pushSource(ss);
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Unget the token. It can be done once
lexer.ungetToken();
// But not twice
@@ -271,17 +271,157 @@ TEST_F(MasterLexerTest, ungetBeforeGet) {
TEST_F(MasterLexerTest, ungetAfterSwitch) {
ss << "\n\n";
lexer.pushSource(ss);
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Switch the source
std::stringstream ss2;
ss2 << "\n\n";
lexer.pushSource(ss2);
EXPECT_THROW(lexer.ungetToken(), isc::InvalidOperation);
// We can get from the new source
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// And when we drop the current source, we can't unget again
lexer.popSource();
EXPECT_THROW(lexer.ungetToken(), isc::InvalidOperation);
}
// Common checks for the case when getNextToken() should result in LexerError
void
lexerErrorCheck(MasterLexer& lexer, MasterToken::Type expect,
MasterToken::ErrorCode expected_error)
{
bool thrown = false;
try {
lexer.getNextToken(expect);
} catch (const MasterLexer::LexerError& error) {
EXPECT_EQ(expected_error, error.token_.getErrorCode());
thrown = true;
}
EXPECT_TRUE(thrown);
}
// Common checks regarding expected/unexpected end-of-line
void
eolCheck(MasterLexer& lexer, MasterToken::Type expect) {
// If EOL is found and eol_ok is true, we get it.
EXPECT_EQ(MasterToken::END_OF_LINE,
lexer.getNextToken(expect, true).getType());
// We'll see the second '\n'; by default it will fail.
EXPECT_THROW(lexer.getNextToken(expect), MasterLexer::LexerError);
// Same if eol_ok is explicitly set to false. This also checks the
// offending '\n' was "ungotten".
EXPECT_THROW(lexer.getNextToken(expect, false), MasterLexer::LexerError);
// And also check the error token set in the exception object.
lexerErrorCheck(lexer, expect, MasterToken::UNEXPECTED_END);
}
// Common checks regarding expected/unexpected end-of-file
void
eofCheck(MasterLexer& lexer, MasterToken::Type expect) {
EXPECT_EQ(MasterToken::END_OF_FILE,
lexer.getNextToken(expect, true).getType());
EXPECT_THROW(lexer.getNextToken(expect), MasterLexer::LexerError);
EXPECT_THROW(lexer.getNextToken(expect, false), MasterLexer::LexerError);
}
TEST_F(MasterLexerTest, getNextTokenString) {
ss << "normal-string\n";
ss << "\n";
ss << "another-string";
lexer.pushSource(ss);
// Normal successful case: Expecting a string and get one.
EXPECT_EQ("normal-string",
lexer.getNextToken(MasterToken::STRING).getString());
eolCheck(lexer, MasterToken::STRING);
// Skip the 2nd '\n'
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Same set of tests but for end-of-file
EXPECT_EQ("another-string",
lexer.getNextToken(MasterToken::STRING, true).getString());
eofCheck(lexer, MasterToken::STRING);
}
TEST_F(MasterLexerTest, getNextTokenQString) {
ss << "\"quoted-string\"\n";
ss << "\n";
ss << "normal-string";
lexer.pushSource(ss);
// Expecting a quoted string and get one.
EXPECT_EQ("quoted-string",
lexer.getNextToken(MasterToken::QSTRING).getString());
eolCheck(lexer, MasterToken::QSTRING);
// Skip the 2nd '\n'
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Expecting a quoted string but see a normal string. It's okay.
EXPECT_EQ("normal-string",
lexer.getNextToken(MasterToken::QSTRING).getString());
eofCheck(lexer, MasterToken::QSTRING);
}
TEST_F(MasterLexerTest, getNextTokenNumber) {
ss << "3600\n";
ss << "\n";
ss << "4294967296 "; // =2^32, out of range
ss << "not-a-number ";
ss << "86400";
lexer.pushSource(ss);
// Expecting a number string and get one.
EXPECT_EQ(3600,
lexer.getNextToken(MasterToken::NUMBER).getNumber());
eolCheck(lexer, MasterToken::NUMBER);
// Skip the 2nd '\n'
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
// Expecting a number, but it's too big for uint32.
lexerErrorCheck(lexer, MasterToken::NUMBER,
MasterToken::NUMBER_OUT_OF_RANGE);
// The token should have been "ungotten". Re-read and skip it.
EXPECT_EQ(MasterToken::STRING, lexer.getNextToken().getType());
// Expecting a number, but see a string.
lexerErrorCheck(lexer, MasterToken::NUMBER, MasterToken::BAD_NUMBER);
// The unexpected string should have been "ungotten". Re-read and skip it.
EXPECT_EQ(MasterToken::STRING, lexer.getNextToken().getType());
// Unless we specify NUMBER, decimal number string should be recognized
// as a string.
EXPECT_EQ("86400",
lexer.getNextToken(MasterToken::STRING).getString());
eofCheck(lexer, MasterToken::NUMBER);
}
TEST_F(MasterLexerTest, getNextTokenErrors) {
// Check miscellaneous error cases
ss << ") "; // unbalanced parenthesis
ss << "string-after-error ";
lexer.pushSource(ss);
// Only string/qstring/number can be "expected".
EXPECT_THROW(lexer.getNextToken(MasterToken::END_OF_LINE),
isc::InvalidParameter);
EXPECT_THROW(lexer.getNextToken(MasterToken::END_OF_FILE),
isc::InvalidParameter);
EXPECT_THROW(lexer.getNextToken(MasterToken::INITIAL_WS),
isc::InvalidParameter);
EXPECT_THROW(lexer.getNextToken(MasterToken::ERROR),
isc::InvalidParameter);
// If it encounters a syntax error, it results in LexerError exception.
lexerErrorCheck(lexer, MasterToken::STRING, MasterToken::UNBALANCED_PAREN);
// Unlike the NUMBER_OUT_OF_RANGE case, the error part has been skipped
// within getNextToken(). We should be able to get the next token.
EXPECT_EQ("string-after-error",
lexer.getNextToken(MasterToken::STRING).getString());
}
}