mirror of
https://gitlab.isc.org/isc-projects/kea
synced 2025-08-31 14:05:33 +00:00
Merge remote-tracking branch 'origin/trac2506' into base/loader
This commit is contained in:
@@ -36,7 +36,7 @@ using namespace master_lexer_internal;
|
||||
|
||||
|
||||
struct MasterLexer::MasterLexerImpl {
|
||||
MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
|
||||
MasterLexerImpl() : source_(NULL), token_(MasterToken::NOT_STARTED),
|
||||
paren_count_(0), last_was_eol_(false),
|
||||
has_previous_(false),
|
||||
previous_paren_count_(0),
|
||||
@@ -82,7 +82,7 @@ struct MasterLexer::MasterLexerImpl {
|
||||
|
||||
std::vector<InputSourcePtr> sources_;
|
||||
InputSource* source_; // current source (NULL if sources_ is empty)
|
||||
Token token_; // currently recognized token (set by a state)
|
||||
MasterToken token_; // currently recognized token (set by a state)
|
||||
std::vector<char> data_; // placeholder for string data
|
||||
|
||||
// These are used in states, and defined here only as a placeholder.
|
||||
@@ -165,9 +165,8 @@ MasterLexer::getSourceLine() const {
|
||||
return (impl_->sources_.back()->getCurrentLine());
|
||||
}
|
||||
|
||||
const MasterLexer::Token&
|
||||
const MasterToken&
|
||||
MasterLexer::getNextToken(Options options) {
|
||||
// If the source is not available
|
||||
if (impl_->source_ == NULL) {
|
||||
isc_throw(isc::InvalidOperation, "No source to read tokens from");
|
||||
}
|
||||
@@ -178,7 +177,7 @@ MasterLexer::getNextToken(Options options) {
|
||||
impl_->has_previous_ = true;
|
||||
// Reset the token now. This is to check a token was actually produced.
|
||||
// This is debugging aid.
|
||||
impl_->token_ = Token(Token::NO_TOKEN_PRODUCED);
|
||||
impl_->token_ = MasterToken(MasterToken::NO_TOKEN_PRODUCED);
|
||||
// And get the token
|
||||
|
||||
// This actually handles EOF internally too.
|
||||
@@ -188,8 +187,62 @@ MasterLexer::getNextToken(Options options) {
|
||||
}
|
||||
// Make sure a token was produced. Since this Can Not Happen, we assert
|
||||
// here instead of throwing.
|
||||
assert(impl_->token_.getType() != Token::ERROR ||
|
||||
impl_->token_.getErrorCode() != Token::NO_TOKEN_PRODUCED);
|
||||
assert(impl_->token_.getType() != MasterToken::ERROR ||
|
||||
impl_->token_.getErrorCode() != MasterToken::NO_TOKEN_PRODUCED);
|
||||
return (impl_->token_);
|
||||
}
|
||||
|
||||
namespace {
|
||||
inline MasterLexer::Options
|
||||
optionsForTokenType(MasterToken::Type expect) {
|
||||
switch (expect) {
|
||||
case MasterToken::STRING:
|
||||
return (MasterLexer::NONE);
|
||||
case MasterToken::QSTRING:
|
||||
return (MasterLexer::QSTRING);
|
||||
case MasterToken::NUMBER:
|
||||
return (MasterLexer::NUMBER);
|
||||
default:
|
||||
isc_throw(InvalidParameter,
|
||||
"expected type for getNextToken not supported: " << expect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const MasterToken&
|
||||
MasterLexer::getNextToken(MasterToken::Type expect, bool eol_ok) {
|
||||
// Get the next token, specifying an appropriate option corresponding to
|
||||
// the expected type. The result should be set in impl_->token_.
|
||||
getNextToken(optionsForTokenType(expect));
|
||||
|
||||
if (impl_->token_.getType() == MasterToken::ERROR) {
|
||||
if (impl_->token_.getErrorCode() == MasterToken::NUMBER_OUT_OF_RANGE) {
|
||||
ungetToken();
|
||||
}
|
||||
throw LexerError(__FILE__, __LINE__, impl_->token_);
|
||||
}
|
||||
|
||||
const bool is_eol_like =
|
||||
(impl_->token_.getType() == MasterToken::END_OF_LINE ||
|
||||
impl_->token_.getType() == MasterToken::END_OF_FILE);
|
||||
if (eol_ok && is_eol_like) {
|
||||
return (impl_->token_);
|
||||
}
|
||||
if (impl_->token_.getType() == MasterToken::STRING &&
|
||||
expect == MasterToken::QSTRING) {
|
||||
return (impl_->token_);
|
||||
}
|
||||
if (impl_->token_.getType() != expect) {
|
||||
ungetToken();
|
||||
if (is_eol_like) {
|
||||
throw LexerError(__FILE__, __LINE__,
|
||||
MasterToken(MasterToken::UNEXPECTED_END));
|
||||
}
|
||||
assert(expect == MasterToken::NUMBER);
|
||||
throw LexerError(__FILE__, __LINE__,
|
||||
MasterToken(MasterToken::BAD_NUMBER));
|
||||
}
|
||||
|
||||
return (impl_->token_);
|
||||
}
|
||||
|
||||
@@ -212,16 +265,17 @@ const char* const error_text[] = {
|
||||
"unexpected end of input", // UNEXPECTED_END
|
||||
"unbalanced quotes", // UNBALANCED_QUOTES
|
||||
"no token produced", // NO_TOKEN_PRODUCED
|
||||
"number out of range" // NUMBER_OUT_OF_RANGE
|
||||
"number out of range", // NUMBER_OUT_OF_RANGE
|
||||
"not a valid number" // BAD_NUMBER
|
||||
};
|
||||
const size_t error_text_max_count = sizeof(error_text) / sizeof(error_text[0]);
|
||||
} // end unnamed namespace
|
||||
|
||||
std::string
|
||||
MasterLexer::Token::getErrorText() const {
|
||||
MasterToken::getErrorText() const {
|
||||
if (type_ != ERROR) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getErrorText() for non error type");
|
||||
"MasterToken::getErrorText() for non error type");
|
||||
}
|
||||
|
||||
// The class integrity ensures the following:
|
||||
@@ -234,14 +288,12 @@ namespace master_lexer_internal {
|
||||
// Note that these need to be defined here so that they can refer to
|
||||
// the details of MasterLexerImpl.
|
||||
|
||||
typedef MasterLexer::Token Token; // convenience shortcut
|
||||
|
||||
bool
|
||||
State::wasLastEOL(const MasterLexer& lexer) const {
|
||||
return (lexer.impl_->last_was_eol_);
|
||||
}
|
||||
|
||||
const MasterLexer::Token&
|
||||
const MasterToken&
|
||||
State::getToken(const MasterLexer& lexer) const {
|
||||
return (lexer.impl_->token_);
|
||||
}
|
||||
@@ -271,7 +323,7 @@ public:
|
||||
if (c != '\n') {
|
||||
getLexerImpl(lexer)->source_->ungetChar();
|
||||
}
|
||||
getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
|
||||
getLexerImpl(lexer)->token_ = MasterToken(MasterToken::END_OF_LINE);
|
||||
getLexerImpl(lexer)->last_was_eol_ = true;
|
||||
}
|
||||
};
|
||||
@@ -342,24 +394,24 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
|
||||
if (c == InputSource::END_OF_STREAM) {
|
||||
lexerimpl.last_was_eol_ = false;
|
||||
if (paren_count != 0) {
|
||||
lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
|
||||
lexerimpl.token_ = MasterToken(MasterToken::UNBALANCED_PAREN);
|
||||
paren_count = 0; // reset to 0; this helps in lenient mode.
|
||||
return (NULL);
|
||||
}
|
||||
lexerimpl.token_ = Token(Token::END_OF_FILE);
|
||||
lexerimpl.token_ = MasterToken(MasterToken::END_OF_FILE);
|
||||
return (NULL);
|
||||
} else if (c == ' ' || c == '\t') {
|
||||
// If requested and we are not in (), recognize the initial space.
|
||||
if (lexerimpl.last_was_eol_ && paren_count == 0 &&
|
||||
(options & MasterLexer::INITIAL_WS) != 0) {
|
||||
lexerimpl.last_was_eol_ = false;
|
||||
lexerimpl.token_ = Token(Token::INITIAL_WS);
|
||||
lexerimpl.token_ = MasterToken(MasterToken::INITIAL_WS);
|
||||
return (NULL);
|
||||
}
|
||||
} else if (c == '\n') {
|
||||
lexerimpl.last_was_eol_ = true;
|
||||
if (paren_count == 0) { // we don't recognize EOL if we are in ()
|
||||
lexerimpl.token_ = Token(Token::END_OF_LINE);
|
||||
lexerimpl.token_ = MasterToken(MasterToken::END_OF_LINE);
|
||||
return (NULL);
|
||||
}
|
||||
} else if (c == '\r') {
|
||||
@@ -375,7 +427,7 @@ State::start(MasterLexer& lexer, MasterLexer::Options options) {
|
||||
} else if (c == ')') {
|
||||
lexerimpl.last_was_eol_ = false;
|
||||
if (paren_count == 0) {
|
||||
lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
|
||||
lexerimpl.token_ = MasterToken(MasterToken::UNBALANCED_PAREN);
|
||||
return (NULL);
|
||||
}
|
||||
--paren_count;
|
||||
@@ -407,7 +459,7 @@ String::handle(MasterLexer& lexer) const {
|
||||
if (getLexerImpl(lexer)->isTokenEnd(c, escaped)) {
|
||||
getLexerImpl(lexer)->source_->ungetChar();
|
||||
getLexerImpl(lexer)->token_ =
|
||||
MasterLexer::Token(&data.at(0), data.size());
|
||||
MasterToken(&data.at(0), data.size());
|
||||
return;
|
||||
}
|
||||
escaped = (c == '\\' && !escaped);
|
||||
@@ -417,7 +469,7 @@ String::handle(MasterLexer& lexer) const {
|
||||
|
||||
void
|
||||
QString::handle(MasterLexer& lexer) const {
|
||||
MasterLexer::Token& token = getLexerImpl(lexer)->token_;
|
||||
MasterToken& token = getLexerImpl(lexer)->token_;
|
||||
std::vector<char>& data = getLexerImpl(lexer)->data_;
|
||||
data.clear();
|
||||
|
||||
@@ -425,7 +477,7 @@ QString::handle(MasterLexer& lexer) const {
|
||||
while (true) {
|
||||
const int c = getLexerImpl(lexer)->source_->getChar();
|
||||
if (c == InputSource::END_OF_STREAM) {
|
||||
token = Token(Token::UNEXPECTED_END);
|
||||
token = MasterToken(MasterToken::UNEXPECTED_END);
|
||||
return;
|
||||
} else if (c == '"') {
|
||||
if (escaped) {
|
||||
@@ -434,12 +486,12 @@ QString::handle(MasterLexer& lexer) const {
|
||||
escaped = false;
|
||||
data.back() = '"';
|
||||
} else {
|
||||
token = MasterLexer::Token(&data.at(0), data.size(), true);
|
||||
token = MasterToken(&data.at(0), data.size(), true);
|
||||
return;
|
||||
}
|
||||
} else if (c == '\n' && !escaped) {
|
||||
getLexerImpl(lexer)->source_->ungetChar();
|
||||
token = Token(Token::UNBALANCED_QUOTES);
|
||||
token = MasterToken(MasterToken::UNBALANCED_QUOTES);
|
||||
return;
|
||||
} else {
|
||||
escaped = (c == '\\' && !escaped);
|
||||
@@ -450,7 +502,7 @@ QString::handle(MasterLexer& lexer) const {
|
||||
|
||||
void
|
||||
Number::handle(MasterLexer& lexer) const {
|
||||
MasterLexer::Token& token = getLexerImpl(lexer)->token_;
|
||||
MasterToken& token = getLexerImpl(lexer)->token_;
|
||||
|
||||
// It may yet turn out to be a string, so we first
|
||||
// collect all the data
|
||||
@@ -470,15 +522,14 @@ Number::handle(MasterLexer& lexer) const {
|
||||
try {
|
||||
const uint32_t number32 =
|
||||
boost::lexical_cast<uint32_t, const char*>(&data[0]);
|
||||
token = MasterLexer::Token(number32);
|
||||
token = MasterToken(number32);
|
||||
} catch (const boost::bad_lexical_cast&) {
|
||||
// Since we already know we have only digits,
|
||||
// range should be the only possible problem.
|
||||
token = Token(Token::NUMBER_OUT_OF_RANGE);
|
||||
token = MasterToken(MasterToken::NUMBER_OUT_OF_RANGE);
|
||||
}
|
||||
} else {
|
||||
token = MasterLexer::Token(&data.at(0),
|
||||
data.size());
|
||||
token = MasterToken(&data.at(0), data.size());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@@ -28,6 +28,236 @@ namespace master_lexer_internal {
|
||||
class State;
|
||||
}
|
||||
|
||||
/// \brief Tokens for \c MasterLexer
|
||||
///
|
||||
/// This is a simple value-class encapsulating a type of a lexer token and
|
||||
/// (if it has a value) its value. Essentially, the class provides
|
||||
/// constructors corresponding to different types of tokens, and corresponding
|
||||
/// getter methods. The type and value are fixed at the time of construction
|
||||
/// and will never be modified throughout the lifetime of the object.
|
||||
/// The getter methods are still provided to maximize the safety; an
|
||||
/// application cannot refer to a value that is invalid for the type of token.
|
||||
///
|
||||
/// This class is intentionally implemented as copyable and assignable
|
||||
/// (using the default version of copy constructor and assignment operator),
|
||||
/// but it's mainly for internal implementation convenience. Applications will
|
||||
/// simply refer to Token object as a reference via the \c MasterLexer class.
|
||||
class MasterToken {
|
||||
public:
|
||||
/// \brief Enumeration for token types
|
||||
///
|
||||
/// \note At the time of initial implementation, all numeric tokens
|
||||
/// that would be extracted from \c MasterLexer should be represented
|
||||
/// as an unsigned 32-bit integer. If we see the need for larger integers
|
||||
/// or negative numbers, we can then extend the token types.
|
||||
enum Type {
|
||||
END_OF_LINE, ///< End of line detected
|
||||
END_OF_FILE, ///< End of file detected
|
||||
INITIAL_WS, ///< White spaces at the beginning of a line after an
|
||||
///< end of line (if asked for detecting it)
|
||||
NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
|
||||
/// no-value (type only) types.
|
||||
/// Mainly for internal use.
|
||||
STRING, ///< A single string
|
||||
QSTRING, ///< A single string quoted by double-quotes (").
|
||||
NUMBER, ///< A decimal number (unsigned 32-bit)
|
||||
ERROR ///< Error detected in getting a token
|
||||
};
|
||||
|
||||
/// \brief Enumeration for lexer error codes
|
||||
enum ErrorCode {
|
||||
NOT_STARTED, ///< The lexer is just initialized and has no token
|
||||
UNBALANCED_PAREN, ///< Unbalanced parentheses detected
|
||||
UNEXPECTED_END, ///< The lexer reaches the end of line or file
|
||||
/// unexpectedly
|
||||
UNBALANCED_QUOTES, ///< Unbalanced quotations detected
|
||||
NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
|
||||
/// error and should never get out of the lexer.
|
||||
NUMBER_OUT_OF_RANGE, ///< Number was out of range
|
||||
BAD_NUMBER, ///< Number is expected but not recognized
|
||||
MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
|
||||
/// (excluding this one). Mainly for internal use.
|
||||
};
|
||||
|
||||
/// \brief A simple representation of a range of a string.
|
||||
///
|
||||
/// This is a straightforward pair of the start pointer of a string
|
||||
/// and its length. The \c STRING and \c QSTRING types of tokens
|
||||
/// will be primarily represented in this form.
|
||||
///
|
||||
/// Any character can be stored in the valid range of the region.
|
||||
/// In particular, there can be a nul character (\0) in the middle of
|
||||
/// the region. On the other hand, it is not ensured that the string
|
||||
/// is nul-terminated. So the usual string manipulation API may not work
|
||||
/// as expected.
|
||||
struct StringRegion {
|
||||
const char* beg; ///< The start address of the string
|
||||
size_t len; ///< The length of the string in bytes
|
||||
};
|
||||
|
||||
/// \brief Constructor for non-value type of token.
|
||||
///
|
||||
/// \throw InvalidParameter A value type token is specified.
|
||||
/// \param type The type of the token. It must indicate a non-value
|
||||
/// type (not larger than \c NOVALUE_TYPE_MAX).
|
||||
explicit MasterToken(Type type) : type_(type) {
|
||||
if (type > NOVALUE_TYPE_MAX) {
|
||||
isc_throw(InvalidParameter, "Token per-type constructor "
|
||||
"called with invalid type: " << type);
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Constructor for string and quoted-string types of token.
|
||||
///
|
||||
/// The optional \c quoted parameter specifies whether it's a quoted or
|
||||
/// non quoted string.
|
||||
///
|
||||
/// The string is specified as a pair of a pointer to the start address
|
||||
/// and its length. Any character can be contained in any position of
|
||||
/// the valid range (see \c StringRegion).
|
||||
///
|
||||
/// When it's a quoted string, the quotation marks must be excluded
|
||||
/// from the specified range.
|
||||
///
|
||||
/// \param str_beg The start address of the string
|
||||
/// \param str_len The size of the string in bytes
|
||||
/// \param quoted true if it's a quoted string; false otherwise.
|
||||
MasterToken(const char* str_beg, size_t str_len, bool quoted = false) :
|
||||
type_(quoted ? QSTRING : STRING)
|
||||
{
|
||||
val_.str_region_.beg = str_beg;
|
||||
val_.str_region_.len = str_len;
|
||||
}
|
||||
|
||||
/// \brief Constructor for number type of token.
|
||||
///
|
||||
/// \brief number An unsigned 32-bit integer corresponding to the token
|
||||
/// value.
|
||||
explicit MasterToken(uint32_t number) : type_(NUMBER) {
|
||||
val_.number_ = number;
|
||||
}
|
||||
|
||||
/// \brief Constructor for error type of token.
|
||||
///
|
||||
/// \throw InvalidParameter Invalid error code value is specified.
|
||||
/// \brief error_code A pre-defined constant of \c ErrorCode.
|
||||
explicit MasterToken(ErrorCode error_code) : type_(ERROR) {
|
||||
if (!(error_code < MAX_ERROR_CODE)) {
|
||||
isc_throw(InvalidParameter, "Invalid master lexer error code: "
|
||||
<< error_code);
|
||||
}
|
||||
val_.error_code_ = error_code;
|
||||
}
|
||||
|
||||
/// \brief Return the token type.
|
||||
///
|
||||
/// \throw none
|
||||
Type getType() const { return (type_); }
|
||||
|
||||
/// \brief Return the value of a string-variant token.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non string-variant types of token.
|
||||
/// \return A reference to \c StringRegion corresponding to the string
|
||||
/// token value.
|
||||
const StringRegion& getStringRegion() const {
|
||||
if (type_ != STRING && type_ != QSTRING) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getStringRegion() for non string-variant type");
|
||||
}
|
||||
return (val_.str_region_);
|
||||
}
|
||||
|
||||
/// \brief Return the value of a string-variant token as a string object.
|
||||
///
|
||||
/// Note that the underlying string may contain a nul (\0) character
|
||||
/// in the middle. The returned string object will contain all characters
|
||||
/// of the valid range of the underlying string. So some string
|
||||
/// operations such as c_str() may not work as expected.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non string-variant types of token.
|
||||
/// \throw std::bad_alloc Resource allocation failure in constructing the
|
||||
/// string object.
|
||||
/// \return A std::string object corresponding to the string token value.
|
||||
std::string getString() const {
|
||||
std::string ret;
|
||||
getString(ret);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/// \brief Fill in a string with the value of a string-variant token.
|
||||
///
|
||||
/// This is similar to the other version of \c getString(), but
|
||||
/// the caller is supposed to pass a placeholder string object.
|
||||
/// This will be more efficient if the caller uses the same
|
||||
/// \c MasterLexer repeatedly and needs to get string token in the
|
||||
/// form of a string object many times as this version could reuse
|
||||
/// the existing internal storage of the passed string.
|
||||
///
|
||||
/// Any existing content of the passed string will be removed.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non string-variant types of token.
|
||||
/// \throw std::bad_alloc Resource allocation failure in constructing the
|
||||
/// string object.
|
||||
///
|
||||
/// \param ret A string object to be filled with the token string.
|
||||
void getString(std::string& ret) const {
|
||||
if (type_ != STRING && type_ != QSTRING) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getString() for non string-variant type");
|
||||
}
|
||||
ret.assign(val_.str_region_.beg,
|
||||
val_.str_region_.beg + val_.str_region_.len);
|
||||
}
|
||||
|
||||
/// \brief Return the value of a string-variant token as a string object.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non number type of token.
|
||||
/// \return The integer corresponding to the number token value.
|
||||
uint32_t getNumber() const {
|
||||
if (type_ != NUMBER) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getNumber() for non number type");
|
||||
}
|
||||
return (val_.number_);
|
||||
}
|
||||
|
||||
/// \brief Return the error code of a error type token.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non error type of token.
|
||||
/// \return The error code of the token.
|
||||
ErrorCode getErrorCode() const {
|
||||
if (type_ != ERROR) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getErrorCode() for non error type");
|
||||
}
|
||||
return (val_.error_code_);
|
||||
};
|
||||
|
||||
/// \brief Return a textual description of the error of a error type token.
|
||||
///
|
||||
/// The returned string would be useful to produce a log message when
|
||||
/// a zone file parser encounters an error.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non error type of token.
|
||||
/// \throw std::bad_alloc Resource allocation failure in constructing the
|
||||
/// string object.
|
||||
/// \return A string object that describes the meaning of the error.
|
||||
std::string getErrorText() const;
|
||||
|
||||
private:
|
||||
Type type_; // this is not const so the class can be assignable
|
||||
|
||||
// We use a union to represent different types of token values via the
|
||||
// unified Token class. The class integrity should ensure valid operation
|
||||
// on the union; getter methods should only refer to the member set at
|
||||
// the construction.
|
||||
union {
|
||||
StringRegion str_region_;
|
||||
uint32_t number_;
|
||||
ErrorCode error_code_;
|
||||
} val_;
|
||||
};
|
||||
|
||||
/// \brief Tokenizer for parsing DNS master files.
|
||||
///
|
||||
/// The \c MasterLexer class provides tokenize interfaces for parsing DNS
|
||||
@@ -71,13 +301,28 @@ class MasterLexer {
|
||||
public:
|
||||
/// \brief Exception thrown when we fail to read from the input
|
||||
/// stream or file.
|
||||
struct ReadError : public Unexpected {
|
||||
class ReadError : public Unexpected {
|
||||
public:
|
||||
ReadError(const char* file, size_t line, const char* what) :
|
||||
Unexpected(file, line, what)
|
||||
{}
|
||||
};
|
||||
|
||||
class Token; // we define it separately for better readability
|
||||
/// \brief Exception thrown from a wrapper version of
|
||||
/// \c MasterLexer::getNextToken() for non fatal errors.
|
||||
///
|
||||
/// See the method description for more details.
|
||||
///
|
||||
/// The \c token_ member variable (read-only) is set to a \c MasterToken
|
||||
/// object of type ERROR indicating the reason for the error.
|
||||
class LexerError : public Exception {
|
||||
public:
|
||||
LexerError(const char* file, size_t line, MasterToken error_token) :
|
||||
Exception(file, line, error_token.getErrorText().c_str()),
|
||||
token_(error_token)
|
||||
{}
|
||||
const MasterToken token_;
|
||||
};
|
||||
|
||||
/// \brief Options for getNextToken.
|
||||
///
|
||||
@@ -213,7 +458,77 @@ public:
|
||||
/// source (eg. I/O error in the file on the disk).
|
||||
/// \throw std::bad_alloc in case allocation of some internal resources
|
||||
/// or the token fail.
|
||||
const Token& getNextToken(Options options = NONE);
|
||||
const MasterToken& getNextToken(Options options = NONE);
|
||||
|
||||
/// \brief Parse the input for the expected type of token.
|
||||
///
|
||||
/// This method is a wrapper of the other version, customized for the case
|
||||
/// where a particular type of token is expected as the next one.
|
||||
/// More specifically, it's intended to be used to get tokens for RDATA
|
||||
/// fields. Since most RDATA types of fixed format, the token type is
|
||||
/// often predictable and the method interface can be simplified.
|
||||
///
|
||||
/// This method basically works as follows: it gets the type of the
|
||||
/// expected token, calls the other version of \c getNextToken(Options),
|
||||
/// and returns the token if it's of the expected type (due to the usage
|
||||
/// assumption this should be normally the case). There are some non
|
||||
/// trivial details though:
|
||||
///
|
||||
/// - If the expected type is MasterToken::QSTRING, both quoted and
|
||||
/// unquoted strings are recognized and returned.
|
||||
/// - If the optional \c eol_ok parameter is \c true (very rare case),
|
||||
/// MasterToken::END_OF_LINE and MasterToken::END_OF_FILE are recognized
|
||||
/// and returned if they are found instead of the expected type of
|
||||
/// token.
|
||||
/// - If the next token is not of the expected type (including the case
|
||||
/// a number is expected but it's out of range), ungetToken() is
|
||||
/// internally called so the caller can re-read that token.
|
||||
/// - If other types or errors (such as unbalanced parentheses) are
|
||||
/// detected, the erroneous part isn't "ungotten"; the caller can
|
||||
/// continue parsing after that part.
|
||||
///
|
||||
/// In some very rare cases where the RDATA has an optional trailing field,
|
||||
/// the \c eol_ok parameter would be set to \c true. This way the caller
|
||||
/// can handle both cases (the field does or does not exist) by a single
|
||||
/// call to this method. In all other cases \c eol_ok should be set to
|
||||
/// \c false, and that is the default and can be omitted.
|
||||
///
|
||||
/// Unlike the other version of \c getNextToken(Options), this method
|
||||
/// throws an exception of type \c LexerError for non fatal errors such as
|
||||
/// broken syntax or encountering an unexpected type of token. This way
|
||||
/// the caller can write RDATA parser code without bothering to handle
|
||||
/// errors for each field. For example, pseudo parser code for MX RDATA
|
||||
/// would look like this:
|
||||
/// \code
|
||||
/// const uint32_t pref =
|
||||
/// lexer.getNextToken(MasterToken::NUMBER).getNumber();
|
||||
/// // check if pref is the uint16_t range; no other check is needed.
|
||||
/// const Name mx(lexer.getNextToken(MasterToken::STRING).getString());
|
||||
/// \endcode
|
||||
///
|
||||
/// In the case where \c LexerError exception is thrown, it's expected
|
||||
/// to be handled comprehensively for the parser of the RDATA or at a
|
||||
/// higher layer. The \c token_ member variable of the corresponding
|
||||
/// \c LexerError exception object stores a token of type
|
||||
/// \c MasterToken::ERROR that indicates the reason for the error.
|
||||
///
|
||||
/// Due to the specific intended usage of this method, only a subset
|
||||
/// of \c MasterToken::Type values are acceptable for the \c expect
|
||||
/// parameter: \c MasterToken::STRING, \c MasterToken::QSTRING, and
|
||||
/// \c MasterToken::NUMBER. Specifying other values will result in
|
||||
/// an \c InvalidParameter exception.
|
||||
///
|
||||
/// \throw InvalidParameter The expected token type is not allowed for
|
||||
/// this method.
|
||||
/// \throw LexerError The lexer finds non fatal error or it finds an
|
||||
/// \throw other Anything the other version of getNextToken() can throw.
|
||||
///
|
||||
/// \param expect Expected type of token. Must be either STRING, QSTRING,
|
||||
/// or NUMBER.
|
||||
/// \param eol_ok \c true iff END_OF_LINE or END_OF_FILE is acceptable.
|
||||
/// \return The expected type of token.
|
||||
const MasterToken& getNextToken(MasterToken::Type expect,
|
||||
bool eol_ok = false);
|
||||
|
||||
/// \brief Return the last token back to the lexer.
|
||||
///
|
||||
@@ -247,235 +562,6 @@ operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
|
||||
static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
|
||||
}
|
||||
|
||||
/// \brief Tokens for \c MasterLexer
|
||||
///
|
||||
/// This is a simple value-class encapsulating a type of a lexer token and
|
||||
/// (if it has a value) its value. Essentially, the class provides
|
||||
/// constructors corresponding to different types of tokens, and corresponding
|
||||
/// getter methods. The type and value are fixed at the time of construction
|
||||
/// and will never be modified throughout the lifetime of the object.
|
||||
/// The getter methods are still provided to maximize the safety; an
|
||||
/// application cannot refer to a value that is invalid for the type of token.
|
||||
///
|
||||
/// This class is intentionally implemented as copyable and assignable
|
||||
/// (using the default version of copy constructor and assignment operator),
|
||||
/// but it's mainly for internal implementation convenience. Applications will
|
||||
/// simply refer to Token object as a reference via the \c MasterLexer class.
|
||||
class MasterLexer::Token {
|
||||
public:
|
||||
/// \brief Enumeration for token types
|
||||
///
|
||||
/// \note At the time of initial implementation, all numeric tokens
|
||||
/// that would be extracted from \c MasterLexer should be represented
|
||||
/// as an unsigned 32-bit integer. If we see the need for larger integers
|
||||
/// or negative numbers, we can then extend the token types.
|
||||
enum Type {
|
||||
END_OF_LINE, ///< End of line detected
|
||||
END_OF_FILE, ///< End of file detected
|
||||
INITIAL_WS, ///< White spaces at the beginning of a line after an
|
||||
///< end of line (if asked for detecting it)
|
||||
NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
|
||||
/// no-value (type only) types.
|
||||
/// Mainly for internal use.
|
||||
STRING, ///< A single string
|
||||
QSTRING, ///< A single string quoted by double-quotes (").
|
||||
NUMBER, ///< A decimal number (unsigned 32-bit)
|
||||
ERROR ///< Error detected in getting a token
|
||||
};
|
||||
|
||||
/// \brief Enumeration for lexer error codes
|
||||
enum ErrorCode {
|
||||
NOT_STARTED, ///< The lexer is just initialized and has no token
|
||||
UNBALANCED_PAREN, ///< Unbalanced parentheses detected
|
||||
UNEXPECTED_END, ///< The lexer reaches the end of line or file
|
||||
/// unexpectedly
|
||||
UNBALANCED_QUOTES, ///< Unbalanced quotations detected
|
||||
NO_TOKEN_PRODUCED, ///< No token was produced. This means programmer
|
||||
/// error and should never get out of the lexer.
|
||||
NUMBER_OUT_OF_RANGE, ///< Number was out of range
|
||||
MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
|
||||
/// (excluding this one). Mainly for internal use.
|
||||
};
|
||||
|
||||
/// \brief A simple representation of a range of a string.
|
||||
///
|
||||
/// This is a straightforward pair of the start pointer of a string
|
||||
/// and its length. The \c STRING and \c QSTRING types of tokens
|
||||
/// will be primarily represented in this form.
|
||||
///
|
||||
/// Any character can be stored in the valid range of the region.
|
||||
/// In particular, there can be a nul character (\0) in the middle of
|
||||
/// the region. On the other hand, it is not ensured that the string
|
||||
/// is nul-terminated. So the usual string manipulation API may not work
|
||||
/// as expected.
|
||||
struct StringRegion {
|
||||
const char* beg; ///< The start address of the string
|
||||
size_t len; ///< The length of the string in bytes
|
||||
};
|
||||
|
||||
/// \brief Constructor for non-value type of token.
|
||||
///
|
||||
/// \throw InvalidParameter A value type token is specified.
|
||||
/// \param type The type of the token. It must indicate a non-value
|
||||
/// type (not larger than \c NOVALUE_TYPE_MAX).
|
||||
explicit Token(Type type) : type_(type) {
|
||||
if (type > NOVALUE_TYPE_MAX) {
|
||||
isc_throw(InvalidParameter, "Token per-type constructor "
|
||||
"called with invalid type: " << type);
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Constructor for string and quoted-string types of token.
|
||||
///
|
||||
/// The optional \c quoted parameter specifies whether it's a quoted or
|
||||
/// non quoted string.
|
||||
///
|
||||
/// The string is specified as a pair of a pointer to the start address
|
||||
/// and its length. Any character can be contained in any position of
|
||||
/// the valid range (see \c StringRegion).
|
||||
///
|
||||
/// When it's a quoted string, the quotation marks must be excluded
|
||||
/// from the specified range.
|
||||
///
|
||||
/// \param str_beg The start address of the string
|
||||
/// \param str_len The size of the string in bytes
|
||||
/// \param quoted true if it's a quoted string; false otherwise.
|
||||
Token(const char* str_beg, size_t str_len, bool quoted = false) :
|
||||
type_(quoted ? QSTRING : STRING)
|
||||
{
|
||||
val_.str_region_.beg = str_beg;
|
||||
val_.str_region_.len = str_len;
|
||||
}
|
||||
|
||||
/// \brief Constructor for number type of token.
|
||||
///
|
||||
/// \brief number An unsigned 32-bit integer corresponding to the token
|
||||
/// value.
|
||||
explicit Token(uint32_t number) : type_(NUMBER) {
|
||||
val_.number_ = number;
|
||||
}
|
||||
|
||||
/// \brief Constructor for error type of token.
|
||||
///
|
||||
/// \throw InvalidParameter Invalid error code value is specified.
|
||||
/// \brief error_code A pre-defined constant of \c ErrorCode.
|
||||
explicit Token(ErrorCode error_code) : type_(ERROR) {
|
||||
if (!(error_code < MAX_ERROR_CODE)) {
|
||||
isc_throw(InvalidParameter, "Invalid master lexer error code: "
|
||||
<< error_code);
|
||||
}
|
||||
val_.error_code_ = error_code;
|
||||
}
|
||||
|
||||
/// \brief Return the token type.
|
||||
///
|
||||
/// \throw none
|
||||
Type getType() const { return (type_); }
|
||||
|
||||
/// \brief Return the value of a string-variant token.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non string-variant types of token.
|
||||
/// \return A reference to \c StringRegion corresponding to the string
|
||||
/// token value.
|
||||
const StringRegion& getStringRegion() const {
|
||||
if (type_ != STRING && type_ != QSTRING) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getStringRegion() for non string-variant type");
|
||||
}
|
||||
return (val_.str_region_);
|
||||
}
|
||||
|
||||
/// \brief Return the value of a string-variant token as a string object.
|
||||
///
|
||||
/// Note that the underlying string may contain a nul (\0) character
|
||||
/// in the middle. The returned string object will contain all characters
|
||||
/// of the valid range of the underlying string. So some string
|
||||
/// operations such as c_str() may not work as expected.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non string-variant types of token.
|
||||
/// \throw std::bad_alloc Resource allocation failure in constructing the
|
||||
/// string object.
|
||||
/// \return A std::string object corresponding to the string token value.
|
||||
std::string getString() const {
|
||||
std::string ret;
|
||||
getString(ret);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/// \brief Fill in a string with the value of a string-variant token.
|
||||
///
|
||||
/// This is similar to the other version of \c getString(), but
|
||||
/// the caller is supposed to pass a placeholder string object.
|
||||
/// This will be more efficient if the caller uses the same
|
||||
/// \c MasterLexer repeatedly and needs to get string token in the
|
||||
/// form of a string object many times as this version could reuse
|
||||
/// the existing internal storage of the passed string.
|
||||
///
|
||||
/// Any existing content of the passed string will be removed.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non string-variant types of token.
|
||||
/// \throw std::bad_alloc Resource allocation failure in constructing the
|
||||
/// string object.
|
||||
///
|
||||
/// \param ret A string object to be filled with the token string.
|
||||
void getString(std::string& ret) const {
|
||||
if (type_ != STRING && type_ != QSTRING) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getString() for non string-variant type");
|
||||
}
|
||||
ret.assign(val_.str_region_.beg,
|
||||
val_.str_region_.beg + val_.str_region_.len);
|
||||
}
|
||||
|
||||
/// \brief Return the value of a string-variant token as a string object.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non number type of token.
|
||||
/// \return The integer corresponding to the number token value.
|
||||
uint32_t getNumber() const {
|
||||
if (type_ != NUMBER) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getNumber() for non number type");
|
||||
}
|
||||
return (val_.number_);
|
||||
}
|
||||
|
||||
/// \brief Return the error code of a error type token.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non error type of token.
|
||||
/// \return The error code of the token.
|
||||
ErrorCode getErrorCode() const {
|
||||
if (type_ != ERROR) {
|
||||
isc_throw(InvalidOperation,
|
||||
"Token::getErrorCode() for non error type");
|
||||
}
|
||||
return (val_.error_code_);
|
||||
};
|
||||
|
||||
/// \brief Return a textual description of the error of a error type token.
|
||||
///
|
||||
/// The returned string would be useful to produce a log message when
|
||||
/// a zone file parser encounters an error.
|
||||
///
|
||||
/// \throw InvalidOperation Called on a non error type of token.
|
||||
/// \throw std::bad_alloc Resource allocation failure in constructing the
|
||||
/// string object.
|
||||
/// \return A string object that describes the meaning of the error.
|
||||
std::string getErrorText() const;
|
||||
|
||||
private:
|
||||
Type type_; // this is not const so the class can be assignable
|
||||
|
||||
// We use a union to represent different types of token values via the
|
||||
// unified Token class. The class integrity should ensure valid operation
|
||||
// on the union; getter methods should only refer to the member set at
|
||||
// the construction.
|
||||
union {
|
||||
StringRegion str_region_;
|
||||
uint32_t number_;
|
||||
ErrorCode error_code_;
|
||||
} val_;
|
||||
};
|
||||
|
||||
} // namespace dns
|
||||
} // namespace isc
|
||||
#endif // MASTER_LEXER_H
|
||||
|
@@ -43,10 +43,10 @@ namespace master_lexer_internal {
|
||||
/// state, so it makes more sense to separate the interface for the transition
|
||||
/// from the initial state.
|
||||
///
|
||||
/// When an object of a specific state class completes the session, it
|
||||
/// normally sets the identified token in the lexer, and returns NULL;
|
||||
/// if more transition is necessary, it returns a pointer to the next state
|
||||
/// object.
|
||||
/// If the whole lexer transition is completed within start(), it sets the
|
||||
/// identified token and returns NULL; otherwise it returns a pointer to
|
||||
/// an object of a specific state class that completes the session
|
||||
/// on the call of handle().
|
||||
///
|
||||
/// As is usual in the state design pattern, the \c State class is made
|
||||
/// a friend class of \c MasterLexer and can refer to its internal details.
|
||||
@@ -119,7 +119,7 @@ public:
|
||||
/// purposes.
|
||||
///@{
|
||||
bool wasLastEOL(const MasterLexer& lexer) const;
|
||||
const MasterLexer::Token& getToken(const MasterLexer& lexer) const;
|
||||
const MasterToken& getToken(const MasterLexer& lexer) const;
|
||||
size_t getParenCount(const MasterLexer& lexer) const;
|
||||
///@}
|
||||
|
||||
|
@@ -24,7 +24,7 @@ using namespace isc::dns;
|
||||
using namespace master_lexer_internal;
|
||||
|
||||
namespace {
|
||||
typedef MasterLexer::Token Token; // shortcut
|
||||
typedef MasterToken Token; // shortcut
|
||||
|
||||
class MasterLexerStateTest : public ::testing::Test {
|
||||
protected:
|
||||
@@ -260,7 +260,7 @@ TEST_F(MasterLexerStateTest, crlf) {
|
||||
// Commonly used check for string related test cases, checking if the given
|
||||
// token has expected values.
|
||||
void
|
||||
stringTokenCheck(const std::string& expected, const MasterLexer::Token& token,
|
||||
stringTokenCheck(const std::string& expected, const MasterToken& token,
|
||||
bool quoted = false)
|
||||
{
|
||||
EXPECT_EQ(quoted ? Token::QSTRING : Token::STRING, token.getType());
|
||||
|
@@ -31,27 +31,27 @@ const size_t TEST_STRING_LEN = sizeof(TEST_STRING) - 1;
|
||||
class MasterLexerTokenTest : public ::testing::Test {
|
||||
protected:
|
||||
MasterLexerTokenTest() :
|
||||
token_eof(MasterLexer::Token::END_OF_FILE),
|
||||
token_eof(MasterToken::END_OF_FILE),
|
||||
token_str(TEST_STRING, TEST_STRING_LEN),
|
||||
token_num(42),
|
||||
token_err(MasterLexer::Token::UNEXPECTED_END)
|
||||
token_err(MasterToken::UNEXPECTED_END)
|
||||
{}
|
||||
|
||||
const MasterLexer::Token token_eof; // an example of non-value type token
|
||||
const MasterLexer::Token token_str;
|
||||
const MasterLexer::Token token_num;
|
||||
const MasterLexer::Token token_err;
|
||||
const MasterToken token_eof; // an example of non-value type token
|
||||
const MasterToken token_str;
|
||||
const MasterToken token_num;
|
||||
const MasterToken token_err;
|
||||
};
|
||||
|
||||
|
||||
TEST_F(MasterLexerTokenTest, strings) {
|
||||
// basic construction and getter checks
|
||||
EXPECT_EQ(MasterLexer::Token::STRING, token_str.getType());
|
||||
EXPECT_EQ(MasterToken::STRING, token_str.getType());
|
||||
EXPECT_EQ(std::string("string token"), token_str.getString());
|
||||
std::string strval = "dummy"; // this should be replaced
|
||||
token_str.getString(strval);
|
||||
EXPECT_EQ(std::string("string token"), strval);
|
||||
const MasterLexer::Token::StringRegion str_region =
|
||||
const MasterToken::StringRegion str_region =
|
||||
token_str.getStringRegion();
|
||||
EXPECT_EQ(TEST_STRING, str_region.beg);
|
||||
EXPECT_EQ(TEST_STRING_LEN, str_region.len);
|
||||
@@ -62,17 +62,17 @@ TEST_F(MasterLexerTokenTest, strings) {
|
||||
std::string expected_str("string token");
|
||||
expected_str.push_back('\0');
|
||||
EXPECT_EQ(expected_str,
|
||||
MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString());
|
||||
MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString(strval);
|
||||
MasterToken(TEST_STRING, TEST_STRING_LEN + 1).getString());
|
||||
MasterToken(TEST_STRING, TEST_STRING_LEN + 1).getString(strval);
|
||||
EXPECT_EQ(expected_str, strval);
|
||||
|
||||
// Construct type of qstring
|
||||
EXPECT_EQ(MasterLexer::Token::QSTRING,
|
||||
MasterLexer::Token(TEST_STRING, sizeof(TEST_STRING), true).
|
||||
EXPECT_EQ(MasterToken::QSTRING,
|
||||
MasterToken(TEST_STRING, sizeof(TEST_STRING), true).
|
||||
getType());
|
||||
// if we explicitly set 'quoted' to false, it should be normal string
|
||||
EXPECT_EQ(MasterLexer::Token::STRING,
|
||||
MasterLexer::Token(TEST_STRING, sizeof(TEST_STRING), false).
|
||||
EXPECT_EQ(MasterToken::STRING,
|
||||
MasterToken(TEST_STRING, sizeof(TEST_STRING), false).
|
||||
getType());
|
||||
|
||||
// getString/StringRegion() aren't allowed for non string(-variant) types
|
||||
@@ -86,23 +86,23 @@ TEST_F(MasterLexerTokenTest, strings) {
|
||||
|
||||
TEST_F(MasterLexerTokenTest, numbers) {
|
||||
EXPECT_EQ(42, token_num.getNumber());
|
||||
EXPECT_EQ(MasterLexer::Token::NUMBER, token_num.getType());
|
||||
EXPECT_EQ(MasterToken::NUMBER, token_num.getType());
|
||||
|
||||
// It's copyable and assignable.
|
||||
MasterLexer::Token token(token_num);
|
||||
MasterToken token(token_num);
|
||||
EXPECT_EQ(42, token.getNumber());
|
||||
EXPECT_EQ(MasterLexer::Token::NUMBER, token.getType());
|
||||
EXPECT_EQ(MasterToken::NUMBER, token.getType());
|
||||
|
||||
token = token_num;
|
||||
EXPECT_EQ(42, token.getNumber());
|
||||
EXPECT_EQ(MasterLexer::Token::NUMBER, token.getType());
|
||||
EXPECT_EQ(MasterToken::NUMBER, token.getType());
|
||||
|
||||
// it's okay to replace it with a different type of token
|
||||
token = token_eof;
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, token.getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE, token.getType());
|
||||
|
||||
// Possible max value
|
||||
token = MasterLexer::Token(0xffffffff);
|
||||
token = MasterToken(0xffffffff);
|
||||
EXPECT_EQ(4294967295u, token.getNumber());
|
||||
|
||||
// getNumber() isn't allowed for non number types
|
||||
@@ -112,58 +112,52 @@ TEST_F(MasterLexerTokenTest, numbers) {
|
||||
|
||||
TEST_F(MasterLexerTokenTest, novalues) {
|
||||
// Just checking we can construct them and getType() returns correct value.
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, token_eof.getType());
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE,
|
||||
MasterLexer::Token(MasterLexer::Token::END_OF_LINE).getType());
|
||||
EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
|
||||
MasterLexer::Token(MasterLexer::Token::INITIAL_WS).getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE, token_eof.getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE,
|
||||
MasterToken(MasterToken::END_OF_LINE).getType());
|
||||
EXPECT_EQ(MasterToken::INITIAL_WS,
|
||||
MasterToken(MasterToken::INITIAL_WS).getType());
|
||||
|
||||
// Special types of tokens cannot have value-based types
|
||||
EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::STRING),
|
||||
isc::InvalidParameter);
|
||||
EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::QSTRING),
|
||||
isc::InvalidParameter);
|
||||
EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::NUMBER),
|
||||
isc::InvalidParameter);
|
||||
EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::ERROR),
|
||||
isc::InvalidParameter);
|
||||
EXPECT_THROW(MasterToken t(MasterToken::STRING), isc::InvalidParameter);
|
||||
EXPECT_THROW(MasterToken t(MasterToken::QSTRING), isc::InvalidParameter);
|
||||
EXPECT_THROW(MasterToken t(MasterToken::NUMBER), isc::InvalidParameter);
|
||||
EXPECT_THROW(MasterToken t(MasterToken::ERROR), isc::InvalidParameter);
|
||||
}
|
||||
|
||||
TEST_F(MasterLexerTokenTest, errors) {
|
||||
EXPECT_EQ(MasterLexer::Token::ERROR, token_err.getType());
|
||||
EXPECT_EQ(MasterLexer::Token::UNEXPECTED_END, token_err.getErrorCode());
|
||||
EXPECT_EQ(MasterToken::ERROR, token_err.getType());
|
||||
EXPECT_EQ(MasterToken::UNEXPECTED_END, token_err.getErrorCode());
|
||||
EXPECT_EQ("unexpected end of input", token_err.getErrorText());
|
||||
EXPECT_EQ("lexer not started",
|
||||
MasterLexer::Token(MasterLexer::Token::NOT_STARTED).
|
||||
EXPECT_EQ("lexer not started", MasterToken(MasterToken::NOT_STARTED).
|
||||
getErrorText());
|
||||
EXPECT_EQ("unbalanced parentheses",
|
||||
MasterLexer::Token(MasterLexer::Token::UNBALANCED_PAREN).
|
||||
MasterToken(MasterToken::UNBALANCED_PAREN).
|
||||
getErrorText());
|
||||
EXPECT_EQ("unbalanced quotes",
|
||||
MasterLexer::Token(MasterLexer::Token::UNBALANCED_QUOTES).
|
||||
EXPECT_EQ("unbalanced quotes", MasterToken(MasterToken::UNBALANCED_QUOTES).
|
||||
getErrorText());
|
||||
EXPECT_EQ("no token produced",
|
||||
MasterLexer::Token(MasterLexer::Token::NO_TOKEN_PRODUCED).
|
||||
EXPECT_EQ("no token produced", MasterToken(MasterToken::NO_TOKEN_PRODUCED).
|
||||
getErrorText());
|
||||
EXPECT_EQ("number out of range",
|
||||
MasterLexer::Token(MasterLexer::Token::NUMBER_OUT_OF_RANGE).
|
||||
MasterToken(MasterToken::NUMBER_OUT_OF_RANGE).
|
||||
getErrorText());
|
||||
EXPECT_EQ("not a valid number",
|
||||
MasterToken(MasterToken::BAD_NUMBER).getErrorText());
|
||||
|
||||
// getErrorCode/Text() isn't allowed for non number types
|
||||
EXPECT_THROW(token_num.getErrorCode(), isc::InvalidOperation);
|
||||
EXPECT_THROW(token_num.getErrorText(), isc::InvalidOperation);
|
||||
|
||||
// Only the pre-defined error code is accepted. Hardcoding '6' (max code
|
||||
// Only the pre-defined error code is accepted. Hardcoding '7' (max code
|
||||
// + 1) is intentional; it'd be actually better if we notice it when we
|
||||
// update the enum list (which shouldn't happen too often).
|
||||
EXPECT_THROW(MasterLexer::Token(MasterLexer::Token::ErrorCode(6)),
|
||||
EXPECT_THROW(MasterToken(MasterToken::ErrorCode(7)),
|
||||
isc::InvalidParameter);
|
||||
|
||||
// Check the coexistence of "from number" and "from error-code"
|
||||
// constructors won't cause confusion.
|
||||
EXPECT_EQ(MasterLexer::Token::NUMBER,
|
||||
MasterLexer::Token(static_cast<uint32_t>(
|
||||
MasterLexer::Token::NOT_STARTED)).
|
||||
EXPECT_EQ(MasterToken::NUMBER,
|
||||
MasterToken(static_cast<uint32_t>(MasterToken::NOT_STARTED)).
|
||||
getType());
|
||||
}
|
||||
}
|
||||
|
@@ -141,19 +141,19 @@ TEST_F(MasterLexerTest, getNextToken) {
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// First, the newline should get out.
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
// Then the whitespace, if we specify the option.
|
||||
EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
|
||||
EXPECT_EQ(MasterToken::INITIAL_WS,
|
||||
lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
|
||||
// The newline
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
// The (quoted) string
|
||||
EXPECT_EQ(MasterLexer::Token::QSTRING,
|
||||
EXPECT_EQ(MasterToken::QSTRING,
|
||||
lexer.getNextToken(MasterLexer::QSTRING).getType());
|
||||
|
||||
// And the end of line and file
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
|
||||
}
|
||||
|
||||
// Test we correctly find end of file.
|
||||
@@ -162,12 +162,12 @@ TEST_F(MasterLexerTest, eof) {
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// The first one is found to be EOF
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
|
||||
// And it stays on EOF for any following attempts
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
|
||||
// And we can step back one token, but that is the EOF too.
|
||||
lexer.ungetToken();
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
|
||||
}
|
||||
|
||||
// Check we properly return error when there's an opened parentheses and no
|
||||
@@ -177,12 +177,12 @@ TEST_F(MasterLexerTest, getUnbalancedParen) {
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// The string gets out first
|
||||
EXPECT_EQ(MasterLexer::Token::STRING, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::STRING, lexer.getNextToken().getType());
|
||||
// Then an unbalanced parenthesis
|
||||
EXPECT_EQ(MasterLexer::Token::UNBALANCED_PAREN,
|
||||
EXPECT_EQ(MasterToken::UNBALANCED_PAREN,
|
||||
lexer.getNextToken().getErrorCode());
|
||||
// And then EOF
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
|
||||
}
|
||||
|
||||
// Check we properly return error when there's an opened quoted string and no
|
||||
@@ -192,10 +192,10 @@ TEST_F(MasterLexerTest, getUnbalancedString) {
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// Then an unbalanced qstring (reported as an unexpected end)
|
||||
EXPECT_EQ(MasterLexer::Token::UNEXPECTED_END,
|
||||
EXPECT_EQ(MasterToken::UNEXPECTED_END,
|
||||
lexer.getNextToken(MasterLexer::QSTRING).getErrorCode());
|
||||
// And then EOF
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_FILE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE, lexer.getNextToken().getType());
|
||||
}
|
||||
|
||||
// Test ungetting tokens works
|
||||
@@ -204,28 +204,28 @@ TEST_F(MasterLexerTest, ungetToken) {
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// Try getting the newline
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
// Return it and get again
|
||||
lexer.ungetToken();
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
// Get the string and return it back
|
||||
EXPECT_EQ(MasterLexer::Token::QSTRING,
|
||||
EXPECT_EQ(MasterToken::QSTRING,
|
||||
lexer.getNextToken(MasterLexer::QSTRING).getType());
|
||||
lexer.ungetToken();
|
||||
// But if we change the options, it honors them
|
||||
EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
|
||||
EXPECT_EQ(MasterToken::INITIAL_WS,
|
||||
lexer.getNextToken(MasterLexer::QSTRING |
|
||||
MasterLexer::INITIAL_WS).getType());
|
||||
// Get to the "more" string
|
||||
EXPECT_EQ(MasterLexer::Token::QSTRING,
|
||||
EXPECT_EQ(MasterToken::QSTRING,
|
||||
lexer.getNextToken(MasterLexer::QSTRING).getType());
|
||||
EXPECT_EQ(MasterLexer::Token::STRING,
|
||||
EXPECT_EQ(MasterToken::STRING,
|
||||
lexer.getNextToken(MasterLexer::QSTRING).getType());
|
||||
// Return it back. It should get inside the parentheses.
|
||||
// Upon next attempt to get it again, the newline inside the parentheses
|
||||
// should be still ignored.
|
||||
lexer.ungetToken();
|
||||
EXPECT_EQ(MasterLexer::Token::STRING,
|
||||
EXPECT_EQ(MasterToken::STRING,
|
||||
lexer.getNextToken(MasterLexer::QSTRING).getType());
|
||||
}
|
||||
|
||||
@@ -235,16 +235,16 @@ TEST_F(MasterLexerTest, ungetRealOptions) {
|
||||
ss << "\n \n";
|
||||
lexer.pushSource(ss);
|
||||
// Skip the first newline
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
|
||||
// If we call it the usual way, it skips up to the newline and returns
|
||||
// it
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
|
||||
// Now we return it. If we call it again, but with different options,
|
||||
// we get the initial whitespace.
|
||||
lexer.ungetToken();
|
||||
EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
|
||||
EXPECT_EQ(MasterToken::INITIAL_WS,
|
||||
lexer.getNextToken(MasterLexer::INITIAL_WS).getType());
|
||||
}
|
||||
|
||||
@@ -253,7 +253,7 @@ TEST_F(MasterLexerTest, ungetTwice) {
|
||||
ss << "\n";
|
||||
lexer.pushSource(ss);
|
||||
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
// Unget the token. It can be done once
|
||||
lexer.ungetToken();
|
||||
// But not twice
|
||||
@@ -271,17 +271,157 @@ TEST_F(MasterLexerTest, ungetBeforeGet) {
|
||||
TEST_F(MasterLexerTest, ungetAfterSwitch) {
|
||||
ss << "\n\n";
|
||||
lexer.pushSource(ss);
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
// Switch the source
|
||||
std::stringstream ss2;
|
||||
ss2 << "\n\n";
|
||||
lexer.pushSource(ss2);
|
||||
EXPECT_THROW(lexer.ungetToken(), isc::InvalidOperation);
|
||||
// We can get from the new source
|
||||
EXPECT_EQ(MasterLexer::Token::END_OF_LINE, lexer.getNextToken().getType());
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
// And when we drop the current source, we can't unget again
|
||||
lexer.popSource();
|
||||
EXPECT_THROW(lexer.ungetToken(), isc::InvalidOperation);
|
||||
}
|
||||
|
||||
// Common checks for the case when getNextToken() should result in LexerError
|
||||
void
|
||||
lexerErrorCheck(MasterLexer& lexer, MasterToken::Type expect,
|
||||
MasterToken::ErrorCode expected_error)
|
||||
{
|
||||
bool thrown = false;
|
||||
try {
|
||||
lexer.getNextToken(expect);
|
||||
} catch (const MasterLexer::LexerError& error) {
|
||||
EXPECT_EQ(expected_error, error.token_.getErrorCode());
|
||||
thrown = true;
|
||||
}
|
||||
EXPECT_TRUE(thrown);
|
||||
}
|
||||
|
||||
// Common checks regarding expected/unexpected end-of-line
|
||||
void
|
||||
eolCheck(MasterLexer& lexer, MasterToken::Type expect) {
|
||||
// If EOL is found and eol_ok is true, we get it.
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE,
|
||||
lexer.getNextToken(expect, true).getType());
|
||||
// We'll see the second '\n'; by default it will fail.
|
||||
EXPECT_THROW(lexer.getNextToken(expect), MasterLexer::LexerError);
|
||||
// Same if eol_ok is explicitly set to false. This also checks the
|
||||
// offending '\n' was "ungotten".
|
||||
EXPECT_THROW(lexer.getNextToken(expect, false), MasterLexer::LexerError);
|
||||
|
||||
// And also check the error token set in the exception object.
|
||||
lexerErrorCheck(lexer, expect, MasterToken::UNEXPECTED_END);
|
||||
}
|
||||
|
||||
// Common checks regarding expected/unexpected end-of-file
|
||||
void
|
||||
eofCheck(MasterLexer& lexer, MasterToken::Type expect) {
|
||||
EXPECT_EQ(MasterToken::END_OF_FILE,
|
||||
lexer.getNextToken(expect, true).getType());
|
||||
EXPECT_THROW(lexer.getNextToken(expect), MasterLexer::LexerError);
|
||||
EXPECT_THROW(lexer.getNextToken(expect, false), MasterLexer::LexerError);
|
||||
}
|
||||
|
||||
TEST_F(MasterLexerTest, getNextTokenString) {
|
||||
ss << "normal-string\n";
|
||||
ss << "\n";
|
||||
ss << "another-string";
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// Normal successful case: Expecting a string and get one.
|
||||
EXPECT_EQ("normal-string",
|
||||
lexer.getNextToken(MasterToken::STRING).getString());
|
||||
eolCheck(lexer, MasterToken::STRING);
|
||||
|
||||
// Skip the 2nd '\n'
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
|
||||
// Same set of tests but for end-of-file
|
||||
EXPECT_EQ("another-string",
|
||||
lexer.getNextToken(MasterToken::STRING, true).getString());
|
||||
eofCheck(lexer, MasterToken::STRING);
|
||||
}
|
||||
|
||||
TEST_F(MasterLexerTest, getNextTokenQString) {
|
||||
ss << "\"quoted-string\"\n";
|
||||
ss << "\n";
|
||||
ss << "normal-string";
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// Expecting a quoted string and get one.
|
||||
EXPECT_EQ("quoted-string",
|
||||
lexer.getNextToken(MasterToken::QSTRING).getString());
|
||||
eolCheck(lexer, MasterToken::QSTRING);
|
||||
|
||||
// Skip the 2nd '\n'
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
|
||||
// Expecting a quoted string but see a normal string. It's okay.
|
||||
EXPECT_EQ("normal-string",
|
||||
lexer.getNextToken(MasterToken::QSTRING).getString());
|
||||
eofCheck(lexer, MasterToken::QSTRING);
|
||||
}
|
||||
|
||||
TEST_F(MasterLexerTest, getNextTokenNumber) {
|
||||
ss << "3600\n";
|
||||
ss << "\n";
|
||||
ss << "4294967296 "; // =2^32, out of range
|
||||
ss << "not-a-number ";
|
||||
ss << "86400";
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// Expecting a number string and get one.
|
||||
EXPECT_EQ(3600,
|
||||
lexer.getNextToken(MasterToken::NUMBER).getNumber());
|
||||
eolCheck(lexer, MasterToken::NUMBER);
|
||||
|
||||
// Skip the 2nd '\n'
|
||||
EXPECT_EQ(MasterToken::END_OF_LINE, lexer.getNextToken().getType());
|
||||
|
||||
// Expecting a number, but it's too big for uint32.
|
||||
lexerErrorCheck(lexer, MasterToken::NUMBER,
|
||||
MasterToken::NUMBER_OUT_OF_RANGE);
|
||||
// The token should have been "ungotten". Re-read and skip it.
|
||||
EXPECT_EQ(MasterToken::STRING, lexer.getNextToken().getType());
|
||||
|
||||
// Expecting a number, but see a string.
|
||||
lexerErrorCheck(lexer, MasterToken::NUMBER, MasterToken::BAD_NUMBER);
|
||||
// The unexpected string should have been "ungotten". Re-read and skip it.
|
||||
EXPECT_EQ(MasterToken::STRING, lexer.getNextToken().getType());
|
||||
|
||||
// Unless we specify NUMBER, decimal number string should be recognized
|
||||
// as a string.
|
||||
EXPECT_EQ("86400",
|
||||
lexer.getNextToken(MasterToken::STRING).getString());
|
||||
eofCheck(lexer, MasterToken::NUMBER);
|
||||
}
|
||||
|
||||
TEST_F(MasterLexerTest, getNextTokenErrors) {
|
||||
// Check miscellaneous error cases
|
||||
|
||||
ss << ") "; // unbalanced parenthesis
|
||||
ss << "string-after-error ";
|
||||
lexer.pushSource(ss);
|
||||
|
||||
// Only string/qstring/number can be "expected".
|
||||
EXPECT_THROW(lexer.getNextToken(MasterToken::END_OF_LINE),
|
||||
isc::InvalidParameter);
|
||||
EXPECT_THROW(lexer.getNextToken(MasterToken::END_OF_FILE),
|
||||
isc::InvalidParameter);
|
||||
EXPECT_THROW(lexer.getNextToken(MasterToken::INITIAL_WS),
|
||||
isc::InvalidParameter);
|
||||
EXPECT_THROW(lexer.getNextToken(MasterToken::ERROR),
|
||||
isc::InvalidParameter);
|
||||
|
||||
// If it encounters a syntax error, it results in LexerError exception.
|
||||
lexerErrorCheck(lexer, MasterToken::STRING, MasterToken::UNBALANCED_PAREN);
|
||||
|
||||
// Unlike the NUMBER_OUT_OF_RANGE case, the error part has been skipped
|
||||
// within getNextToken(). We should be able to get the next token.
|
||||
EXPECT_EQ("string-after-error",
|
||||
lexer.getNextToken(MasterToken::STRING).getString());
|
||||
}
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user