first commit
This commit is contained in:
772
extern/stdcxx/4.2.1/util/scanner.cpp
vendored
Normal file
772
extern/stdcxx/4.2.1/util/scanner.cpp
vendored
Normal file
@@ -0,0 +1,772 @@
|
||||
/***************************************************************************
|
||||
*
|
||||
* scanner.cpp
|
||||
*
|
||||
* $Id: scanner.cpp 648752 2008-04-16 17:01:56Z faridz $
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed
|
||||
* with this work for additional information regarding copyright
|
||||
* ownership. The ASF licenses this file to you under the Apache
|
||||
* License, Version 2.0 (the "License"); you may not use this file
|
||||
* except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* Copyright 2001-2006 Rogue Wave Software.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "scanner.h"
|
||||
|
||||
#include "diagnostic.h"
|
||||
#include "loc_exception.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <cassert> // for assert()
|
||||
#include <climits> // for UCHAR_MAX
|
||||
#include <cstdlib> // for strtol()
|
||||
#include <cstring> // for strcmp()
|
||||
|
||||
|
||||
struct ScannerContext
|
||||
{
|
||||
ScannerContext (const char*, char = '#', char = '\\');
|
||||
|
||||
std::ifstream file; // file stream object
|
||||
std::string filename; // filename
|
||||
|
||||
// comment and escape for current file
|
||||
char comment_char;
|
||||
char escape_char;
|
||||
|
||||
// current line and column for the scanner
|
||||
int line;
|
||||
|
||||
// current line and position within it
|
||||
std::string line_;
|
||||
const char* pos_;
|
||||
|
||||
private:
|
||||
// not defined (not copy constructible or assignable)
|
||||
ScannerContext (const ScannerContext&);
|
||||
void operator= (ScannerContext&);
|
||||
};
|
||||
|
||||
/**************************************************************************/
|
||||
// helpers
|
||||
|
||||
static void normal_path (std::string& s)
|
||||
{
|
||||
std::string::iterator it(s.begin ());
|
||||
|
||||
for (; it != s.end (); it++)
|
||||
if (*it == '/' || *it == '\\') {
|
||||
#if defined (_MSC_VER)
|
||||
*it = '\\';
|
||||
#else
|
||||
*it = '/';
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/**************************************************************************/
|
||||
// ScannerContext class definitions
|
||||
|
||||
ScannerContext::
|
||||
ScannerContext (const char* name, char cc, char ec)
|
||||
: file (name), filename (name),
|
||||
comment_char (cc), escape_char (ec),
|
||||
line (0) // , column (0)
|
||||
{
|
||||
// update current position
|
||||
pos_ = line_.c_str ();
|
||||
|
||||
if (!file.is_open ())
|
||||
issue_diag (500, true, 0,
|
||||
"%s could not be opened for reading\n", name);
|
||||
|
||||
issue_diag (I_OPENRD, false, 0, "reading %s\n", name);
|
||||
}
|
||||
|
||||
/**************************************************************************/
|
||||
// Scanner class definitions
|
||||
|
||||
Scanner::
|
||||
Scanner ()
|
||||
: context_ (0), nlines_ (0), ntokens_ (0), escaped_newline_ (false)
|
||||
{
|
||||
// no-op
|
||||
}
|
||||
|
||||
|
||||
Scanner::
|
||||
~Scanner()
|
||||
{
|
||||
// empty the stack and destroy the current state
|
||||
delete context_;
|
||||
|
||||
while (!context_stack_.empty ()) {
|
||||
delete context_stack_.top ();
|
||||
context_stack_.pop ();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char Scanner::
|
||||
escape_char () const
|
||||
{
|
||||
return context_ ? context_->escape_char : 0;
|
||||
}
|
||||
|
||||
void Scanner::
|
||||
ignore_line ()
|
||||
{
|
||||
while (next_token ().token != tok_nl);
|
||||
}
|
||||
|
||||
|
||||
void Scanner::
|
||||
open (std::string name, char cc, char ec)
|
||||
{
|
||||
normal_path (name);
|
||||
|
||||
if (context_)
|
||||
context_stack_.push (context_);
|
||||
|
||||
try {
|
||||
context_ = new ScannerContext (name.c_str (), cc, ec);
|
||||
}
|
||||
catch (loc_exception&) {
|
||||
context_ = 0;
|
||||
|
||||
if (!context_stack_.empty ()) {
|
||||
context_ = context_stack_.top ();
|
||||
context_stack_.pop ();
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
|
||||
nlines_ = 0;
|
||||
ntokens_ = 0;
|
||||
}
|
||||
|
||||
|
||||
void Scanner::
|
||||
close ()
|
||||
{
|
||||
assert (0 != context_);
|
||||
|
||||
issue_diag (I_OPENRD, false, 0,
|
||||
"%s: %u tokens, %u lines\n",
|
||||
context_->filename.c_str (), ntokens_, nlines_);
|
||||
|
||||
delete context_;
|
||||
|
||||
if (context_stack_.empty ())
|
||||
context_ = 0;
|
||||
else {
|
||||
context_ = context_stack_.top ();
|
||||
context_stack_.pop ();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Scanner::token_id Scanner::
|
||||
process_token (const char* name)
|
||||
{
|
||||
assert (0 != name);
|
||||
|
||||
if (*name == context_->escape_char) {
|
||||
switch (name [1]) {
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
case 'd':
|
||||
case 'x':
|
||||
// escaped numeric character value
|
||||
return tok_char_value;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return tok_ndef;
|
||||
}
|
||||
|
||||
// look for a predefined token
|
||||
|
||||
static const struct {
|
||||
const char* name;
|
||||
Scanner::token_id token;
|
||||
} tok_map [] = {
|
||||
// elements must be sorted in ascending order
|
||||
{ "CHARMAP", tok_charmap },
|
||||
{ "END", tok_end },
|
||||
{ "IGNORE", tok_ignore },
|
||||
{ "LC_ADDRESS", tok_addr },
|
||||
{ "LC_COLLATE", tok_collate },
|
||||
{ "LC_CTYPE", tok_ctype },
|
||||
{ "LC_IDENTIFICATION", tok_ident },
|
||||
{ "LC_MEASUREMENT", tok_measure },
|
||||
{ "LC_MESSAGES", tok_messages },
|
||||
{ "LC_MONETARY", tok_monetary },
|
||||
{ "LC_NAME", tok_name },
|
||||
{ "LC_NUMERIC", tok_numeric },
|
||||
{ "LC_PAPER", tok_paper },
|
||||
{ "LC_TELEPHONE", tok_phone },
|
||||
{ "LC_TIME", tok_time },
|
||||
{ "UNDEFINED", tok_undefined },
|
||||
{ "WIDTH", tok_width },
|
||||
{ "abday", tok_abday },
|
||||
{ "abmon", tok_abmon },
|
||||
{ "alpha", tok_alpha },
|
||||
{ "alt_digits", tok_alt_digits },
|
||||
{ "am_pm", tok_am_pm },
|
||||
{ "backward", tok_backward },
|
||||
{ "blank", tok_blank },
|
||||
{ "cntrl", tok_cntrl },
|
||||
{ "collating-element", tok_coll_elem },
|
||||
{ "collating-symbol", tok_coll_sym },
|
||||
{ "comment_char", tok_comment_char },
|
||||
{ "copy", tok_copy },
|
||||
{ "currency_symbol", tok_currency_symbol },
|
||||
{ "d_fmt", tok_d_fmt },
|
||||
{ "d_t_fmt", tok_d_t_fmt },
|
||||
{ "day", tok_day },
|
||||
{ "decimal_point", tok_decimal_point },
|
||||
{ "digit", tok_digit },
|
||||
{ "era", tok_era },
|
||||
{ "era_d_fmt", tok_era_d_fmt },
|
||||
{ "era_d_t_fmt", tok_era_d_t_fmt },
|
||||
{ "era_t_fmt", tok_era_t_fmt },
|
||||
{ "escape_char", tok_escape_char },
|
||||
{ "falsename", tok_falsename },
|
||||
{ "forward", tok_forward },
|
||||
{ "frac_digits", tok_frac_digits },
|
||||
{ "from", tok_from },
|
||||
{ "graph", tok_graph },
|
||||
{ "grouping", tok_grouping },
|
||||
{ "include", tok_include },
|
||||
{ "int_curr_symbol", tok_int_curr_symbol },
|
||||
{ "int_frac_digits", tok_int_frac_digits },
|
||||
{ "int_n_cs_precedes", tok_int_n_cs_precedes },
|
||||
{ "int_n_sep_by_space", tok_int_n_sep_by_space },
|
||||
{ "int_n_sign_posn", tok_int_n_sign_posn },
|
||||
{ "int_p_cs_precedes", tok_int_p_cs_precedes },
|
||||
{ "int_p_sep_by_space", tok_int_p_sep_by_space },
|
||||
{ "int_p_sign_posn", tok_int_p_sign_posn },
|
||||
{ "lower", tok_lower },
|
||||
{ "mon", tok_mon },
|
||||
{ "mon_decimal_point", tok_mon_decimal_point },
|
||||
{ "mon_grouping", tok_mon_grouping },
|
||||
{ "mon_thousands_sep", tok_mon_thousands_sep },
|
||||
{ "n_cs_precedes", tok_n_cs_precedes },
|
||||
{ "n_sep_by_space", tok_n_sep_by_space },
|
||||
{ "n_sign_posn", tok_n_sign_posn },
|
||||
{ "negative_sign", tok_negative_sign },
|
||||
{ "noexpr", tok_noexpr },
|
||||
{ "order_end", tok_order_end },
|
||||
{ "order_start", tok_order_start },
|
||||
{ "p_cs_precedes", tok_p_cs_precedes },
|
||||
{ "p_sep_by_space", tok_p_sep_by_space },
|
||||
{ "p_sign_posn", tok_p_sign_posn },
|
||||
{ "position", tok_position },
|
||||
{ "positive_sign", tok_positive_sign },
|
||||
{ "print", tok_print },
|
||||
{ "punct", tok_punct },
|
||||
{ "reorder-after", tok_reorder },
|
||||
{ "reorder-end", tok_reorder_end },
|
||||
{ "reorder-section-after", tok_reorder_section },
|
||||
{ "reorder-section-end", tok_reorder_section_end },
|
||||
{ "script", tok_script },
|
||||
{ "space", tok_space },
|
||||
{ "t_fmt", tok_t_fmt },
|
||||
{ "t_fmt_ampm", tok_t_fmt_ampm },
|
||||
{ "thousands_sep", tok_thousands_sep },
|
||||
{ "tolower", tok_tolower },
|
||||
{ "toupper", tok_toupper },
|
||||
{ "translit_end", tok_xlit_end },
|
||||
{ "translit_start", tok_xlit_start },
|
||||
{ "truename", tok_truename },
|
||||
{ "upper", tok_upper },
|
||||
{ "xdigit", tok_xdigit },
|
||||
{ "yesexpr", tok_yesexpr }
|
||||
};
|
||||
|
||||
int low = 0;
|
||||
int high = sizeof tok_map / sizeof *tok_map - 1;
|
||||
|
||||
// this loop implements a binary search to find 'name' in the
|
||||
// tok_map list and when found returns the token value.
|
||||
while (low <= high) {
|
||||
|
||||
const int cur = (low + high) / 2;
|
||||
|
||||
const int cmp = std::strcmp (name, tok_map [cur].name);
|
||||
if (0 == cmp)
|
||||
return tok_map [cur].token;
|
||||
|
||||
if (cmp < 0)
|
||||
high = cur - 1;
|
||||
else
|
||||
low = cur + 1;
|
||||
}
|
||||
|
||||
return tok_ndef;
|
||||
}
|
||||
|
||||
|
||||
void Scanner::
|
||||
read_line ()
|
||||
{
|
||||
context_->line_.clear ();
|
||||
|
||||
std::getline (context_->file, context_->line_);
|
||||
|
||||
context_->line_ += '\n';
|
||||
|
||||
context_->pos_ = context_->line_.c_str ();
|
||||
|
||||
++context_->line;
|
||||
// context_->column = 0;
|
||||
|
||||
++nlines_;
|
||||
|
||||
assert (context_->line_.size ());
|
||||
}
|
||||
|
||||
|
||||
Scanner::token_t Scanner::
|
||||
next_token ()
|
||||
{
|
||||
assert (0 != context_);
|
||||
assert (context_->file.is_open ());
|
||||
|
||||
// token
|
||||
token_t next_tok;
|
||||
|
||||
next_tok.name = "";
|
||||
next_tok.token = tok_ndef;
|
||||
next_tok.line = 0;
|
||||
next_tok.column = 0;
|
||||
next_tok.file = 0;
|
||||
|
||||
while (true) {
|
||||
|
||||
// store the *current* file name
|
||||
next_tok.file = context_->filename.c_str ();
|
||||
|
||||
// the assert above for eof checks if the caller has lost it;
|
||||
if (context_->file.eof ()) {
|
||||
next_tok.token = tok_end_tokens;
|
||||
return next_tok;
|
||||
}
|
||||
|
||||
// if we exhausted the current line, advance
|
||||
if ( context_->line_.size ()
|
||||
<= std::size_t (context_->pos_ - context_->line_.c_str ())) {
|
||||
read_line ();
|
||||
}
|
||||
|
||||
// line and column for the token start; they are set at each
|
||||
// iteration; the finding of a token breaks and next_tok leaves
|
||||
// this loop having the line/col info
|
||||
next_tok.line = context_->line;
|
||||
next_tok.column = context_->pos_ - context_->line_.c_str ();
|
||||
|
||||
// plug in the pointer to current position
|
||||
const char*& next = context_->pos_;
|
||||
|
||||
if (*next != context_->comment_char)
|
||||
escaped_newline_ = false;
|
||||
|
||||
if (*next == '<') {
|
||||
// beginning of a symbolic name or keyword
|
||||
const char* tok_begin = next++;
|
||||
|
||||
for (; '>' != *next; ++next) {
|
||||
|
||||
// if has an escaped close angular, pass
|
||||
if (*next == context_->escape_char) {
|
||||
|
||||
// append symbol name up to but not including the escape
|
||||
next_tok.name.append (tok_begin, next - tok_begin);
|
||||
|
||||
// advance the next pointer to skip the escape
|
||||
tok_begin = ++next;
|
||||
}
|
||||
else if ('\n' == *next) {
|
||||
// past the end of the line
|
||||
issue_diag (E_SYNTAX, true, &next_tok,
|
||||
" unterminated symbolic name\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
next_tok.name.append (tok_begin, ++next - tok_begin);
|
||||
|
||||
// check the name fetched so far
|
||||
if (next_tok.name == "<code_set_name>") {
|
||||
next_tok.token = tok_code_set_name;
|
||||
}
|
||||
else if ( next_tok.name == "<escape_char>"
|
||||
|| next_tok.name == "<comment_char>") {
|
||||
|
||||
// eat away spaces
|
||||
while (' ' == *next || '\t' == *next) {
|
||||
++next;
|
||||
}
|
||||
|
||||
// test for end of line
|
||||
if (*next == '\n')
|
||||
issue_diag (E_SYNTAX, true, &next_tok,
|
||||
"missing value for %s\n",
|
||||
next_tok.name.c_str ());
|
||||
|
||||
// store character
|
||||
if (next_tok.name == "<escape_char>")
|
||||
context_->escape_char = *next;
|
||||
else
|
||||
context_->comment_char = *next;
|
||||
|
||||
// adjust positions;
|
||||
context_->pos_ =
|
||||
context_->line_.c_str () + context_->line_.size ();
|
||||
|
||||
// set token to a newline
|
||||
next_tok.name = "";
|
||||
next_tok.token = tok_nl;
|
||||
}
|
||||
else if (next_tok.name == "<mb_cur_max>") {
|
||||
next_tok.token = tok_mb_cur_max;
|
||||
}
|
||||
else if (next_tok.name == "<mb_cur_min>") {
|
||||
next_tok.token = tok_mb_cur_min;
|
||||
}
|
||||
else {
|
||||
next_tok.token = tok_sym_name;
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (*next == ' ' || *next == '\t' || *next == ';') {
|
||||
// ignore whitespace and separators
|
||||
while (*next == ' ' || *next == '\t' || *next == ';') {
|
||||
++next;
|
||||
}
|
||||
}
|
||||
else if (*next == '\n') {
|
||||
++next;
|
||||
next_tok.token = tok_nl;
|
||||
break;
|
||||
}
|
||||
else if (*next == context_->comment_char) {
|
||||
// start of a comment - check as early as necessary
|
||||
// adjust to end of line
|
||||
context_->pos_ = context_->line_.c_str () + context_->line_.size ();
|
||||
|
||||
if (escaped_newline_)
|
||||
continue;
|
||||
|
||||
next_tok.token = tok_nl;
|
||||
next_tok.name = "\n";
|
||||
break;
|
||||
}
|
||||
else if (*next == '(') {
|
||||
// push open parenthesis
|
||||
next_tok.name.push_back (*next++);
|
||||
|
||||
// start of a grouping
|
||||
while (*next != ')') {
|
||||
// contains a symbolic name
|
||||
if (*next == '<') {
|
||||
// push open angular parenthesis
|
||||
next_tok.name.push_back (*next++);
|
||||
|
||||
while (*next != '\n') {
|
||||
// if has an escaped close angular, pass
|
||||
if (next [0] == context_->escape_char) {
|
||||
next_tok.name.push_back (*next++);
|
||||
next_tok.name.push_back (*next++);
|
||||
continue;
|
||||
}
|
||||
|
||||
// if we have reached the end of the sym name
|
||||
if (*next == '>') {
|
||||
next_tok.name.push_back (*next);
|
||||
break;
|
||||
}
|
||||
|
||||
// still inside the sym name/keyword
|
||||
next_tok.name.push_back (*next++);
|
||||
}
|
||||
|
||||
// check if we have gone past the end of the line
|
||||
if (*next == '\n')
|
||||
issue_diag (E_SYNTAX, true, &next_tok,
|
||||
" unterminated symbolic name");
|
||||
|
||||
++next;
|
||||
}
|
||||
else {
|
||||
// fetch the character
|
||||
next_tok.name.push_back (*next++);
|
||||
}
|
||||
|
||||
if (*next == '\n')
|
||||
issue_diag (E_SYNTAX, true, &next_tok,
|
||||
" unterminated grouping ");
|
||||
}
|
||||
|
||||
next_tok.name.push_back (*next++);
|
||||
next_tok.token = tok_grouping;
|
||||
break;
|
||||
}
|
||||
else if (*next == '.') {
|
||||
// ellipsis (see ISO/IEC TR 14652)
|
||||
int ellipsis_count = 0;
|
||||
// start of an interval
|
||||
while (*next == '.') {
|
||||
next_tok.name.push_back (*next++);
|
||||
++ellipsis_count;
|
||||
}
|
||||
|
||||
switch (ellipsis_count) {
|
||||
case 2: {
|
||||
const char* tmp = next;
|
||||
if (*tmp++ == '(' && *tmp++ == '2' && *tmp++ == ')'
|
||||
&& *tmp++ == '.' && *tmp++ == '.') {
|
||||
// double increment hexadecimal symbolic ellipsis
|
||||
next_tok.token = tok_dbl_ellipsis;
|
||||
next = tmp;
|
||||
}
|
||||
else {
|
||||
// hexadecimal symbolic ellipsis
|
||||
next_tok.token = tok_hex_ellipsis;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 3:
|
||||
// absolute symbolic ellipsis
|
||||
next_tok.token = tok_abs_ellipsis;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
// decimal symbolic ellipsis
|
||||
next_tok.token = tok_dec_ellipsis;
|
||||
break;
|
||||
|
||||
default:
|
||||
issue_diag (E_SYNTAX, true, &next_tok, "illegal ellipsis\n");
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
else if (*next == '\"') {
|
||||
|
||||
// start of a string
|
||||
next_tok.name.push_back (*next++);
|
||||
const char ec = context_->escape_char;
|
||||
|
||||
while (next[0] != '\n') {
|
||||
|
||||
// escaped newline; continue
|
||||
if (next [0] == ec && next [1] == '\n') {
|
||||
read_line ();
|
||||
continue;
|
||||
}
|
||||
|
||||
// escaped quote
|
||||
if (next[0] == ec) {
|
||||
next_tok.name.push_back (*next++);
|
||||
next_tok.name.push_back (*next++);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (next [0] == '\"') {
|
||||
next_tok.name.push_back (*next);
|
||||
break;
|
||||
}
|
||||
|
||||
// still inside the string
|
||||
next_tok.name.push_back (*next++);
|
||||
}
|
||||
|
||||
// test for closure
|
||||
if (*next == '\n')
|
||||
issue_diag (E_SYNTAX, true, &next_tok, "unterminated string");
|
||||
|
||||
++next;
|
||||
next_tok.token = tok_string;
|
||||
break;
|
||||
|
||||
}
|
||||
else if (*next == context_->escape_char) {
|
||||
// start of an escape sequence
|
||||
// escaped new line
|
||||
if (next [1] == '\n') {
|
||||
// adjust to end of line
|
||||
context_->pos_ =
|
||||
context_->line_.c_str () + context_->line_.size ();
|
||||
|
||||
escaped_newline_ = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// or
|
||||
while ( *next != ' ' && *next != '\t'
|
||||
&& *next != ';' && *next != '\n') {
|
||||
next_tok.name.push_back (*next++);
|
||||
}
|
||||
|
||||
// retrieve token based on value
|
||||
next_tok.token = process_token (next_tok.name.c_str ());
|
||||
break;
|
||||
}
|
||||
else {
|
||||
// the rest of it
|
||||
for (const char ec = context_->escape_char; ; ) {
|
||||
|
||||
// stop at esc-newline or at first "separator"
|
||||
if ( (next [0] == ec && next [1] == '\n')
|
||||
|| next [0] == ' '
|
||||
|| next [0] == '\t'
|
||||
|| next [0] == '\n'
|
||||
|| next [0] == ';') {
|
||||
// continuation of a line, separators
|
||||
break;
|
||||
}
|
||||
|
||||
// fetch characters
|
||||
next_tok.name.push_back (*next++);
|
||||
}
|
||||
|
||||
// assert length of input
|
||||
assert (next_tok.name.size ());
|
||||
|
||||
// it wasn't a locale definition keyword so call process_token
|
||||
// and add the result to the list
|
||||
next_tok.token = process_token (next_tok.name.c_str ());
|
||||
|
||||
|
||||
// an extra bit of processing since we keep comment and escape
|
||||
// characters in the scanner for a faster processing
|
||||
if ( next_tok.token == tok_escape_char
|
||||
|| next_tok.token == tok_comment_char) {
|
||||
|
||||
// eat away spaces
|
||||
while (' ' == *next || '\t' == *next) {
|
||||
++next;
|
||||
}
|
||||
|
||||
// test for end of line
|
||||
if (*next == '\n')
|
||||
issue_diag (E_SYNTAX, true, &next_tok,
|
||||
"unterminated statement");
|
||||
|
||||
// store character
|
||||
if (next_tok.token == tok_escape_char)
|
||||
context_->escape_char = next [0];
|
||||
else
|
||||
context_->comment_char = next [0];
|
||||
|
||||
// adjust positions;
|
||||
context_->pos_ =
|
||||
context_->line_.c_str () + context_->line_.size ();
|
||||
|
||||
// return the token
|
||||
next_tok.name = "";
|
||||
next_tok.token = tok_nl;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
++ntokens_;
|
||||
|
||||
return next_tok;
|
||||
}
|
||||
|
||||
|
||||
unsigned long Scanner::
|
||||
convert_escape (const char *esc,
|
||||
const char **pend /* = 0 */,
|
||||
bool multi /* = false */) const
|
||||
{
|
||||
assert (0 != esc);
|
||||
|
||||
const char escape = escape_char ();
|
||||
|
||||
if (escape != *esc)
|
||||
issue_diag (E_SYNTAX, true, 0,
|
||||
"expected the escape character ('%c'), got \"%s\"\n",
|
||||
escape, esc);
|
||||
|
||||
unsigned long value = 0;
|
||||
|
||||
for (const char *s = esc; ; ) {
|
||||
|
||||
// escaped characters are octal by default
|
||||
const char *basename = "octal";
|
||||
int base = 8;
|
||||
|
||||
switch (*++s) {
|
||||
case 'd': ++s; base = 10; basename = "decimal"; break;
|
||||
case 'x': ++s; base = 16; basename = "hexadecimal"; break;
|
||||
|
||||
case 'o': ++s;
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
break;
|
||||
|
||||
default:
|
||||
issue_diag (E_SYNTAX, true, 0,
|
||||
"one of { 'o', 'd', 'x' } expected following "
|
||||
"the escape character: %s\n", esc);
|
||||
}
|
||||
|
||||
char *end = 0;
|
||||
|
||||
const unsigned long byte = std::strtoul (s, &end, base);
|
||||
|
||||
if (pend)
|
||||
*pend = end;
|
||||
|
||||
// cast away constness below to work around an MSVC 7.0 bug:
|
||||
// causing error C2446: '==' : no conversion from 'char ** '
|
||||
// to 'const char ** ' Conversion loses qualifiers
|
||||
if (!multi && _RWSTD_CONST_CAST (char**, pend) == &end && **pend)
|
||||
issue_diag (E_SYNTAX, true, 0,
|
||||
"%s constant expected: %s\n", basename, esc);
|
||||
|
||||
if (UCHAR_MAX < byte)
|
||||
issue_diag (E_INVAL, true, 0,
|
||||
"%s byte value must be in the range [0, %d]: %s\n",
|
||||
basename, int (UCHAR_MAX), esc);
|
||||
|
||||
if (value >> (sizeof (unsigned long) - 1) * CHAR_BIT)
|
||||
issue_diag (E_INVAL, true, 0, "integer overflow: %s\n", esc);
|
||||
|
||||
value = (value << CHAR_BIT) | byte;
|
||||
|
||||
if (**pend != escape || !multi)
|
||||
break;
|
||||
|
||||
s = *pend;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
Reference in New Issue
Block a user