234 lines
7.6 KiB
C++
234 lines
7.6 KiB
C++
/***************************************************************************
|
|
*
|
|
* scanner.h
|
|
*
|
|
* $Id: scanner.h 648752 2008-04-16 17:01:56Z faridz $
|
|
*
|
|
***************************************************************************
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* permissions and limitations under the License.
|
|
*
|
|
* Copyright 2001-2006 Rogue Wave Software.
|
|
*
|
|
**************************************************************************/
|
|
|
|
#ifndef RWSTD_UTIL_SCANNER_H_INCLUDED
|
|
#define RWSTD_UTIL_SCANNER_H_INCLUDED
|
|
|
|
#include <string>
|
|
#include <stack>
|
|
#include <climits> // for ULONG_MAX
|
|
|
|
|
|
struct ScannerContext;
|
|
|
|
|
|
class Scanner
|
|
{
|
|
public:
|
|
|
|
// enumeration of all tokens in the character map
|
|
// and locale definition file
|
|
enum token_id {
|
|
tok_code_set_name, // <code_set_name>
|
|
tok_mb_cur_max, // <mb_cur_max>
|
|
tok_mb_cur_min, // <mb_cur_min>
|
|
// sections
|
|
tok_charmap, // beginning of CHARMAP section
|
|
tok_collate, // beginning of LC_COLLATE section
|
|
tok_ctype, // beginning of LC_CTYPE section
|
|
tok_messages, // beginning of LC_MESSAGES section
|
|
tok_monetary, // beginning of LC_MONETARY section
|
|
tok_numeric, // beginning of LC_NUMERIC section
|
|
tok_time, // beginning of LC_TIME section
|
|
// ISO/IEC TR 14652 extensions:
|
|
tok_addr, // beginning of LC_ADDRESS section
|
|
tok_ident, // beginning of LC_IDENTIFICATION section
|
|
tok_measure, // beginning of LC_MEASUREMENT section
|
|
tok_name, // beginning of LC_NAME section
|
|
tok_paper, // beginning of LC_PAPER section
|
|
tok_phone, // beginning of LC_TELEPHONE section
|
|
//
|
|
tok_end, // END of a section
|
|
// LC_CTYPE-specific tokens
|
|
tok_upper, // upper section of LC_CTYPE
|
|
tok_lower, // lower section of LC_CTYPE
|
|
tok_digit, // digit section of LC_CTYPE
|
|
tok_space, // space section of LC_CTYPE
|
|
tok_alpha, // alpha section of LC_CTYPE
|
|
tok_graph, // graph section of LC_CTYPE
|
|
tok_print, // print section of LC_CTYPE
|
|
tok_cntrl, // cntrl section of LC_CTYPE
|
|
tok_punct, // punct section of LC_CTYPE
|
|
tok_xdigit, // xdigit section of LC_CTYPE
|
|
tok_blank, // blank section of LC_CTYPE
|
|
tok_tolower, // tolower section of LC_CTYPE
|
|
tok_toupper, // toupper section of LC_CTYPE
|
|
// LC_COLLATE-specific tokens
|
|
tok_script,
|
|
tok_coll_elem, // collating-element
|
|
tok_coll_sym, // collating symbol
|
|
tok_from,
|
|
tok_xlit_start, // translit_start
|
|
tok_xlit_end, // translit_end
|
|
tok_reorder,
|
|
tok_reorder_end,
|
|
tok_reorder_section,
|
|
tok_reorder_section_end,
|
|
tok_order_start,
|
|
tok_order_end,
|
|
tok_forward,
|
|
tok_backward,
|
|
tok_position,
|
|
tok_undefined,
|
|
//
|
|
tok_string,
|
|
tok_ignore,
|
|
// absolute, hexadecimal, decimal, and double-increment
|
|
// ellipses (see ISO/IEC TR 14652)
|
|
tok_abs_ellipsis, // "..."
|
|
tok_hex_ellipsis, // ".."
|
|
tok_dec_ellipsis, // "...."
|
|
tok_dbl_ellipsis, // "..(N).."
|
|
tok_width,
|
|
// LC_MONETARY-specific tokens
|
|
tok_int_curr_symbol,
|
|
tok_currency_symbol,
|
|
tok_mon_decimal_point,
|
|
tok_mon_thousands_sep,
|
|
tok_mon_grouping,
|
|
tok_positive_sign,
|
|
tok_negative_sign,
|
|
tok_int_frac_digits,
|
|
tok_frac_digits,
|
|
tok_p_cs_precedes,
|
|
tok_p_sep_by_space,
|
|
tok_n_cs_precedes,
|
|
tok_n_sep_by_space,
|
|
tok_p_sign_posn,
|
|
tok_n_sign_posn,
|
|
tok_int_p_cs_precedes,
|
|
tok_int_n_cs_precedes,
|
|
tok_int_p_sep_by_space,
|
|
tok_int_n_sep_by_space,
|
|
tok_int_p_sign_posn,
|
|
tok_int_n_sign_posn,
|
|
// LC_NUMERIC-specific tokens
|
|
tok_decimal_point, // decimal point
|
|
tok_thousands_sep, // thousands_sep
|
|
tok_grouping, // grouping
|
|
tok_truename, // truename (C++ extension)
|
|
tok_falsename, // falsename (C++ extension)
|
|
// LC_TIME-specific tokens
|
|
tok_abday,
|
|
tok_day,
|
|
tok_abmon,
|
|
tok_mon,
|
|
tok_d_t_fmt,
|
|
tok_d_fmt,
|
|
tok_t_fmt,
|
|
tok_am_pm,
|
|
tok_t_fmt_ampm,
|
|
tok_era,
|
|
tok_era_d_fmt,
|
|
tok_era_t_fmt,
|
|
tok_era_d_t_fmt,
|
|
tok_alt_digits,
|
|
// LC_MESSAGES-specific tokens
|
|
tok_yesexpr,
|
|
tok_noexpr,
|
|
// LC_ADDRESS-specific tokens
|
|
// LC_IDENTIFICATION-specific tokens
|
|
// LC_MEASUREMENT-specific tokens
|
|
// LC_NAME-specific tokens
|
|
// LC_PAPER-specific tokens
|
|
// LC_TELEPHONE-specific tokens
|
|
// other:
|
|
tok_sym_name, // symbolic character name
|
|
tok_char_value, // character value (octal, decimal, or hex)
|
|
tok_comment, // comment
|
|
tok_comment_char, // <comment_char>
|
|
tok_escape_char, // <escape_char>
|
|
tok_copy, // copy directive
|
|
tok_include, // include directive
|
|
tok_nl, // newline
|
|
tok_ndef, // unknown/undefined token
|
|
tok_end_tokens // end of input
|
|
};
|
|
|
|
// scanner states
|
|
// enum {valid, invalid};
|
|
|
|
// a structure that represents a token
|
|
struct token_t {
|
|
std::string name;
|
|
token_id token;
|
|
|
|
// file position
|
|
int line;
|
|
int column;
|
|
|
|
// file name pointer
|
|
const char* file;
|
|
};
|
|
|
|
// realization
|
|
Scanner ();
|
|
virtual ~Scanner();
|
|
|
|
// public interface
|
|
token_t next_token ();
|
|
|
|
void open (std::string, char = '#', char = '\\');
|
|
|
|
void close ();
|
|
|
|
char escape_char () const;
|
|
|
|
void ignore_line ();
|
|
|
|
// converts an octal, decimal, or hexadecimal escape sequence
|
|
// (or a multibyte sequence of such things) to a numeric value
|
|
unsigned long
|
|
convert_escape (const char*, const char** = 0, bool = false) const;
|
|
|
|
private:
|
|
|
|
Scanner (const Scanner&); // not defined
|
|
void operator= (const Scanner&); // not defined
|
|
|
|
// helper function that identifies a token from a string and
|
|
// returns a new token_t object
|
|
token_id process_token (const char* name);
|
|
|
|
// read a line from stream
|
|
void read_line ();
|
|
|
|
// current file context and stack of context objects
|
|
ScannerContext* context_;
|
|
std::stack<ScannerContext*> context_stack_;
|
|
|
|
unsigned nlines_; // number of lines read
|
|
unsigned ntokens_; // number of tokens read
|
|
|
|
// was the last token an escaped newline
|
|
bool escaped_newline_;
|
|
};
|
|
|
|
|
|
#endif // RWSTD_UTIL_SCANNER_H_INCLUDED
|