first commit

2025-06-07 11:34:38 -04:00
commit 0eb2d7c07d
4708 changed files with 1500614 additions and 0 deletions
--- a/extern/stdcxx/4.2.1/util/scanner.cpp
+++ b/extern/stdcxx/4.2.1/util/scanner.cpp
@@ -0,0 +1,772 @@
+/***************************************************************************
+ *
+ * scanner.cpp
+ *
+ * $Id: scanner.cpp 648752 2008-04-16 17:01:56Z faridz $
+ *
+ ***************************************************************************
+ *
+ * Licensed to the Apache Software  Foundation (ASF) under one or more
+ * contributor  license agreements.  See  the NOTICE  file distributed
+ * with  this  work  for  additional information  regarding  copyright
+ * ownership.   The ASF  licenses this  file to  you under  the Apache
+ * License, Version  2.0 (the  "License"); you may  not use  this file
+ * except in  compliance with the License.   You may obtain  a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the  License is distributed on an  "AS IS" BASIS,
+ * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+ * implied.   See  the License  for  the  specific language  governing
+ * permissions and limitations under the License.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
+ * 
+ **************************************************************************/
+
+#include "scanner.h"
+
+#include "diagnostic.h"
+#include "loc_exception.h"
+
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include <cassert>   // for assert()
+#include <climits>   // for UCHAR_MAX
+#include <cstdlib>   // for strtol()
+#include <cstring>   // for strcmp()
+
+
+struct ScannerContext
+{
+    ScannerContext (const char*, char = '#', char = '\\');
+ 
+    std::ifstream file;            // file stream object
+    std::string   filename;        // filename
+
+    // comment and escape for current file
+    char comment_char;
+    char escape_char;
+
+    // current line and column for the scanner
+    int line;
+
+    // current line and position within it
+    std::string line_;
+    const char* pos_;
+
+private:
+    // not defined (not copy constructible or assignable)
+    ScannerContext (const ScannerContext&);
+    void operator= (ScannerContext&);
+};
+
+/**************************************************************************/
+// helpers
+
+static void normal_path (std::string& s)
+{
+    std::string::iterator it(s.begin ());
+
+    for (; it != s.end (); it++)
+        if (*it == '/' || *it == '\\') {
+#if defined (_MSC_VER)
+            *it = '\\';
+#else
+            *it = '/';
+#endif            
+        }
+}
+
+/**************************************************************************/
+// ScannerContext class definitions
+
+ScannerContext::
+ScannerContext (const char* name, char cc, char ec)
+    : file (name),  filename (name),
+      comment_char (cc), escape_char (ec),
+    line (0) // , column (0)
+{
+    // update current position
+    pos_ = line_.c_str ();
+
+    if (!file.is_open ())
+        issue_diag (500, true, 0, 
+                    "%s could not be opened for reading\n", name);
+
+    issue_diag (I_OPENRD, false, 0, "reading %s\n", name);
+}
+
+/**************************************************************************/
+// Scanner class definitions
+
+Scanner::
+Scanner ()
+    : context_ (0), nlines_ (0), ntokens_ (0), escaped_newline_ (false)
+{
+    // no-op
+}
+
+
+Scanner::
+~Scanner() 
+{
+    // empty the stack and destroy the current state
+    delete context_;
+
+    while (!context_stack_.empty ()) {
+        delete context_stack_.top ();
+        context_stack_.pop ();
+    }
+}
+
+
+char Scanner::
+escape_char () const
+{
+    return context_ ? context_->escape_char : 0;
+}
+
+void Scanner::
+ignore_line ()
+{
+    while (next_token ().token != tok_nl);
+}
+
+
+void Scanner::
+open (std::string name, char cc, char ec)
+{
+    normal_path (name);
+
+    if (context_)
+        context_stack_.push (context_);
+
+    try {
+        context_ = new ScannerContext (name.c_str (), cc, ec);
+    }
+    catch (loc_exception&) {
+        context_ = 0;
+
+        if (!context_stack_.empty ()) {
+            context_ = context_stack_.top ();
+            context_stack_.pop ();
+        }
+
+        throw;
+    }
+
+    nlines_ = 0;
+    ntokens_ = 0;
+}
+
+
+void Scanner::
+close ()
+{
+    assert (0 != context_);
+
+    issue_diag (I_OPENRD, false, 0,
+                "%s: %u tokens, %u lines\n",
+                context_->filename.c_str (), ntokens_, nlines_);
+
+    delete context_;
+
+    if (context_stack_.empty ())
+        context_ = 0;
+    else {
+        context_ = context_stack_.top ();
+        context_stack_.pop ();
+    }
+}
+
+
+Scanner::token_id Scanner::
+process_token (const char* name)
+{
+    assert (0 != name);
+
+    if (*name == context_->escape_char) {
+        switch (name [1]) {
+        case '0': case '1': case '2': case '3':
+        case '4': case '5': case '6': case '7':
+        case 'd':
+        case 'x':
+            // escaped numeric character value
+            return tok_char_value;
+        default:
+            break;
+        }
+
+        return tok_ndef;
+    }
+
+    // look for a predefined token
+
+    static const struct {
+        const char*       name;
+        Scanner::token_id token;
+    } tok_map [] = {
+        // elements must be sorted in ascending order
+        { "CHARMAP", tok_charmap },
+        { "END", tok_end },
+        { "IGNORE", tok_ignore },
+        { "LC_ADDRESS", tok_addr },
+        { "LC_COLLATE", tok_collate },
+        { "LC_CTYPE", tok_ctype },
+        { "LC_IDENTIFICATION", tok_ident },
+        { "LC_MEASUREMENT", tok_measure },
+        { "LC_MESSAGES", tok_messages },
+        { "LC_MONETARY", tok_monetary },
+        { "LC_NAME", tok_name },
+        { "LC_NUMERIC", tok_numeric },
+        { "LC_PAPER", tok_paper },
+        { "LC_TELEPHONE", tok_phone },
+        { "LC_TIME", tok_time },
+        { "UNDEFINED", tok_undefined },
+        { "WIDTH", tok_width },
+        { "abday", tok_abday },
+        { "abmon", tok_abmon },
+        { "alpha", tok_alpha },
+        { "alt_digits", tok_alt_digits },
+        { "am_pm", tok_am_pm },
+        { "backward", tok_backward },
+        { "blank", tok_blank },
+        { "cntrl", tok_cntrl },
+        { "collating-element", tok_coll_elem },
+        { "collating-symbol", tok_coll_sym },
+        { "comment_char", tok_comment_char },
+        { "copy", tok_copy },
+        { "currency_symbol", tok_currency_symbol },
+        { "d_fmt", tok_d_fmt },
+        { "d_t_fmt", tok_d_t_fmt },
+        { "day", tok_day },
+        { "decimal_point", tok_decimal_point },
+        { "digit", tok_digit },
+        { "era", tok_era },
+        { "era_d_fmt", tok_era_d_fmt },
+        { "era_d_t_fmt", tok_era_d_t_fmt },
+        { "era_t_fmt", tok_era_t_fmt },
+        { "escape_char", tok_escape_char },
+        { "falsename", tok_falsename },
+        { "forward", tok_forward },
+        { "frac_digits", tok_frac_digits },
+        { "from", tok_from },
+        { "graph", tok_graph },
+        { "grouping", tok_grouping },
+        { "include", tok_include },
+        { "int_curr_symbol", tok_int_curr_symbol },
+        { "int_frac_digits", tok_int_frac_digits },
+        { "int_n_cs_precedes", tok_int_n_cs_precedes },
+        { "int_n_sep_by_space", tok_int_n_sep_by_space },
+        { "int_n_sign_posn", tok_int_n_sign_posn },
+        { "int_p_cs_precedes", tok_int_p_cs_precedes },
+        { "int_p_sep_by_space", tok_int_p_sep_by_space },
+        { "int_p_sign_posn", tok_int_p_sign_posn },
+        { "lower", tok_lower },
+        { "mon", tok_mon },
+        { "mon_decimal_point", tok_mon_decimal_point },
+        { "mon_grouping", tok_mon_grouping },
+        { "mon_thousands_sep", tok_mon_thousands_sep },
+        { "n_cs_precedes", tok_n_cs_precedes },
+        { "n_sep_by_space", tok_n_sep_by_space },
+        { "n_sign_posn", tok_n_sign_posn },
+        { "negative_sign", tok_negative_sign },
+        { "noexpr", tok_noexpr },
+        { "order_end", tok_order_end },
+        { "order_start", tok_order_start },
+        { "p_cs_precedes", tok_p_cs_precedes },
+        { "p_sep_by_space", tok_p_sep_by_space },
+        { "p_sign_posn", tok_p_sign_posn },
+        { "position", tok_position },
+        { "positive_sign", tok_positive_sign },
+        { "print", tok_print },
+        { "punct", tok_punct },
+        { "reorder-after", tok_reorder },
+        { "reorder-end", tok_reorder_end },
+        { "reorder-section-after", tok_reorder_section },
+        { "reorder-section-end", tok_reorder_section_end },
+        { "script", tok_script },
+        { "space", tok_space },
+        { "t_fmt", tok_t_fmt },
+        { "t_fmt_ampm", tok_t_fmt_ampm },
+        { "thousands_sep", tok_thousands_sep },
+        { "tolower", tok_tolower },
+        { "toupper", tok_toupper },
+        { "translit_end", tok_xlit_end },
+        { "translit_start", tok_xlit_start },
+        { "truename", tok_truename },
+        { "upper", tok_upper },
+        { "xdigit", tok_xdigit },
+        { "yesexpr", tok_yesexpr }
+    };
+
+    int low  = 0;
+    int high = sizeof tok_map / sizeof *tok_map - 1;
+
+    // this loop implements a binary search to find 'name' in the   
+    // tok_map list and when found returns the token value.
+    while (low <= high) {
+
+        const int cur = (low + high) / 2;
+
+        const int cmp = std::strcmp (name, tok_map [cur].name);
+        if (0 == cmp)
+            return tok_map [cur].token;
+
+        if (cmp < 0)
+            high = cur - 1;
+        else
+            low = cur + 1;
+    }
+
+    return tok_ndef;
+}
+
+
+void Scanner::
+read_line ()
+{
+    context_->line_.clear ();
+
+    std::getline (context_->file, context_->line_);
+
+    context_->line_ += '\n';
+      
+    context_->pos_ = context_->line_.c_str ();
+
+    ++context_->line;
+    // context_->column = 0;
+
+    ++nlines_;
+    
+    assert (context_->line_.size ());
+}
+
+
+Scanner::token_t Scanner::
+next_token ()
+{
+    assert (0 != context_);
+    assert (context_->file.is_open ());
+
+    // token
+    token_t next_tok;
+
+    next_tok.name   = "";
+    next_tok.token  = tok_ndef;
+    next_tok.line   = 0;
+    next_tok.column = 0;
+    next_tok.file   = 0;
+
+    while (true) {
+
+        // store the *current* file name
+        next_tok.file = context_->filename.c_str ();
+
+        // the assert above for eof checks if the caller has lost it;
+        if (context_->file.eof ()) {
+            next_tok.token = tok_end_tokens;
+            return next_tok;
+        }
+
+        // if we exhausted the current line, advance
+        if (   context_->line_.size ()
+            <= std::size_t (context_->pos_ - context_->line_.c_str ())) {
+            read_line ();
+        }
+
+        // line and column for the token start; they are set at each
+        // iteration; the finding of a token breaks and next_tok leaves
+        // this loop having the line/col info
+        next_tok.line   = context_->line;
+        next_tok.column = context_->pos_ - context_->line_.c_str ();
+
+        // plug in the pointer to current position
+        const char*& next = context_->pos_;
+
+        if (*next != context_->comment_char)
+            escaped_newline_ = false;
+
+        if (*next == '<') {
+            // beginning of a symbolic name or keyword
+            const char* tok_begin = next++;
+
+            for (; '>' != *next; ++next) {
+
+                // if has an escaped close angular, pass
+                if (*next == context_->escape_char) {
+
+                    // append symbol name up to but not including the escape
+                    next_tok.name.append (tok_begin, next - tok_begin);
+
+                    // advance the next pointer to skip the escape
+                    tok_begin = ++next;
+                }
+                else if ('\n' == *next) {
+                    // past the end of the line
+                    issue_diag (E_SYNTAX, true, &next_tok, 
+                                " unterminated symbolic name\n");
+                    break;
+                }
+            }
+
+            next_tok.name.append (tok_begin, ++next - tok_begin);
+
+            // check the name fetched so far
+            if (next_tok.name == "<code_set_name>") {
+                next_tok.token = tok_code_set_name;
+            }
+            else if (   next_tok.name == "<escape_char>"
+                     || next_tok.name == "<comment_char>") {
+
+                // eat away spaces
+                while (' ' == *next || '\t' == *next) {
+                    ++next;
+                }
+
+                // test for end of line
+                if (*next == '\n')
+                    issue_diag (E_SYNTAX, true, &next_tok, 
+                                "missing value for %s\n",
+                                next_tok.name.c_str ());
+
+                // store character
+                if (next_tok.name == "<escape_char>")
+                    context_->escape_char = *next;
+                else
+                    context_->comment_char = *next;
+
+                // adjust positions;
+                context_->pos_ =
+                    context_->line_.c_str () + context_->line_.size ();
+
+                // set token to a newline
+                next_tok.name = "";
+                next_tok.token = tok_nl;
+            }
+            else if (next_tok.name == "<mb_cur_max>") {
+                next_tok.token = tok_mb_cur_max;
+            }
+            else if (next_tok.name == "<mb_cur_min>") {
+                next_tok.token = tok_mb_cur_min;
+            }
+            else {
+                next_tok.token = tok_sym_name;
+            }
+            break;
+        }
+        else if (*next == ' ' || *next == '\t' || *next == ';') {
+            // ignore whitespace and separators
+            while (*next == ' ' || *next == '\t' || *next == ';') {
+                ++next;
+            }
+        }
+        else if (*next == '\n') {
+            ++next;
+            next_tok.token = tok_nl;
+            break;
+        }
+        else if (*next == context_->comment_char) {
+            // start of a comment - check as early as necessary
+            // adjust to end of line
+            context_->pos_ = context_->line_.c_str () + context_->line_.size ();
+
+            if (escaped_newline_)
+                continue;
+
+            next_tok.token = tok_nl;
+            next_tok.name = "\n";
+            break;
+        }
+        else if (*next == '(') {
+            // push open parenthesis
+            next_tok.name.push_back (*next++);
+
+            // start of a grouping
+            while (*next != ')') {
+                // contains a symbolic name
+                if (*next == '<') {
+                    // push open angular parenthesis
+                    next_tok.name.push_back (*next++);
+
+                    while (*next != '\n') {
+                        // if has an escaped close angular, pass
+                        if (next [0] == context_->escape_char) {
+                            next_tok.name.push_back (*next++);
+                            next_tok.name.push_back (*next++);
+                            continue;
+                        }
+
+                        // if we have reached the end of the sym name
+                        if (*next == '>') {
+                            next_tok.name.push_back (*next);
+                            break;
+                        }
+
+                        // still inside the sym name/keyword
+                        next_tok.name.push_back (*next++);
+                    }
+
+                    // check if we have gone past the end of the line
+                    if (*next == '\n')
+                        issue_diag (E_SYNTAX, true, &next_tok, 
+                                    " unterminated symbolic name");
+
+                    ++next;
+                }
+                else {
+                    // fetch the character
+                    next_tok.name.push_back (*next++);
+                }
+
+                if (*next == '\n')
+                    issue_diag (E_SYNTAX, true, &next_tok, 
+                                " unterminated grouping ");
+            }
+
+            next_tok.name.push_back (*next++);
+            next_tok.token = tok_grouping;
+            break;
+        } 
+        else if (*next == '.') {
+            // ellipsis (see ISO/IEC TR 14652)
+            int ellipsis_count = 0;
+            // start of an interval
+            while (*next == '.') {
+                next_tok.name.push_back (*next++);
+                ++ellipsis_count;
+            }
+
+            switch (ellipsis_count) {
+            case 2: {
+                const char* tmp = next;
+                if (*tmp++ == '(' && *tmp++ == '2' && *tmp++ == ')'
+                    && *tmp++ == '.' && *tmp++ == '.') {
+                    // double increment hexadecimal symbolic ellipsis
+                    next_tok.token = tok_dbl_ellipsis;
+                    next = tmp;
+                }
+                else {
+                    // hexadecimal symbolic ellipsis
+                    next_tok.token = tok_hex_ellipsis;
+                }
+                break;
+            }
+
+            case 3:
+                // absolute symbolic ellipsis
+                next_tok.token = tok_abs_ellipsis;
+                break;
+
+            case 4:
+                // decimal symbolic ellipsis
+                next_tok.token = tok_dec_ellipsis;
+                break;
+
+            default:
+                issue_diag (E_SYNTAX, true, &next_tok, "illegal ellipsis\n");
+            }
+            break;
+
+        } 
+        else if (*next == '\"') {
+
+            // start of a string
+            next_tok.name.push_back (*next++);
+            const char ec = context_->escape_char;
+
+            while (next[0] != '\n') {
+
+                // escaped newline; continue
+                if (next [0] == ec && next [1] == '\n') {
+                    read_line ();
+                    continue;
+                }
+
+                // escaped quote
+                if (next[0] == ec) {
+                    next_tok.name.push_back (*next++);
+                    next_tok.name.push_back (*next++);
+                    continue;
+                }
+
+                if (next [0] == '\"') {
+                    next_tok.name.push_back (*next);
+                    break;
+                }
+
+                // still inside the string
+                next_tok.name.push_back (*next++);
+            }
+
+            // test for closure
+            if (*next == '\n')
+                issue_diag (E_SYNTAX, true, &next_tok, "unterminated string");
+            
+            ++next;
+            next_tok.token = tok_string;
+            break;
+
+        }
+        else if (*next == context_->escape_char) {
+            // start of an escape sequence
+            // escaped new line
+            if (next [1] == '\n') {
+                // adjust to end of line
+                context_->pos_ =
+                    context_->line_.c_str () + context_->line_.size ();
+
+                escaped_newline_ = true;
+                continue;
+            }
+
+            // or
+            while (   *next != ' ' && *next != '\t'
+                   && *next != ';' && *next != '\n') {
+                next_tok.name.push_back (*next++);
+            }
+
+            // retrieve token based on value
+            next_tok.token = process_token (next_tok.name.c_str ());
+            break;
+        }
+        else {
+            // the rest of it
+            for (const char ec = context_->escape_char; ; ) {
+
+                // stop at esc-newline or at first "separator"
+                if (   (next [0] == ec && next [1] == '\n')
+                    || next [0] == ' '
+                    || next [0] == '\t' 
+                    || next [0] == '\n'
+                    || next [0] == ';') {
+                    // continuation of a line, separators
+                    break;
+                } 
+                
+                // fetch characters
+                next_tok.name.push_back (*next++);
+            }
+
+            // assert length of input
+            assert (next_tok.name.size ());
+
+            // it wasn't a locale definition keyword so call process_token
+            // and add the result to the list
+            next_tok.token = process_token (next_tok.name.c_str ());
+            
+
+            // an extra bit of processing since we keep comment and escape
+            // characters in the scanner for a faster processing
+            if (   next_tok.token == tok_escape_char
+                || next_tok.token == tok_comment_char) {
+
+                // eat away spaces
+                while (' ' == *next || '\t' == *next) {
+                    ++next;
+                }
+
+                // test for end of line
+                if (*next == '\n')
+                    issue_diag (E_SYNTAX, true, &next_tok,
+                                "unterminated statement");
+
+                // store character
+                if (next_tok.token == tok_escape_char)
+                    context_->escape_char = next [0];
+                else
+                    context_->comment_char = next [0];
+
+                // adjust positions;
+                context_->pos_ =
+                    context_->line_.c_str () + context_->line_.size ();
+
+                // return the token
+                next_tok.name = "";
+                next_tok.token = tok_nl;
+            }
+
+            break;
+        }
+    }
+
+    ++ntokens_;
+    
+    return next_tok;
+}
+
+
+unsigned long Scanner::
+convert_escape (const char  *esc,
+                const char **pend  /* = 0 */,
+                bool         multi /* = false */) const
+{
+    assert (0 != esc);
+
+    const char escape = escape_char ();
+
+    if (escape != *esc)
+        issue_diag (E_SYNTAX, true, 0,
+                    "expected the escape character ('%c'), got \"%s\"\n",
+                    escape, esc);
+
+    unsigned long value = 0;
+
+    for (const char *s = esc; ; ) {
+
+        // escaped characters are octal by default
+        const char *basename = "octal";
+        int         base     = 8;
+
+        switch (*++s) {
+        case 'd': ++s; base = 10; basename = "decimal"; break;
+        case 'x': ++s; base = 16; basename = "hexadecimal"; break;
+
+        case 'o': ++s;
+        case '0': case '1': case '2': case '3':
+        case '4': case '5': case '6': case '7':
+            break;
+
+        default:
+            issue_diag (E_SYNTAX, true, 0,
+                        "one of { 'o', 'd', 'x' } expected following "
+                        "the escape character: %s\n", esc);
+        }
+
+        char *end = 0;
+
+        const unsigned long byte = std::strtoul (s, &end, base);
+
+        if (pend)
+            *pend = end;
+
+        // cast away constness below to work around an MSVC 7.0 bug:
+        // causing error C2446: '==' : no conversion from 'char ** '
+        // to 'const char ** ' Conversion loses qualifiers
+        if (!multi && _RWSTD_CONST_CAST (char**, pend) == &end && **pend)
+            issue_diag (E_SYNTAX, true, 0,
+                        "%s constant expected: %s\n", basename, esc);
+
+        if (UCHAR_MAX < byte)
+            issue_diag (E_INVAL, true, 0,
+                        "%s byte value must be in the range [0, %d]: %s\n",
+                        basename, int (UCHAR_MAX), esc);
+
+        if (value >> (sizeof (unsigned long) - 1) * CHAR_BIT)
+            issue_diag (E_INVAL, true, 0, "integer overflow: %s\n", esc);
+
+        value = (value << CHAR_BIT) | byte;
+
+        if (**pend != escape || !multi)
+            break;
+
+        s = *pend;
+    }
+
+    return value;
+}