first commit

2025-06-07 11:34:38 -04:00
commit 0eb2d7c07d
4708 changed files with 1500614 additions and 0 deletions
--- a/extern/stdcxx/4.2.1/util/def.cpp
+++ b/extern/stdcxx/4.2.1/util/def.cpp
@@ -0,0 +1,610 @@
+/***************************************************************************
+ *
+ * def.cpp
+ *
+ * $Id: def.cpp 522614 2007-03-26 20:25:09Z sebor $
+ *
+ ***************************************************************************
+ *
+ * Licensed to the Apache Software  Foundation (ASF) under one or more
+ * contributor  license agreements.  See  the NOTICE  file distributed
+ * with  this  work  for  additional information  regarding  copyright
+ * ownership.   The ASF  licenses this  file to  you under  the Apache
+ * License, Version  2.0 (the  "License"); you may  not use  this file
+ * except in  compliance with the License.   You may obtain  a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the  License is distributed on an  "AS IS" BASIS,
+ * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+ * implied.   See  the License  for  the  specific language  governing
+ * permissions and limitations under the License.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
+ * 
+ **************************************************************************/
+
+// #ifndef _RWSTD_NO_PURE_C_HEADERS
+// #  define _RWSTD_NO_PURE_C_HEADERS
+// #endif   // _RWSTD_NO_PURE_C_HEADERS
+
+// #ifndef _RWSTD_NO_DEPRECATED_C_HEADERS
+// #  define _RWSTD_NO_DEPRECATED_C_HEADERS
+// #endif   // _RWSTD_NO_DEPRECATED_C_HEADERS
+
+#ifdef __DECCXX
+#  undef __PURE_CNAME
+#endif   // __DECCXX
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <locale>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <cassert>
+#include <cctype>
+#include <cerrno>
+#include <climits>
+#include <clocale>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>   // for memset()
+
+#include "aliases.h"
+#include "def.h"
+#include "diagnostic.h"
+#include "loc_exception.h"
+#include "path.h"
+
+
+#define UTF8_MAX_SIZE 6
+
+
+// convert_to_ext converts a wchar_t value with some encoding into 
+// a narrow character string in the current locale's encoding
+std::string Def::convert_to_ext (wchar_t val)
+{
+    rmb_cmap_iter it;
+    if ((it = charmap_.get_rmb_cmap().find(val)) 
+        != charmap_.get_rmb_cmap().end()){
+      return it->second;
+        
+    }
+
+    issue_diag (E_CVT2EXT, true, 0, 
+                "unable to convert character %d to external "
+                "representation\n", val);
+
+    return std::string("");
+}
+
+
+// convert the wchar_t value into a utf8 string 
+std::string Def::utf8_encode (wchar_t wc)
+{
+    unsigned int wc_int = _RWSTD_STATIC_CAST (unsigned int, wc);
+
+    std::string ret;
+    std::size_t size = 0;
+    char buf[UTF8_MAX_SIZE + 1];
+    char* bufp = buf;
+
+    if (wc_int < 0x80)
+    {
+        size = 1;
+        *bufp++ = wc_int;
+    }
+    else
+    {
+        int b;
+
+        for (b = 2; b < UTF8_MAX_SIZE; b++)
+            if ((wc_int & (~(wchar_t)0 << (5 * b + 1))) == 0)
+                break;
+        size = b;
+
+        *bufp = (unsigned char) (~0xff >> b);
+        --b;
+        do
+        {
+            bufp[b] = 0x80 | (wc_int & 0x3f);
+            wc_int >>= 6;
+        }
+        while (--b > 0);
+        *bufp |= wc_int;
+    }
+    buf[size] = (char)0;
+    for (unsigned int i = 0; i < size; i++)
+        ret += buf[i];
+    return ret;
+    
+}
+
+void Def::copy_file (const std::string& name, const std::string& outname)
+{
+    assert (name.size() > 0);
+    assert (outname.size() > 0);
+
+    std::ifstream from (name.c_str(), std::ios::binary);
+    if (!from) {
+        issue_diag (E_OPENRD, true, 
+                    &next, "unable to open locale database %s\n",
+                    name.c_str());
+    }
+    from.exceptions (std::ios::badbit);
+
+    std::ofstream to (outname.c_str(), std::ios::binary);
+    if (!to) {
+        issue_diag (E_OPENWR, true, 
+                    &next, "unable to create locale database %s\n",
+                    outname.c_str());
+    }
+    to.exceptions (std::ios::failbit | std::ios::badbit);
+
+    // copy the file
+    to << from.rdbuf ();
+}
+
+
+void Def::copy_category(int category, std::string name)
+{
+    assert (name.size() > 0);
+
+    // create the name of the file to copy to and call copy_file
+    std::string outname (output_name_);
+
+    makedir (outname.c_str ());
+
+    switch (category) {
+        // append the category name to both 'name' and 'outname'
+        // and call the copy_file routine
+        // the xxx_written variable is set to true so that write_xxx
+        // does not overwrite the file that is written here
+    case LC_CTYPE:
+        (name += _RWSTD_PATH_SEP) += "LC_CTYPE";
+        (outname += _RWSTD_PATH_SEP) += "LC_CTYPE";
+        copy_file (name, outname);
+        ctype_written_ = true;
+
+        break;
+    case LC_COLLATE:
+        (name += _RWSTD_PATH_SEP) += "LC_COLLATE";
+        (outname += _RWSTD_PATH_SEP) += "LC_COLLATE";
+        copy_file(name, outname);
+        collate_written_ = true;
+
+        break;
+    case LC_MONETARY:
+        (name += _RWSTD_PATH_SEP) += "LC_MONETARY";
+        (outname += _RWSTD_PATH_SEP) += "LC_MONETARY";
+        copy_file(name, outname);
+        mon_written_ = true;
+
+        break;
+
+    case LC_NUMERIC:
+        (name += _RWSTD_PATH_SEP) += "LC_NUMERIC";
+        (outname += _RWSTD_PATH_SEP) += "LC_NUMERIC";
+        copy_file(name, outname);
+        num_written_ = true;
+
+        break;
+    case LC_TIME:
+        (name += _RWSTD_PATH_SEP) += "LC_TIME";
+        (outname += _RWSTD_PATH_SEP) += "LC_TIME";
+        copy_file(name, outname);
+        time_written_ = true;
+
+        break;
+
+#ifdef LC_MESSAGES
+    case LC_MESSAGES:
+        (name += _RWSTD_PATH_SEP) += "LC_MESSAGES";
+        (outname += _RWSTD_PATH_SEP) += "LC_MESSAGES";
+        copy_file(name, outname);
+        messages_written_ = true;
+        break;
+#endif   // LC_MESSAGES
+
+    default:
+        break;
+    }
+}
+
+
+// strip a pair, which should be in the form '(<sym>,<sym2>)'
+void Def::strip_pair (const std::string &tok, std::string &sym,
+                      std::string &sym2)
+{
+    std::size_t i = 0;
+
+    if(tok[i] == '(') {
+        if(tok[++i] == '<')
+            while (tok[i] != '>'){
+                if (tok[i] == scanner_.escape_char ())
+                    i++;
+                sym.push_back(tok[i++]);
+            }
+        // this push_back is safe because the while loop above ends when 
+        // tok[i] == '>'
+        sym.push_back(tok[i++]);
+        if (tok[i++] != ',')
+            issue_diag (E_PAIR, true, &next, 
+                        "invalid pair %s\n", tok.c_str());
+        if (tok[i] == '<')
+            while (tok[i] != '>'){
+                if (tok[i] == scanner_.escape_char ())
+                    sym2.push_back(tok[i++]);
+                if ('\0' != tok[i])
+                    sym2.push_back(tok[i++]);
+                else 
+                    issue_diag (E_PAIR, true, &next, 
+                                "invalid pair %s\n",  tok.c_str());
+            }
+        
+        // this push_back is safe because the while loop above ends when 
+        // tok[i] == '>'
+        sym2.push_back(tok[i++]);
+
+    }
+}
+
+// converts str, which is a string in the following format
+// "[<sym_name>][char]" including the quotes to a string of characters
+// str is not a const reference because if the string spans multiple lines
+// str is modified
+std::string Def::convert_string (const std::string &str1)
+{
+    assert (str1[0] == '\"');
+
+    std::string ret;
+    
+    std::string sym;
+    // the index starts at 1 so that we ignore the initial '"'
+    int idx = 1;
+
+    const char* str = str1.c_str();
+    while (str[idx] != '\"') {
+        sym.clear();
+        // if we reach the null-terminator before we see an end-quote
+        // then we must have a multi-line string, so get the next token
+        if (str[idx] == '\0') {
+            if((next = scanner_.next_token()).token == Scanner::tok_string)
+                break;
+            str = next.name.c_str();
+            idx = 0;
+        }
+
+        // '<' marks the beginning of a symbolic name
+        // construct the name and look up its value in the cmap
+        if (str[idx] == '<') {
+            while (str [idx] && str [idx] != '>') {
+                if (str[idx] == scanner_.escape_char ())
+                    idx++;
+                sym += str[idx++];
+            }
+
+            // this is safe because the while loop ended with *str == '>'
+            if (str [idx])
+                sym += str [idx++];
+
+            w_cmap_iter w_pos = charmap_.get_w_cmap().find (sym);
+            if (w_pos != charmap_.get_w_cmap().end()) {
+                ret += convert_to_ext(w_pos->second);
+            }
+            else {
+                return std::string();
+            }
+        }
+
+        // the definition file contains a sting with non-symbol names.
+        // process each character as it's actual character value.
+        // Locale definitions that use this may not be portable.
+        else {
+            ret += (char)str[idx++];
+            
+        }
+    }
+
+    return ret;
+   
+
+}
+
+#ifndef _RWSTD_NO_WCHAR_T
+// converts a collating element definition to an array of wide characters
+// (the wide characters the collating element is composed of).
+
+// this overload deals with collating elements defined through
+// a sequence of symbolic names, NOT enclosed within quotes.
+std::wstring 
+Def::convert_wstring (const StringVector& sym_array)
+{
+    std::wstring ret;
+    StringVector::const_iterator it = sym_array.begin ();
+    while (it != sym_array.end ()) {
+        // lookup the symbol we just constructed
+        w_cmap_iter w_pos = charmap_.get_w_cmap().find (*it);
+        if (w_pos != charmap_.get_w_cmap().end()) {
+            ret += w_pos->second;
+            it++;
+        }
+        else {
+            // we return an empty string if we couldn't find any character 
+            // in the character map
+            ret.clear();
+            return ret;
+        }
+    }
+
+    return ret;
+}
+
+// this overload deals with collating elements defined through
+// a sequence of characters or symbolic names, enclosed within quotes.
+std::wstring 
+Def::convert_wstring (const token_t& t)
+{
+    std::wstring ret;
+    std::string  sym;
+
+    std::string str1 (t.name);
+
+    int         idx = 0;
+    char        term = 0;
+    const char* str = str1.c_str();
+
+    // skip first character if quote
+    if (str[idx] == '\"') {
+        term = '\"', idx++;
+    }
+
+    while (str[idx] != term) {
+        sym.clear();
+
+        // '<' marks the beginning of a symbolic name
+        // construct the name and look up its value in the cmap
+        if (str[idx] == '<') {
+            while (str[idx] != '>') {
+                if (str[idx] == scanner_.escape_char ()) {
+                    // sym += str[idx++];
+                    idx++;
+                }
+
+                if ('\0' != str[idx])
+                    sym += str[idx++];
+                else 
+                    issue_diag (E_SYMEND, true, &t, 
+                                "end of symbolic name not found\n");
+            }
+
+            // this is safe because the while loop ended with *str == '>'
+            sym += str[idx++];
+
+            // lookup the symbol we just constructed
+            w_cmap_iter w_pos = charmap_.get_w_cmap().find (sym);
+            if (w_pos != charmap_.get_w_cmap().end()) {
+                ret += w_pos->second;
+            }
+            else {
+                // if we can't find a symbol then return an empty string,
+                // most likely this will happen if inside a collating-element
+                // the user uses a character that is not in the current
+                // codeset, in this case the collating element will be ignored
+                ret.clear();
+                return ret;
+            }
+        }
+        // the definition file contains a string with non-symbol names.
+        // process each character as it's actual character value.
+        // Locale definitions that use this may not be portable.
+        else 
+            ret += (wchar_t)str[idx++];
+    }
+
+    return ret;
+   
+}
+
+#endif  // _RWSTD_NO_WCHAR_T
+
+
+// automatically fill any categories that depend on other categories
+void Def::auto_fill ()
+{
+
+    mask_iter mask_pos;
+
+    for (std::size_t i = 0; i <= UCHAR_MAX; i++) {
+        if (   ctype_out_.mask_tab[i] & std::ctype_base::upper 
+            || ctype_out_.mask_tab[i] & std::ctype_base::lower
+            || ctype_out_.mask_tab[i] & std::ctype_base::alpha
+            || ctype_out_.mask_tab[i] & std::ctype_base::digit 
+            || ctype_out_.mask_tab[i] & std::ctype_base::xdigit 
+            || ctype_out_.mask_tab[i] & std::ctype_base::punct)
+            
+            ctype_out_.mask_tab[i] |= std::ctype_base::print;
+
+        if (   ctype_out_.mask_tab[i] & std::ctype_base::upper 
+            || ctype_out_.mask_tab[i] & std::ctype_base::lower)
+
+            ctype_out_.mask_tab[i] |= std::ctype_base::alpha;
+
+        if (   ctype_out_.mask_tab[i] & std::ctype_base::upper 
+            || ctype_out_.mask_tab[i] & std::ctype_base::lower
+            || ctype_out_.mask_tab[i] & std::ctype_base::alpha
+            || ctype_out_.mask_tab[i] & std::ctype_base::digit 
+            || ctype_out_.mask_tab[i] & std::ctype_base::xdigit 
+            || ctype_out_.mask_tab[i] & std::ctype_base::punct)
+            
+            ctype_out_.mask_tab[i] |= std::ctype_base::graph;
+    }
+
+    for (mask_pos = mask_.begin(); mask_pos != mask_.end(); mask_pos++) {
+        // all lower, alpha, digit, xdigit, and punct, and space 
+        // characters are automatically print
+        
+        if (   mask_pos->second & std::ctype_base::upper 
+            || mask_pos->second & std::ctype_base::lower
+            || mask_pos->second & std::ctype_base::alpha
+            || mask_pos->second & std::ctype_base::digit 
+            || mask_pos->second & std::ctype_base::xdigit 
+            || mask_pos->second & std::ctype_base::punct)
+            //     || mask_pos->second & std::ctype_base::space)
+
+            mask_pos->second |= std::ctype_base::print;
+
+        // all upper and lower characters are alpha
+        if (   mask_pos->second & std::ctype_base::upper 
+            || mask_pos->second & std::ctype_base::lower)
+
+            mask_pos->second |= std::ctype_base::alpha;
+
+        // all upper, lower, alpha, digit, xdigit, and punct characters
+        // are graph characters
+        if (   mask_pos->second & std::ctype_base::upper 
+            || mask_pos->second & std::ctype_base::lower
+            || mask_pos->second & std::ctype_base::alpha
+            || mask_pos->second & std::ctype_base::digit 
+            || mask_pos->second & std::ctype_base::xdigit 
+            || mask_pos->second & std::ctype_base::punct)
+            
+            mask_pos->second |= std::ctype_base::graph;
+            
+
+    }
+}
+
+
+void Def::process_input ()
+{
+    while ((next = scanner_.next_token ()).token != Scanner::tok_end_tokens) {
+
+        switch (next.token) {
+
+        case Scanner::tok_comment:
+            scanner_.ignore_line ();
+            break;
+
+        case Scanner::tok_ctype:
+            process_ctype ();
+            break;
+
+        case Scanner::tok_collate:
+            process_collate ();
+            break;
+
+        case Scanner::tok_monetary:
+            process_monetary ();
+            break;
+
+        case Scanner::tok_numeric:
+            process_numeric ();
+            break;
+
+        case Scanner::tok_time:
+            process_time ();
+            break;
+
+        case Scanner::tok_messages:
+            process_messages ();
+            break;
+
+        case Scanner::tok_nl:
+            break;
+
+        default:
+            scanner_.ignore_line ();
+            break;
+        }
+    }
+
+    auto_fill ();
+}
+
+
+Def::Def (const char* filename, const char* out_name, Charmap& char_map,
+          bool no_position)
+    : warnings_occurred_ (false),
+      scan_ahead_ (false),
+      next_offset_ (0),
+      output_name_ (out_name),
+      charmap_ (char_map), 
+      ctype_written_ (false),
+      codecvt_written_ (false),
+      collate_written_ (false),
+      time_written_ (false), 
+      num_written_ (false),
+      mon_written_ (false),
+      messages_written_ (false),
+      ctype_def_found_ (false),
+      collate_def_found_ (false),
+      time_def_found_ (false),
+      num_def_found_ (false), 
+      mon_def_found_ (false),
+      messages_def_found_ (false), 
+      undefined_keyword_found_ (false),
+      no_position_ (no_position)
+
+{
+    // make sure ctype_out object is cleared
+    std::memset (&ctype_out_, 0, sizeof (ctype_out_));
+    std::memset (&time_out_, 0, sizeof (time_out_));
+
+    // invalidate format characters by setting each to CHAR_MAX
+    // as specified by the C function localeconv()
+    mon_out_.frac_digits    [0] = CHAR_MAX;
+    mon_out_.frac_digits    [1] = CHAR_MAX;
+    mon_out_.p_cs_precedes  [0] = CHAR_MAX;
+    mon_out_.p_sep_by_space [0] = CHAR_MAX;
+    mon_out_.n_cs_precedes  [0] = CHAR_MAX;
+    mon_out_.n_sep_by_space [0] = CHAR_MAX;
+    mon_out_.p_sign_posn    [0] = CHAR_MAX;
+    mon_out_.n_sign_posn    [0] = CHAR_MAX;
+
+    mon_st_.mon_grouping += CHAR_MAX;
+
+    // invalidate int'l formats
+    mon_out_.p_cs_precedes  [1] = CHAR_MAX;
+    mon_out_.p_sep_by_space [1] = CHAR_MAX;
+    mon_out_.n_cs_precedes  [1] = CHAR_MAX;
+    mon_out_.n_sep_by_space [1] = CHAR_MAX;
+    mon_out_.p_sign_posn    [1] = CHAR_MAX;
+    mon_out_.n_sign_posn    [1] = CHAR_MAX;
+    
+    num_st_.grouping += CHAR_MAX;
+
+    collate_out_.largest_ce     = 1;
+    collate_out_.longest_weight = 1;
+    collate_out_.num_wchars     = 0;
+    std::memset (collate_out_.weight_type, 0,
+                 sizeof (collate_out_.weight_type));
+
+    // initialize all extensions to 0
+    ctype_out_.ctype_ext_off     = 0;
+    num_out_.numeric_ext_off     = 0;
+    collate_out_.collate_ext_off = 0;
+    mon_out_.monetary_ext_off    = 0;
+    time_out_.time_ext_off       = 0;
+
+    // actual processing
+    scanner_.open (filename);
+}
+
+
+Def::~Def ()
+{
+    // free up the memory that was allocated
+
+    coll_map_iter coll_map_pos;
+    for (coll_map_pos = coll_map_.begin();
+         coll_map_pos != coll_map_.end(); coll_map_pos ++) {
+        delete[] (coll_map_pos->second.weights);
+    }
+
+}