first commit
This commit is contained in:
610
extern/stdcxx/4.2.1/util/def.cpp
vendored
Normal file
610
extern/stdcxx/4.2.1/util/def.cpp
vendored
Normal file
@@ -0,0 +1,610 @@
|
||||
/***************************************************************************
|
||||
*
|
||||
* def.cpp
|
||||
*
|
||||
* $Id: def.cpp 522614 2007-03-26 20:25:09Z sebor $
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed
|
||||
* with this work for additional information regarding copyright
|
||||
* ownership. The ASF licenses this file to you under the Apache
|
||||
* License, Version 2.0 (the "License"); you may not use this file
|
||||
* except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* Copyright 2001-2006 Rogue Wave Software.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
// #ifndef _RWSTD_NO_PURE_C_HEADERS
|
||||
// # define _RWSTD_NO_PURE_C_HEADERS
|
||||
// #endif // _RWSTD_NO_PURE_C_HEADERS
|
||||
|
||||
// #ifndef _RWSTD_NO_DEPRECATED_C_HEADERS
|
||||
// # define _RWSTD_NO_DEPRECATED_C_HEADERS
|
||||
// #endif // _RWSTD_NO_DEPRECATED_C_HEADERS
|
||||
|
||||
#ifdef __DECCXX
|
||||
# undef __PURE_CNAME
|
||||
#endif // __DECCXX
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <locale>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <clocale>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring> // for memset()
|
||||
|
||||
#include "aliases.h"
|
||||
#include "def.h"
|
||||
#include "diagnostic.h"
|
||||
#include "loc_exception.h"
|
||||
#include "path.h"
|
||||
|
||||
|
||||
#define UTF8_MAX_SIZE 6
|
||||
|
||||
|
||||
// convert_to_ext converts a wchar_t value with some encoding into
|
||||
// a narrow character string in the current locale's encoding
|
||||
std::string Def::convert_to_ext (wchar_t val)
|
||||
{
|
||||
rmb_cmap_iter it;
|
||||
if ((it = charmap_.get_rmb_cmap().find(val))
|
||||
!= charmap_.get_rmb_cmap().end()){
|
||||
return it->second;
|
||||
|
||||
}
|
||||
|
||||
issue_diag (E_CVT2EXT, true, 0,
|
||||
"unable to convert character %d to external "
|
||||
"representation\n", val);
|
||||
|
||||
return std::string("");
|
||||
}
|
||||
|
||||
|
||||
// convert the wchar_t value into a utf8 string
|
||||
std::string Def::utf8_encode (wchar_t wc)
|
||||
{
|
||||
unsigned int wc_int = _RWSTD_STATIC_CAST (unsigned int, wc);
|
||||
|
||||
std::string ret;
|
||||
std::size_t size = 0;
|
||||
char buf[UTF8_MAX_SIZE + 1];
|
||||
char* bufp = buf;
|
||||
|
||||
if (wc_int < 0x80)
|
||||
{
|
||||
size = 1;
|
||||
*bufp++ = wc_int;
|
||||
}
|
||||
else
|
||||
{
|
||||
int b;
|
||||
|
||||
for (b = 2; b < UTF8_MAX_SIZE; b++)
|
||||
if ((wc_int & (~(wchar_t)0 << (5 * b + 1))) == 0)
|
||||
break;
|
||||
size = b;
|
||||
|
||||
*bufp = (unsigned char) (~0xff >> b);
|
||||
--b;
|
||||
do
|
||||
{
|
||||
bufp[b] = 0x80 | (wc_int & 0x3f);
|
||||
wc_int >>= 6;
|
||||
}
|
||||
while (--b > 0);
|
||||
*bufp |= wc_int;
|
||||
}
|
||||
buf[size] = (char)0;
|
||||
for (unsigned int i = 0; i < size; i++)
|
||||
ret += buf[i];
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
void Def::copy_file (const std::string& name, const std::string& outname)
|
||||
{
|
||||
assert (name.size() > 0);
|
||||
assert (outname.size() > 0);
|
||||
|
||||
std::ifstream from (name.c_str(), std::ios::binary);
|
||||
if (!from) {
|
||||
issue_diag (E_OPENRD, true,
|
||||
&next, "unable to open locale database %s\n",
|
||||
name.c_str());
|
||||
}
|
||||
from.exceptions (std::ios::badbit);
|
||||
|
||||
std::ofstream to (outname.c_str(), std::ios::binary);
|
||||
if (!to) {
|
||||
issue_diag (E_OPENWR, true,
|
||||
&next, "unable to create locale database %s\n",
|
||||
outname.c_str());
|
||||
}
|
||||
to.exceptions (std::ios::failbit | std::ios::badbit);
|
||||
|
||||
// copy the file
|
||||
to << from.rdbuf ();
|
||||
}
|
||||
|
||||
|
||||
void Def::copy_category(int category, std::string name)
|
||||
{
|
||||
assert (name.size() > 0);
|
||||
|
||||
// create the name of the file to copy to and call copy_file
|
||||
std::string outname (output_name_);
|
||||
|
||||
makedir (outname.c_str ());
|
||||
|
||||
switch (category) {
|
||||
// append the category name to both 'name' and 'outname'
|
||||
// and call the copy_file routine
|
||||
// the xxx_written variable is set to true so that write_xxx
|
||||
// does not overwrite the file that is written here
|
||||
case LC_CTYPE:
|
||||
(name += _RWSTD_PATH_SEP) += "LC_CTYPE";
|
||||
(outname += _RWSTD_PATH_SEP) += "LC_CTYPE";
|
||||
copy_file (name, outname);
|
||||
ctype_written_ = true;
|
||||
|
||||
break;
|
||||
case LC_COLLATE:
|
||||
(name += _RWSTD_PATH_SEP) += "LC_COLLATE";
|
||||
(outname += _RWSTD_PATH_SEP) += "LC_COLLATE";
|
||||
copy_file(name, outname);
|
||||
collate_written_ = true;
|
||||
|
||||
break;
|
||||
case LC_MONETARY:
|
||||
(name += _RWSTD_PATH_SEP) += "LC_MONETARY";
|
||||
(outname += _RWSTD_PATH_SEP) += "LC_MONETARY";
|
||||
copy_file(name, outname);
|
||||
mon_written_ = true;
|
||||
|
||||
break;
|
||||
|
||||
case LC_NUMERIC:
|
||||
(name += _RWSTD_PATH_SEP) += "LC_NUMERIC";
|
||||
(outname += _RWSTD_PATH_SEP) += "LC_NUMERIC";
|
||||
copy_file(name, outname);
|
||||
num_written_ = true;
|
||||
|
||||
break;
|
||||
case LC_TIME:
|
||||
(name += _RWSTD_PATH_SEP) += "LC_TIME";
|
||||
(outname += _RWSTD_PATH_SEP) += "LC_TIME";
|
||||
copy_file(name, outname);
|
||||
time_written_ = true;
|
||||
|
||||
break;
|
||||
|
||||
#ifdef LC_MESSAGES
|
||||
case LC_MESSAGES:
|
||||
(name += _RWSTD_PATH_SEP) += "LC_MESSAGES";
|
||||
(outname += _RWSTD_PATH_SEP) += "LC_MESSAGES";
|
||||
copy_file(name, outname);
|
||||
messages_written_ = true;
|
||||
break;
|
||||
#endif // LC_MESSAGES
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// strip a pair, which should be in the form '(<sym>,<sym2>)'
|
||||
void Def::strip_pair (const std::string &tok, std::string &sym,
|
||||
std::string &sym2)
|
||||
{
|
||||
std::size_t i = 0;
|
||||
|
||||
if(tok[i] == '(') {
|
||||
if(tok[++i] == '<')
|
||||
while (tok[i] != '>'){
|
||||
if (tok[i] == scanner_.escape_char ())
|
||||
i++;
|
||||
sym.push_back(tok[i++]);
|
||||
}
|
||||
// this push_back is safe because the while loop above ends when
|
||||
// tok[i] == '>'
|
||||
sym.push_back(tok[i++]);
|
||||
if (tok[i++] != ',')
|
||||
issue_diag (E_PAIR, true, &next,
|
||||
"invalid pair %s\n", tok.c_str());
|
||||
if (tok[i] == '<')
|
||||
while (tok[i] != '>'){
|
||||
if (tok[i] == scanner_.escape_char ())
|
||||
sym2.push_back(tok[i++]);
|
||||
if ('\0' != tok[i])
|
||||
sym2.push_back(tok[i++]);
|
||||
else
|
||||
issue_diag (E_PAIR, true, &next,
|
||||
"invalid pair %s\n", tok.c_str());
|
||||
}
|
||||
|
||||
// this push_back is safe because the while loop above ends when
|
||||
// tok[i] == '>'
|
||||
sym2.push_back(tok[i++]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// converts str, which is a string in the following format
|
||||
// "[<sym_name>][char]" including the quotes to a string of characters
|
||||
// str is not a const reference because if the string spans multiple lines
|
||||
// str is modified
|
||||
std::string Def::convert_string (const std::string &str1)
|
||||
{
|
||||
assert (str1[0] == '\"');
|
||||
|
||||
std::string ret;
|
||||
|
||||
std::string sym;
|
||||
// the index starts at 1 so that we ignore the initial '"'
|
||||
int idx = 1;
|
||||
|
||||
const char* str = str1.c_str();
|
||||
while (str[idx] != '\"') {
|
||||
sym.clear();
|
||||
// if we reach the null-terminator before we see an end-quote
|
||||
// then we must have a multi-line string, so get the next token
|
||||
if (str[idx] == '\0') {
|
||||
if((next = scanner_.next_token()).token == Scanner::tok_string)
|
||||
break;
|
||||
str = next.name.c_str();
|
||||
idx = 0;
|
||||
}
|
||||
|
||||
// '<' marks the beginning of a symbolic name
|
||||
// construct the name and look up its value in the cmap
|
||||
if (str[idx] == '<') {
|
||||
while (str [idx] && str [idx] != '>') {
|
||||
if (str[idx] == scanner_.escape_char ())
|
||||
idx++;
|
||||
sym += str[idx++];
|
||||
}
|
||||
|
||||
// this is safe because the while loop ended with *str == '>'
|
||||
if (str [idx])
|
||||
sym += str [idx++];
|
||||
|
||||
w_cmap_iter w_pos = charmap_.get_w_cmap().find (sym);
|
||||
if (w_pos != charmap_.get_w_cmap().end()) {
|
||||
ret += convert_to_ext(w_pos->second);
|
||||
}
|
||||
else {
|
||||
return std::string();
|
||||
}
|
||||
}
|
||||
|
||||
// the definition file contains a sting with non-symbol names.
|
||||
// process each character as it's actual character value.
|
||||
// Locale definitions that use this may not be portable.
|
||||
else {
|
||||
ret += (char)str[idx++];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
|
||||
}
|
||||
|
||||
#ifndef _RWSTD_NO_WCHAR_T
|
||||
// converts a collating element definition to an array of wide characters
|
||||
// (the wide characters the collating element is composed of).
|
||||
|
||||
// this overload deals with collating elements defined through
|
||||
// a sequence of symbolic names, NOT enclosed within quotes.
|
||||
std::wstring
|
||||
Def::convert_wstring (const StringVector& sym_array)
|
||||
{
|
||||
std::wstring ret;
|
||||
StringVector::const_iterator it = sym_array.begin ();
|
||||
while (it != sym_array.end ()) {
|
||||
// lookup the symbol we just constructed
|
||||
w_cmap_iter w_pos = charmap_.get_w_cmap().find (*it);
|
||||
if (w_pos != charmap_.get_w_cmap().end()) {
|
||||
ret += w_pos->second;
|
||||
it++;
|
||||
}
|
||||
else {
|
||||
// we return an empty string if we couldn't find any character
|
||||
// in the character map
|
||||
ret.clear();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// this overload deals with collating elements defined through
|
||||
// a sequence of characters or symbolic names, enclosed within quotes.
|
||||
std::wstring
|
||||
Def::convert_wstring (const token_t& t)
|
||||
{
|
||||
std::wstring ret;
|
||||
std::string sym;
|
||||
|
||||
std::string str1 (t.name);
|
||||
|
||||
int idx = 0;
|
||||
char term = 0;
|
||||
const char* str = str1.c_str();
|
||||
|
||||
// skip first character if quote
|
||||
if (str[idx] == '\"') {
|
||||
term = '\"', idx++;
|
||||
}
|
||||
|
||||
while (str[idx] != term) {
|
||||
sym.clear();
|
||||
|
||||
// '<' marks the beginning of a symbolic name
|
||||
// construct the name and look up its value in the cmap
|
||||
if (str[idx] == '<') {
|
||||
while (str[idx] != '>') {
|
||||
if (str[idx] == scanner_.escape_char ()) {
|
||||
// sym += str[idx++];
|
||||
idx++;
|
||||
}
|
||||
|
||||
if ('\0' != str[idx])
|
||||
sym += str[idx++];
|
||||
else
|
||||
issue_diag (E_SYMEND, true, &t,
|
||||
"end of symbolic name not found\n");
|
||||
}
|
||||
|
||||
// this is safe because the while loop ended with *str == '>'
|
||||
sym += str[idx++];
|
||||
|
||||
// lookup the symbol we just constructed
|
||||
w_cmap_iter w_pos = charmap_.get_w_cmap().find (sym);
|
||||
if (w_pos != charmap_.get_w_cmap().end()) {
|
||||
ret += w_pos->second;
|
||||
}
|
||||
else {
|
||||
// if we can't find a symbol then return an empty string,
|
||||
// most likely this will happen if inside a collating-element
|
||||
// the user uses a character that is not in the current
|
||||
// codeset, in this case the collating element will be ignored
|
||||
ret.clear();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
// the definition file contains a string with non-symbol names.
|
||||
// process each character as it's actual character value.
|
||||
// Locale definitions that use this may not be portable.
|
||||
else
|
||||
ret += (wchar_t)str[idx++];
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
#endif // _RWSTD_NO_WCHAR_T
|
||||
|
||||
|
||||
// automatically fill any categories that depend on other categories
|
||||
void Def::auto_fill ()
|
||||
{
|
||||
|
||||
mask_iter mask_pos;
|
||||
|
||||
for (std::size_t i = 0; i <= UCHAR_MAX; i++) {
|
||||
if ( ctype_out_.mask_tab[i] & std::ctype_base::upper
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::lower
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::alpha
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::digit
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::xdigit
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::punct)
|
||||
|
||||
ctype_out_.mask_tab[i] |= std::ctype_base::print;
|
||||
|
||||
if ( ctype_out_.mask_tab[i] & std::ctype_base::upper
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::lower)
|
||||
|
||||
ctype_out_.mask_tab[i] |= std::ctype_base::alpha;
|
||||
|
||||
if ( ctype_out_.mask_tab[i] & std::ctype_base::upper
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::lower
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::alpha
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::digit
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::xdigit
|
||||
|| ctype_out_.mask_tab[i] & std::ctype_base::punct)
|
||||
|
||||
ctype_out_.mask_tab[i] |= std::ctype_base::graph;
|
||||
}
|
||||
|
||||
for (mask_pos = mask_.begin(); mask_pos != mask_.end(); mask_pos++) {
|
||||
// all lower, alpha, digit, xdigit, and punct, and space
|
||||
// characters are automatically print
|
||||
|
||||
if ( mask_pos->second & std::ctype_base::upper
|
||||
|| mask_pos->second & std::ctype_base::lower
|
||||
|| mask_pos->second & std::ctype_base::alpha
|
||||
|| mask_pos->second & std::ctype_base::digit
|
||||
|| mask_pos->second & std::ctype_base::xdigit
|
||||
|| mask_pos->second & std::ctype_base::punct)
|
||||
// || mask_pos->second & std::ctype_base::space)
|
||||
|
||||
mask_pos->second |= std::ctype_base::print;
|
||||
|
||||
// all upper and lower characters are alpha
|
||||
if ( mask_pos->second & std::ctype_base::upper
|
||||
|| mask_pos->second & std::ctype_base::lower)
|
||||
|
||||
mask_pos->second |= std::ctype_base::alpha;
|
||||
|
||||
// all upper, lower, alpha, digit, xdigit, and punct characters
|
||||
// are graph characters
|
||||
if ( mask_pos->second & std::ctype_base::upper
|
||||
|| mask_pos->second & std::ctype_base::lower
|
||||
|| mask_pos->second & std::ctype_base::alpha
|
||||
|| mask_pos->second & std::ctype_base::digit
|
||||
|| mask_pos->second & std::ctype_base::xdigit
|
||||
|| mask_pos->second & std::ctype_base::punct)
|
||||
|
||||
mask_pos->second |= std::ctype_base::graph;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Def::process_input ()
|
||||
{
|
||||
while ((next = scanner_.next_token ()).token != Scanner::tok_end_tokens) {
|
||||
|
||||
switch (next.token) {
|
||||
|
||||
case Scanner::tok_comment:
|
||||
scanner_.ignore_line ();
|
||||
break;
|
||||
|
||||
case Scanner::tok_ctype:
|
||||
process_ctype ();
|
||||
break;
|
||||
|
||||
case Scanner::tok_collate:
|
||||
process_collate ();
|
||||
break;
|
||||
|
||||
case Scanner::tok_monetary:
|
||||
process_monetary ();
|
||||
break;
|
||||
|
||||
case Scanner::tok_numeric:
|
||||
process_numeric ();
|
||||
break;
|
||||
|
||||
case Scanner::tok_time:
|
||||
process_time ();
|
||||
break;
|
||||
|
||||
case Scanner::tok_messages:
|
||||
process_messages ();
|
||||
break;
|
||||
|
||||
case Scanner::tok_nl:
|
||||
break;
|
||||
|
||||
default:
|
||||
scanner_.ignore_line ();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto_fill ();
|
||||
}
|
||||
|
||||
|
||||
Def::Def (const char* filename, const char* out_name, Charmap& char_map,
|
||||
bool no_position)
|
||||
: warnings_occurred_ (false),
|
||||
scan_ahead_ (false),
|
||||
next_offset_ (0),
|
||||
output_name_ (out_name),
|
||||
charmap_ (char_map),
|
||||
ctype_written_ (false),
|
||||
codecvt_written_ (false),
|
||||
collate_written_ (false),
|
||||
time_written_ (false),
|
||||
num_written_ (false),
|
||||
mon_written_ (false),
|
||||
messages_written_ (false),
|
||||
ctype_def_found_ (false),
|
||||
collate_def_found_ (false),
|
||||
time_def_found_ (false),
|
||||
num_def_found_ (false),
|
||||
mon_def_found_ (false),
|
||||
messages_def_found_ (false),
|
||||
undefined_keyword_found_ (false),
|
||||
no_position_ (no_position)
|
||||
|
||||
{
|
||||
// make sure ctype_out object is cleared
|
||||
std::memset (&ctype_out_, 0, sizeof (ctype_out_));
|
||||
std::memset (&time_out_, 0, sizeof (time_out_));
|
||||
|
||||
// invalidate format characters by setting each to CHAR_MAX
|
||||
// as specified by the C function localeconv()
|
||||
mon_out_.frac_digits [0] = CHAR_MAX;
|
||||
mon_out_.frac_digits [1] = CHAR_MAX;
|
||||
mon_out_.p_cs_precedes [0] = CHAR_MAX;
|
||||
mon_out_.p_sep_by_space [0] = CHAR_MAX;
|
||||
mon_out_.n_cs_precedes [0] = CHAR_MAX;
|
||||
mon_out_.n_sep_by_space [0] = CHAR_MAX;
|
||||
mon_out_.p_sign_posn [0] = CHAR_MAX;
|
||||
mon_out_.n_sign_posn [0] = CHAR_MAX;
|
||||
|
||||
mon_st_.mon_grouping += CHAR_MAX;
|
||||
|
||||
// invalidate int'l formats
|
||||
mon_out_.p_cs_precedes [1] = CHAR_MAX;
|
||||
mon_out_.p_sep_by_space [1] = CHAR_MAX;
|
||||
mon_out_.n_cs_precedes [1] = CHAR_MAX;
|
||||
mon_out_.n_sep_by_space [1] = CHAR_MAX;
|
||||
mon_out_.p_sign_posn [1] = CHAR_MAX;
|
||||
mon_out_.n_sign_posn [1] = CHAR_MAX;
|
||||
|
||||
num_st_.grouping += CHAR_MAX;
|
||||
|
||||
collate_out_.largest_ce = 1;
|
||||
collate_out_.longest_weight = 1;
|
||||
collate_out_.num_wchars = 0;
|
||||
std::memset (collate_out_.weight_type, 0,
|
||||
sizeof (collate_out_.weight_type));
|
||||
|
||||
// initialize all extensions to 0
|
||||
ctype_out_.ctype_ext_off = 0;
|
||||
num_out_.numeric_ext_off = 0;
|
||||
collate_out_.collate_ext_off = 0;
|
||||
mon_out_.monetary_ext_off = 0;
|
||||
time_out_.time_ext_off = 0;
|
||||
|
||||
// actual processing
|
||||
scanner_.open (filename);
|
||||
}
|
||||
|
||||
|
||||
Def::~Def ()
|
||||
{
|
||||
// free up the memory that was allocated
|
||||
|
||||
coll_map_iter coll_map_pos;
|
||||
for (coll_map_pos = coll_map_.begin();
|
||||
coll_map_pos != coll_map_.end(); coll_map_pos ++) {
|
||||
delete[] (coll_map_pos->second.weights);
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user