first commit
This commit is contained in:
264
extern/stdcxx/4.2.1/util/charmap.h
vendored
Normal file
264
extern/stdcxx/4.2.1/util/charmap.h
vendored
Normal file
@@ -0,0 +1,264 @@
|
||||
/***************************************************************************
|
||||
*
|
||||
* charmap.h
|
||||
*
|
||||
* $Id: charmap.h 580483 2007-09-28 20:55:52Z sebor $
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed
|
||||
* with this work for additional information regarding copyright
|
||||
* ownership. The ASF licenses this file to you under the Apache
|
||||
* License, Version 2.0 (the "License"); you may not use this file
|
||||
* except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* Copyright 2001-2007 Rogue Wave Software, Inc.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#ifndef _RWSTD_CHARMAP_H_INCLUDED
|
||||
#define _RWSTD_CHARMAP_H_INCLUDED
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#ifndef _RWSTD_NO_ICONV
|
||||
# include <iconv.h>
|
||||
#endif // _RWSTD_NO_ICONV
|
||||
|
||||
|
||||
#include "scanner.h"
|
||||
|
||||
class Charmap
|
||||
{
|
||||
public:
|
||||
static const char* const portable_charset [];
|
||||
|
||||
Charmap(const char* /*corresponding C library locale*/,
|
||||
const char* /*filename*/,
|
||||
bool /*is utf8 encoding?*/,
|
||||
bool /*create_forward_charmaps*/,
|
||||
bool /*create_reverse_charmaps*/,
|
||||
bool /*use UCS4 internally*/);
|
||||
|
||||
// returns the narrow character map which maps a symbolic character
|
||||
// name to its narrow character value
|
||||
const std::map<std::string, unsigned char>& get_n_cmap() const {
|
||||
return n_cmap_;
|
||||
}
|
||||
|
||||
// returns the reverse narrow character map which maps a narrow
|
||||
// character value to its symbolic name
|
||||
const std::map<unsigned char, std::string>& get_rn_cmap() const {
|
||||
return rn_cmap_;
|
||||
}
|
||||
|
||||
// returns the wide character map which maps a symbolic character
|
||||
// name to its wide character value
|
||||
const std::map<std::string, wchar_t>& get_w_cmap() const {
|
||||
return w_cmap_;
|
||||
}
|
||||
|
||||
// returns the reverse wide character map which maps a wide
|
||||
// character value to its symbolic name
|
||||
const std::map<wchar_t, std::string>& get_rw_cmap() const {
|
||||
return rw_cmap_;
|
||||
}
|
||||
|
||||
// returns the multibyte character map which maps a multibyte
|
||||
// character to its corresponding wide character value
|
||||
const std::map<std::string, wchar_t>& get_mb_cmap() const {
|
||||
return mb_cmap_;
|
||||
}
|
||||
|
||||
// returns the reverse multibyte character map which maps a wide
|
||||
// character value to its corresponding multibyte character
|
||||
const std::map<wchar_t, std::string>& get_rmb_cmap() const {
|
||||
return rmb_cmap_;
|
||||
}
|
||||
|
||||
// get the string value map
|
||||
const std::list<std::string>& get_symnames_list() const {
|
||||
return symnames_list_;
|
||||
}
|
||||
|
||||
const std::map <std::string, wchar_t>& get_ucs4_cmap () const {
|
||||
return ucs4_cmap_;
|
||||
}
|
||||
|
||||
const std::map <wchar_t, std::string>& get_rucs4_cmap () const {
|
||||
return rucs4_cmap_;
|
||||
}
|
||||
|
||||
// return the value of mb_cur_max
|
||||
int get_mb_cur_max() const {
|
||||
return mb_cur_max_;
|
||||
}
|
||||
|
||||
// return the name of the codeset
|
||||
const std::string& get_code_set_name () const {
|
||||
return code_set_name_;
|
||||
}
|
||||
|
||||
// return the name of the character map
|
||||
std::string get_charmap_name () const;
|
||||
|
||||
// return the full path to the charmap
|
||||
std::string get_full_charmap_name () const {
|
||||
return charmap_name_;
|
||||
}
|
||||
|
||||
// convert the externally encoded string to the internal encoding
|
||||
bool convert_to_wc (const std::string&, const std::string&, wchar_t&);
|
||||
|
||||
// convert the externally encoded string to UCS
|
||||
bool convert_to_ucs (const std::string&, const std::string&, wchar_t&);
|
||||
|
||||
// convert the externally encoded string to UCS
|
||||
wchar_t convert_sym_to_ucs (const std::string&) const;
|
||||
|
||||
|
||||
// get the number of bytes in a single multi-byte character
|
||||
std::size_t mbcharlen (const std::string&) const;
|
||||
|
||||
// convert the first byte in the multi-byte character to an unsigned char
|
||||
unsigned char convert_char (const char*, const char** = 0) const;
|
||||
|
||||
unsigned char get_largest_nchar () const;
|
||||
|
||||
// increments the wide character value to the next encoded character
|
||||
// in the current codeset; returns the incremented value or -1 on
|
||||
// error
|
||||
wchar_t increment_wchar (wchar_t) const;
|
||||
|
||||
private:
|
||||
|
||||
// processes characters implicitly defined by an ellipsis denoted
|
||||
// by two explicitly defined characters; returns the number of
|
||||
// characters in the range, -1 on error
|
||||
std::size_t process_ellipsis (const Scanner::token_t&, int);
|
||||
|
||||
// process the charmap file making the necessary mappings in the cmaps
|
||||
void process_chars();
|
||||
|
||||
// increment the encoded multi byte character argument
|
||||
bool increment_encoding (std::string&);
|
||||
|
||||
// verify that all the characters in the portable character set
|
||||
// are defined in the character map
|
||||
void verify_portable_charset () const;
|
||||
|
||||
#ifndef _RWSTD_NO_ICONV
|
||||
// open the iconv descriptor to convert to utf8
|
||||
iconv_t open_iconv_to_utf8 () const;
|
||||
#endif // _RWSTD_NO_ICONV
|
||||
|
||||
// convert a human-readable encoding of a character
|
||||
// to its raw multibyte character representation
|
||||
std::string encoding_to_mbchar (const std::string&) const;
|
||||
|
||||
// convert a multi-byte string to a utf8 multi-byte string
|
||||
char* convert_to_utf8 (const char *inbuf, std::size_t inbuf_s,
|
||||
char *outbuf, std::size_t outbuf_s) const;
|
||||
|
||||
#ifndef _RWSTD_NO_ICONV
|
||||
# ifndef _RWSTD_NO_ISO_10646_WCHAR_T
|
||||
|
||||
// open the iconv descriptor to convert from utf8 to the external encoding
|
||||
iconv_t open_iconv_to_ext ();
|
||||
|
||||
# endif // _RWSTD_NO_ISO_10646_WCHAR_T
|
||||
#endif // _RWSTD_NO_ICONV
|
||||
|
||||
// add the symbolic name of a character and the raw multibyte
|
||||
// character corresponding to it to the character maps
|
||||
void add_to_cmaps (const std::string&,
|
||||
const std::string&,
|
||||
bool = false);
|
||||
|
||||
// the scanner used to process the charmap file
|
||||
Scanner scanner_;
|
||||
|
||||
// the name of the codeset
|
||||
std::string code_set_name_;
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
int codepage_;
|
||||
#endif // _MSC_VER
|
||||
|
||||
// n_cmap maps the symbolic name to a narrow character value
|
||||
// rn_cmap does the opposite
|
||||
std::map <std::string, unsigned char> n_cmap_;
|
||||
std::map <unsigned char, std::string> rn_cmap_;
|
||||
|
||||
// mb_cmap maps a multibyte character representation to its
|
||||
// corresponding wide character value
|
||||
// rmb_cmap does the opposite
|
||||
std::map <std::string, wchar_t> mb_cmap_;
|
||||
std::map <wchar_t, std::string> rmb_cmap_;
|
||||
|
||||
typedef std::map <wchar_t, std::string>::const_iterator rmb_cmap_iter;
|
||||
typedef std::map <std::string, wchar_t>::const_iterator mb_cmap_iter;
|
||||
|
||||
// w_cmap maps the symbolic name to a wide character value
|
||||
// rw_cmap does exactly the opposite
|
||||
std::map <std::string, wchar_t> w_cmap_;
|
||||
std::map <wchar_t, std::string> rw_cmap_;
|
||||
|
||||
// ucs4_cmap maps the symbolic name to the UCS4 value for that name
|
||||
std::map <std::string, wchar_t> ucs4_cmap_;
|
||||
std::map <wchar_t, std::string> rucs4_cmap_;
|
||||
|
||||
// the number of bytes in the largest multi-byte value
|
||||
int mb_cur_max_;
|
||||
|
||||
#ifndef _RWSTD_NO_ICONV
|
||||
// the iconv file descriptor that converts to utf8
|
||||
iconv_t ic_to_utf8_;
|
||||
|
||||
// the iconv file descriptor that converts from utf8 to external
|
||||
iconv_t ic_to_ext_;
|
||||
#endif // _RWSTD_NO_ICONV
|
||||
|
||||
// the name of the character map file
|
||||
std::string charmap_name_;
|
||||
|
||||
// the name of the C library locale with same encoding
|
||||
std::string Clocale_;
|
||||
|
||||
unsigned char largest_nchar_;
|
||||
|
||||
// are we in the utf8 encoding?
|
||||
bool in_utf8_;
|
||||
|
||||
// should we create the forward character maps
|
||||
bool forward_maps;
|
||||
|
||||
// should we create the reverse character maps
|
||||
bool reverse_maps;
|
||||
|
||||
// should we use UCS4 as the internal representation
|
||||
bool UCS4_internal_;
|
||||
|
||||
// list of all known symbolic character names
|
||||
std::list<std::string> symnames_list_;
|
||||
|
||||
Scanner::token_t next;
|
||||
};
|
||||
|
||||
|
||||
#endif // _RWSTD_CHARMAP_H_INCLUDED
|
||||
|
||||
Reference in New Issue
Block a user