/*************************************************************************** * * charmap.h * * $Id: charmap.h 580483 2007-09-28 20:55:52Z sebor $ * *************************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * Copyright 2001-2007 Rogue Wave Software, Inc. * **************************************************************************/ #ifndef _RWSTD_CHARMAP_H_INCLUDED #define _RWSTD_CHARMAP_H_INCLUDED #include #include #include #include #ifndef _RWSTD_NO_ICONV # include #endif // _RWSTD_NO_ICONV #include "scanner.h" class Charmap { public: static const char* const portable_charset []; Charmap(const char* /*corresponding C library locale*/, const char* /*filename*/, bool /*is utf8 encoding?*/, bool /*create_forward_charmaps*/, bool /*create_reverse_charmaps*/, bool /*use UCS4 internally*/); // returns the narrow character map which maps a symbolic character // name to its narrow character value const std::map& get_n_cmap() const { return n_cmap_; } // returns the reverse narrow character map which maps a narrow // character value to its symbolic name const std::map& get_rn_cmap() const { return rn_cmap_; } // returns the wide character map which maps a symbolic character // name to its wide character value const std::map& get_w_cmap() const { return w_cmap_; } // returns the reverse wide character map which maps a wide // character value to its symbolic name const std::map& get_rw_cmap() const { return rw_cmap_; } // returns the multibyte character map which maps a multibyte // character to its corresponding wide character value const std::map& get_mb_cmap() const { return mb_cmap_; } // returns the reverse multibyte character map which maps a wide // character value to its corresponding multibyte character const std::map& get_rmb_cmap() const { return rmb_cmap_; } // get the string value map const std::list& get_symnames_list() const { return symnames_list_; } const std::map & get_ucs4_cmap () const { return ucs4_cmap_; } const std::map & get_rucs4_cmap () const { return rucs4_cmap_; } // return the value of mb_cur_max int get_mb_cur_max() const { return mb_cur_max_; } // return the name of the codeset const std::string& get_code_set_name () const { return code_set_name_; } // return the name of the character map std::string get_charmap_name () const; // return the full path to the charmap std::string get_full_charmap_name () const { return charmap_name_; } // convert the externally encoded string to the internal encoding bool convert_to_wc (const std::string&, const std::string&, wchar_t&); // convert the externally encoded string to UCS bool convert_to_ucs (const std::string&, const std::string&, wchar_t&); // convert the externally encoded string to UCS wchar_t convert_sym_to_ucs (const std::string&) const; // get the number of bytes in a single multi-byte character std::size_t mbcharlen (const std::string&) const; // convert the first byte in the multi-byte character to an unsigned char unsigned char convert_char (const char*, const char** = 0) const; unsigned char get_largest_nchar () const; // increments the wide character value to the next encoded character // in the current codeset; returns the incremented value or -1 on // error wchar_t increment_wchar (wchar_t) const; private: // processes characters implicitly defined by an ellipsis denoted // by two explicitly defined characters; returns the number of // characters in the range, -1 on error std::size_t process_ellipsis (const Scanner::token_t&, int); // process the charmap file making the necessary mappings in the cmaps void process_chars(); // increment the encoded multi byte character argument bool increment_encoding (std::string&); // verify that all the characters in the portable character set // are defined in the character map void verify_portable_charset () const; #ifndef _RWSTD_NO_ICONV // open the iconv descriptor to convert to utf8 iconv_t open_iconv_to_utf8 () const; #endif // _RWSTD_NO_ICONV // convert a human-readable encoding of a character // to its raw multibyte character representation std::string encoding_to_mbchar (const std::string&) const; // convert a multi-byte string to a utf8 multi-byte string char* convert_to_utf8 (const char *inbuf, std::size_t inbuf_s, char *outbuf, std::size_t outbuf_s) const; #ifndef _RWSTD_NO_ICONV # ifndef _RWSTD_NO_ISO_10646_WCHAR_T // open the iconv descriptor to convert from utf8 to the external encoding iconv_t open_iconv_to_ext (); # endif // _RWSTD_NO_ISO_10646_WCHAR_T #endif // _RWSTD_NO_ICONV // add the symbolic name of a character and the raw multibyte // character corresponding to it to the character maps void add_to_cmaps (const std::string&, const std::string&, bool = false); // the scanner used to process the charmap file Scanner scanner_; // the name of the codeset std::string code_set_name_; #if defined (_MSC_VER) int codepage_; #endif // _MSC_VER // n_cmap maps the symbolic name to a narrow character value // rn_cmap does the opposite std::map n_cmap_; std::map rn_cmap_; // mb_cmap maps a multibyte character representation to its // corresponding wide character value // rmb_cmap does the opposite std::map mb_cmap_; std::map rmb_cmap_; typedef std::map ::const_iterator rmb_cmap_iter; typedef std::map ::const_iterator mb_cmap_iter; // w_cmap maps the symbolic name to a wide character value // rw_cmap does exactly the opposite std::map w_cmap_; std::map rw_cmap_; // ucs4_cmap maps the symbolic name to the UCS4 value for that name std::map ucs4_cmap_; std::map rucs4_cmap_; // the number of bytes in the largest multi-byte value int mb_cur_max_; #ifndef _RWSTD_NO_ICONV // the iconv file descriptor that converts to utf8 iconv_t ic_to_utf8_; // the iconv file descriptor that converts from utf8 to external iconv_t ic_to_ext_; #endif // _RWSTD_NO_ICONV // the name of the character map file std::string charmap_name_; // the name of the C library locale with same encoding std::string Clocale_; unsigned char largest_nchar_; // are we in the utf8 encoding? bool in_utf8_; // should we create the forward character maps bool forward_maps; // should we create the reverse character maps bool reverse_maps; // should we use UCS4 as the internal representation bool UCS4_internal_; // list of all known symbolic character names std::list symnames_list_; Scanner::token_t next; }; #endif // _RWSTD_CHARMAP_H_INCLUDED