first commit
This commit is contained in:
658
extern/stdcxx/4.2.1/util/codecvt.cpp
vendored
Normal file
658
extern/stdcxx/4.2.1/util/codecvt.cpp
vendored
Normal file
@@ -0,0 +1,658 @@
|
||||
/***************************************************************************
|
||||
*
|
||||
* codecvt.cpp
|
||||
*
|
||||
* $Id: codecvt.cpp 449092 2006-09-22 21:16:16Z sebor $
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed
|
||||
* with this work for additional information regarding copyright
|
||||
* ownership. The ASF licenses this file to you under the Apache
|
||||
* License, Version 2.0 (the "License"); you may not use this file
|
||||
* except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
* implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*
|
||||
* Copyright 2001-2006 Rogue Wave Software.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "diagnostic.h" // for issue_diag()
|
||||
#include "def.h" // for Def
|
||||
|
||||
#include "path.h" // for get_pathname()
|
||||
#include "scanner.h" // for scanner
|
||||
|
||||
#include <cassert> // for assert()
|
||||
#include <climits> // for UCHAR_MAX
|
||||
#include <cstring> // for memset()
|
||||
#include <fstream> // for ifstream, ofstream
|
||||
|
||||
|
||||
typedef std::map<std::string, wchar_t>::const_iterator n_cmap_citer2;
|
||||
|
||||
|
||||
std::size_t Def::
|
||||
gen_mbchar_tables (codecvt_offsets_map_t &tab,
|
||||
std::map<std::string, unsigned> &off_map,
|
||||
const std::string &charp /* = "" */,
|
||||
unsigned tabno /* = 0 */)
|
||||
{
|
||||
// upon the first call (but not during subsequent recursive calls)
|
||||
// generate a set of multibyte prefixes from the set of all known
|
||||
// multibyte characters
|
||||
static unsigned ntabs = 0;
|
||||
static std::set<std::string>* pfx_set = 0;
|
||||
|
||||
const n_cmap_citer2 mb_map_end = charmap_.get_mb_cmap ().end ();
|
||||
|
||||
if (0 == pfx_set) {
|
||||
pfx_set = new std::set<std::string>;
|
||||
|
||||
// iterate over the range of valid multibyte characters
|
||||
// obtained from the charmap and generate a complete
|
||||
// subset of non-empty multibyte prefixes from each
|
||||
unsigned off = 0;
|
||||
|
||||
const n_cmap_citer2 mb_map_begin = charmap_.get_mb_cmap ().begin ();
|
||||
|
||||
for (n_cmap_citer2 it = mb_map_begin; it != mb_map_end; ++it, ++off) {
|
||||
|
||||
// insert the ordinal number of each multibyte character
|
||||
// into a map for fast lookup later
|
||||
off_map.insert (std::make_pair (it->first, off));
|
||||
|
||||
// generate non-empty prefixes up to one byte less
|
||||
// in length than the complete multibyte character
|
||||
for (std::string prefix = it->first; 1 < prefix.size (); ) {
|
||||
prefix = prefix.substr (0, prefix.size () - 1);
|
||||
pfx_set->insert (prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// number of valid characters inserted into the tables
|
||||
std::size_t nchars = 0;
|
||||
|
||||
// an array of offsets to the multibyte character or to the next
|
||||
// array containing such offsets (defined recursively for up to
|
||||
// MB_CUR_MAX levels of nesting)
|
||||
codecvt_offset_tab_t* const offsets = new codecvt_offset_tab_t;
|
||||
|
||||
std::string mb_char (charp + '\0');
|
||||
|
||||
for (unsigned i = 0; i <= UCHAR_MAX; ++i) {
|
||||
|
||||
unsigned char cur_char = (unsigned char)i;
|
||||
|
||||
mb_char [mb_char.size () - 1] = char (cur_char);
|
||||
|
||||
if (mb_map_end == charmap_.get_mb_cmap ().find (mb_char)) {
|
||||
// mb_char is not a complete, valid multibyte character
|
||||
// check to see if it's a prefix of one
|
||||
|
||||
if (pfx_set->find (mb_char) == pfx_set->end ()) {
|
||||
// mb_char is not a prefix of a valid multibyte
|
||||
// character, mark it invalide
|
||||
offsets->off [cur_char] = UINT_MAX;
|
||||
}
|
||||
else {
|
||||
// mb_char is a prefix of a valid multibyte character,
|
||||
// set the MSB to denote that it "continues" in the
|
||||
// table at the next higher offset
|
||||
offsets->off [cur_char] = ++ntabs | 0x80000000;
|
||||
|
||||
// generate that table
|
||||
nchars += gen_mbchar_tables (tab, off_map, mb_char, ntabs);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// mb_char is a complete, valid miltibyte character
|
||||
// insert its ordinal number (offset) into the array
|
||||
offsets->off [cur_char] = off_map.find (mb_char)->second;
|
||||
++nchars;
|
||||
}
|
||||
}
|
||||
|
||||
// insert the completely populated table into the map
|
||||
tab.insert (std::make_pair (tabno, offsets));
|
||||
|
||||
if (0 == ntabs) {
|
||||
// clean up on return from the topmost (non-recursive) call
|
||||
delete pfx_set;
|
||||
pfx_set = 0;
|
||||
}
|
||||
|
||||
return nchars;
|
||||
}
|
||||
|
||||
|
||||
std::size_t Def::
|
||||
gen_wchar_tables (codecvt_offsets_map_t &tab,
|
||||
const std::string &charp /* = "" */,
|
||||
unsigned int tabno /* = 0 */)
|
||||
{
|
||||
// upon the first call (but not during subsequent recursive calls)
|
||||
// generate a set of multibyte prefixes from the set of all known
|
||||
// multibyte characters
|
||||
static unsigned ntabs = 0;
|
||||
static std::set<std::string> *pfx_set = 0;
|
||||
static std::map<std::string, unsigned> *off_map = 0;
|
||||
static std::map<std::string, std::string> *utf_map = 0;
|
||||
|
||||
if (0 == utf_map) {
|
||||
pfx_set = new std::set<std::string>;
|
||||
off_map = new std::map<std::string, unsigned>;
|
||||
utf_map = new std::map<std::string, std::string>;
|
||||
|
||||
const n_cmap_citer2 first = charmap_.get_mb_cmap ().begin ();
|
||||
const n_cmap_citer2 last = charmap_.get_mb_cmap ().end ();
|
||||
|
||||
unsigned off = 0;
|
||||
|
||||
for (n_cmap_citer2 it = first; it != last; ++it) {
|
||||
|
||||
off_map->insert (std::make_pair (it->first, off));
|
||||
|
||||
off += it->first.size () + 1;
|
||||
|
||||
std::string utf = utf8_encode (it->second);
|
||||
|
||||
utf_map->insert (std::make_pair (utf, it->first));
|
||||
|
||||
while (1 < utf.size ()) {
|
||||
utf = utf.substr (0, utf.size () - 1);
|
||||
pfx_set->insert (utf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
codecvt_offset_tab_t* const offsets = new codecvt_offset_tab_t;
|
||||
|
||||
// number of valid characters inserted into the tables
|
||||
std::size_t nchars = 0;
|
||||
|
||||
std::string mb_char (charp + '\0');
|
||||
|
||||
for (unsigned i = 0; i <= UCHAR_MAX; ++i) {
|
||||
|
||||
unsigned char cur_char = (unsigned char)i;
|
||||
|
||||
mb_char [mb_char.size () - 1] = char (cur_char);
|
||||
|
||||
const wchar_utf8_iter it = utf_map->find (mb_char);
|
||||
if (it == utf_map->end ()) {
|
||||
if (pfx_set->find (mb_char) == pfx_set->end ()) {
|
||||
offsets->off [cur_char] = UINT_MAX;
|
||||
}
|
||||
else {
|
||||
offsets->off [cur_char] = ++ntabs | 0x80000000;
|
||||
|
||||
nchars += gen_wchar_tables (tab, mb_char, ntabs);
|
||||
}
|
||||
}
|
||||
else {
|
||||
offsets->off [cur_char] = off_map->find (it->second)->second;
|
||||
|
||||
++nchars;
|
||||
}
|
||||
}
|
||||
|
||||
tab.insert (std::make_pair (tabno, offsets));
|
||||
|
||||
if (0 == ntabs) {
|
||||
// clean up
|
||||
delete pfx_set;
|
||||
delete utf_map;
|
||||
|
||||
pfx_set = 0;
|
||||
utf_map = 0;
|
||||
}
|
||||
|
||||
return nchars;
|
||||
}
|
||||
|
||||
|
||||
std::size_t Def::
|
||||
gen_utf8_tables (codecvt_offsets_map_t &tab,
|
||||
std::map<std::string, unsigned> &off_map,
|
||||
const std::string &charp /* = "" */,
|
||||
unsigned tabno /* = 0 */)
|
||||
{
|
||||
static unsigned ntabs = 0;
|
||||
static std::set<std::string> *pfx_set = 0;
|
||||
static std::map<std::string, wchar_t> *utf_map = 0;
|
||||
|
||||
if (0 == pfx_set) {
|
||||
pfx_set = new std::set<std::string>;
|
||||
|
||||
const ucs4_cmap_iter first = charmap_.get_ucs4_cmap ().begin ();
|
||||
const ucs4_cmap_iter last = charmap_.get_ucs4_cmap ().end ();
|
||||
|
||||
for (ucs4_cmap_iter it = first; it != last; ++it) {
|
||||
|
||||
for (std::string prefix = utf8_encode (it->second);
|
||||
1 < prefix.size (); ) {
|
||||
prefix = prefix.substr (0, prefix.size () - 1);
|
||||
pfx_set->insert (prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// the set of complete utf8 strings in the current character map
|
||||
typedef std::map<std::string, wchar_t>::iterator utf8_map_iter;
|
||||
|
||||
if (0 == utf_map) {
|
||||
utf_map = new std::map<std::string, wchar_t>;
|
||||
|
||||
const ucs4_cmap_iter first = charmap_.get_ucs4_cmap ().begin ();
|
||||
const ucs4_cmap_iter last = charmap_.get_ucs4_cmap ().end ();
|
||||
|
||||
for (ucs4_cmap_iter it = first; it != last; ++it) {
|
||||
const std::string utf = utf8_encode (it->second);
|
||||
utf_map->insert (std::make_pair (utf, it->second));
|
||||
}
|
||||
}
|
||||
|
||||
codecvt_offset_tab_t* const offsets = new codecvt_offset_tab_t;
|
||||
|
||||
// number of valid characters inserted into the tables
|
||||
std::size_t nchars = 0;
|
||||
|
||||
std::string mb_char = charp + '\0';
|
||||
|
||||
for (unsigned int i = 0; i <= UCHAR_MAX; ++i) {
|
||||
|
||||
unsigned char cur_char = (unsigned char)i;
|
||||
|
||||
mb_char [mb_char.size () - 1] = char (cur_char);
|
||||
|
||||
const utf8_map_iter where = utf_map->find (mb_char);
|
||||
|
||||
if (where == utf_map->end ()) {
|
||||
if (pfx_set->find (mb_char) == pfx_set->end ()) {
|
||||
offsets->off [cur_char] = UINT_MAX;
|
||||
}
|
||||
else {
|
||||
offsets->off [cur_char] = ++ntabs | 0x80000000;
|
||||
nchars += gen_utf8_tables (tab, off_map, mb_char, ntabs);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// first get the symbolic name
|
||||
std::string str
|
||||
= charmap_.get_rucs4_cmap ().find (where->second)->second;
|
||||
|
||||
// then get the internal encoding of the character
|
||||
const wchar_t int_enc = charmap_.get_w_cmap().find (str)->second;
|
||||
|
||||
// then get the external encoding to use in a lookup in
|
||||
// mb_char_off_map
|
||||
str = charmap_.get_rmb_cmap ().find (int_enc)->second;
|
||||
|
||||
offsets->off [cur_char] = off_map.find (str)->second;
|
||||
|
||||
++nchars;
|
||||
}
|
||||
}
|
||||
|
||||
tab.insert (std::make_pair (tabno, offsets));
|
||||
|
||||
if (0 == ntabs) {
|
||||
// clean up
|
||||
delete pfx_set;
|
||||
delete utf_map;
|
||||
|
||||
pfx_set = 0;
|
||||
utf_map = 0;
|
||||
}
|
||||
return nchars;
|
||||
}
|
||||
|
||||
|
||||
void Def::
|
||||
gen_xlit_data ()
|
||||
{
|
||||
// data offset points to the beginning of the data containing
|
||||
// the narrow strings character encodings
|
||||
unsigned int data_offset = 0;
|
||||
|
||||
// traverse the map and construct the map of offsets
|
||||
xlit_map_t::const_iterator it = xlit_map_.begin ();
|
||||
for (; it != xlit_map_.end (); ++it) {
|
||||
// insert pair(wchar_t value, offset of first string in data block)
|
||||
xlit_data_offset_map_.insert (
|
||||
std::make_pair (it->first,data_offset));
|
||||
|
||||
// advance the data_offset value to the next "first" string
|
||||
std::list<std::string>::const_iterator sit =
|
||||
it->second.begin ();
|
||||
for (; sit != it->second.end (); ++sit) {
|
||||
data_offset += sit->size () + 1;
|
||||
}
|
||||
++data_offset;
|
||||
}
|
||||
|
||||
// create a new table (first), populate it with default values
|
||||
// and insert it in the tables map
|
||||
xlit_offset_table_t table0;
|
||||
unsigned int k;
|
||||
for (k = 0; k < UCHAR_MAX + 1; ++k)
|
||||
table0.offset_table [k] = UINT_MAX;
|
||||
|
||||
// insert it into the map
|
||||
xlit_table_map_.insert (std::make_pair(0, table0));
|
||||
|
||||
const xlit_map_t::const_iterator xlit_map_end = xlit_map_.end ();
|
||||
|
||||
// traverse the map again and build the tables
|
||||
for (it = xlit_map_.begin (); it != xlit_map_end; ++it) {
|
||||
|
||||
// encode the wchar_t value to UTF-8
|
||||
const std::string utf8_rep (utf8_encode (it->first));
|
||||
data_offset = xlit_data_offset_map_.find (it->first)->second;
|
||||
|
||||
// traverse the utf8 representation string and create the
|
||||
// necessary tables and populate the indexes
|
||||
unsigned int table_idx = 0;
|
||||
|
||||
const std::string::const_iterator utf8_rep_end = utf8_rep.end ();
|
||||
std::string::const_iterator string_it = utf8_rep.begin ();
|
||||
|
||||
for (; string_it != utf8_rep_end; ++string_it) {
|
||||
// get the table corresponding to the current index and locate
|
||||
// the value at that index
|
||||
const xlit_table_map_t::iterator res =
|
||||
xlit_table_map_.find (table_idx);
|
||||
|
||||
assert (res != xlit_table_map_.end ());
|
||||
|
||||
// offset in table
|
||||
unsigned char off_idx = (unsigned char)*string_it;
|
||||
|
||||
// res is the iterator pointing to the correct table in the map
|
||||
// check the index and if not populated, create a new table
|
||||
if (res->second.offset_table [off_idx] == UINT_MAX) {
|
||||
|
||||
// if this is the last position in the string, then
|
||||
// fill the table position with the offset of the string data
|
||||
if ((string_it + 1) == utf8_rep.end ()) {
|
||||
xlit_data_offset_map_t::const_iterator data_it =
|
||||
xlit_data_offset_map_.find (it->first);
|
||||
assert (data_it != xlit_data_offset_map_.end ());
|
||||
|
||||
// fill the table position with the found offset
|
||||
res->second.offset_table [off_idx] = data_it->second;
|
||||
continue;
|
||||
}
|
||||
|
||||
// create a new table and append it to the map
|
||||
xlit_offset_table_t table;
|
||||
for (unsigned int i = 0; i < UCHAR_MAX + 1; ++i)
|
||||
table.offset_table [i] = UINT_MAX;
|
||||
|
||||
// insert it into the map
|
||||
unsigned int tmp = xlit_table_map_.size ();
|
||||
xlit_table_map_.insert (std::make_pair(tmp, table));
|
||||
|
||||
// store its index at correct position in current table
|
||||
res->second.offset_table [off_idx] = tmp | 0x80000000;
|
||||
table_idx = tmp;
|
||||
} else {
|
||||
table_idx =
|
||||
res->second.offset_table [off_idx] & 0x7FFFFFFF;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Def::
|
||||
write_codecvt (std::string dir_name)
|
||||
{
|
||||
// if it has been already written
|
||||
if (codecvt_written_)
|
||||
return;
|
||||
|
||||
// compose the directory name
|
||||
((dir_name += _RWSTD_PATH_SEP) += "..") += _RWSTD_PATH_SEP;
|
||||
dir_name += charmap_.get_code_set_name ();
|
||||
|
||||
// check to see if the codecvt database already exists and
|
||||
// avoid recreating it if it does (as an optimization)
|
||||
if (std::ifstream (dir_name.c_str ())) {
|
||||
issue_diag (I_OPENWR, false, 0,
|
||||
"%s exists, skipping\n", dir_name.c_str ());
|
||||
return;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// generate multibyte conversion tables
|
||||
issue_diag (I_STAGE, false, 0, "generating multibyte tables\n");
|
||||
|
||||
codecvt_offsets_map_t mbchar_offs;
|
||||
std::map<std::string, unsigned> off_map;
|
||||
const std::size_t n_mbchars = gen_mbchar_tables (mbchar_offs, off_map);
|
||||
|
||||
// generate wchar_t conversion tables
|
||||
issue_diag (I_STAGE, false, 0, "generating wchar_t tables\n");
|
||||
|
||||
codecvt_offsets_map_t wchar_offs;
|
||||
const std::size_t n_wchars = gen_wchar_tables (wchar_offs);
|
||||
|
||||
// generate UTF-8 conversion conversion tables
|
||||
issue_diag (I_STAGE, false, 0, "generating UTF-8 tables\n");
|
||||
|
||||
codecvt_offsets_map_t uchar_offs;
|
||||
const std::size_t n_uchars = gen_utf8_tables (uchar_offs, off_map);
|
||||
|
||||
// not needed beyond this point, clear it out
|
||||
off_map.clear ();
|
||||
|
||||
// generate the transliteration tables and the transliteration data
|
||||
issue_diag (I_STAGE, false, 0, "generating transliteration tables\n");
|
||||
gen_xlit_data ();
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// populate the codecvt structure before writing it out
|
||||
// in binary form to the file (the codecvt database)
|
||||
_RW::__rw_codecvt_t codecvt_out;
|
||||
std::memset (&codecvt_out, 0, sizeof codecvt_out);
|
||||
|
||||
// calculate byte offsets within the structure
|
||||
codecvt_out.n_to_w_tab_off = 0;
|
||||
codecvt_out.w_to_n_tab_off = codecvt_out.n_to_w_tab_off
|
||||
+ mbchar_offs.size () * (UCHAR_MAX + 1) * sizeof (unsigned);
|
||||
|
||||
codecvt_out.utf8_to_ext_tab_off = codecvt_out.w_to_n_tab_off
|
||||
+ wchar_offs.size () * (UCHAR_MAX + 1) * sizeof (unsigned);
|
||||
|
||||
// insert the transliteration tables here
|
||||
codecvt_out.xliteration_off = codecvt_out.utf8_to_ext_tab_off
|
||||
+ uchar_offs.size () * (UCHAR_MAX + 1) * sizeof (unsigned);
|
||||
|
||||
codecvt_out.wchar_off = codecvt_out.xliteration_off
|
||||
+ xlit_table_map_.size () * (UCHAR_MAX + 1) * sizeof (unsigned);
|
||||
|
||||
codecvt_out.codeset_off = codecvt_out.wchar_off
|
||||
+ charmap_.get_mb_cmap ().size () * 2 * sizeof (wchar_t);
|
||||
|
||||
codecvt_out.charmap_off = codecvt_out.codeset_off
|
||||
+ charmap_.get_code_set_name ().size () + 1 /* NUL */;
|
||||
|
||||
const std::size_t mb_offset = codecvt_out.charmap_off
|
||||
+ charmap_.get_charmap_name ().size () + 1 /* NUL */;
|
||||
|
||||
// compute the size of narrow strings map which added to
|
||||
// mb_offset will give the start of the transliteration data
|
||||
std::size_t xlit_data_offset = mb_offset;
|
||||
|
||||
mb_cmap_iter iter;
|
||||
|
||||
for (iter = charmap_.get_mb_cmap ().begin();
|
||||
iter != charmap_.get_mb_cmap().end(); ++iter) {
|
||||
xlit_data_offset += iter->first.size() + 1;
|
||||
}
|
||||
|
||||
// now traverse again the utf8 tables for transliteration data
|
||||
// and recompute the offsets:
|
||||
const xlit_table_map_t::const_iterator xlit_table_map_end =
|
||||
xlit_table_map_.end ();
|
||||
|
||||
xlit_table_map_t::iterator xit = xlit_table_map_.begin ();
|
||||
for (; xit != xlit_table_map_end; ++xit) {
|
||||
for (unsigned int i = 0; i < UCHAR_MAX + 1; ++i) {
|
||||
if (xit->second.offset_table [i] & 0x80000000)
|
||||
continue;
|
||||
// add the offset for xliteration data
|
||||
xit->second.offset_table [i] += xlit_data_offset;
|
||||
}
|
||||
}
|
||||
|
||||
codecvt_out.mb_cur_max = charmap_.get_mb_cur_max();
|
||||
|
||||
issue_diag (I_OPENWR, false, 0, "writing %s\n", dir_name.c_str ());
|
||||
|
||||
// create the stream with exceptions enabled
|
||||
std::ofstream out (dir_name.c_str(), std::ios::binary);
|
||||
out.exceptions (std::ios::failbit | std::ios::badbit);
|
||||
|
||||
// write the codecvt_out structure
|
||||
out.write ((char*)&codecvt_out, sizeof codecvt_out);
|
||||
|
||||
typedef codecvt_offsets_map_t::iterator off_iter_t;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// write out the multibyte to wchar_t tables
|
||||
issue_diag (I_WRITE, false, 0,
|
||||
"writing %lu multibyte tables (%lu characters)\n",
|
||||
mbchar_offs.size (), n_mbchars);
|
||||
|
||||
for (off_iter_t it = mbchar_offs.begin (); it != mbchar_offs.end (); ++it) {
|
||||
for (unsigned i = 0; i <= UCHAR_MAX; ++i) {
|
||||
|
||||
const unsigned off = it->second->off [i];
|
||||
|
||||
out.write ((const char*)&off, sizeof off);
|
||||
}
|
||||
|
||||
delete it->second;
|
||||
}
|
||||
|
||||
// not needed beyond this point, clear it out
|
||||
mbchar_offs.clear ();
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// write out the wchar_t to multibyte conversion tables
|
||||
issue_diag (I_WRITE, false, 0,
|
||||
"writing %lu wchar_t tables (%lu characters)\n",
|
||||
wchar_offs.size (), n_wchars);
|
||||
|
||||
for (off_iter_t it = wchar_offs.begin (); it != wchar_offs.end (); ++it) {
|
||||
for (unsigned i = 0; i <= UCHAR_MAX; ++i) {
|
||||
|
||||
// adjust offsets to multibyte characters (but not those
|
||||
// to other tables or invalid encodings)
|
||||
unsigned off = it->second->off [i];
|
||||
|
||||
if (!(off & 0x80000000))
|
||||
off += mb_offset;
|
||||
|
||||
out.write ((const char*)&off, sizeof off);
|
||||
}
|
||||
|
||||
delete it->second;
|
||||
}
|
||||
|
||||
// not needed beyond this point, clear it out
|
||||
wchar_offs.clear ();
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// write out the UTF-8 to (libc) multibyte tables
|
||||
issue_diag (I_WRITE, false, 0,
|
||||
"writing %lu UTF-8 tables (%lu characters)\n",
|
||||
uchar_offs.size (), n_uchars);
|
||||
|
||||
for (off_iter_t it = uchar_offs.begin (); it != uchar_offs.end (); ++it) {
|
||||
for (unsigned i = 0; i <= UCHAR_MAX; ++i) {
|
||||
|
||||
// adjust offsets to multibyte characters (but not those
|
||||
// to other tables or invalid encodings)
|
||||
unsigned off = it->second->off [i];
|
||||
|
||||
if (!(off & 0x80000000))
|
||||
off += mb_offset;
|
||||
|
||||
out.write ((const char*)&off, sizeof off);
|
||||
}
|
||||
|
||||
delete it->second;
|
||||
}
|
||||
|
||||
// not needed beyond this point, clear it out
|
||||
uchar_offs.clear ();
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// write out the transliteration UTF-8 lookup tables
|
||||
issue_diag (I_WRITE, false, 0,
|
||||
"writing transliteration table (size %lu)\n",
|
||||
xlit_table_map_.size ());
|
||||
|
||||
xit = xlit_table_map_.begin ();
|
||||
for (; xit != xlit_table_map_end; ++xit) {
|
||||
const unsigned int* ptable = &xit->second.offset_table [0];
|
||||
for (unsigned int i = 0; i < UCHAR_MAX + 1; ++i, ++ptable)
|
||||
out.write ((const char*)ptable, sizeof (unsigned int));
|
||||
}
|
||||
|
||||
issue_diag (I_WRITE, false, 0,
|
||||
"writing the UCS table (%lu characters)\n",
|
||||
charmap_.get_mb_cmap ().size ());
|
||||
|
||||
const mb_cmap_iter n_cmap2_end = charmap_.get_mb_cmap ().end ();
|
||||
|
||||
// write the locale-encoded wchar_t and the UCS4 wchar_t
|
||||
for (iter = charmap_.get_mb_cmap ().begin();
|
||||
iter != n_cmap2_end; ++iter) {
|
||||
out.write ((const char*)&iter->second, sizeof (iter->second));
|
||||
out.write ((const char*)& (charmap_.get_ucs4_cmap().find
|
||||
(charmap_.get_rw_cmap().find
|
||||
(iter->second)->second))->second,
|
||||
sizeof (wchar_t));
|
||||
}
|
||||
|
||||
// write the code_set_name string and charmap string
|
||||
out << charmap_.get_code_set_name() << std::ends
|
||||
<< charmap_.get_charmap_name() << std::ends;
|
||||
|
||||
|
||||
// write out the narrow character strings
|
||||
for (iter = charmap_.get_mb_cmap().begin();
|
||||
iter != n_cmap2_end; ++iter) {
|
||||
out.write (iter->first.c_str(), iter->first.size() + 1);
|
||||
}
|
||||
|
||||
issue_diag (I_WRITE, false, 0,
|
||||
"writing transliteration data (size %lu)\n",
|
||||
xlit_map_.size ());
|
||||
|
||||
// write out the transliteration data
|
||||
xlit_map_t::const_iterator xlit_data_it = xlit_map_.begin ();
|
||||
for (; xlit_data_it != xlit_map_.end (); ++xlit_data_it) {
|
||||
std::list<std::string>::const_iterator sit =
|
||||
xlit_data_it->second.begin ();
|
||||
for (; sit != xlit_data_it->second.end (); ++sit) {
|
||||
out.write (sit->c_str (), sit->size () + 1);
|
||||
}
|
||||
out.write ("\0", 1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user