/************************************************************************** * * codecvt1.cpp - Example program of codecvt facet. * * $Id: codecvt1.cpp 648752 2008-04-16 17:01:56Z faridz $ * *************************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * Copyright 1994-2006 Rogue Wave Software. * **************************************************************************/ #include #include #include #include #include #include #include // for memcmp(), ptrdiff_t #include // for mbstate_t #include #define TMP_BUFFER_SIZE 1024 // Japanese week days (short/abbreviated and long/normal form) in EUC-JP, // Shift_JIS and UTF-8 encodings; const char wd_eucjp [] = { "\xc6\xfc\x0a\xc6\xfc\xcd\xcb\xc6\xfc\x0a\xb7\xee\x0a\xb7\xee\xcd" "\xcb\xc6\xfc\x0a\xb2\xd0\x0a\xb2\xd0\xcd\xcb\xc6\xfc\x0a\xbf\xe5" "\x0a\xbf\xe5\xcd\xcb\xc6\xfc\x0a\xcc\xda\x0a\xcc\xda\xcd\xcb\xc6" "\xfc\x0a\xb6\xe2\x0a\xb6\xe2\xcd\xcb\xc6\xfc\x0a\xc5\xda\x0a\xc5" "\xda\xcd\xcb\xc6\xfc\x0a" }; const char wd_sjis [] = { "\x93\xfa\x0a\x93\xfa\x97\x6a\x93\xfa\x0a\x8c\x8e\x0a\x8c\x8e\x97" "\x6a\x93\xfa\x0a\x89\xce\x0a\x89\xce\x97\x6a\x93\xfa\x0a\x90\x85" "\x0a\x90\x85\x97\x6a\x93\xfa\x0a\x96\xd8\x0a\x96\xd8\x97\x6a\x93" "\xfa\x0a\x8b\xe0\x0a\x8b\xe0\x97\x6a\x93\xfa\x0a\x93\x79\x0a\x93" "\x79\x97\x6a\x93\xfa\x0a" }; const char wd_utf8 [] = { "\xe6\x97\xa5\x0a\xe6\x97\xa5\xe6\x9b\x9c\xe6\x97\xa5\x0a\xe6\x9c" "\x88\x0a\xe6\x9c\x88\xe6\x9b\x9c\xe6\x97\xa5\x0a\xe7\x81\xab\x0a" "\xe7\x81\xab\xe6\x9b\x9c\xe6\x97\xa5\x0a\xe6\xb0\xb4\x0a\xe6\xb0" "\xb4\xe6\x9b\x9c\xe6\x97\xa5\x0a\xe6\x9c\xa8\x0a\xe6\x9c\xa8\xe6" "\x9b\x9c\xe6\x97\xa5\x0a\xe9\x87\x91\x0a\xe9\x87\x91\xe6\x9b\x9c" "\xe6\x97\xa5\x0a\xe5\x9c\x9f\x0a\xe5\x9c\x9f\xe6\x9b\x9c\xe6\x97" "\xa5\x0a" }; const struct { const char* name; const char* buffer; } locales [] = { { "ja_JP.UTF-8" , wd_utf8 }, { "ja_JP.Shift_JIS", wd_sjis }, { "ja_JP.EUC-JP" , wd_eucjp } }; std::mbstate_t init = std::mbstate_t (); // Conversion from external representation of characters to internal // representation of characters and back to external representation // and checking of the transformation correctness after round-trip. void do_roundtrip_conversion (const char* locname, const char* buffer) { typedef std::codecvt_byname wcodecvt_byname_t; wchar_t wtmp [TMP_BUFFER_SIZE]; char tmp [TMP_BUFFER_SIZE]; std::mbstate_t state = init; std::codecvt_base::result res; // create the named facet std::locale loc (locname); const wcodecvt_byname_t& cc = std::use_facet (loc); // set the pointers const char* cpe = buffer; const char* cpen = buffer; const char* cpe_limit = buffer + std::strlen (buffer); wchar_t* pi = wtmp; wchar_t* pin = wtmp; wchar_t* pi_limit = wtmp + TMP_BUFFER_SIZE; // convert characters in external buffer to internal representation res = cc.in (state, cpe, cpe_limit, cpen, pi, pi_limit, pin); // set the pointers and adjust the pi_limit pointer to after // the last successfully converted character const wchar_t* cpi = pi; const wchar_t* cpin = pi; const wchar_t* cpi_limit = pin; char* pe = tmp; char* pen = tmp; char* pe_limit = tmp + TMP_BUFFER_SIZE; // convert the characters in internal representation to external // representation and compare the result with the original buffer res = cc.out (state, cpi, cpi_limit, cpin, pe, pe_limit, pen); std::cout << locname << " -> INT -> " << locname << '\n'; std::cout << " Size comparison of buffers yields " << (std::codecvt_base::ok == res && (cpen - cpe == pen - pe) ? "equal\n" : "not equal\n"); const int cmp = std::memcmp (cpe, pe, std::min ((cpen - cpe), (pen - pe))); std::cout << " Content comparison of buffers yields " << (cmp ? "not equal\n" : "equal\n"); } void do_diff_conversion () { typedef std::codecvt_byname wcodecvt_byname_t; wchar_t wtmp [TMP_BUFFER_SIZE]; char tmp [TMP_BUFFER_SIZE]; std::codecvt_base::result res; // Conversion from external representation of characters to internal // representation of characters using two different facets obtained // from two different locales: // 1. External to internal: codecvt_byname facet from ja_JP.EUC-JP locale // transforms the content of wd_eucjp to internal representation; // 2. Internal to external: codecvt_byname facet from ja_JP.UTF-8 locale // transforms the internal representation of wd_eucjp to an external // representation that uses UTF-8 encoding; // 3. Compare the size and content of the final result with size and // content of the wd_utf8 buffer. const std::locale loc1 ("ja_JP.EUC-JP"); const std::locale loc2 ("ja_JP.UTF-8"); const wcodecvt_byname_t& cc1 = std::use_facet (loc1); const wcodecvt_byname_t& cc2 = std::use_facet (loc2); std::mbstate_t state1 = init; std::mbstate_t state2 = init; const std::size_t len = std::strlen (wd_utf8); // set the pointers const char* cpe = wd_eucjp; const char* cpen = wd_eucjp; const char* cpe_limit = wd_eucjp + std::strlen (wd_eucjp); wchar_t* pi = wtmp; wchar_t* pin = wtmp; wchar_t* pi_limit = wtmp + TMP_BUFFER_SIZE; // convert external buffer to internal representation res = cc1.in (state1, cpe, cpe_limit, cpen, pi, pi_limit, pin); // set the pointers and adjust the pi_limit pointer to after // the last converted character const wchar_t* cpi = pi; const wchar_t* cpin = pi; const wchar_t* cpi_limit = pin; char* pe = tmp; char* pen = tmp; char* pe_limit = tmp + TMP_BUFFER_SIZE; res = cc2.out (state2, cpi, cpi_limit, cpin, pe, pe_limit, pen); std::cout << "\nEUC-JP -> INT -> UTF-8 conversion\n"; std::cout << " Size comparison of buffers yields " << (std::codecvt_base::ok == res && std::size_t (pen - pe) == len ? " " : "not ") << "equal\n"; const int cmp = std::memcmp (pe, wd_utf8, std::min ((pen - pe), std::ptrdiff_t (len))); std::cout << " Content comparison of buffers yields " << (cmp ? "not equal\n" : "equal\n"); } int main () { try { for (int i = 0; i != sizeof locales / sizeof *locales; i++) do_roundtrip_conversion (locales [i].name, locales [i].buffer); do_diff_conversion (); } catch (const std::exception &e) { std::cout << "Caught an exception: " << e.what () << std::endl; return 1; // Indicate failure. } catch (...) { std::cout << "Caught an unknown exception" << std::endl; return 1; // Indicate failure. } return 0; }