215 lines
7.1 KiB
C++
215 lines
7.1 KiB
C++
/**************************************************************************
|
|
*
|
|
* insert_wchar.cpp
|
|
*
|
|
* Example program demonstrating an implementation of an inserter
|
|
* operator overloaded for arrays of wchar_t that performs codeset
|
|
* conversion from wchar_t to mutlibyte characters.
|
|
*
|
|
* $Id: insert_wchar.cpp 590060 2007-10-30 13:12:23Z faridz $
|
|
*
|
|
***************************************************************************
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* permissions and limitations under the License.
|
|
*
|
|
**************************************************************************/
|
|
|
|
#include <cassert> // for assert()
|
|
#include <cwchar> // for mbstate_t, size_t
|
|
#include <ios> // for hex
|
|
#include <iostream> // for cout
|
|
#include <locale> // for codecvt, isalnum(), locale
|
|
#include <ostream> // for basic_ostream
|
|
#include <sstream> // for ostringstream
|
|
|
|
|
|
// inserts a wide character string into a stream buffer performing
|
|
// codeset conversion if necessary
|
|
template <class charT, class Traits>
|
|
void
|
|
streambuf_insert (std::basic_ostream<charT, Traits> &strm,
|
|
const wchar_t *s)
|
|
{
|
|
typedef typename Traits::state_type StateT;
|
|
typedef std::codecvt<wchar_t, charT, StateT> Codecvt;
|
|
|
|
const Codecvt &cvt = std::use_facet<Codecvt>(strm.getloc ());
|
|
|
|
const std::size_t slen = std::char_traits<wchar_t>::length (s);
|
|
|
|
// perform codeset conversion in chunks to avoid dynamic
|
|
// memory allocation
|
|
|
|
const std::size_t xbufsize = 32;
|
|
|
|
charT xbuf [xbufsize];
|
|
charT* xbuf_end = xbuf + xbufsize;
|
|
charT* to_next = 0;
|
|
const wchar_t* from_next = 0;
|
|
const wchar_t* const end = s + slen;
|
|
|
|
StateT state = StateT ();
|
|
|
|
for (const wchar_t* base = s; from_next != end; base = from_next) {
|
|
|
|
const std::codecvt_base::result res =
|
|
cvt.out (state, base, end, from_next,
|
|
xbuf, xbuf_end, to_next);
|
|
|
|
std::streamsize nbytes = to_next - xbuf;
|
|
|
|
switch (res) {
|
|
case Codecvt::error:
|
|
// write out the sequence successfully converted up
|
|
// to the point of the error in the internal sequence
|
|
// and fail
|
|
strm.rdbuf ()->sputn (xbuf, nbytes);
|
|
strm.setstate (strm.badbit);
|
|
|
|
case Codecvt::noconv:
|
|
// write the entire sequence
|
|
if (nbytes != strm.rdbuf ()->sputn (xbuf, nbytes)) {
|
|
strm.setstate (strm.badbit);
|
|
return;
|
|
}
|
|
|
|
from_next = end; // effectively break
|
|
break;
|
|
|
|
default:
|
|
assert (cvt.ok == res || cvt.partial == res);
|
|
|
|
// partial conversion will result if there isn't enough
|
|
// space in the conversion buffer to hold the converted
|
|
// sequence, but we're O.K. since we'll be passing any
|
|
// remaining unconverted characters (starting at
|
|
// from_next) in the next iteration
|
|
|
|
nbytes = to_next - xbuf;
|
|
|
|
if (nbytes != strm.rdbuf ()->sputn (xbuf, nbytes)) {
|
|
strm.setstate (strm.badbit);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// stream insertion operator overloaded for arrays of wchar_t characters
|
|
template <class charT, class Traits>
|
|
std::basic_ostream<charT, Traits>&
|
|
operator<< (std::basic_ostream<charT, Traits> &strm,
|
|
const wchar_t *s)
|
|
{
|
|
const typename std::basic_ostream<charT, Traits>::sentry opfx (strm);
|
|
|
|
if (opfx) {
|
|
|
|
try {
|
|
// try to insert character array into stream buffer
|
|
streambuf_insert (strm, s);
|
|
}
|
|
catch (...) {
|
|
bool threw;
|
|
try {
|
|
// set badbit on exception without throwing ios::failure
|
|
strm.setstate (strm.badbit);
|
|
threw = false;
|
|
}
|
|
catch (std::ios_base::failure&) {
|
|
// make a note of the exception thrown from setstate()...
|
|
threw = true;
|
|
}
|
|
if (threw) {
|
|
// ...and rethrow the original exception
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
return strm;
|
|
}
|
|
|
|
|
|
// examples of wide character strings
|
|
static const wchar_t* const wcs [] = {
|
|
L"a", L"abc",
|
|
// Greek letter Alpha:
|
|
L"\x0391", // "\xce\x91"
|
|
// Greek letters Alpha Beta:
|
|
L"\x0391\x0392", // "\xce\x91\xce\x91\xce\x92"
|
|
// Greek letters Alpha Beta Gamma:
|
|
L"\x0391\x0392\x0393", // "\xce\x91\xce\x92\xce\x93"
|
|
// Tibetan digit zero:
|
|
L"\x0f20", // "\xe0\xbc\xa0"
|
|
// Tibetan digits one, zero:
|
|
L"\x0f21\x0f20", // "\xe0\xbc\xa1\xe0\xbc\xa0"
|
|
// Tibetan digits two, one, zero:
|
|
L"\x0f22\x0f21\x0f20" // "\xe0\xbc\xa2\xe0\xbc\xa1\xe0\xbc\xa0"
|
|
};
|
|
|
|
|
|
int main ()
|
|
{
|
|
typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> Codecvt;
|
|
|
|
// create a UCS/UTF-8 codecvt facet and install it in a locale
|
|
const std::locale utf (std::cout.getloc (), new Codecvt ("UTF-8@UCS"));
|
|
|
|
for (std::size_t i = 0; i != sizeof wcs / sizeof *wcs; ++i) {
|
|
|
|
std::ostringstream strm;
|
|
|
|
// imbue the UTF-8/UCS capable locale in a stringstream
|
|
strm.imbue (utf);
|
|
|
|
// insert each wide character string into the narrow stream
|
|
// object relying on the inserter to convert each wide string
|
|
// into the corresponding multibyte character string
|
|
strm << wcs [i];
|
|
|
|
// write out the wide character string in Unicode notation
|
|
std::cout << "UCS-2: " << std::hex;
|
|
|
|
for (const wchar_t *pwc = wcs [i]; *pwc != L'\0'; ++pwc)
|
|
std::cout << "U+" << unsigned (*pwc) << ' ';
|
|
|
|
const std::string str = strm.str ();
|
|
|
|
std::cout << " ==> UTF-8: \"";
|
|
|
|
typedef unsigned char UChar;
|
|
|
|
// write out the the multibyte character sequence using
|
|
// ordinary aphanumeric symbols or hex escape sequences
|
|
// where necessary
|
|
for (const char *pc = str.c_str (); *pc != '\0'; ++pc) {
|
|
|
|
// parenthesize isalnum to prevent macro expension
|
|
// in case the function happens to be (illegally)
|
|
// shadowed by a macro
|
|
if ((std::isalnum)(*pc, std::cout.getloc ()))
|
|
std::cout << *pc;
|
|
else
|
|
std::cout << "\\x" << int (UChar (*pc));
|
|
}
|
|
|
|
std::cout << "\"\n";
|
|
}
|
|
}
|