1176 lines
39 KiB
C++
1176 lines
39 KiB
C++
/************************************************************************
|
|
*
|
|
* codecvt.cpp - test exercising file streams and code conversion
|
|
*
|
|
* $Id: 27.filebuf.codecvt.cpp 590052 2007-10-30 12:44:14Z faridz $
|
|
*
|
|
************************************************************************
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* permissions and limitations under the License.
|
|
*
|
|
* Copyright 2001-2006 Rogue Wave Software.
|
|
*
|
|
**************************************************************************/
|
|
|
|
// PROBLEM DESCRIPTION:
|
|
// basic_ofstream<>::overflow() mangles a multibyte sequence when using
|
|
// code conversion. This functionality of the class isn't currently (as
|
|
// of the date of the creation of the test) being exercised by our
|
|
// testsuite due to the lack of a suitable locale (e.g., ja_JP).
|
|
|
|
// TEST DESCRIPTION:
|
|
// test creates a temporary file and fills it with series of pairs
|
|
// <offset, seq>, where `offset' is the offset from the beginning of
|
|
// the file to the first (decimal) digit of offset, and `seq' is
|
|
// a character in the range [1, CHAR_MAX] possibly converted to an
|
|
// escape or trigraph sequence according to the rules described
|
|
// in 2.3 and 2.13.2
|
|
//
|
|
// the program then exercises the stream's (actually, the stream
|
|
// file buffer's) ability to extract and seek within such a file
|
|
// thus testing the stream's ability to crrectly interact with
|
|
// the codecvt facet installed in the imbued locale
|
|
|
|
|
|
#include <rw/_defs.h>
|
|
#if defined (__IBMCPP__) && !defined (_RWSTD_NO_IMPLICIT_INCLUSION)
|
|
// disable implicit inclusion to work around a limitation
|
|
// in IBM VisualAge
|
|
# define _RWSTD_NO_IMPLICIT_INCLUSION
|
|
#endif
|
|
|
|
|
|
#include <fstream>
|
|
#include <sstream>
|
|
|
|
#include <climits> // for UCHAR_MAX
|
|
#include <cstdio> // for fclose(), fopen(), fseek(), size_t
|
|
#include <cstdlib> // for abort()
|
|
#include <cstring> // for memset(), strcmp(), strlen()
|
|
#include <cwchar> // for mbstate_t
|
|
|
|
#include <driver.h>
|
|
#include <file.h>
|
|
|
|
/**************************************************************************/
|
|
|
|
// code conversion facet suitable for replacement of the default
|
|
// codecvt<char, char, mbstate_t> facet
|
|
// cformat::do_out() converts printable ASCII characters into themselves,
|
|
// control characters are converted to standard C escape sequences
|
|
// cformat::do_in() reverses the effect of do_out()
|
|
class cformat: public std::codecvt <char, char, std::mbstate_t>
|
|
{
|
|
enum {
|
|
new_line = 0x0001, // convert '\n' to "\n"
|
|
horizontal_tab = 0x0002, // convert '\t' to "\t"
|
|
vertical_tab = 0x0004, // convert '\v' to "\v"
|
|
backspace = 0x0008, // convert '\b' to "\b"
|
|
carriage_return = 0x0010, // convert '\r' to "\r"
|
|
form_feed = 0x0020, // convert '\f' to "\f"
|
|
alert = 0x0040, // convert '\a' to "\a"
|
|
backslash = 0x0080, // convert '\\' to "\\"
|
|
question_mark = 0x0100, // convert '?' to "\?"
|
|
single_quote = 0x0200, // convert '\\'' to "\'"
|
|
double_quote = 0x0400, // convert '"' to "\""
|
|
trigraphs = 0x8000, // convert to/from trigrap sequences
|
|
hex = 0x1000 // hex notation in external representation
|
|
};
|
|
|
|
const int mask; // bitmaps of flags above
|
|
|
|
public:
|
|
explicit
|
|
cformat (std::size_t ref = 0, int m = 0)
|
|
: std::codecvt<char, char, std::mbstate_t> (ref),
|
|
mask (m) { /* empty */ }
|
|
|
|
protected:
|
|
|
|
virtual result
|
|
do_out (state_type&,
|
|
const intern_type*, const intern_type*,
|
|
const intern_type*&,
|
|
extern_type*, extern_type*, extern_type*&) const;
|
|
|
|
virtual result
|
|
do_in (state_type&, const extern_type*,
|
|
const extern_type*, const extern_type*&,
|
|
intern_type*, intern_type*, intern_type*&) const;
|
|
|
|
virtual result
|
|
do_unshift (state_type&, extern_type*,
|
|
extern_type*, extern_type*&) const {
|
|
// stateless encoding, no conversion necessary
|
|
return noconv;
|
|
}
|
|
|
|
virtual int
|
|
do_encoding () const _THROWS (()) {
|
|
return 0; // variable number of external chars per single internal
|
|
}
|
|
|
|
virtual bool
|
|
do_always_noconv () const _THROWS (()) {
|
|
return false; // conversion always necessary
|
|
}
|
|
|
|
// returns the maximum `N' of extern chars in the range [from, from_end)
|
|
// such that N represents max or fewer internal chars
|
|
virtual int
|
|
do_length (state_type&, const extern_type*,
|
|
const extern_type*, std::size_t) const;
|
|
|
|
// returns the max value do_length (s, from, from_end, 1) can return
|
|
// for any valid range [from, from_end) - see LWG issue 74 (a DR)
|
|
virtual int
|
|
do_max_length () const _THROWS (()) {
|
|
// assume that an internal char occupies at most 4 external chars
|
|
// this won't hold for e.g. '\x00001' etc., but will hold for all
|
|
// chars in the hex notation of up to two digits and all chars in
|
|
// octal notation (which are required to fit in 4 by the standard)
|
|
return 4;
|
|
}
|
|
};
|
|
|
|
|
|
std::codecvt_base::result
|
|
cformat::do_out ( state_type& /* unused */,
|
|
const intern_type *from,
|
|
const intern_type *from_end,
|
|
const intern_type *&from_next,
|
|
extern_type *to,
|
|
extern_type *to_end,
|
|
extern_type *&to_next) const
|
|
{
|
|
// assert 22.2.1.5.2, p1 preconditions
|
|
rw_assert (from <= from_end, __FILE__, __LINE__,
|
|
"codecvt::do_out (..., from = %#p, from + %d, %#p, "
|
|
"to = %#p, to + %d, %#p): from <= from_end",
|
|
from, from_end - from, from_next, to, to_end - to, to_next);
|
|
|
|
rw_assert (to <= to_end, __FILE__, __LINE__,
|
|
"codecvt::do_out (..., from = %#p, from + %d, %#p, "
|
|
"to = %#p, to + %d, %#p): to <= to_end",
|
|
from, from_end - from, from_next, to, to_end - to, to_next);
|
|
|
|
// assume no conversion will be performed
|
|
result res = noconv;
|
|
|
|
for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
|
|
|
|
// out of space
|
|
if (to_next == to_end) {
|
|
res = partial;
|
|
break;
|
|
}
|
|
|
|
// convert to unsigned to make sure comparison works
|
|
unsigned char ch = *from_next;
|
|
extern_type esc = extern_type ();
|
|
|
|
if (ch < ' ') {
|
|
|
|
// convert to a C escape sequence
|
|
switch (ch) {
|
|
|
|
case '\a':
|
|
if (!(mask & alert)) {
|
|
ch = 'a';
|
|
esc = '\\';
|
|
}
|
|
break;
|
|
|
|
case '\b':
|
|
if (!(mask & backspace)) {
|
|
ch = 'b';
|
|
esc = '\\';
|
|
}
|
|
break;
|
|
|
|
case '\t':
|
|
if (!(mask & horizontal_tab)) {
|
|
ch = 't';
|
|
esc = '\\';
|
|
}
|
|
break;
|
|
|
|
case '\n':
|
|
if (!(mask & new_line)) {
|
|
ch = 'n';
|
|
esc = '\\';
|
|
}
|
|
break;
|
|
|
|
case '\v':
|
|
if (!(mask & vertical_tab)) {
|
|
ch = 'v';
|
|
esc = '\\';
|
|
}
|
|
break;
|
|
|
|
case '\f':
|
|
if (!(mask & form_feed)) {
|
|
ch = 'f';
|
|
esc = '\\';
|
|
}
|
|
break;
|
|
|
|
case '\r':
|
|
if (!(mask & carriage_return)) {
|
|
ch = 'r';
|
|
esc = '\\';
|
|
}
|
|
break;
|
|
|
|
case '\\':
|
|
if (!(mask & backslash)) {
|
|
ch = '\\';
|
|
esc = '\\';
|
|
}
|
|
break;
|
|
|
|
default:
|
|
esc = '\\';
|
|
break;
|
|
}
|
|
}
|
|
else if (ch > '~') {
|
|
// convert to a C escape sequence (octal)
|
|
esc = '\\';
|
|
}
|
|
else {
|
|
// escape special characters
|
|
switch (ch) {
|
|
case '?':
|
|
if (!(mask & question_mark))
|
|
esc = '\\';
|
|
break;
|
|
|
|
case '\'':
|
|
if (!(mask & single_quote))
|
|
esc = '\\';
|
|
break;
|
|
|
|
case '"':
|
|
if (!(mask & double_quote))
|
|
esc = '\\';
|
|
break;
|
|
|
|
case '\\':
|
|
if (!(mask & backslash))
|
|
esc = '\\';
|
|
break;
|
|
}
|
|
|
|
if (!(mask & trigraphs)) {
|
|
|
|
// convert to a trigraph sequence
|
|
switch (ch) {
|
|
case '#': ch = '='; esc = '?'; break;
|
|
case '\\': ch = '/'; esc = '?'; break;
|
|
case '^': ch = '\''; esc = '?'; break;
|
|
case '[': ch = '('; esc = '?'; break;
|
|
case ']': ch = ')'; esc = '?'; break;
|
|
case '|': ch = '!'; esc = '?'; break;
|
|
case '{': ch = '<'; esc = '?'; break;
|
|
case '}': ch = '>'; esc = '?'; break;
|
|
case '~': ch = '-'; esc = '?'; break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// process `ch' and `esc'
|
|
|
|
if ('\\' == esc) {
|
|
|
|
// conversion was performed
|
|
res = ok;
|
|
|
|
if (ch < ' ' || ch > '~') {
|
|
|
|
// need room for an escape followed by three ocal digits
|
|
if (4 > to_end - to_next) {
|
|
res = partial;
|
|
break;
|
|
}
|
|
|
|
static const char digits[] = "0123456789abcdef";
|
|
|
|
// add an escape character
|
|
*to_next++ = esc;
|
|
|
|
if (mask & hex) {
|
|
// add hex representation (exactly three chars)
|
|
*to_next++ = 'x';
|
|
*to_next++ = digits [(ch & 0xf0) >> 4];
|
|
*to_next++ = digits [ch & 0xf];
|
|
}
|
|
else {
|
|
// add octal representation (exactly three digits)
|
|
*to_next++ = digits [(ch & (7 << 6)) >> 6];
|
|
*to_next++ = digits [(ch & (7 << 3)) >> 3];
|
|
*to_next++ = digits [ch & 7];
|
|
}
|
|
}
|
|
else {
|
|
// need room for an escape followed by a single char
|
|
if (2 > to_end - to_next) {
|
|
res = partial;
|
|
break;
|
|
}
|
|
|
|
// add an escape char followed by the escaped char
|
|
*to_next++ = esc;
|
|
*to_next++ = ch;
|
|
}
|
|
}
|
|
else if ('?' == esc) {
|
|
// need room for a trigraph sequence
|
|
if (3 > to_end - to_next) {
|
|
res = partial;
|
|
break;
|
|
}
|
|
|
|
// conversion was performed
|
|
res = ok;
|
|
|
|
// add a trigraph sequence
|
|
*to_next++ = '?';
|
|
*to_next++ = '?';
|
|
*to_next++ = ch;
|
|
}
|
|
else {
|
|
// not escaped
|
|
*to_next++ = ch;
|
|
}
|
|
}
|
|
|
|
if (noconv == res) {
|
|
// 22.2.1.5.2, p2, Note: no conversion was necessary
|
|
from_next = from;
|
|
to_next = to;
|
|
}
|
|
|
|
rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
|
|
"user-defined codecvt: internal inconsistency");
|
|
|
|
rw_fatal (to_next >= to && to_next <= to_end, 0, __LINE__,
|
|
"user-defined codecvt: internal inconsistency");
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
std::codecvt_base::result
|
|
cformat::do_in ( state_type& /* unused */,
|
|
const extern_type *from,
|
|
const extern_type *from_end,
|
|
const extern_type *&from_next,
|
|
intern_type *to,
|
|
intern_type *to_end,
|
|
intern_type *&to_next) const
|
|
{
|
|
// assert 22.2.1.5.2, p1 preconditions
|
|
rw_assert (from <= from_end, __FILE__, __LINE__,
|
|
"codecvt::do_in (..., from = %#p, from + %d, %#p, "
|
|
"to = %#p, to + %d, %#p): from <= from_end",
|
|
from, from_end - from, from_next, to, to_end - to, to_next);
|
|
|
|
rw_assert (to <= to_end, __FILE__, __LINE__,
|
|
"codecvt::do_in (..., from = %#p, from + %d, %#p, "
|
|
"to = %#p, to + %d, %#p) to <= to_end",
|
|
from, from_end - from, from_next, to, to_end - to, to_next);
|
|
|
|
result res = ok;
|
|
|
|
for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
|
|
|
|
unsigned char ch = *from_next;
|
|
intern_type c = intern_type ();
|
|
|
|
if ('\\' == ch) {
|
|
|
|
if (2 > from_end - from_next) {
|
|
// ok is the correct value to return in this case,
|
|
// but partial should be handled as well for robustness
|
|
res = (from_end - (extern_type*)0) % 2 ? ok : partial;
|
|
break;
|
|
}
|
|
|
|
ch = from_next [1];
|
|
|
|
if ('x' == ch) {
|
|
// interpret a hex escape sequence
|
|
|
|
// advance past '\x'
|
|
const extern_type *next = from_next + 2;
|
|
|
|
// parse hex digits until a non-hex digits is encountered
|
|
for (; ; ++next) {
|
|
|
|
if (next == from_end) {
|
|
// do not advance to the end since there may be
|
|
// more digits following it (e.g., '\x012' with
|
|
// from_end pointing at '1' or '2')
|
|
return partial;
|
|
}
|
|
|
|
ch = *next;
|
|
if (ch >= '0' && ch <= '9')
|
|
c = (c << 4) | (ch - '0');
|
|
else if (ch >= 'a' && ch <= 'f')
|
|
c = (c << 4) | (ch - 'a' + 10);
|
|
else if (ch >= 'A' && ch <= 'F')
|
|
c = (c << 4) | (ch - 'A' + 10);
|
|
else if (next - from_next > 2)
|
|
break;
|
|
else {
|
|
return error; // non-hex digit immediately after '\x'
|
|
}
|
|
}
|
|
|
|
// advance to the end of parsed number
|
|
from_next = next - 1;
|
|
}
|
|
else if ('0' <= ch && '7' >= ch) {
|
|
// interpret a oct escape sequence
|
|
|
|
// (tentatively) advance past '\'
|
|
const extern_type *next = from_next + 1;
|
|
|
|
// parse at most three oct digits
|
|
for (; next - from_next < 4; ++next) {
|
|
|
|
if (next == from_end) {
|
|
// do not advance to the end since there may be
|
|
// more digits following it (e.g., '\x012' with
|
|
// from_end pointing at '1' or '2')
|
|
return partial;
|
|
}
|
|
|
|
ch = *next;
|
|
if (ch >= '0' && ch <= '7')
|
|
c = (c << 3) | (ch - '0');
|
|
else if (next - from_next)
|
|
break;
|
|
else {
|
|
// advance to the offending char
|
|
from_next = next;
|
|
return error; // non-oct digit immediately after '\'
|
|
}
|
|
}
|
|
|
|
// advance to the end of parsed number
|
|
from_next = next - 1;
|
|
}
|
|
else {
|
|
// interpret standard C escape sequence
|
|
switch (ch) {
|
|
case 'a': c = '\a'; break;
|
|
case 'b': c = '\b'; break;
|
|
case 't': c = '\t'; break;
|
|
case 'n': c = '\n'; break;
|
|
case 'v': c = '\v'; break;
|
|
case 'f': c = '\f'; break;
|
|
case 'r': c = '\r'; break;
|
|
|
|
// optional but allowed and escaped backslash
|
|
case '?': case '"': case '\'': case '\\': c = ch ; break;
|
|
|
|
// bad escape sequence
|
|
default: return error;
|
|
}
|
|
|
|
// advance past the initial '\'
|
|
++from_next;
|
|
}
|
|
}
|
|
else if ('?' == ch && !(mask & trigraphs)) {
|
|
// (try to) convert a trigraph sequence
|
|
if ( 2 > from_end - from_next
|
|
|| '?' == from_next [1] && 3 > from_end - from_next) {
|
|
res = partial;
|
|
break;
|
|
}
|
|
|
|
if ('?' == from_next [1]) {
|
|
|
|
// "??" (potentilly) introduces a trigraph sequence
|
|
switch (from_next [2]) {
|
|
|
|
case '=': c = '#'; break;
|
|
case '/': c = '\\'; break;
|
|
case '\'': c = '^'; break;
|
|
case '(': c = '['; break;
|
|
case ')': c = ']'; break;
|
|
case '!': c = '|'; break;
|
|
case '<': c = '{'; break;
|
|
case '>': c = '}'; break;
|
|
case '-': c = '~'; break;
|
|
|
|
default:
|
|
// no a trigraph sequence, won't convert
|
|
c = from_next [0]; // i.e., '?'
|
|
}
|
|
|
|
// skip the leading "??" of a trigraph sequence
|
|
if (c != from_next [0])
|
|
from_next += 2;
|
|
}
|
|
else
|
|
// ordinary (not escaped) character
|
|
c = ch;
|
|
}
|
|
else
|
|
// ordinary (not escaped) character
|
|
c = ch;
|
|
|
|
// to_next may be 0 (when called from do_length())
|
|
// doing pointer math on invalid pointers (null) has undefined behavior
|
|
// but will probably work in most cases
|
|
if (to_next)
|
|
*to_next = c;
|
|
|
|
++to_next;
|
|
|
|
// in case of of the inner loops has reached end
|
|
if (from_next == from_end)
|
|
break;
|
|
}
|
|
|
|
rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
|
|
"user-defined codecvt: internal inconsistency");
|
|
|
|
rw_fatal (to_next >= to && (to_next <= to_end || !to_end), 0, __LINE__,
|
|
"user-defined codecvt: internal inconsistency");
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
int
|
|
cformat::do_length (state_type&,
|
|
const extern_type *from,
|
|
const extern_type *from_end,
|
|
std::size_t max) const
|
|
{
|
|
const extern_type *from_next;
|
|
intern_type *to_next = 0;
|
|
|
|
std::mbstate_t st;
|
|
std::memset (&st, 0, sizeof st);
|
|
|
|
// use do_in() with `to' of 0 to do the computation
|
|
// doing pointer math on invalid pointers (null) has undefined behavior
|
|
// but will probably work in most cases
|
|
do_in (st, from, from_end, from_next,
|
|
to_next, to_next + max, to_next);
|
|
|
|
return to_next - (intern_type*)0;
|
|
}
|
|
|
|
/***********************************************************************/
|
|
|
|
// determines file size in bytes
|
|
static std::streamsize
|
|
fsize (const char *fname)
|
|
{
|
|
std::FILE* const f = std::fopen (fname, "r");
|
|
if (!f || std::fseek (f, 0, SEEK_END))
|
|
return -1;
|
|
|
|
const std::streamsize size = std::ftell (f);
|
|
|
|
std::fclose (f);
|
|
|
|
return size;
|
|
}
|
|
|
|
/***********************************************************************/
|
|
|
|
static void
|
|
self_test ()
|
|
{
|
|
rw_info (0, __FILE__, __LINE__,
|
|
"user-defined codecvt facet -- self test");
|
|
|
|
static const char* const result[] = {
|
|
"ok", "partial", "error", "noconv"
|
|
};
|
|
|
|
// user-defined code conversion facet
|
|
cformat fmt (1);
|
|
|
|
// original array of internal characters and one to which to convert
|
|
// an external representation back to (for comparison)
|
|
cformat::intern_type intrn [2][256] = { { '\0' } };
|
|
|
|
// array of external chars large enough to hold the internal array
|
|
// each internal char converts to at most 4 external chars
|
|
cformat::extern_type extrn [1024] = { '\0' };
|
|
|
|
// fill internal array with chars from '\1' to '\377'
|
|
for (std::size_t i = 0; i != sizeof intrn [0] - 1; ++i)
|
|
intrn [0][i] = cformat::intern_type (i + 1);
|
|
|
|
const cformat::intern_type *intrn_next_0 = 0;
|
|
cformat::intern_type *intrn_next_1 = 0;
|
|
|
|
cformat::extern_type *extrn_next = 0;
|
|
|
|
|
|
// dummy (state not used, conversion is stateless)
|
|
std::mbstate_t st;
|
|
std::memset (&st, 0, sizeof st);
|
|
|
|
// convert internal to external representation, substituting
|
|
// escape sequences for non-printable characters
|
|
std::codecvt_base::result res;
|
|
|
|
// convert array in internal representation to external representation
|
|
res = fmt.out (st,
|
|
intrn [0], intrn [0] + sizeof intrn [0], intrn_next_0,
|
|
extrn, extrn + sizeof extrn, extrn_next);
|
|
|
|
rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
|
|
"codecvt::out (); result == codecvt_base::ok, "
|
|
"got codecvt_base::%s", result [res]);
|
|
|
|
// assert that the external sequence is longer than the internal one
|
|
rw_assert (extrn_next - extrn > intrn_next_0 - intrn [0], 0, __LINE__,
|
|
"codecvt::out (); converted size %d, expected > %d",
|
|
extrn_next - extrn, intrn_next_0 - intrn [0]);
|
|
|
|
// convert external to internal representation, parsing
|
|
// multi-char escape sequences into single chars
|
|
|
|
const cformat::intern_type *next = extrn;
|
|
|
|
intrn_next_1 = intrn [1];
|
|
|
|
for (; next != extrn_next; ) {
|
|
|
|
// allow only a small buffer space to exercise partial conversion
|
|
std::size_t step = std::size_t (extrn_next - next);
|
|
if (step > 12)
|
|
step = 5 + step % 8;
|
|
|
|
res = fmt.in (st,
|
|
next, next + step, next,
|
|
intrn_next_1, intrn [1] + sizeof intrn [1], intrn_next_1);
|
|
|
|
if (std::codecvt_base::error == res)
|
|
break;
|
|
}
|
|
|
|
// assert that entrire sequence converted ok
|
|
rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
|
|
"codecvt::in (); result == codecvt_base::ok, got "
|
|
"codecvt_base::%s at offset %d", result [res], next - extrn);
|
|
|
|
rw_assert (intrn_next_1 == intrn [1] + sizeof intrn [1], 0, __LINE__,
|
|
"codecvt::in (); to_next == %#p, got %#p",
|
|
intrn [1] + sizeof intrn [1], intrn_next_1);
|
|
|
|
rw_assert (0 == std::strcmp (intrn [0], intrn [1]), 0, __LINE__,
|
|
"codecvt<>::out/in ()");
|
|
}
|
|
|
|
/***********************************************************************/
|
|
|
|
#define RW_ASSERT_STATE(strm, state) \
|
|
rw_assert ((strm).rdstate () == (state), 0, __LINE__, \
|
|
"rdstate () == %{Is}, got %{Is}", \
|
|
(state), (strm).rdstate ())
|
|
|
|
static void
|
|
test_noconv (const char *fname)
|
|
{
|
|
rw_info (0, 0, __LINE__, "ifstream extraction without conversion");
|
|
|
|
std::ifstream f (fname);
|
|
|
|
// make sure file stream has been successfully opened
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
// gain public access to protected members
|
|
struct pubbuf: std::streambuf {
|
|
|
|
// working around an MSVC 6.0 bug (PR #26330)
|
|
typedef std::streambuf Base;
|
|
|
|
virtual std::streamsize showmanyc () {
|
|
return Base::showmanyc ();
|
|
}
|
|
char* pubgptr () {
|
|
return Base::gptr ();
|
|
}
|
|
char* pubegptr () {
|
|
return Base::egptr ();
|
|
}
|
|
};
|
|
|
|
// use static cast through void* to avoid using reinterpret_cast
|
|
pubbuf *rdbuf = _RWSTD_STATIC_CAST (pubbuf*, (void*)f.rdbuf ());
|
|
|
|
std::streamsize filesize = rdbuf->showmanyc ();
|
|
|
|
// exercise 27.5.2.4.3, p1: showmanyc() returns the "estimated"
|
|
// size of the sequence (i.e., the file size in this case)
|
|
rw_assert (filesize == fsize (fname), 0, __LINE__,
|
|
"streambuf::showmanyc () == %ld, got %ld",
|
|
fsize (fname), filesize);
|
|
|
|
// exercise 27.5.2.2.3, p1
|
|
filesize = f.rdbuf ()->in_avail ();
|
|
|
|
const char *gptr = rdbuf->pubgptr ();
|
|
const char *egptr = rdbuf->pubgptr ();
|
|
|
|
rw_assert (filesize == (gptr < egptr ? egptr - gptr : fsize (fname)),
|
|
0, __LINE__,
|
|
"streambuf::in_avail () == %ld, got %ld",
|
|
(gptr < egptr ? egptr - gptr : fsize (fname)), filesize);
|
|
|
|
// allocate buffer large enough to accomodate the converted
|
|
// (i.e. internal) sequence
|
|
const std::size_t bufsize = 0x10000; // 64k should do it
|
|
char *tmpbuf = new char [bufsize];
|
|
|
|
// fill with non-0 value to check for writes past the end
|
|
// (see also Onyx incident 14033)
|
|
std::memset (tmpbuf, '\x7f', bufsize);
|
|
|
|
// ecercise putback area
|
|
std::streamsize i;
|
|
|
|
#ifndef _RWSTD_PBACK_SIZE
|
|
# define _RWSTD_PBACK_SIZE 1
|
|
#endif // _RWSTD_PBACK_SIZE
|
|
|
|
// _RWSTD_PBACK_SIZE is the size of the putback area the library
|
|
// was configured with; the macro expands to streamsize (N)
|
|
for (i = 0; i != _RWSTD_PBACK_SIZE + 1; ++i) {
|
|
// read a few characters, read must not append a '\0'
|
|
std::streamsize n = f.read (tmpbuf, i).gcount ();
|
|
|
|
// assert that read exactly `i' chars, buffer not null-terminared
|
|
rw_assert (i == n && '\x7f' == tmpbuf [i], 0, __LINE__,
|
|
"ifstream::read (%#p, %ld) read %ld,"
|
|
"buffer terminated with '\\%03o'",
|
|
tmpbuf, i, n, tmpbuf [i]);
|
|
|
|
// put back read characters; assert that they are the same
|
|
// as those in the corresponding positions in the buffer
|
|
for (std::streamsize j = 0; j != i; ++j) {
|
|
std::ifstream::int_type c = f.rdbuf ()->sungetc ();
|
|
|
|
typedef std::ifstream::traits_type Traits;
|
|
|
|
rw_assert (Traits::to_int_type (tmpbuf [n - j - 1]) == c,
|
|
0, __LINE__,
|
|
"filebuf::sungetc() == '\\%03o', got '\\%03o",
|
|
tmpbuf [n - j - 1], c);
|
|
}
|
|
|
|
// re-read characters just put back
|
|
char buf [_RWSTD_PBACK_SIZE + 1];
|
|
std::memset (buf, '\x7f', sizeof buf);
|
|
|
|
std::streamsize n2 = f.read (buf, i).gcount ();
|
|
|
|
// assert that the requested number of chars were read in
|
|
rw_assert (i == n2 && '\x7f' == buf [i], 0, __LINE__,
|
|
"ifstream::read (%#p, %ld) read %ld,"
|
|
"buffer terminated with '\\%03o'",
|
|
buf, i, n2, buf [i]);
|
|
|
|
// assert that the read chars are those that were put back
|
|
for (std::streamsize k = 0; k != i; ++k) {
|
|
rw_assert (buf [k] == tmpbuf [k], 0, __LINE__,
|
|
"buffer mismatch at offset %ld: got '\\%03o', "
|
|
"expected '\\%03o'", k, buf [k], tmpbuf [k]);
|
|
// put character back again so that it can be read back in
|
|
f.rdbuf ()->sungetc ();
|
|
}
|
|
}
|
|
|
|
// read file contents into buffer (apply no conversion)
|
|
const std::streamsize n = f.read (tmpbuf, bufsize).gcount ();
|
|
|
|
// 27.6.1.3, p28 - read() sets eofbit | failbit
|
|
// if end-of-file occurs on the input sequence
|
|
RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);
|
|
|
|
// assert that the entiire file has been read
|
|
rw_assert (n == filesize && '\x7f' == tmpbuf [n], 0, __LINE__,
|
|
"ifstream::read (%#p, %ld) read %ld, expected %ld; "
|
|
"buffer terminated with '\\%03o' [%s]",
|
|
tmpbuf, bufsize, n, filesize, tmpbuf [n], fname);
|
|
|
|
tmpbuf [n] = '\0';
|
|
|
|
// assert that file contains no control characters
|
|
bool b = true;
|
|
for (i = 0; b && i != UCHAR_MAX + 1; ++i) {
|
|
if (i >= ' ' || i <= '~')
|
|
continue;
|
|
b = 0 == std::strchr (tmpbuf, char (i));
|
|
}
|
|
|
|
rw_assert (b, 0, __LINE__,
|
|
"unescaped non-printable character '\\#03o' at offset %ld",
|
|
tmpbuf [i], i);
|
|
|
|
delete[] tmpbuf;
|
|
}
|
|
|
|
/***********************************************************************/
|
|
|
|
static void
|
|
test_error (const char *fname)
|
|
{
|
|
rw_info (0, 0, __LINE__, "ifstream extraction with a conversion error");
|
|
|
|
const char outbuf[] = {
|
|
"abcdefghijklmnopqrstuvwxyz\\x20\\xzzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
// ^
|
|
// |
|
|
// error (invalid hex sequence) ---+
|
|
};
|
|
|
|
// write out a text file containing a conversion error
|
|
std::ofstream ostrm (fname);
|
|
|
|
ostrm << outbuf;
|
|
|
|
ostrm.close ();
|
|
|
|
// read the file back in using the conversion facet
|
|
std::ifstream istrm (fname);
|
|
|
|
// user-defined code conversion facet
|
|
const cformat fmt (1 /* prevent locale from deleting */);
|
|
|
|
// create a locale by combining the classic locale and our UD facet
|
|
// cformat; the facet will replace std::codecvt<char, char, mbstate_t>
|
|
std::locale l (std::locale::classic (), &fmt);
|
|
|
|
// imbue locale with formatting facet into streams and save previous
|
|
l = istrm.imbue (l);
|
|
|
|
char inbuf [sizeof outbuf * 4] = { 0 };
|
|
|
|
// try to read partial contents of the file
|
|
// including the conversion error into the buffer
|
|
istrm.read (inbuf, 26L + 4L /* "a..z" <space> <error> "AB" */);
|
|
|
|
// verify that the operation failed, eofbit is set since
|
|
// less than the requested number of characters have been read
|
|
RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);
|
|
|
|
// verify that the 26 characters 'a' through 'z' plus
|
|
// the space (i.e., 27 chars) have been extracted
|
|
rw_assert (27 == istrm.gcount (), 0, __LINE__,
|
|
"ifstream::read () extracted %d, expected 27",
|
|
istrm.gcount ());
|
|
|
|
rw_assert ( 0 == std::ifstream::traits_type::compare (inbuf, outbuf, 26)
|
|
&& ' ' == inbuf [26], 0, __LINE__,
|
|
"ifstream::read () got \"%s\", expected \"%.26s \"",
|
|
inbuf, outbuf);
|
|
|
|
istrm.clear ();
|
|
|
|
// try to read again, and verify that the operation fails
|
|
istrm.read (inbuf, sizeof inbuf);
|
|
|
|
RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);
|
|
|
|
rw_assert (0 == istrm.gcount (), 0, __LINE__,
|
|
"ifstream::read () extracted %d, expected 0",
|
|
istrm.gcount ());
|
|
|
|
// replace the imbued locale
|
|
istrm.imbue (l);
|
|
}
|
|
|
|
/***********************************************************************/
|
|
|
|
static void
|
|
test_seek (const char *fname,
|
|
std::size_t bufsize = std::size_t (-1))
|
|
{
|
|
std::ifstream f (fname);
|
|
|
|
// make sure stream has been successfully opened
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
// set buffer size if specified
|
|
if (std::size_t (-1) != bufsize) {
|
|
|
|
rw_info (0, 0, __LINE__,
|
|
"ifstream::seekg()/tellg() - %zu byte buffer", bufsize);
|
|
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
f.rdbuf ()->pubsetbuf (0, bufsize);
|
|
}
|
|
else {
|
|
rw_info (0, 0, __LINE__,
|
|
"ifstream::seekg()/tellg() - default buffer size");
|
|
}
|
|
|
|
// user-defined code conversion facet
|
|
const cformat fmt (1 /* prevent locale from deleting */);
|
|
|
|
// create a locale by combining the classic locale and our UD facet
|
|
// cformat; the facet will replace std::codecvt<char, char, mbstate_t>
|
|
std::locale l (std::locale::classic (), &fmt);
|
|
|
|
// imbue locale with formatting facet into streams and save previous
|
|
l = f.imbue (l);
|
|
|
|
// seek to the beginning of stream (safe)
|
|
f.seekg (0);
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
const unsigned char max = UCHAR_MAX - '~';
|
|
|
|
for (std::size_t n = 0; n != std::size_t (max - 1); ++n) {
|
|
|
|
const char delim = char ('~' + n);
|
|
|
|
// skip over chars until the terminating delim (and extract it)
|
|
f.ignore (0x10000, std::fstream::traits_type::to_int_type (delim));
|
|
rw_assert (f.good (), 0, __LINE__,
|
|
"istream::ignore (0x10000, '\\%03o'); "
|
|
"rdstate() = %{Is}, gcount() = %ld",
|
|
delim, f.rdstate (), f.gcount ());
|
|
|
|
// alternate between exercising seekg() and pubsync()
|
|
if (n % 2) {
|
|
// seek inplace (offset must be 0 for MB encodings)
|
|
f.seekg (0, std::ios::cur);
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
}
|
|
else {
|
|
// filebuf::pubsync() must return 0
|
|
int syn = f.rdbuf ()->pubsync ();
|
|
rw_assert (0 == syn, 0, __LINE__,
|
|
"filebuf::pubsync () == 0, got %d", syn);
|
|
}
|
|
|
|
// skip exactly one char forward (retrieve a space)
|
|
char c = char ();
|
|
f.get (c);
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
rw_assert (' ' == c, 0, __LINE__,
|
|
"istream::get(char_type) got '\\%03o', expected ' '", c);
|
|
|
|
// get current file position
|
|
const std::ifstream::pos_type pos = f.tellg ();
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
// extract offset - should be the same as pos
|
|
long offset = 0;
|
|
f >> offset;
|
|
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
rw_assert (long (pos) == offset, 0, __LINE__,
|
|
"ifstream::operator>>() expected %ld, got %ld",
|
|
long (pos), offset);
|
|
|
|
// in_avail() must return a value > 0
|
|
std::streamsize avail = f.rdbuf ()->in_avail ();
|
|
rw_assert (avail > 0, 0, __LINE__,
|
|
"filebuf::in_avail() expected > 0, got %ld", avail);
|
|
|
|
// "rewind" stream to the beginning
|
|
f.seekg (0);
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
// try seeking to the previous position
|
|
f.seekg (pos);
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
rw_assert (f.tellg () == pos, 0, __LINE__,
|
|
"istream::seekg (%ld); tellg () returns %ld",
|
|
long (pos), long (f.tellg ()));
|
|
|
|
// re-read offset - should be the same as file pos
|
|
f >> offset;
|
|
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
rw_assert (long (pos) == offset, 0, __LINE__,
|
|
"ifstream::operator>>() expected %ld, got %ld",
|
|
long (pos), offset);
|
|
}
|
|
|
|
// ignore the rest of file, eofbit must be set
|
|
f.ignore (0x10000);
|
|
RW_ASSERT_STATE (f, std::ios::eofbit);
|
|
|
|
// in_avail() must return 0
|
|
const std::streamsize avail = f.rdbuf ()->in_avail ();
|
|
rw_assert (0 == avail, 0, __LINE__,
|
|
"filebuf::in_avail() expected 0, got %ld", avail);
|
|
|
|
// imbue original locale (currently imbued locale
|
|
// will be destroyed prior to the destruction of `f')
|
|
f.imbue (l);
|
|
}
|
|
|
|
/***********************************************************************/
|
|
|
|
static int
|
|
run_test (int, char*[])
|
|
{
|
|
// self-test make sure facet works
|
|
self_test ();
|
|
|
|
// user-defined code conversion facet
|
|
const cformat fmt (1 /* prevent locale from deleting */);
|
|
|
|
// create a locale by combining the classic locale and our UD facet
|
|
// cformat; the facet will replace std::codecvt<char, char, mbstate_t>
|
|
std::locale l (std::locale::classic (), &fmt);
|
|
|
|
const char *fname = rw_tmpnam (0);
|
|
|
|
if (!fname)
|
|
return 1;
|
|
|
|
// will be populated with file offsets and escape sequences
|
|
char buffer [4096] = { '\0' };
|
|
|
|
int buflen = 0;
|
|
|
|
// generate file contents using UD conversion
|
|
if (1) {
|
|
rw_info (0, 0, __LINE__,
|
|
"ofstream insertion with multibyte conversion");
|
|
|
|
std::ofstream f (fname);
|
|
|
|
// make sure file stream has been successfully opened
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
// imbue locale with formatting facet into stream
|
|
f.imbue (l);
|
|
|
|
for (std::size_t i = 1; i != UCHAR_MAX + 1U; ++i) {
|
|
|
|
const std::ofstream::pos_type pos = f.tellp ();
|
|
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
buflen = std::strlen (buffer);
|
|
|
|
// append the file offset followed by a (possibly escaped) char
|
|
std::sprintf (buffer + buflen, "%ld %c ", long (pos), char (i));
|
|
|
|
// write out the just appended portion of the buffer
|
|
f << (buffer + buflen);
|
|
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
}
|
|
|
|
buflen = std::strlen (buffer);
|
|
|
|
// file contains the contents of buffer with non-printable
|
|
// chars replaced with escape sequences (e.g., tabs with '\t', etc.)
|
|
}
|
|
|
|
|
|
// read contents of file w/o conversion
|
|
test_noconv (fname);
|
|
|
|
// read contents of file, apply conversion
|
|
if (1) {
|
|
rw_info (0, 0, __LINE__,
|
|
"ifstream extraction with multibyte conversion");
|
|
|
|
std::ifstream f (fname);
|
|
|
|
// make sure file stream has been successfully opened
|
|
RW_ASSERT_STATE (f, std::ios::goodbit);
|
|
|
|
// imbue locale with formatting facet into stream
|
|
f.imbue (l);
|
|
|
|
// allocate buffer large enough to accomodate the converted
|
|
// (i.e. internal) sequence
|
|
char tmpbuf [sizeof buffer];
|
|
|
|
// read file contents into buffer, convert escape sequences
|
|
// into the corresponding (perhaps unprintable) characters
|
|
const std::streamsize n = f.read (tmpbuf, sizeof tmpbuf).gcount ();
|
|
|
|
// 27.6.1.3, p28 - read() sets eofbit | failbit
|
|
// if end-of-file occurs on the input sequence
|
|
RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);
|
|
|
|
rw_assert (long (n) == buflen, 0, __LINE__,
|
|
"ifstream::read (%#p, %d); read %ld, expected %d",
|
|
tmpbuf, sizeof tmpbuf, long (n), buflen);
|
|
|
|
// assert that converted file contents are the same
|
|
// as the originally generated buffer
|
|
const long len = long (n) < buflen ? long (n) : buflen;
|
|
for (long i = 0; i != len; ++i) {
|
|
if (tmpbuf [i] != buffer [i]) {
|
|
rw_assert (0, 0, __LINE__,
|
|
"'\\%03o' == '\\%03o'; offset %d",
|
|
(unsigned char)buffer [i],
|
|
(unsigned char)tmpbuf [i], i);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// test with default buffer
|
|
test_seek (fname);
|
|
|
|
// retest with buffer of user-defined size
|
|
for (std::size_t n = 4096; n != std::size_t (-1);
|
|
n -= 1024 < n ? 1024 : 256 < n ? 256 : 16 < n ? 16 : 1)
|
|
test_seek (fname, n);
|
|
|
|
|
|
// test with errors during conversion
|
|
test_error (fname);
|
|
|
|
// remove a temporary file
|
|
std::remove (fname);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**************************************************************************/
|
|
|
|
int main (int argc, char *argv[])
|
|
{
|
|
return rw_test (argc, argv, __FILE__,
|
|
"lib.filebuf",
|
|
0 /* no comment */,
|
|
run_test,
|
|
"", 0);
|
|
}
|