/************************************************************************ * * codecvt.cpp - test exercising file streams and code conversion * * $Id: 27.filebuf.codecvt.cpp 590052 2007-10-30 12:44:14Z faridz $ * ************************************************************************ * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * Copyright 2001-2006 Rogue Wave Software. * **************************************************************************/ // PROBLEM DESCRIPTION: // basic_ofstream<>::overflow() mangles a multibyte sequence when using // code conversion. This functionality of the class isn't currently (as // of the date of the creation of the test) being exercised by our // testsuite due to the lack of a suitable locale (e.g., ja_JP). // TEST DESCRIPTION: // test creates a temporary file and fills it with series of pairs // , where `offset' is the offset from the beginning of // the file to the first (decimal) digit of offset, and `seq' is // a character in the range [1, CHAR_MAX] possibly converted to an // escape or trigraph sequence according to the rules described // in 2.3 and 2.13.2 // // the program then exercises the stream's (actually, the stream // file buffer's) ability to extract and seek within such a file // thus testing the stream's ability to crrectly interact with // the codecvt facet installed in the imbued locale #include #if defined (__IBMCPP__) && !defined (_RWSTD_NO_IMPLICIT_INCLUSION) // disable implicit inclusion to work around a limitation // in IBM VisualAge # define _RWSTD_NO_IMPLICIT_INCLUSION #endif #include #include #include // for UCHAR_MAX #include // for fclose(), fopen(), fseek(), size_t #include // for abort() #include // for memset(), strcmp(), strlen() #include // for mbstate_t #include #include /**************************************************************************/ // code conversion facet suitable for replacement of the default // codecvt facet // cformat::do_out() converts printable ASCII characters into themselves, // control characters are converted to standard C escape sequences // cformat::do_in() reverses the effect of do_out() class cformat: public std::codecvt { enum { new_line = 0x0001, // convert '\n' to "\n" horizontal_tab = 0x0002, // convert '\t' to "\t" vertical_tab = 0x0004, // convert '\v' to "\v" backspace = 0x0008, // convert '\b' to "\b" carriage_return = 0x0010, // convert '\r' to "\r" form_feed = 0x0020, // convert '\f' to "\f" alert = 0x0040, // convert '\a' to "\a" backslash = 0x0080, // convert '\\' to "\\" question_mark = 0x0100, // convert '?' to "\?" single_quote = 0x0200, // convert '\\'' to "\'" double_quote = 0x0400, // convert '"' to "\"" trigraphs = 0x8000, // convert to/from trigrap sequences hex = 0x1000 // hex notation in external representation }; const int mask; // bitmaps of flags above public: explicit cformat (std::size_t ref = 0, int m = 0) : std::codecvt (ref), mask (m) { /* empty */ } protected: virtual result do_out (state_type&, const intern_type*, const intern_type*, const intern_type*&, extern_type*, extern_type*, extern_type*&) const; virtual result do_in (state_type&, const extern_type*, const extern_type*, const extern_type*&, intern_type*, intern_type*, intern_type*&) const; virtual result do_unshift (state_type&, extern_type*, extern_type*, extern_type*&) const { // stateless encoding, no conversion necessary return noconv; } virtual int do_encoding () const _THROWS (()) { return 0; // variable number of external chars per single internal } virtual bool do_always_noconv () const _THROWS (()) { return false; // conversion always necessary } // returns the maximum `N' of extern chars in the range [from, from_end) // such that N represents max or fewer internal chars virtual int do_length (state_type&, const extern_type*, const extern_type*, std::size_t) const; // returns the max value do_length (s, from, from_end, 1) can return // for any valid range [from, from_end) - see LWG issue 74 (a DR) virtual int do_max_length () const _THROWS (()) { // assume that an internal char occupies at most 4 external chars // this won't hold for e.g. '\x00001' etc., but will hold for all // chars in the hex notation of up to two digits and all chars in // octal notation (which are required to fit in 4 by the standard) return 4; } }; std::codecvt_base::result cformat::do_out ( state_type& /* unused */, const intern_type *from, const intern_type *from_end, const intern_type *&from_next, extern_type *to, extern_type *to_end, extern_type *&to_next) const { // assert 22.2.1.5.2, p1 preconditions rw_assert (from <= from_end, __FILE__, __LINE__, "codecvt::do_out (..., from = %#p, from + %d, %#p, " "to = %#p, to + %d, %#p): from <= from_end", from, from_end - from, from_next, to, to_end - to, to_next); rw_assert (to <= to_end, __FILE__, __LINE__, "codecvt::do_out (..., from = %#p, from + %d, %#p, " "to = %#p, to + %d, %#p): to <= to_end", from, from_end - from, from_next, to, to_end - to, to_next); // assume no conversion will be performed result res = noconv; for (from_next = from, to_next = to; from_next != from_end; ++from_next) { // out of space if (to_next == to_end) { res = partial; break; } // convert to unsigned to make sure comparison works unsigned char ch = *from_next; extern_type esc = extern_type (); if (ch < ' ') { // convert to a C escape sequence switch (ch) { case '\a': if (!(mask & alert)) { ch = 'a'; esc = '\\'; } break; case '\b': if (!(mask & backspace)) { ch = 'b'; esc = '\\'; } break; case '\t': if (!(mask & horizontal_tab)) { ch = 't'; esc = '\\'; } break; case '\n': if (!(mask & new_line)) { ch = 'n'; esc = '\\'; } break; case '\v': if (!(mask & vertical_tab)) { ch = 'v'; esc = '\\'; } break; case '\f': if (!(mask & form_feed)) { ch = 'f'; esc = '\\'; } break; case '\r': if (!(mask & carriage_return)) { ch = 'r'; esc = '\\'; } break; case '\\': if (!(mask & backslash)) { ch = '\\'; esc = '\\'; } break; default: esc = '\\'; break; } } else if (ch > '~') { // convert to a C escape sequence (octal) esc = '\\'; } else { // escape special characters switch (ch) { case '?': if (!(mask & question_mark)) esc = '\\'; break; case '\'': if (!(mask & single_quote)) esc = '\\'; break; case '"': if (!(mask & double_quote)) esc = '\\'; break; case '\\': if (!(mask & backslash)) esc = '\\'; break; } if (!(mask & trigraphs)) { // convert to a trigraph sequence switch (ch) { case '#': ch = '='; esc = '?'; break; case '\\': ch = '/'; esc = '?'; break; case '^': ch = '\''; esc = '?'; break; case '[': ch = '('; esc = '?'; break; case ']': ch = ')'; esc = '?'; break; case '|': ch = '!'; esc = '?'; break; case '{': ch = '<'; esc = '?'; break; case '}': ch = '>'; esc = '?'; break; case '~': ch = '-'; esc = '?'; break; } } } // process `ch' and `esc' if ('\\' == esc) { // conversion was performed res = ok; if (ch < ' ' || ch > '~') { // need room for an escape followed by three ocal digits if (4 > to_end - to_next) { res = partial; break; } static const char digits[] = "0123456789abcdef"; // add an escape character *to_next++ = esc; if (mask & hex) { // add hex representation (exactly three chars) *to_next++ = 'x'; *to_next++ = digits [(ch & 0xf0) >> 4]; *to_next++ = digits [ch & 0xf]; } else { // add octal representation (exactly three digits) *to_next++ = digits [(ch & (7 << 6)) >> 6]; *to_next++ = digits [(ch & (7 << 3)) >> 3]; *to_next++ = digits [ch & 7]; } } else { // need room for an escape followed by a single char if (2 > to_end - to_next) { res = partial; break; } // add an escape char followed by the escaped char *to_next++ = esc; *to_next++ = ch; } } else if ('?' == esc) { // need room for a trigraph sequence if (3 > to_end - to_next) { res = partial; break; } // conversion was performed res = ok; // add a trigraph sequence *to_next++ = '?'; *to_next++ = '?'; *to_next++ = ch; } else { // not escaped *to_next++ = ch; } } if (noconv == res) { // 22.2.1.5.2, p2, Note: no conversion was necessary from_next = from; to_next = to; } rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__, "user-defined codecvt: internal inconsistency"); rw_fatal (to_next >= to && to_next <= to_end, 0, __LINE__, "user-defined codecvt: internal inconsistency"); return res; } std::codecvt_base::result cformat::do_in ( state_type& /* unused */, const extern_type *from, const extern_type *from_end, const extern_type *&from_next, intern_type *to, intern_type *to_end, intern_type *&to_next) const { // assert 22.2.1.5.2, p1 preconditions rw_assert (from <= from_end, __FILE__, __LINE__, "codecvt::do_in (..., from = %#p, from + %d, %#p, " "to = %#p, to + %d, %#p): from <= from_end", from, from_end - from, from_next, to, to_end - to, to_next); rw_assert (to <= to_end, __FILE__, __LINE__, "codecvt::do_in (..., from = %#p, from + %d, %#p, " "to = %#p, to + %d, %#p) to <= to_end", from, from_end - from, from_next, to, to_end - to, to_next); result res = ok; for (from_next = from, to_next = to; from_next != from_end; ++from_next) { unsigned char ch = *from_next; intern_type c = intern_type (); if ('\\' == ch) { if (2 > from_end - from_next) { // ok is the correct value to return in this case, // but partial should be handled as well for robustness res = (from_end - (extern_type*)0) % 2 ? ok : partial; break; } ch = from_next [1]; if ('x' == ch) { // interpret a hex escape sequence // advance past '\x' const extern_type *next = from_next + 2; // parse hex digits until a non-hex digits is encountered for (; ; ++next) { if (next == from_end) { // do not advance to the end since there may be // more digits following it (e.g., '\x012' with // from_end pointing at '1' or '2') return partial; } ch = *next; if (ch >= '0' && ch <= '9') c = (c << 4) | (ch - '0'); else if (ch >= 'a' && ch <= 'f') c = (c << 4) | (ch - 'a' + 10); else if (ch >= 'A' && ch <= 'F') c = (c << 4) | (ch - 'A' + 10); else if (next - from_next > 2) break; else { return error; // non-hex digit immediately after '\x' } } // advance to the end of parsed number from_next = next - 1; } else if ('0' <= ch && '7' >= ch) { // interpret a oct escape sequence // (tentatively) advance past '\' const extern_type *next = from_next + 1; // parse at most three oct digits for (; next - from_next < 4; ++next) { if (next == from_end) { // do not advance to the end since there may be // more digits following it (e.g., '\x012' with // from_end pointing at '1' or '2') return partial; } ch = *next; if (ch >= '0' && ch <= '7') c = (c << 3) | (ch - '0'); else if (next - from_next) break; else { // advance to the offending char from_next = next; return error; // non-oct digit immediately after '\' } } // advance to the end of parsed number from_next = next - 1; } else { // interpret standard C escape sequence switch (ch) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 't': c = '\t'; break; case 'n': c = '\n'; break; case 'v': c = '\v'; break; case 'f': c = '\f'; break; case 'r': c = '\r'; break; // optional but allowed and escaped backslash case '?': case '"': case '\'': case '\\': c = ch ; break; // bad escape sequence default: return error; } // advance past the initial '\' ++from_next; } } else if ('?' == ch && !(mask & trigraphs)) { // (try to) convert a trigraph sequence if ( 2 > from_end - from_next || '?' == from_next [1] && 3 > from_end - from_next) { res = partial; break; } if ('?' == from_next [1]) { // "??" (potentilly) introduces a trigraph sequence switch (from_next [2]) { case '=': c = '#'; break; case '/': c = '\\'; break; case '\'': c = '^'; break; case '(': c = '['; break; case ')': c = ']'; break; case '!': c = '|'; break; case '<': c = '{'; break; case '>': c = '}'; break; case '-': c = '~'; break; default: // no a trigraph sequence, won't convert c = from_next [0]; // i.e., '?' } // skip the leading "??" of a trigraph sequence if (c != from_next [0]) from_next += 2; } else // ordinary (not escaped) character c = ch; } else // ordinary (not escaped) character c = ch; // to_next may be 0 (when called from do_length()) // doing pointer math on invalid pointers (null) has undefined behavior // but will probably work in most cases if (to_next) *to_next = c; ++to_next; // in case of of the inner loops has reached end if (from_next == from_end) break; } rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__, "user-defined codecvt: internal inconsistency"); rw_fatal (to_next >= to && (to_next <= to_end || !to_end), 0, __LINE__, "user-defined codecvt: internal inconsistency"); return res; } int cformat::do_length (state_type&, const extern_type *from, const extern_type *from_end, std::size_t max) const { const extern_type *from_next; intern_type *to_next = 0; std::mbstate_t st; std::memset (&st, 0, sizeof st); // use do_in() with `to' of 0 to do the computation // doing pointer math on invalid pointers (null) has undefined behavior // but will probably work in most cases do_in (st, from, from_end, from_next, to_next, to_next + max, to_next); return to_next - (intern_type*)0; } /***********************************************************************/ // determines file size in bytes static std::streamsize fsize (const char *fname) { std::FILE* const f = std::fopen (fname, "r"); if (!f || std::fseek (f, 0, SEEK_END)) return -1; const std::streamsize size = std::ftell (f); std::fclose (f); return size; } /***********************************************************************/ static void self_test () { rw_info (0, __FILE__, __LINE__, "user-defined codecvt facet -- self test"); static const char* const result[] = { "ok", "partial", "error", "noconv" }; // user-defined code conversion facet cformat fmt (1); // original array of internal characters and one to which to convert // an external representation back to (for comparison) cformat::intern_type intrn [2][256] = { { '\0' } }; // array of external chars large enough to hold the internal array // each internal char converts to at most 4 external chars cformat::extern_type extrn [1024] = { '\0' }; // fill internal array with chars from '\1' to '\377' for (std::size_t i = 0; i != sizeof intrn [0] - 1; ++i) intrn [0][i] = cformat::intern_type (i + 1); const cformat::intern_type *intrn_next_0 = 0; cformat::intern_type *intrn_next_1 = 0; cformat::extern_type *extrn_next = 0; // dummy (state not used, conversion is stateless) std::mbstate_t st; std::memset (&st, 0, sizeof st); // convert internal to external representation, substituting // escape sequences for non-printable characters std::codecvt_base::result res; // convert array in internal representation to external representation res = fmt.out (st, intrn [0], intrn [0] + sizeof intrn [0], intrn_next_0, extrn, extrn + sizeof extrn, extrn_next); rw_assert (std::codecvt_base::ok == res, 0, __LINE__, "codecvt::out (); result == codecvt_base::ok, " "got codecvt_base::%s", result [res]); // assert that the external sequence is longer than the internal one rw_assert (extrn_next - extrn > intrn_next_0 - intrn [0], 0, __LINE__, "codecvt::out (); converted size %d, expected > %d", extrn_next - extrn, intrn_next_0 - intrn [0]); // convert external to internal representation, parsing // multi-char escape sequences into single chars const cformat::intern_type *next = extrn; intrn_next_1 = intrn [1]; for (; next != extrn_next; ) { // allow only a small buffer space to exercise partial conversion std::size_t step = std::size_t (extrn_next - next); if (step > 12) step = 5 + step % 8; res = fmt.in (st, next, next + step, next, intrn_next_1, intrn [1] + sizeof intrn [1], intrn_next_1); if (std::codecvt_base::error == res) break; } // assert that entrire sequence converted ok rw_assert (std::codecvt_base::ok == res, 0, __LINE__, "codecvt::in (); result == codecvt_base::ok, got " "codecvt_base::%s at offset %d", result [res], next - extrn); rw_assert (intrn_next_1 == intrn [1] + sizeof intrn [1], 0, __LINE__, "codecvt::in (); to_next == %#p, got %#p", intrn [1] + sizeof intrn [1], intrn_next_1); rw_assert (0 == std::strcmp (intrn [0], intrn [1]), 0, __LINE__, "codecvt<>::out/in ()"); } /***********************************************************************/ #define RW_ASSERT_STATE(strm, state) \ rw_assert ((strm).rdstate () == (state), 0, __LINE__, \ "rdstate () == %{Is}, got %{Is}", \ (state), (strm).rdstate ()) static void test_noconv (const char *fname) { rw_info (0, 0, __LINE__, "ifstream extraction without conversion"); std::ifstream f (fname); // make sure file stream has been successfully opened RW_ASSERT_STATE (f, std::ios::goodbit); // gain public access to protected members struct pubbuf: std::streambuf { // working around an MSVC 6.0 bug (PR #26330) typedef std::streambuf Base; virtual std::streamsize showmanyc () { return Base::showmanyc (); } char* pubgptr () { return Base::gptr (); } char* pubegptr () { return Base::egptr (); } }; // use static cast through void* to avoid using reinterpret_cast pubbuf *rdbuf = _RWSTD_STATIC_CAST (pubbuf*, (void*)f.rdbuf ()); std::streamsize filesize = rdbuf->showmanyc (); // exercise 27.5.2.4.3, p1: showmanyc() returns the "estimated" // size of the sequence (i.e., the file size in this case) rw_assert (filesize == fsize (fname), 0, __LINE__, "streambuf::showmanyc () == %ld, got %ld", fsize (fname), filesize); // exercise 27.5.2.2.3, p1 filesize = f.rdbuf ()->in_avail (); const char *gptr = rdbuf->pubgptr (); const char *egptr = rdbuf->pubgptr (); rw_assert (filesize == (gptr < egptr ? egptr - gptr : fsize (fname)), 0, __LINE__, "streambuf::in_avail () == %ld, got %ld", (gptr < egptr ? egptr - gptr : fsize (fname)), filesize); // allocate buffer large enough to accomodate the converted // (i.e. internal) sequence const std::size_t bufsize = 0x10000; // 64k should do it char *tmpbuf = new char [bufsize]; // fill with non-0 value to check for writes past the end // (see also Onyx incident 14033) std::memset (tmpbuf, '\x7f', bufsize); // ecercise putback area std::streamsize i; #ifndef _RWSTD_PBACK_SIZE # define _RWSTD_PBACK_SIZE 1 #endif // _RWSTD_PBACK_SIZE // _RWSTD_PBACK_SIZE is the size of the putback area the library // was configured with; the macro expands to streamsize (N) for (i = 0; i != _RWSTD_PBACK_SIZE + 1; ++i) { // read a few characters, read must not append a '\0' std::streamsize n = f.read (tmpbuf, i).gcount (); // assert that read exactly `i' chars, buffer not null-terminared rw_assert (i == n && '\x7f' == tmpbuf [i], 0, __LINE__, "ifstream::read (%#p, %ld) read %ld," "buffer terminated with '\\%03o'", tmpbuf, i, n, tmpbuf [i]); // put back read characters; assert that they are the same // as those in the corresponding positions in the buffer for (std::streamsize j = 0; j != i; ++j) { std::ifstream::int_type c = f.rdbuf ()->sungetc (); typedef std::ifstream::traits_type Traits; rw_assert (Traits::to_int_type (tmpbuf [n - j - 1]) == c, 0, __LINE__, "filebuf::sungetc() == '\\%03o', got '\\%03o", tmpbuf [n - j - 1], c); } // re-read characters just put back char buf [_RWSTD_PBACK_SIZE + 1]; std::memset (buf, '\x7f', sizeof buf); std::streamsize n2 = f.read (buf, i).gcount (); // assert that the requested number of chars were read in rw_assert (i == n2 && '\x7f' == buf [i], 0, __LINE__, "ifstream::read (%#p, %ld) read %ld," "buffer terminated with '\\%03o'", buf, i, n2, buf [i]); // assert that the read chars are those that were put back for (std::streamsize k = 0; k != i; ++k) { rw_assert (buf [k] == tmpbuf [k], 0, __LINE__, "buffer mismatch at offset %ld: got '\\%03o', " "expected '\\%03o'", k, buf [k], tmpbuf [k]); // put character back again so that it can be read back in f.rdbuf ()->sungetc (); } } // read file contents into buffer (apply no conversion) const std::streamsize n = f.read (tmpbuf, bufsize).gcount (); // 27.6.1.3, p28 - read() sets eofbit | failbit // if end-of-file occurs on the input sequence RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit); // assert that the entiire file has been read rw_assert (n == filesize && '\x7f' == tmpbuf [n], 0, __LINE__, "ifstream::read (%#p, %ld) read %ld, expected %ld; " "buffer terminated with '\\%03o' [%s]", tmpbuf, bufsize, n, filesize, tmpbuf [n], fname); tmpbuf [n] = '\0'; // assert that file contains no control characters bool b = true; for (i = 0; b && i != UCHAR_MAX + 1; ++i) { if (i >= ' ' || i <= '~') continue; b = 0 == std::strchr (tmpbuf, char (i)); } rw_assert (b, 0, __LINE__, "unescaped non-printable character '\\#03o' at offset %ld", tmpbuf [i], i); delete[] tmpbuf; } /***********************************************************************/ static void test_error (const char *fname) { rw_info (0, 0, __LINE__, "ifstream extraction with a conversion error"); const char outbuf[] = { "abcdefghijklmnopqrstuvwxyz\\x20\\xzzABCDEFGHIJKLMNOPQRSTUVWXYZ" // ^ // | // error (invalid hex sequence) ---+ }; // write out a text file containing a conversion error std::ofstream ostrm (fname); ostrm << outbuf; ostrm.close (); // read the file back in using the conversion facet std::ifstream istrm (fname); // user-defined code conversion facet const cformat fmt (1 /* prevent locale from deleting */); // create a locale by combining the classic locale and our UD facet // cformat; the facet will replace std::codecvt std::locale l (std::locale::classic (), &fmt); // imbue locale with formatting facet into streams and save previous l = istrm.imbue (l); char inbuf [sizeof outbuf * 4] = { 0 }; // try to read partial contents of the file // including the conversion error into the buffer istrm.read (inbuf, 26L + 4L /* "a..z" "AB" */); // verify that the operation failed, eofbit is set since // less than the requested number of characters have been read RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit); // verify that the 26 characters 'a' through 'z' plus // the space (i.e., 27 chars) have been extracted rw_assert (27 == istrm.gcount (), 0, __LINE__, "ifstream::read () extracted %d, expected 27", istrm.gcount ()); rw_assert ( 0 == std::ifstream::traits_type::compare (inbuf, outbuf, 26) && ' ' == inbuf [26], 0, __LINE__, "ifstream::read () got \"%s\", expected \"%.26s \"", inbuf, outbuf); istrm.clear (); // try to read again, and verify that the operation fails istrm.read (inbuf, sizeof inbuf); RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit); rw_assert (0 == istrm.gcount (), 0, __LINE__, "ifstream::read () extracted %d, expected 0", istrm.gcount ()); // replace the imbued locale istrm.imbue (l); } /***********************************************************************/ static void test_seek (const char *fname, std::size_t bufsize = std::size_t (-1)) { std::ifstream f (fname); // make sure stream has been successfully opened RW_ASSERT_STATE (f, std::ios::goodbit); // set buffer size if specified if (std::size_t (-1) != bufsize) { rw_info (0, 0, __LINE__, "ifstream::seekg()/tellg() - %zu byte buffer", bufsize); RW_ASSERT_STATE (f, std::ios::goodbit); f.rdbuf ()->pubsetbuf (0, bufsize); } else { rw_info (0, 0, __LINE__, "ifstream::seekg()/tellg() - default buffer size"); } // user-defined code conversion facet const cformat fmt (1 /* prevent locale from deleting */); // create a locale by combining the classic locale and our UD facet // cformat; the facet will replace std::codecvt std::locale l (std::locale::classic (), &fmt); // imbue locale with formatting facet into streams and save previous l = f.imbue (l); // seek to the beginning of stream (safe) f.seekg (0); RW_ASSERT_STATE (f, std::ios::goodbit); const unsigned char max = UCHAR_MAX - '~'; for (std::size_t n = 0; n != std::size_t (max - 1); ++n) { const char delim = char ('~' + n); // skip over chars until the terminating delim (and extract it) f.ignore (0x10000, std::fstream::traits_type::to_int_type (delim)); rw_assert (f.good (), 0, __LINE__, "istream::ignore (0x10000, '\\%03o'); " "rdstate() = %{Is}, gcount() = %ld", delim, f.rdstate (), f.gcount ()); // alternate between exercising seekg() and pubsync() if (n % 2) { // seek inplace (offset must be 0 for MB encodings) f.seekg (0, std::ios::cur); RW_ASSERT_STATE (f, std::ios::goodbit); } else { // filebuf::pubsync() must return 0 int syn = f.rdbuf ()->pubsync (); rw_assert (0 == syn, 0, __LINE__, "filebuf::pubsync () == 0, got %d", syn); } // skip exactly one char forward (retrieve a space) char c = char (); f.get (c); RW_ASSERT_STATE (f, std::ios::goodbit); rw_assert (' ' == c, 0, __LINE__, "istream::get(char_type) got '\\%03o', expected ' '", c); // get current file position const std::ifstream::pos_type pos = f.tellg (); RW_ASSERT_STATE (f, std::ios::goodbit); // extract offset - should be the same as pos long offset = 0; f >> offset; RW_ASSERT_STATE (f, std::ios::goodbit); rw_assert (long (pos) == offset, 0, __LINE__, "ifstream::operator>>() expected %ld, got %ld", long (pos), offset); // in_avail() must return a value > 0 std::streamsize avail = f.rdbuf ()->in_avail (); rw_assert (avail > 0, 0, __LINE__, "filebuf::in_avail() expected > 0, got %ld", avail); // "rewind" stream to the beginning f.seekg (0); RW_ASSERT_STATE (f, std::ios::goodbit); // try seeking to the previous position f.seekg (pos); RW_ASSERT_STATE (f, std::ios::goodbit); rw_assert (f.tellg () == pos, 0, __LINE__, "istream::seekg (%ld); tellg () returns %ld", long (pos), long (f.tellg ())); // re-read offset - should be the same as file pos f >> offset; RW_ASSERT_STATE (f, std::ios::goodbit); rw_assert (long (pos) == offset, 0, __LINE__, "ifstream::operator>>() expected %ld, got %ld", long (pos), offset); } // ignore the rest of file, eofbit must be set f.ignore (0x10000); RW_ASSERT_STATE (f, std::ios::eofbit); // in_avail() must return 0 const std::streamsize avail = f.rdbuf ()->in_avail (); rw_assert (0 == avail, 0, __LINE__, "filebuf::in_avail() expected 0, got %ld", avail); // imbue original locale (currently imbued locale // will be destroyed prior to the destruction of `f') f.imbue (l); } /***********************************************************************/ static int run_test (int, char*[]) { // self-test make sure facet works self_test (); // user-defined code conversion facet const cformat fmt (1 /* prevent locale from deleting */); // create a locale by combining the classic locale and our UD facet // cformat; the facet will replace std::codecvt std::locale l (std::locale::classic (), &fmt); const char *fname = rw_tmpnam (0); if (!fname) return 1; // will be populated with file offsets and escape sequences char buffer [4096] = { '\0' }; int buflen = 0; // generate file contents using UD conversion if (1) { rw_info (0, 0, __LINE__, "ofstream insertion with multibyte conversion"); std::ofstream f (fname); // make sure file stream has been successfully opened RW_ASSERT_STATE (f, std::ios::goodbit); // imbue locale with formatting facet into stream f.imbue (l); for (std::size_t i = 1; i != UCHAR_MAX + 1U; ++i) { const std::ofstream::pos_type pos = f.tellp (); RW_ASSERT_STATE (f, std::ios::goodbit); buflen = std::strlen (buffer); // append the file offset followed by a (possibly escaped) char std::sprintf (buffer + buflen, "%ld %c ", long (pos), char (i)); // write out the just appended portion of the buffer f << (buffer + buflen); RW_ASSERT_STATE (f, std::ios::goodbit); } buflen = std::strlen (buffer); // file contains the contents of buffer with non-printable // chars replaced with escape sequences (e.g., tabs with '\t', etc.) } // read contents of file w/o conversion test_noconv (fname); // read contents of file, apply conversion if (1) { rw_info (0, 0, __LINE__, "ifstream extraction with multibyte conversion"); std::ifstream f (fname); // make sure file stream has been successfully opened RW_ASSERT_STATE (f, std::ios::goodbit); // imbue locale with formatting facet into stream f.imbue (l); // allocate buffer large enough to accomodate the converted // (i.e. internal) sequence char tmpbuf [sizeof buffer]; // read file contents into buffer, convert escape sequences // into the corresponding (perhaps unprintable) characters const std::streamsize n = f.read (tmpbuf, sizeof tmpbuf).gcount (); // 27.6.1.3, p28 - read() sets eofbit | failbit // if end-of-file occurs on the input sequence RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit); rw_assert (long (n) == buflen, 0, __LINE__, "ifstream::read (%#p, %d); read %ld, expected %d", tmpbuf, sizeof tmpbuf, long (n), buflen); // assert that converted file contents are the same // as the originally generated buffer const long len = long (n) < buflen ? long (n) : buflen; for (long i = 0; i != len; ++i) { if (tmpbuf [i] != buffer [i]) { rw_assert (0, 0, __LINE__, "'\\%03o' == '\\%03o'; offset %d", (unsigned char)buffer [i], (unsigned char)tmpbuf [i], i); break; } } } // test with default buffer test_seek (fname); // retest with buffer of user-defined size for (std::size_t n = 4096; n != std::size_t (-1); n -= 1024 < n ? 1024 : 256 < n ? 256 : 16 < n ? 16 : 1) test_seek (fname, n); // test with errors during conversion test_error (fname); // remove a temporary file std::remove (fname); return 0; } /**************************************************************************/ int main (int argc, char *argv[]) { return rw_test (argc, argv, __FILE__, "lib.filebuf", 0 /* no comment */, run_test, "", 0); }