How to access data from `std::ostringstream` without copying it [duplicate] - c++

If I construct a string made of a list of space separated floating point values using std::ostringstream:
std::ostringstream ss;
unsigned int s = floatData.size();
for(unsigned int i=0;i<s;i++)
{
ss << floatData[i] << " ";
}
Then I get the result in a std::string:
std::string textValues(ss.str());
However, this will cause an unnecessary deep copy of the string contents, as ss will not be used anymore.
Is there any way to construct the string without copying the entire content?

std::ostringstream offers no public interface to access its in-memory buffer unless it non-portably supports pubsetbuf (but even then your buffer is fixed-size, see cppreference example)
If you want to torture some string streams, you could access the buffer using the protected interface:
#include <iostream>
#include <sstream>
#include <vector>
struct my_stringbuf : std::stringbuf {
const char* my_str() const { return pbase(); } // pptr might be useful too
};
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
my_stringbuf buf;
std::ostream ss(&buf);
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
std::cout << buf.my_str() << '\n';
}
The standard C++ way of directly accessing an auto-resizing output stream buffer is offered by std::ostrstream, deprecated in C++98, but still standard C++14 and counting.
#include <iostream>
#include <strstream>
#include <vector>
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::ostrstream ss;
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
const char* buffer = ss.str(); // direct access!
std::cout << buffer << '\n';
ss.freeze(false); // abomination
}
However, I think the cleanest (and the fastest) solution is boost.karma
#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/include/karma.hpp>
namespace karma = boost::spirit::karma;
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::string s;
karma::generate(back_inserter(s), karma::double_ % ' ', v);
std::cout << s << '\n'; // here's your string
}

This is now possible with C++20, with syntax like:
const std::string s = std::move(ss).str();
This is possible because the std::ostringstream class now has a str() overload that is rvalue-ref qualified:
basic_string<charT, traits, Allocator> str() &&; // since C++20
This was added in P0408, revision 7, which was adopted into C++20.

+1 for the Boost Karma by #Cubbi and the suggestion to "create your own streambuf-dervied type that does not make a copy, and give that to the constructor of a basic_istream<>.".
A more generic answer, though, is missing, and sits between these two.
It uses Boost Iostreams:
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
Here's a demo program:
Live On Coliru
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/stream_buffer.hpp>
namespace bio = boost::iostreams;
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
// any code that uses ostream
void foo(std::ostream& os) {
os << "Hello world "
<< std::hex << std::showbase << 42
<< " " << std::boolalpha << (1==1) << "\n";
}
#include <iostream>
int main() {
std::string output;
output.reserve(100); // optionally optimize if you know roughly how large output is gonna, or know what minimal size it will require
{
string_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Note that you can trivially replace the std::string withstd::wstring, or std::vector<char> etc.
Even better, you can use it with the array_sink device and have a fixed-size buffer. That way you can avoid any buffer allocation whatsoever with your Iostreams code!
Live On Coliru
#include <boost/iostreams/device/array.hpp>
using array_buf = bio::stream_buffer<bio::basic_array<char>>;
// ...
int main() {
char output[100] = {0};
{
array_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Both programs print:
Output contains: Hello world 0x2a true

I implemented "outstringstream" class, which I believe does exactly what you need (see take_str() method). I partially used code from: What is wrong with my implementation of overflow()?
#include <ostream>
template <typename char_type>
class basic_outstringstream : private std::basic_streambuf<char_type, std::char_traits<char_type>>,
public std::basic_ostream<char_type, std::char_traits<char_type>>
{
using traits_type = std::char_traits<char_type>;
using base_buf_type = std::basic_streambuf<char_type, traits_type>;
using base_stream_type = std::basic_ostream<char_type, traits_type>;
using int_type = typename base_buf_type::int_type;
std::basic_string<char_type> m_str;
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
const std::ptrdiff_t diff = this->pptr() - this->pbase();
this->setp(&m_str.front(), &m_str.back());
this->pbump(diff);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return traits_type::not_eof(traits_type::to_int_type(*this->pptr()));
}
void init()
{
this->setp(&m_str.front(), &m_str.back());
const std::size_t size = m_str.size();
if (size)
{
memcpy(this->pptr(), &m_str.front(), size);
this->pbump(size);
}
}
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(this)
{
m_str.reserve(reserveSize);
init();
}
explicit basic_outstringstream(std::basic_string<char_type>&& str)
: base_stream_type(this), m_str(std::move(str))
{
init();
}
explicit basic_outstringstream(const std::basic_string<char_type>& str)
: base_stream_type(this), m_str(str)
{
init();
}
const std::basic_string<char_type>& str() const
{
return m_str;
}
std::basic_string<char_type>&& take_str()
{
return std::move(m_str);
}
void clear()
{
m_str.clear();
init();
}
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;

Update: In the face of people's continued dislike of this answer, I thought I'd make an edit and explain.
No, there is no way to avoid a string copy (stringbuf has the same interface)
It will never matter. It's actually more efficient that way. (I will try to explain this)
Imagine writing a version of stringbuf that keeps a perfect, moveable std::string available at all times. (I have actually tried this).
Adding characters is easy - we simply use push_back on the underlying string.
OK, but what about removing characters (reading from the buffer)? We'll have to move some pointer to account for the characters we've removed, all well and good.
However, we have a problem - the contract we're keeping that says we'll always have a std::string available.
So whenever we remove characters from the stream, we'll need to erase them from the underlying string. That means shuffling all the remaining characters down (memmove/memcpy). Because this contract must be kept every time the flow of control leaves our private implementation, this in practice means having to erase characters from the string every time we call getc or gets on the string buffer. This translates to a call to erase on every << operation on the stream.
Then of course there's the problem of implementing the pushback buffer. If you pushback characters into the underlying string, you've got to insert them at position 0 - shuffling the entire buffer up.
The long and short of it is that you can write an ostream-only stream buffer purely for building a std::string. You'll still need to deal with all the reallocations as the underlying buffer grows, so in the end you get to save exactly one string copy. So perhaps we go from 4 string copies (and calls to malloc/free) to 3, or 3 to 2.
You'll also need to deal with the problem that the streambuf interface is not split into istreambuf and ostreambuf. This means you still have to offer the input interface and either throw exceptions or assert if someone uses it. This amounts to lying to users - we've failed to implement an expected interface.
For this tiny improvement in performance, we must pay the cost of:
developing a (quite complex, when you factor in locale management) software component.
suffering the loss of flexibility of having a streambuf which only supports output operations.
Laying landmines for future developers to step on.

I adapted the very good #Kuba answer to fix some issues (unfortunately he's currently unresponsive). In particular:
added a safe_pbump to handle 64 bit offsets;
return a string_view instead of string (internal string doesn't have the right size of the buffer);
resize the string to current buffer size on the move semantics take_str method;
fixed take_str method move semantics with init before return;
removed a useless memcpy on init method;
renamed the template parameter char_type to CharT to avoid ambiguity with basic_streambuf::char_type;
used string::data() and pointer arithmetic instead of possible undefined behavior using string::front() and string::back() as pointed by #LightnessRacesinOrbit;
Implementation with streambuf composition.
#pragma once
#include <cstdlib>
#include <limits>
#include <ostream>
#include <string>
#if __cplusplus >= 201703L
#include <string_view>
#endif
namespace usr
{
template <typename CharT>
class basic_outstringstream : public std::basic_ostream<CharT, std::char_traits<CharT>>
{
using traits_type = std::char_traits<CharT>;
using base_stream_type = std::basic_ostream<CharT, traits_type>;
class buffer : public std::basic_streambuf<CharT, std::char_traits<CharT>>
{
using base_buf_type = std::basic_streambuf<CharT, traits_type>;
using int_type = typename base_buf_type::int_type;
private:
void safe_pbump(std::streamsize off)
{
// pbump doesn't support 64 bit offsets
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47921
int maxbump;
if (off > 0)
maxbump = std::numeric_limits<int>::max();
else if (off < 0)
maxbump = std::numeric_limits<int>::min();
else // == 0
return;
while (std::abs(off) > std::numeric_limits<int>::max())
{
this->pbump(maxbump);
off -= maxbump;
}
this->pbump((int)off);
}
void init()
{
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)m_str.size());
}
protected:
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
size_t size = this->size();
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)size);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return ch;
}
public:
buffer(std::size_t reserveSize)
{
m_str.reserve(reserveSize);
init();
}
buffer(std::basic_string<CharT>&& str)
: m_str(std::move(str))
{
init();
}
buffer(const std::basic_string<CharT>& str)
: m_str(str)
{
init();
}
public:
size_t size() const
{
return (size_t)(this->pptr() - this->pbase());
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return std::basic_string_view<CharT>(m_str.data(), size());
}
#endif
std::basic_string<CharT> take_str()
{
// Resize the string to actual used buffer size
m_str.resize(size());
std::string ret = std::move(m_str);
init();
return ret;
}
void clear()
{
m_str.clear();
init();
}
const CharT * data() const
{
return m_str.data();
}
private:
std::basic_string<CharT> m_str;
};
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(nullptr), m_buffer(reserveSize)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(std::basic_string<CharT>&& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(const std::basic_string<CharT>& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return m_buffer.str();
}
#endif
std::basic_string<CharT> take_str()
{
return m_buffer.take_str();
}
const CharT * data() const
{
return m_buffer.data();
}
size_t size() const
{
return m_buffer.size();
}
void clear()
{
m_buffer.clear();
}
private:
buffer m_buffer;
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
}

Related

Safely convert std::string_view to int (like stoi or atoi)

Is there a safe standard way to convert std::string_view to int?
Since C++11 std::string lets us use stoi to convert to int:
std::string str = "12345";
int i1 = stoi(str); // Works, have i1 = 12345
int i2 = stoi(str.substr(1,2)); // Works, have i2 = 23
try {
int i3 = stoi(std::string("abc"));
}
catch(const std::exception& e) {
std::cout << e.what() << std::endl; // Correctly throws 'invalid stoi argument'
}
But stoi does not support std::string_view. So alternatively, we could use atoi, but one has to be very careful, e.g.:
std::string_view sv = "12345";
int i1 = atoi(sv.data()); // Works, have i1 = 12345
int i2 = atoi(sv.substr(1,2).data()); // Works, but wrong, have i2 = 2345, not 23
So atoi does not work either, since it is based off the null-terminator '\0' (and e.g. sv.substr cannot simply insert/add one).
Now, since C++17 there is also from_chars, but it does not seem to throw when providing poor inputs:
try {
int i3;
std::string_view sv = "abc";
std::from_chars(sv.data(), sv.data() + sv.size(), i3);
}
catch (const std::exception& e) {
std::cout << e.what() << std::endl; // Does not get called
}
The std::from_chars function does not throw, it only returns a value of type from_chars_result which is a struct with two fields:
struct from_chars_result {
const char* ptr;
std::errc ec;
};
You should inspect the values of ptr and ec when the function returns:
#include <iostream>
#include <string>
#include <charconv>
int main()
{
int i3;
std::string_view sv = "abc";
auto result = std::from_chars(sv.data(), sv.data() + sv.size(), i3);
if (result.ec == std::errc::invalid_argument) {
std::cout << "Could not convert.";
}
}
Unfortunately, there is no standard way that would throw an exception for you but std::from_chars has a return value code that you may use:
#include <charconv>
#include <stdexcept>
template <class T, class... Args>
void from_chars_throws(const char* first, const char* last, T &t, Args... args) {
std::from_chars_result res = std::from_chars(first, last, t, args... );
// These two exceptions reflect the behavior of std::stoi.
if (res.ec == std::errc::invalid_argument) {
throw std::invalid_argument{"invalid_argument"};
}
else if (res.ec == std::errc::result_out_of_range) {
throw std::out_of_range{"out_of_range"};
}
}
Obviously you can create svtoi, svtol from this, but the advantage of "extending" from_chars is that you only need a single templated function.
Building on #Ron and #Holt's excellent answers, here's a small wrapper around std::from_chars() that returns an optional (std::nullopt when the input fails to parse).
#include <charconv>
#include <optional>
#include <string_view>
std::optional<int> to_int(const std::string_view & input)
{
int out;
const std::from_chars_result result = std::from_chars(input.data(), input.data() + input.size(), out);
if(result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range)
{
return std::nullopt;
}
return out;
}

std::num_put issue with nan-boxing due to auto-cast from float to double

I'm using this post to extend nan values with some extra info and this post to modify std::cout behaviour and display this extra info.
Here is the code defining the functions and NumPut class:
#include <iostream>
#include <assert.h>
#include <limits>
#include <bitset>
#include <cmath>
#include <locale>
#include <ostream>
#include <sstream>
template <typename T>
void showValue( T val, const std::string& what )
{
union uT {
T d;
unsigned long long u;
};
uT ud;
ud.d = val;
std::bitset<sizeof(T) * 8> b(ud.u);
std::cout << val << " (" << what << "): " << b.to_string() << std::endl;
}
template <typename T>
T customizeNaN( T value, char mask )
{
T res = value;
char* ptr = (char*) &res;
assert( ptr[0] == 0 );
ptr[0] |= mask;
return res;
}
template <typename T>
bool isCustomNaN( T value, char mask )
{
char* ptr = (char*) &value;
return ptr[0] == mask;
}
template <typename T>
char getCustomNaNMask( T value )
{
char* ptr = (char*) &value;
return ptr[0];
}
template <typename Iterator = std::ostreambuf_iterator<char> >
class NumPut : public std::num_put<char, Iterator>
{
private:
using base_type = std::num_put<char, Iterator>;
public:
using char_type = typename base_type::char_type;
using iter_type = typename base_type::iter_type;
NumPut(std::size_t refs = 0)
: base_type(refs)
{}
protected:
virtual iter_type do_put(iter_type out, std::ios_base& str, char_type fill, double v) const override {
if(std::isnan(v))
{
char mask = getCustomNaNMask(v);
if ( mask == 0x00 )
{
out = std::copy(std::begin(NotANumber), std::end(NotANumber), out);
}
else
{
std::stringstream maskStr;
maskStr << "(0x" << std::hex << (unsigned) mask << ")";
std::string temp = maskStr.str();
out = std::copy(std::begin(CustomNotANumber), std::end(CustomNotANumber), out);
out = std::copy(std::begin(temp), std::end(temp), out);
}
}
else
{
out = base_type::do_put(out, str, fill, v);
}
return out;
}
private:
static const std::string NotANumber;
static const std::string CustomNotANumber;
};
template<typename Iterator> const std::string NumPut<Iterator>::NotANumber = "Not a Number";
template<typename Iterator> const std::string NumPut<Iterator>::CustomNotANumber = "Custom Not a Number";
inline void fixNaNToStream( std::ostream& str )
{
str.imbue( std::locale(str.getloc(), new NumPut<std::ostreambuf_iterator<char>>() ) );
}
A simple test function:
template<typename T>
void doTest()
{
T regular_nan = std::numeric_limits<T>::quiet_NaN();
T myNaN1 = customizeNaN( regular_nan, 0x01 );
T myNaN2 = customizeNaN( regular_nan, 0x02 );
showValue( regular_nan, "regular" );
showValue( myNaN1, "custom 1" );
showValue( myNaN2, "custom 2" );
}
My main program:
int main(int argc, char *argv[])
{
fixNaNToStream( std::cout );
doTest<double>();
doTest<float>();
return 0;
}
doTest<double> outputs:
Not a Number (regular): 0111111111111000000000000000000000000000000000000000000000000000
Custom Not a Number(0x1) (custom 1): 0111111111111000000000000000000000000000000000000000000000000001
Custom Not a Number(0x2) (custom 2): 0111111111111000000000000000000000000000000000000000000000000010
doTest<float> outputs:
Not a Number (regular): 01111111110000000000000000000000
Not a Number (custom 1): 01111111110000000000000000000001
Not a Number (custom 2): 01111111110000000000000000000010
While I would expect for float:
Not a Number (regular): 01111111110000000000000000000000
Custom Not a Number(0x1) (custom 1): 01111111110000000000000000000001
Custom Not a Number(0x2) (custom 2): 01111111110000000000000000000010
The problem is that num_put only has a virtual do_put for double, not for float. So my float is silently casted to a double, losing my extended information.
I know there are some alternatives, like using FloatFormat from the second post, or simply writing a smart float2double function and calling it prior to sending my NaN value to the output stream, but they require the developer to take care of this situation...and he may forget to.
Is there no way to implement that within NumPut class or anything else that would simply make things work when a float is send to the imbued stream as nicely as it works for a double?
My requirement is to be able to simply call a function like fixNaNToStream for any output stream (std::cout, local std::stringstream, ...) and then send float and double to it and get them identified as my custom NaNs and displayed accordingly.
The problem is that num_put only has a virtual do_put for double, not for float. So my float is silently casted to a double, losing my extended information.
The information is lost because the positions of the bits carrying it are different when the number is converted from float to double:
// Assuming an IEE-754 floating-point representation of float and double
0 11111111 10000000000000000000010
0 11111111111 1000000000000000000001000000000000000000000000000000
Note that the mantissa bits are "shifted" by 3 positions, because the exponent requires 3 more bits.
Also, it's worth noting what it's stated in this page: https://en.cppreference.com/w/cpp/numeric/math/isnan
Copying a NaN is not required, by IEEE-754, to preserve its bit representation (sign and payload), though most implementation do.
I assume the same holds for casting such values, so that, even ignoring other causes of undefined behavior in OP's code, whether a method of NaN-boxing could work or not is actually implementation defined.
In my former attempts of answering this question, I used some explicit bit shifting by different offset to achive the result, but as jpo38 also found out, the easiest way is to always generate a float NaN and then cast correctly.
The Standard Library function std::nanf could be used to generate a "customized" float NaN, but in the following demo snippet I won't use it.
#include <cstdint>
#include <limits>
#include <cstring>
#include <cassert>
#include <type_traits>
#include <iostream>
#include <bitset>
#include <array>
#include <climits>
namespace my {
// Waiting for C++20 std::bit_cast
// source: https://en.cppreference.com/w/cpp/numeric/bit_cast
template <class To, class From>
typename std::enable_if<
(sizeof(To) == sizeof(From)) &&
std::is_trivially_copyable<From>::value &&
std::is_trivial<To>::value,
// this implementation requires that To is trivially default constructible
To>::type
// constexpr support needs compiler magic
bit_cast(const From &src) noexcept
{
To dst;
std::memcpy(&dst, &src, sizeof(To));
return dst;
}
template <typename T, std::size_t Size = sizeof(T)>
void print_bits(T x)
{
std::array<unsigned char, Size> buf;
std::memcpy(buf.data(), &x, Size);
for (auto it = buf.crbegin(); it != buf.crend(); ++it)
{
std::bitset<CHAR_BIT> b{*it};
std::cout << b.to_string();
}
std::cout << '\n';
}
// The following assumes that both floats and doubles store the mantissa
// in the lower bits and that while casting a NaN (float->double or double->float)
// the most significant of those aren't changed
template <typename T>
auto boxed_nan(uint8_t data = 0) -> typename std::enable_if<std::numeric_limits<T>::has_quiet_NaN, T>::type
{
return bit_cast<float>(
bit_cast<uint32_t>(std::numeric_limits<float>::quiet_NaN()) |
static_cast<uint32_t>(data)
);
}
template <typename T>
uint8_t unbox_nan(T num)
{
return bit_cast<uint32_t>(static_cast<float>(num));
}
}; // End of namespace 'my'
int main()
{
auto my_nan = my::boxed_nan<float>(42);
my::print_bits(my_nan);
my::print_bits(static_cast<double>(my_nan));
assert(my::unbox_nan(my_nan) == 42);
assert(my::unbox_nan(static_cast<double>(my_nan)) == 42);
auto my_d_nan = my::boxed_nan<double>(17);
my::print_bits(my_d_nan);
my::print_bits(static_cast<float>(my_d_nan));
assert(my::unbox_nan(my_d_nan) == 17);
assert(my::unbox_nan(static_cast<float>(my_d_nan)) == 17);
auto my_ld_nan = my::boxed_nan<long double>(9);
assert(my::unbox_nan(my_ld_nan) == 9);
assert(my::unbox_nan(static_cast<double>(my_ld_nan)) == 9);
}
As Bob pointed, the double extended bit should be at the same relative position to biased exponent than it is for float if you want cast to work in both ways (from float to double and from double to float).
Considering that, a very trivial approach to handle that is to use the far right bit for the float. For for double, instead of trying to determine manually what bit should be used, simply douse cast operations and let the system identify where is the right place...
Then code becomes:
#include <iostream>
#include <assert.h>
#include <limits>
#include <bitset>
#include <cmath>
#include <locale>
#include <ostream>
#include <sstream>
template <typename T>
void showValue( T val, const std::string& what )
{
union uT {
T d;
unsigned long long u;
};
uT ud;
ud.d = val;
std::bitset<sizeof(T) * 8> b(ud.u);
std::cout << val << " (" << what << "): " << b.to_string() << std::endl;
}
char& getCustomNaNMask( float& value )
{
char* ptr = (char*) &value;
return ptr[0];
}
/** temp parameter is mainly used because we can't have two functions with same prototype even if they return different values */
float getCustomizedNaN( char mask, float temp )
{
// let's reuse temp argument as we need a local float variable
temp = std::numeric_limits<float>::quiet_NaN();
getCustomNaNMask(temp) |= mask;
return temp;
}
/** temp parameter is mainly used because we can't have two functions with same prototype even if they return different values */
double getCustomizedNaN( char mask, double temp )
{
float asFloat = getCustomizedNaN( mask, float() );
// Let the system correctly cast from float to double, that's it!
return static_cast<double>( asFloat );
}
template <typename T>
bool isCustomNaN( T value, char mask )
{
return getCustomNaNMask(value) == mask;
}
template <typename Iterator = std::ostreambuf_iterator<char> >
class NumPut : public std::num_put<char, Iterator>
{
private:
using base_type = std::num_put<char, Iterator>;
public:
using char_type = typename base_type::char_type;
using iter_type = typename base_type::iter_type;
NumPut(std::size_t refs = 0)
: base_type(refs)
{}
protected:
virtual iter_type do_put(iter_type out, std::ios_base& str, char_type fill, double v) const override {
if(std::isnan(v))
{
float asFloat = static_cast<float>( v );
char& mask = getCustomNaNMask(asFloat);
if ( mask == 0x00 )
{
out = std::copy(std::begin(NotANumber), std::end(NotANumber), out);
}
else
{
std::stringstream maskStr;
maskStr << "(0x" << std::hex << (unsigned) mask << ")";
std::string temp = maskStr.str();
out = std::copy(std::begin(CustomNotANumber), std::end(CustomNotANumber), out);
out = std::copy(std::begin(temp), std::end(temp), out);
}
}
else
{
out = base_type::do_put(out, str, fill, v);
}
return out;
}
private:
static const std::string NotANumber;
static const std::string CustomNotANumber;
};
template<typename Iterator> const std::string NumPut<Iterator>::NotANumber = "Not a Number";
template<typename Iterator> const std::string NumPut<Iterator>::CustomNotANumber = "Custom Not a Number";
inline void fixNaNToStream( std::ostream& str )
{
str.imbue( std::locale(str.getloc(), new NumPut<std::ostreambuf_iterator<char>>() ) );
}
And test program:
template<typename T>
void doTest()
{
T regular_nan = std::numeric_limits<T>::quiet_NaN();
T myNaN1 = getCustomizedNaN( 0x01, T() );
T myNaN2 = getCustomizedNaN( 0x02, T() );
showValue( regular_nan, "regular" );
showValue( myNaN1, "custom 1" );
showValue( myNaN2, "custom 2" );
}
int main(int argc, char *argv[])
{
fixNaNToStream( std::cout );
doTest<double>();
doTest<float>();
return 0;
}
Outputs:
Not a Number (regular): 0111111111111000000000000000000000000000000000000000000000000000
Custom Not a Number(0x1) (custom 1): 0111111111111000000000000000000000100000000000000000000000000000
Custom Not a Number(0x2) (custom 2): 0111111111111000000000000000000001000000000000000000000000000000
Not a Number (regular): 01111111110000000000000000000000
Custom Not a Number(0x1) (custom 1): 01111111110000000000000000000001
Custom Not a Number(0x2) (custom 2): 01111111110000000000000000000010
Thanks Bob!

Stringstream peculiarity must copy entire string twice? [duplicate]

If I construct a string made of a list of space separated floating point values using std::ostringstream:
std::ostringstream ss;
unsigned int s = floatData.size();
for(unsigned int i=0;i<s;i++)
{
ss << floatData[i] << " ";
}
Then I get the result in a std::string:
std::string textValues(ss.str());
However, this will cause an unnecessary deep copy of the string contents, as ss will not be used anymore.
Is there any way to construct the string without copying the entire content?
std::ostringstream offers no public interface to access its in-memory buffer unless it non-portably supports pubsetbuf (but even then your buffer is fixed-size, see cppreference example)
If you want to torture some string streams, you could access the buffer using the protected interface:
#include <iostream>
#include <sstream>
#include <vector>
struct my_stringbuf : std::stringbuf {
const char* my_str() const { return pbase(); } // pptr might be useful too
};
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
my_stringbuf buf;
std::ostream ss(&buf);
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
std::cout << buf.my_str() << '\n';
}
The standard C++ way of directly accessing an auto-resizing output stream buffer is offered by std::ostrstream, deprecated in C++98, but still standard C++14 and counting.
#include <iostream>
#include <strstream>
#include <vector>
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::ostrstream ss;
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
const char* buffer = ss.str(); // direct access!
std::cout << buffer << '\n';
ss.freeze(false); // abomination
}
However, I think the cleanest (and the fastest) solution is boost.karma
#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/include/karma.hpp>
namespace karma = boost::spirit::karma;
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::string s;
karma::generate(back_inserter(s), karma::double_ % ' ', v);
std::cout << s << '\n'; // here's your string
}
This is now possible with C++20, with syntax like:
const std::string s = std::move(ss).str();
This is possible because the std::ostringstream class now has a str() overload that is rvalue-ref qualified:
basic_string<charT, traits, Allocator> str() &&; // since C++20
This was added in P0408, revision 7, which was adopted into C++20.
+1 for the Boost Karma by #Cubbi and the suggestion to "create your own streambuf-dervied type that does not make a copy, and give that to the constructor of a basic_istream<>.".
A more generic answer, though, is missing, and sits between these two.
It uses Boost Iostreams:
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
Here's a demo program:
Live On Coliru
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/stream_buffer.hpp>
namespace bio = boost::iostreams;
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
// any code that uses ostream
void foo(std::ostream& os) {
os << "Hello world "
<< std::hex << std::showbase << 42
<< " " << std::boolalpha << (1==1) << "\n";
}
#include <iostream>
int main() {
std::string output;
output.reserve(100); // optionally optimize if you know roughly how large output is gonna, or know what minimal size it will require
{
string_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Note that you can trivially replace the std::string withstd::wstring, or std::vector<char> etc.
Even better, you can use it with the array_sink device and have a fixed-size buffer. That way you can avoid any buffer allocation whatsoever with your Iostreams code!
Live On Coliru
#include <boost/iostreams/device/array.hpp>
using array_buf = bio::stream_buffer<bio::basic_array<char>>;
// ...
int main() {
char output[100] = {0};
{
array_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Both programs print:
Output contains: Hello world 0x2a true
I implemented "outstringstream" class, which I believe does exactly what you need (see take_str() method). I partially used code from: What is wrong with my implementation of overflow()?
#include <ostream>
template <typename char_type>
class basic_outstringstream : private std::basic_streambuf<char_type, std::char_traits<char_type>>,
public std::basic_ostream<char_type, std::char_traits<char_type>>
{
using traits_type = std::char_traits<char_type>;
using base_buf_type = std::basic_streambuf<char_type, traits_type>;
using base_stream_type = std::basic_ostream<char_type, traits_type>;
using int_type = typename base_buf_type::int_type;
std::basic_string<char_type> m_str;
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
const std::ptrdiff_t diff = this->pptr() - this->pbase();
this->setp(&m_str.front(), &m_str.back());
this->pbump(diff);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return traits_type::not_eof(traits_type::to_int_type(*this->pptr()));
}
void init()
{
this->setp(&m_str.front(), &m_str.back());
const std::size_t size = m_str.size();
if (size)
{
memcpy(this->pptr(), &m_str.front(), size);
this->pbump(size);
}
}
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(this)
{
m_str.reserve(reserveSize);
init();
}
explicit basic_outstringstream(std::basic_string<char_type>&& str)
: base_stream_type(this), m_str(std::move(str))
{
init();
}
explicit basic_outstringstream(const std::basic_string<char_type>& str)
: base_stream_type(this), m_str(str)
{
init();
}
const std::basic_string<char_type>& str() const
{
return m_str;
}
std::basic_string<char_type>&& take_str()
{
return std::move(m_str);
}
void clear()
{
m_str.clear();
init();
}
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
Update: In the face of people's continued dislike of this answer, I thought I'd make an edit and explain.
No, there is no way to avoid a string copy (stringbuf has the same interface)
It will never matter. It's actually more efficient that way. (I will try to explain this)
Imagine writing a version of stringbuf that keeps a perfect, moveable std::string available at all times. (I have actually tried this).
Adding characters is easy - we simply use push_back on the underlying string.
OK, but what about removing characters (reading from the buffer)? We'll have to move some pointer to account for the characters we've removed, all well and good.
However, we have a problem - the contract we're keeping that says we'll always have a std::string available.
So whenever we remove characters from the stream, we'll need to erase them from the underlying string. That means shuffling all the remaining characters down (memmove/memcpy). Because this contract must be kept every time the flow of control leaves our private implementation, this in practice means having to erase characters from the string every time we call getc or gets on the string buffer. This translates to a call to erase on every << operation on the stream.
Then of course there's the problem of implementing the pushback buffer. If you pushback characters into the underlying string, you've got to insert them at position 0 - shuffling the entire buffer up.
The long and short of it is that you can write an ostream-only stream buffer purely for building a std::string. You'll still need to deal with all the reallocations as the underlying buffer grows, so in the end you get to save exactly one string copy. So perhaps we go from 4 string copies (and calls to malloc/free) to 3, or 3 to 2.
You'll also need to deal with the problem that the streambuf interface is not split into istreambuf and ostreambuf. This means you still have to offer the input interface and either throw exceptions or assert if someone uses it. This amounts to lying to users - we've failed to implement an expected interface.
For this tiny improvement in performance, we must pay the cost of:
developing a (quite complex, when you factor in locale management) software component.
suffering the loss of flexibility of having a streambuf which only supports output operations.
Laying landmines for future developers to step on.
I adapted the very good #Kuba answer to fix some issues (unfortunately he's currently unresponsive). In particular:
added a safe_pbump to handle 64 bit offsets;
return a string_view instead of string (internal string doesn't have the right size of the buffer);
resize the string to current buffer size on the move semantics take_str method;
fixed take_str method move semantics with init before return;
removed a useless memcpy on init method;
renamed the template parameter char_type to CharT to avoid ambiguity with basic_streambuf::char_type;
used string::data() and pointer arithmetic instead of possible undefined behavior using string::front() and string::back() as pointed by #LightnessRacesinOrbit;
Implementation with streambuf composition.
#pragma once
#include <cstdlib>
#include <limits>
#include <ostream>
#include <string>
#if __cplusplus >= 201703L
#include <string_view>
#endif
namespace usr
{
template <typename CharT>
class basic_outstringstream : public std::basic_ostream<CharT, std::char_traits<CharT>>
{
using traits_type = std::char_traits<CharT>;
using base_stream_type = std::basic_ostream<CharT, traits_type>;
class buffer : public std::basic_streambuf<CharT, std::char_traits<CharT>>
{
using base_buf_type = std::basic_streambuf<CharT, traits_type>;
using int_type = typename base_buf_type::int_type;
private:
void safe_pbump(std::streamsize off)
{
// pbump doesn't support 64 bit offsets
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47921
int maxbump;
if (off > 0)
maxbump = std::numeric_limits<int>::max();
else if (off < 0)
maxbump = std::numeric_limits<int>::min();
else // == 0
return;
while (std::abs(off) > std::numeric_limits<int>::max())
{
this->pbump(maxbump);
off -= maxbump;
}
this->pbump((int)off);
}
void init()
{
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)m_str.size());
}
protected:
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
size_t size = this->size();
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)size);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return ch;
}
public:
buffer(std::size_t reserveSize)
{
m_str.reserve(reserveSize);
init();
}
buffer(std::basic_string<CharT>&& str)
: m_str(std::move(str))
{
init();
}
buffer(const std::basic_string<CharT>& str)
: m_str(str)
{
init();
}
public:
size_t size() const
{
return (size_t)(this->pptr() - this->pbase());
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return std::basic_string_view<CharT>(m_str.data(), size());
}
#endif
std::basic_string<CharT> take_str()
{
// Resize the string to actual used buffer size
m_str.resize(size());
std::string ret = std::move(m_str);
init();
return ret;
}
void clear()
{
m_str.clear();
init();
}
const CharT * data() const
{
return m_str.data();
}
private:
std::basic_string<CharT> m_str;
};
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(nullptr), m_buffer(reserveSize)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(std::basic_string<CharT>&& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(const std::basic_string<CharT>& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return m_buffer.str();
}
#endif
std::basic_string<CharT> take_str()
{
return m_buffer.take_str();
}
const CharT * data() const
{
return m_buffer.data();
}
size_t size() const
{
return m_buffer.size();
}
void clear()
{
m_buffer.clear();
}
private:
buffer m_buffer;
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
}

boost::udp socket.recieve_from() appends data to the end of the buffer

I have implemented a udp_client using boost_asio The udp_client::recieve_from() is given below.
void udp_client::recieve_from()
{
for(unsigned int i = 0; i < m_buffer_manager.get_recieve_array().size(); ++i)
m_buffer_manager.get_recieve_array()[i] = 0;
/*Initialize our endpoint*/
size_t len = m_socket.receive_from(
boost::asio::buffer(m_buffer_manager.get_recieve_array()), m_sender_endpoint);
m_buffer_manager.message_buffer(m_buffer_manager.get_recieve_array(),len);
std::cout << "Length of recieved message " << len << std::endl;
/*dumps the message into std::cout for debugging.*/
std::cout << m_buffer_manager.get_message_string() << std::endl;
//std::cout.write((const char*)&m_buffer_manager.get_recieve_array()[0], len);
packet_t ack_packet = { "ACK", {} };
auto buffer = ack_packet.serialize();
m_socket.send_to(boost::asio::buffer(buffer), m_endpoint);
}
The udp_client.hpp file is shown below.
class udp_client
{
public:
udp_client(boost::asio::io_service& io_service,const std::string& host,const std::string& port);
~udp_client();
void subscribe();
void publish(const std::string& message);
void recieve_from();
private:
boost::asio::io_service& m_io_service;
boost::asio::ip::udp::udp::socket m_socket;
boost::asio::ip::udp::udp::endpoint m_endpoint;
boost::asio::ip::udp::endpoint m_sender_endpoint;
buffer_manager m_buffer_manager;
};
The buffer_manager object that is used to manage the recieve buffer is shown below.
class buffer_manager
{
public:
typedef boost::array<unsigned char, 4096> m_array_type;
buffer_manager();
~buffer_manager();
void message_buffer(m_array_type &recv_buf,size_t size);
buffer_manager::m_array_type & get_recieve_array();
std::string & get_message_string();
private:
std::string m_message;
m_array_type m_recv_buf;
};
My problem with the udp_client::recieve_from() code is that
size_t len = m_socket.receive_from(boost::asio::buffer(m_buffer_manager.get_recieve_array()), m_sender_endpoint);
returns 1 packet after recieving one packet. When it recieves two packets it recieves the entire two packets. (i.e the contents of the second packet are appended to the contents of the first packet.)
This is inspite of
for(unsigned int i = 0; i < m_buffer_manager.get_recieve_array().size(); ++i)
m_buffer_manager.get_recieve_array()[i] = 0;
where I explictly clear the buffer. What is the reason for this? How do I get around this issue.?
Please find the implementation for buffer_manager.cpp below.
#include <iostream>
#include <boost/array.hpp>
#include <boost/algorithm/hex.hpp>
#include <algorithm>
#include "buffer_manager.hpp"
buffer_manager::buffer_manager()
{
}
buffer_manager::~buffer_manager()
{
}
void buffer_manager::message_buffer(m_array_type &recv_buf,size_t size)
{
auto it = recv_buf.begin();
std::advance(it,size);
boost::algorithm::hex(recv_buf.begin(), it, back_inserter(m_message));
}
buffer_manager::m_array_type& buffer_manager::get_recieve_array()
{
return m_recv_buf;
}
std::string & buffer_manager::get_message_string()
{
return m_message;
}
The receive_from() operation is functioning properly and not appending data to the end of the buffer. On the other hand, buffer_manager::message_buffer() is appending to m_message on each invocation, as it uses a back_insert_iterator and never clears the string.
void buffer_manager::message_buffer(...)
{
auto it = recv_buf.begin();
std::advance(it, size);
boost::algorithm::hex(recv_buf.begin(), it, back_inserter(m_message));
// ^~~ invokes m_message.push_back() for the
// range [recv_buf.begin(), it).
}
To resolve this, consider clearing the string beforehand.
void buffer_manager::message_buffer(...)
{
auto it = recv_buf.begin();
std::advance(it, size);
m_message.clear();
boost::algorithm::hex(recv_buf.begin(), it, back_inserter(m_message));
Here is a minimal example demonstrating std::back_inserter:
#include <algorithm>
#include <cassert>
#include <iostream>
#include <string>
int main()
{
std::string message = "abc";
auto inserter = back_inserter(message);
inserter = 'd';
inserter = 'e';
assert("abcde" == message);
}
It is not clear to me what value the buffer_manager provides. However, if you want to print the hex-value of a buffer, consider writing to the ostream using an ostream_iterator without the overhead of constructing a string. For example, the following utility function writes the hex values of an iterator range to the provided ostream:
template <typename Iterator>
void write_hex(Iterator first, Iterator last, std::ostream& out)
{
boost::algorithm::hex(first, last, std::ostream_iterator<char>(out));
}
and its usage:
unsigned char data[3] = { 0, 10, 255 };
write_hex(std::begin(data), std::end(data), std::cout); // writes 000AFF to stdout.
Here is a complete example demonstrating printing the hex-value of various buffer types to stdout using the write_hex function and with a custom type to streamline writing hex to an ostream:
#include <algorithm>
#include <iostream>
#include <string>
#include <vector>
#include <boost/algorithm/hex.hpp>
// Utility funciton to write hex to a stream.
template <typename Iterator>
void write_hex(Iterator first, Iterator last, std::ostream& out)
{
boost::algorithm::hex(first, last, std::ostream_iterator<char>(out));
}
namespace detail {
// Utility type to write an iterable as hex to a stream via the insertion
// operator.
template <typename Iterable>
struct hex_writer
{
const Iterable& iterable;
friend std::ostream& operator<<(std::ostream& stream, const hex_writer& object)
{
write_hex(std::begin(object.iterable), std::end(object.iterable), stream);
return stream;
}
};
} // namespace detail
// Auxiliary function to create hex_writers. Intended to be used for
// chaining writes to an ostream.
template <typename Iterable>
detail::hex_writer<Iterable> as_hex(const Iterable& iterable)
{
return {iterable};
}
int main()
{
// Using c-array.
{
unsigned char data[3] = { 0, 10, 255 };
write_hex(std::begin(data), std::end(data), std::cout);
std::cout << " " << as_hex(data) << std::endl;
}
// Using c++-array.
{
std::array<unsigned char, 3> data = {{ 0, 10, 255 }};
write_hex(begin(data), end(data), std::cout);
std::cout << " " << as_hex(data) << std::endl;
}
// Using vector.
{
std::vector<unsigned char> data = { 0, 10, 255 };
write_hex(begin(data), end(data), std::cout);
std::cout << " " << as_hex(data) << std::endl;
}
}
Output:
000AFF 000AFF
000AFF 000AFF
000AFF 000AFF
The fact that you're clearing the buffer beforehand provides conclusively that the problem isn't in boost::asio, unless you're suggesting that it keeps a memory for some unknown purpose.
Either:
The sender is sending datagrams with duplicated data, or
The problem lies somewhere in your buffer manager class, probably the string thing.
I don't see the point of this class. I suggest you rewrite the code using a char array like everybody else.

Move the string out of a std::ostringstream

If I construct a string made of a list of space separated floating point values using std::ostringstream:
std::ostringstream ss;
unsigned int s = floatData.size();
for(unsigned int i=0;i<s;i++)
{
ss << floatData[i] << " ";
}
Then I get the result in a std::string:
std::string textValues(ss.str());
However, this will cause an unnecessary deep copy of the string contents, as ss will not be used anymore.
Is there any way to construct the string without copying the entire content?
std::ostringstream offers no public interface to access its in-memory buffer unless it non-portably supports pubsetbuf (but even then your buffer is fixed-size, see cppreference example)
If you want to torture some string streams, you could access the buffer using the protected interface:
#include <iostream>
#include <sstream>
#include <vector>
struct my_stringbuf : std::stringbuf {
const char* my_str() const { return pbase(); } // pptr might be useful too
};
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
my_stringbuf buf;
std::ostream ss(&buf);
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
std::cout << buf.my_str() << '\n';
}
The standard C++ way of directly accessing an auto-resizing output stream buffer is offered by std::ostrstream, deprecated in C++98, but still standard C++14 and counting.
#include <iostream>
#include <strstream>
#include <vector>
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::ostrstream ss;
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
const char* buffer = ss.str(); // direct access!
std::cout << buffer << '\n';
ss.freeze(false); // abomination
}
However, I think the cleanest (and the fastest) solution is boost.karma
#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/include/karma.hpp>
namespace karma = boost::spirit::karma;
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::string s;
karma::generate(back_inserter(s), karma::double_ % ' ', v);
std::cout << s << '\n'; // here's your string
}
This is now possible with C++20, with syntax like:
const std::string s = std::move(ss).str();
This is possible because the std::ostringstream class now has a str() overload that is rvalue-ref qualified:
basic_string<charT, traits, Allocator> str() &&; // since C++20
This was added in P0408, revision 7, which was adopted into C++20.
+1 for the Boost Karma by #Cubbi and the suggestion to "create your own streambuf-dervied type that does not make a copy, and give that to the constructor of a basic_istream<>.".
A more generic answer, though, is missing, and sits between these two.
It uses Boost Iostreams:
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
Here's a demo program:
Live On Coliru
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/stream_buffer.hpp>
namespace bio = boost::iostreams;
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
// any code that uses ostream
void foo(std::ostream& os) {
os << "Hello world "
<< std::hex << std::showbase << 42
<< " " << std::boolalpha << (1==1) << "\n";
}
#include <iostream>
int main() {
std::string output;
output.reserve(100); // optionally optimize if you know roughly how large output is gonna, or know what minimal size it will require
{
string_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Note that you can trivially replace the std::string withstd::wstring, or std::vector<char> etc.
Even better, you can use it with the array_sink device and have a fixed-size buffer. That way you can avoid any buffer allocation whatsoever with your Iostreams code!
Live On Coliru
#include <boost/iostreams/device/array.hpp>
using array_buf = bio::stream_buffer<bio::basic_array<char>>;
// ...
int main() {
char output[100] = {0};
{
array_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Both programs print:
Output contains: Hello world 0x2a true
I implemented "outstringstream" class, which I believe does exactly what you need (see take_str() method). I partially used code from: What is wrong with my implementation of overflow()?
#include <ostream>
template <typename char_type>
class basic_outstringstream : private std::basic_streambuf<char_type, std::char_traits<char_type>>,
public std::basic_ostream<char_type, std::char_traits<char_type>>
{
using traits_type = std::char_traits<char_type>;
using base_buf_type = std::basic_streambuf<char_type, traits_type>;
using base_stream_type = std::basic_ostream<char_type, traits_type>;
using int_type = typename base_buf_type::int_type;
std::basic_string<char_type> m_str;
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
const std::ptrdiff_t diff = this->pptr() - this->pbase();
this->setp(&m_str.front(), &m_str.back());
this->pbump(diff);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return traits_type::not_eof(traits_type::to_int_type(*this->pptr()));
}
void init()
{
this->setp(&m_str.front(), &m_str.back());
const std::size_t size = m_str.size();
if (size)
{
memcpy(this->pptr(), &m_str.front(), size);
this->pbump(size);
}
}
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(this)
{
m_str.reserve(reserveSize);
init();
}
explicit basic_outstringstream(std::basic_string<char_type>&& str)
: base_stream_type(this), m_str(std::move(str))
{
init();
}
explicit basic_outstringstream(const std::basic_string<char_type>& str)
: base_stream_type(this), m_str(str)
{
init();
}
const std::basic_string<char_type>& str() const
{
return m_str;
}
std::basic_string<char_type>&& take_str()
{
return std::move(m_str);
}
void clear()
{
m_str.clear();
init();
}
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
Update: In the face of people's continued dislike of this answer, I thought I'd make an edit and explain.
No, there is no way to avoid a string copy (stringbuf has the same interface)
It will never matter. It's actually more efficient that way. (I will try to explain this)
Imagine writing a version of stringbuf that keeps a perfect, moveable std::string available at all times. (I have actually tried this).
Adding characters is easy - we simply use push_back on the underlying string.
OK, but what about removing characters (reading from the buffer)? We'll have to move some pointer to account for the characters we've removed, all well and good.
However, we have a problem - the contract we're keeping that says we'll always have a std::string available.
So whenever we remove characters from the stream, we'll need to erase them from the underlying string. That means shuffling all the remaining characters down (memmove/memcpy). Because this contract must be kept every time the flow of control leaves our private implementation, this in practice means having to erase characters from the string every time we call getc or gets on the string buffer. This translates to a call to erase on every << operation on the stream.
Then of course there's the problem of implementing the pushback buffer. If you pushback characters into the underlying string, you've got to insert them at position 0 - shuffling the entire buffer up.
The long and short of it is that you can write an ostream-only stream buffer purely for building a std::string. You'll still need to deal with all the reallocations as the underlying buffer grows, so in the end you get to save exactly one string copy. So perhaps we go from 4 string copies (and calls to malloc/free) to 3, or 3 to 2.
You'll also need to deal with the problem that the streambuf interface is not split into istreambuf and ostreambuf. This means you still have to offer the input interface and either throw exceptions or assert if someone uses it. This amounts to lying to users - we've failed to implement an expected interface.
For this tiny improvement in performance, we must pay the cost of:
developing a (quite complex, when you factor in locale management) software component.
suffering the loss of flexibility of having a streambuf which only supports output operations.
Laying landmines for future developers to step on.
I adapted the very good #Kuba answer to fix some issues (unfortunately he's currently unresponsive). In particular:
added a safe_pbump to handle 64 bit offsets;
return a string_view instead of string (internal string doesn't have the right size of the buffer);
resize the string to current buffer size on the move semantics take_str method;
fixed take_str method move semantics with init before return;
removed a useless memcpy on init method;
renamed the template parameter char_type to CharT to avoid ambiguity with basic_streambuf::char_type;
used string::data() and pointer arithmetic instead of possible undefined behavior using string::front() and string::back() as pointed by #LightnessRacesinOrbit;
Implementation with streambuf composition.
#pragma once
#include <cstdlib>
#include <limits>
#include <ostream>
#include <string>
#if __cplusplus >= 201703L
#include <string_view>
#endif
namespace usr
{
template <typename CharT>
class basic_outstringstream : public std::basic_ostream<CharT, std::char_traits<CharT>>
{
using traits_type = std::char_traits<CharT>;
using base_stream_type = std::basic_ostream<CharT, traits_type>;
class buffer : public std::basic_streambuf<CharT, std::char_traits<CharT>>
{
using base_buf_type = std::basic_streambuf<CharT, traits_type>;
using int_type = typename base_buf_type::int_type;
private:
void safe_pbump(std::streamsize off)
{
// pbump doesn't support 64 bit offsets
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47921
int maxbump;
if (off > 0)
maxbump = std::numeric_limits<int>::max();
else if (off < 0)
maxbump = std::numeric_limits<int>::min();
else // == 0
return;
while (std::abs(off) > std::numeric_limits<int>::max())
{
this->pbump(maxbump);
off -= maxbump;
}
this->pbump((int)off);
}
void init()
{
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)m_str.size());
}
protected:
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
size_t size = this->size();
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)size);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return ch;
}
public:
buffer(std::size_t reserveSize)
{
m_str.reserve(reserveSize);
init();
}
buffer(std::basic_string<CharT>&& str)
: m_str(std::move(str))
{
init();
}
buffer(const std::basic_string<CharT>& str)
: m_str(str)
{
init();
}
public:
size_t size() const
{
return (size_t)(this->pptr() - this->pbase());
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return std::basic_string_view<CharT>(m_str.data(), size());
}
#endif
std::basic_string<CharT> take_str()
{
// Resize the string to actual used buffer size
m_str.resize(size());
std::string ret = std::move(m_str);
init();
return ret;
}
void clear()
{
m_str.clear();
init();
}
const CharT * data() const
{
return m_str.data();
}
private:
std::basic_string<CharT> m_str;
};
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(nullptr), m_buffer(reserveSize)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(std::basic_string<CharT>&& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(const std::basic_string<CharT>& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return m_buffer.str();
}
#endif
std::basic_string<CharT> take_str()
{
return m_buffer.take_str();
}
const CharT * data() const
{
return m_buffer.data();
}
size_t size() const
{
return m_buffer.size();
}
void clear()
{
m_buffer.clear();
}
private:
buffer m_buffer;
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
}