Is there a safe standard way to convert std::string_view to int?
Since C++11 std::string lets us use stoi to convert to int:
std::string str = "12345";
int i1 = stoi(str); // Works, have i1 = 12345
int i2 = stoi(str.substr(1,2)); // Works, have i2 = 23
try {
int i3 = stoi(std::string("abc"));
}
catch(const std::exception& e) {
std::cout << e.what() << std::endl; // Correctly throws 'invalid stoi argument'
}
But stoi does not support std::string_view. So alternatively, we could use atoi, but one has to be very careful, e.g.:
std::string_view sv = "12345";
int i1 = atoi(sv.data()); // Works, have i1 = 12345
int i2 = atoi(sv.substr(1,2).data()); // Works, but wrong, have i2 = 2345, not 23
So atoi does not work either, since it is based off the null-terminator '\0' (and e.g. sv.substr cannot simply insert/add one).
Now, since C++17 there is also from_chars, but it does not seem to throw when providing poor inputs:
try {
int i3;
std::string_view sv = "abc";
std::from_chars(sv.data(), sv.data() + sv.size(), i3);
}
catch (const std::exception& e) {
std::cout << e.what() << std::endl; // Does not get called
}
The std::from_chars function does not throw, it only returns a value of type from_chars_result which is a struct with two fields:
struct from_chars_result {
const char* ptr;
std::errc ec;
};
You should inspect the values of ptr and ec when the function returns:
#include <iostream>
#include <string>
#include <charconv>
int main()
{
int i3;
std::string_view sv = "abc";
auto result = std::from_chars(sv.data(), sv.data() + sv.size(), i3);
if (result.ec == std::errc::invalid_argument) {
std::cout << "Could not convert.";
}
}
Unfortunately, there is no standard way that would throw an exception for you but std::from_chars has a return value code that you may use:
#include <charconv>
#include <stdexcept>
template <class T, class... Args>
void from_chars_throws(const char* first, const char* last, T &t, Args... args) {
std::from_chars_result res = std::from_chars(first, last, t, args... );
// These two exceptions reflect the behavior of std::stoi.
if (res.ec == std::errc::invalid_argument) {
throw std::invalid_argument{"invalid_argument"};
}
else if (res.ec == std::errc::result_out_of_range) {
throw std::out_of_range{"out_of_range"};
}
}
Obviously you can create svtoi, svtol from this, but the advantage of "extending" from_chars is that you only need a single templated function.
Building on #Ron and #Holt's excellent answers, here's a small wrapper around std::from_chars() that returns an optional (std::nullopt when the input fails to parse).
#include <charconv>
#include <optional>
#include <string_view>
std::optional<int> to_int(const std::string_view & input)
{
int out;
const std::from_chars_result result = std::from_chars(input.data(), input.data() + input.size(), out);
if(result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range)
{
return std::nullopt;
}
return out;
}
Related
I am looking for a sample how to pack ext types with msgpack in C++ as I am not sure about how to do this.
The only information I found is located in this section https://github.com/msgpack/msgpack-c/wiki/v2_0_cpp_packer#pack-manually.
Assumed I want to pack an object of type Foo as a msgpack ext type with an adaptor class template. How to use pack_ext and pack_ext_body? Do I have to create a "sub packer" within the template, pack my Foo data manually and then pass size of the binary data and the data itself to pack_extand pack_ext_body? It would be create if some C++ expert could give me a minimal example.
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
namespace adaptor {
template<>
struct pack<Foo> {
template <typename Stream>
packer<Stream>& operator()(msgpack::packer<Stream>& o, Foo const& v) const {
// how to use ?
o.pack_ext(size_t l, int8_t type);
o.pack_ext_body(const char* b, size_t l);
}
}
}
}
Thanks in advance!
I got it to work with my "sub packer" idea. I do not know if it is a good and elegant solution but at least it is working:
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
namespace adaptor {
template<>
struct pack<Foo> {
template <typename Stream>
packer<Stream>& operator()(msgpack::packer<Stream>& o, Foo const& v) const {
msgpack::sbuffer sbuf;
msgpack::packer<msgpack::sbuffer> sub_packer(sbuf);
sub_packer.pack_map(2);
sub_packer.pack("name");
sub_packer.pack(v.name);
sub_packer.pack("bar");
sub_packer.pack(v.bar);
// get binary data from sub_packer's sbuffer
size_t l = sbuf.size();
const char* b = sbuf.data();
// pass ext type and binary data to originally packer
o.pack_ext(l, 1);
o.pack_ext_body(b, l);
return o;
}
}
}
}
You can use msgpack::type::ext or msgpack::type::ext_ref.
They are defined at https://github.com/msgpack/msgpack-c/blob/master/include/msgpack/v1/adaptor/ext.hpp
Here is an example for msgpack::type::ext:
#include <sstream>
#include <cassert>
#include <msgpack.hpp>
int main() {
std::string val = "ABC";
msgpack::type::ext e1(42, val.data(), val.size());
assert(e1.type() == 42);
assert(e1.size() == 3);
assert(e1.data()[0] == 'A');
assert(e1.data()[1] == 'B');
assert(e1.data()[2] == 'C');
std::stringstream ss;
msgpack::pack(ss, e1);
auto oh = msgpack::unpack(ss.str().data(), ss.str().size());
auto e2 = oh.get().as<msgpack::type::ext>();
assert(e1 == e2);
}
Live demo:
https://wandbox.org/permlink/ESmreWNBqDdXbKSf
You can also use msgpack::type::ext_ref.
It can avoid copy operation but you need to keep the original buffer, in this case val and oh.
#include <sstream>
#include <cassert>
#include <msgpack.hpp>
int main() {
std::string val = "\x2a"; // type 42
val += "ABC";
msgpack::type::ext_ref e1(val.data(), val.size());
assert(e1.type() == 42);
assert(e1.size() == 3);
assert(e1.data()[0] == 'A');
assert(e1.data()[1] == 'B');
assert(e1.data()[2] == 'C');
std::stringstream ss;
msgpack::pack(ss, e1);
auto oh = msgpack::unpack(ss.str().data(), ss.str().size());
auto e2 = oh.get().as<msgpack::type::ext_ref>();
assert(e1 == e2);
}
Live demo:
https://wandbox.org/permlink/uYr5MFjLJqPHQgj6
I have following function template to return specific type of data from a VARIANT, according to the given typename.
template <typename T>
T VariantGetValue(VARIANT Variant) {
std::string S(typeid(T).name());
if (S == "wchar_t* __ptr64") { return Variant.bstrVal; }
if (S == "unsigned int") { return Variant.uintVal; }
}
So, as I needed to return an unsigned int type from a VARIANT, I tried using above function like:
return VariantGetValue<unsigned int>(CV);
But, unfortunately compiler seems to ignore if (S == "....) case here and gives me error:
C2440 - 'return': cannot convert from 'BSTR' to 'unsigned int'
But, if I remove the line if (S == "wchar_t* __ptr64") { return Variant.bstrVal; }, compiler only gives me following warning:
C4715 - 'VariantGetValue': not all control paths return a value
Can I suppress this error and continue? Is it safe or are there any alternate ways to do this without compiler errors?
You cannot have multiple return types based on a branch the code is going to take at runtime. Your best bet here is to work with explicit specializations.
template < typename T >
T VariantGetValue(VARIANT) = delete;
template <>
unsigned int VariantGetValue<unsigned int>(VARIANT Variant)
{
VARIANT var;
InitVariantFromUInt32(unsigned int{}, &var);
if (Variant.vt != var.vt)
throw std::runtime_error("bad get");
return Variant.uintVal;
}
template <>
BSTR VariantGetValue<BSTR>(VARIANT Variant)
{
if (/* check that Variant stores wchar_t* __ptr64 */)
throw std::runtime_error("bad get");
return Variant.bstrVal;
}
This, by the way, is what std::get does for std::variant.
#include <iostream>
#include <variant>
using Variant = std::variant<int,std::string>;
int main()
{
Variant v(13);
std::cout << std::get<int>(v) << '\n'; // 13
//std::cout << std::get<std::string>(v) << '\n'; // std::bad_variant_access
}
I have implemented a full example to perhaps clarify some questions raised in the comments.
#include <iostream>
#include <stdlib.h>
#include <string.h>
// Implement a mock VARIANT, don't take this code too seriously
typedef unsigned int VARTYPE;
typedef char* BSTR;
enum { VT_UI4, VT_BSTR };
struct VARIANT
{
VARIANT() : bstrVal(nullptr) {}
VARTYPE vt;
union {
unsigned int uintVal;
BSTR bstrVal;
};
};
void InitVariantFromUInt32(unsigned int u, VARIANT * v)
{
v->vt = VT_UI4;
v->uintVal = u;
}
void InitVariantFromString(char const * s, VARIANT * v)
{
v->vt = VT_BSTR;
delete[] v->bstrVal;
v->bstrVal = new char[strlen(s)];
strcpy(v->bstrVal, s);
}
// VARIANT get value functions
template < typename T >
T VariantGetValue(VARIANT) = delete;
template <>
unsigned int VariantGetValue<unsigned int>(VARIANT Variant)
{
if (Variant.vt != VT_UI4)
throw std::runtime_error("bad get");
return Variant.uintVal;
}
template <>
BSTR VariantGetValue<BSTR>(VARIANT Variant)
{
if (Variant.vt != VT_BSTR)
throw std::runtime_error("bad get");
return Variant.bstrVal;
}
int main()
{
VARIANT v;
InitVariantFromUInt32(14, &v);
std::cout << VariantGetValue<unsigned int>(v) << '\n';
try {
std::cout << VariantGetValue<BSTR>(v) << '\n';
} catch (std::exception const& e) {
std::cout << "Get failed!" << '\n';
}
VARIANT w;
InitVariantFromString("Hello World!", &w);
std::cout << VariantGetValue<BSTR>(w) << '\n';
//std::cout << VariantGetValue<bool>(w) << '\n'; // error: call to deleted function 'VariantGetValue'
}
If I construct a string made of a list of space separated floating point values using std::ostringstream:
std::ostringstream ss;
unsigned int s = floatData.size();
for(unsigned int i=0;i<s;i++)
{
ss << floatData[i] << " ";
}
Then I get the result in a std::string:
std::string textValues(ss.str());
However, this will cause an unnecessary deep copy of the string contents, as ss will not be used anymore.
Is there any way to construct the string without copying the entire content?
std::ostringstream offers no public interface to access its in-memory buffer unless it non-portably supports pubsetbuf (but even then your buffer is fixed-size, see cppreference example)
If you want to torture some string streams, you could access the buffer using the protected interface:
#include <iostream>
#include <sstream>
#include <vector>
struct my_stringbuf : std::stringbuf {
const char* my_str() const { return pbase(); } // pptr might be useful too
};
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
my_stringbuf buf;
std::ostream ss(&buf);
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
std::cout << buf.my_str() << '\n';
}
The standard C++ way of directly accessing an auto-resizing output stream buffer is offered by std::ostrstream, deprecated in C++98, but still standard C++14 and counting.
#include <iostream>
#include <strstream>
#include <vector>
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::ostrstream ss;
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
const char* buffer = ss.str(); // direct access!
std::cout << buffer << '\n';
ss.freeze(false); // abomination
}
However, I think the cleanest (and the fastest) solution is boost.karma
#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/include/karma.hpp>
namespace karma = boost::spirit::karma;
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::string s;
karma::generate(back_inserter(s), karma::double_ % ' ', v);
std::cout << s << '\n'; // here's your string
}
This is now possible with C++20, with syntax like:
const std::string s = std::move(ss).str();
This is possible because the std::ostringstream class now has a str() overload that is rvalue-ref qualified:
basic_string<charT, traits, Allocator> str() &&; // since C++20
This was added in P0408, revision 7, which was adopted into C++20.
+1 for the Boost Karma by #Cubbi and the suggestion to "create your own streambuf-dervied type that does not make a copy, and give that to the constructor of a basic_istream<>.".
A more generic answer, though, is missing, and sits between these two.
It uses Boost Iostreams:
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
Here's a demo program:
Live On Coliru
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/stream_buffer.hpp>
namespace bio = boost::iostreams;
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
// any code that uses ostream
void foo(std::ostream& os) {
os << "Hello world "
<< std::hex << std::showbase << 42
<< " " << std::boolalpha << (1==1) << "\n";
}
#include <iostream>
int main() {
std::string output;
output.reserve(100); // optionally optimize if you know roughly how large output is gonna, or know what minimal size it will require
{
string_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Note that you can trivially replace the std::string withstd::wstring, or std::vector<char> etc.
Even better, you can use it with the array_sink device and have a fixed-size buffer. That way you can avoid any buffer allocation whatsoever with your Iostreams code!
Live On Coliru
#include <boost/iostreams/device/array.hpp>
using array_buf = bio::stream_buffer<bio::basic_array<char>>;
// ...
int main() {
char output[100] = {0};
{
array_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Both programs print:
Output contains: Hello world 0x2a true
I implemented "outstringstream" class, which I believe does exactly what you need (see take_str() method). I partially used code from: What is wrong with my implementation of overflow()?
#include <ostream>
template <typename char_type>
class basic_outstringstream : private std::basic_streambuf<char_type, std::char_traits<char_type>>,
public std::basic_ostream<char_type, std::char_traits<char_type>>
{
using traits_type = std::char_traits<char_type>;
using base_buf_type = std::basic_streambuf<char_type, traits_type>;
using base_stream_type = std::basic_ostream<char_type, traits_type>;
using int_type = typename base_buf_type::int_type;
std::basic_string<char_type> m_str;
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
const std::ptrdiff_t diff = this->pptr() - this->pbase();
this->setp(&m_str.front(), &m_str.back());
this->pbump(diff);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return traits_type::not_eof(traits_type::to_int_type(*this->pptr()));
}
void init()
{
this->setp(&m_str.front(), &m_str.back());
const std::size_t size = m_str.size();
if (size)
{
memcpy(this->pptr(), &m_str.front(), size);
this->pbump(size);
}
}
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(this)
{
m_str.reserve(reserveSize);
init();
}
explicit basic_outstringstream(std::basic_string<char_type>&& str)
: base_stream_type(this), m_str(std::move(str))
{
init();
}
explicit basic_outstringstream(const std::basic_string<char_type>& str)
: base_stream_type(this), m_str(str)
{
init();
}
const std::basic_string<char_type>& str() const
{
return m_str;
}
std::basic_string<char_type>&& take_str()
{
return std::move(m_str);
}
void clear()
{
m_str.clear();
init();
}
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
Update: In the face of people's continued dislike of this answer, I thought I'd make an edit and explain.
No, there is no way to avoid a string copy (stringbuf has the same interface)
It will never matter. It's actually more efficient that way. (I will try to explain this)
Imagine writing a version of stringbuf that keeps a perfect, moveable std::string available at all times. (I have actually tried this).
Adding characters is easy - we simply use push_back on the underlying string.
OK, but what about removing characters (reading from the buffer)? We'll have to move some pointer to account for the characters we've removed, all well and good.
However, we have a problem - the contract we're keeping that says we'll always have a std::string available.
So whenever we remove characters from the stream, we'll need to erase them from the underlying string. That means shuffling all the remaining characters down (memmove/memcpy). Because this contract must be kept every time the flow of control leaves our private implementation, this in practice means having to erase characters from the string every time we call getc or gets on the string buffer. This translates to a call to erase on every << operation on the stream.
Then of course there's the problem of implementing the pushback buffer. If you pushback characters into the underlying string, you've got to insert them at position 0 - shuffling the entire buffer up.
The long and short of it is that you can write an ostream-only stream buffer purely for building a std::string. You'll still need to deal with all the reallocations as the underlying buffer grows, so in the end you get to save exactly one string copy. So perhaps we go from 4 string copies (and calls to malloc/free) to 3, or 3 to 2.
You'll also need to deal with the problem that the streambuf interface is not split into istreambuf and ostreambuf. This means you still have to offer the input interface and either throw exceptions or assert if someone uses it. This amounts to lying to users - we've failed to implement an expected interface.
For this tiny improvement in performance, we must pay the cost of:
developing a (quite complex, when you factor in locale management) software component.
suffering the loss of flexibility of having a streambuf which only supports output operations.
Laying landmines for future developers to step on.
I adapted the very good #Kuba answer to fix some issues (unfortunately he's currently unresponsive). In particular:
added a safe_pbump to handle 64 bit offsets;
return a string_view instead of string (internal string doesn't have the right size of the buffer);
resize the string to current buffer size on the move semantics take_str method;
fixed take_str method move semantics with init before return;
removed a useless memcpy on init method;
renamed the template parameter char_type to CharT to avoid ambiguity with basic_streambuf::char_type;
used string::data() and pointer arithmetic instead of possible undefined behavior using string::front() and string::back() as pointed by #LightnessRacesinOrbit;
Implementation with streambuf composition.
#pragma once
#include <cstdlib>
#include <limits>
#include <ostream>
#include <string>
#if __cplusplus >= 201703L
#include <string_view>
#endif
namespace usr
{
template <typename CharT>
class basic_outstringstream : public std::basic_ostream<CharT, std::char_traits<CharT>>
{
using traits_type = std::char_traits<CharT>;
using base_stream_type = std::basic_ostream<CharT, traits_type>;
class buffer : public std::basic_streambuf<CharT, std::char_traits<CharT>>
{
using base_buf_type = std::basic_streambuf<CharT, traits_type>;
using int_type = typename base_buf_type::int_type;
private:
void safe_pbump(std::streamsize off)
{
// pbump doesn't support 64 bit offsets
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47921
int maxbump;
if (off > 0)
maxbump = std::numeric_limits<int>::max();
else if (off < 0)
maxbump = std::numeric_limits<int>::min();
else // == 0
return;
while (std::abs(off) > std::numeric_limits<int>::max())
{
this->pbump(maxbump);
off -= maxbump;
}
this->pbump((int)off);
}
void init()
{
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)m_str.size());
}
protected:
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
size_t size = this->size();
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)size);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return ch;
}
public:
buffer(std::size_t reserveSize)
{
m_str.reserve(reserveSize);
init();
}
buffer(std::basic_string<CharT>&& str)
: m_str(std::move(str))
{
init();
}
buffer(const std::basic_string<CharT>& str)
: m_str(str)
{
init();
}
public:
size_t size() const
{
return (size_t)(this->pptr() - this->pbase());
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return std::basic_string_view<CharT>(m_str.data(), size());
}
#endif
std::basic_string<CharT> take_str()
{
// Resize the string to actual used buffer size
m_str.resize(size());
std::string ret = std::move(m_str);
init();
return ret;
}
void clear()
{
m_str.clear();
init();
}
const CharT * data() const
{
return m_str.data();
}
private:
std::basic_string<CharT> m_str;
};
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(nullptr), m_buffer(reserveSize)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(std::basic_string<CharT>&& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(const std::basic_string<CharT>& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return m_buffer.str();
}
#endif
std::basic_string<CharT> take_str()
{
return m_buffer.take_str();
}
const CharT * data() const
{
return m_buffer.data();
}
size_t size() const
{
return m_buffer.size();
}
void clear()
{
m_buffer.clear();
}
private:
buffer m_buffer;
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
}
If I construct a string made of a list of space separated floating point values using std::ostringstream:
std::ostringstream ss;
unsigned int s = floatData.size();
for(unsigned int i=0;i<s;i++)
{
ss << floatData[i] << " ";
}
Then I get the result in a std::string:
std::string textValues(ss.str());
However, this will cause an unnecessary deep copy of the string contents, as ss will not be used anymore.
Is there any way to construct the string without copying the entire content?
std::ostringstream offers no public interface to access its in-memory buffer unless it non-portably supports pubsetbuf (but even then your buffer is fixed-size, see cppreference example)
If you want to torture some string streams, you could access the buffer using the protected interface:
#include <iostream>
#include <sstream>
#include <vector>
struct my_stringbuf : std::stringbuf {
const char* my_str() const { return pbase(); } // pptr might be useful too
};
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
my_stringbuf buf;
std::ostream ss(&buf);
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
std::cout << buf.my_str() << '\n';
}
The standard C++ way of directly accessing an auto-resizing output stream buffer is offered by std::ostrstream, deprecated in C++98, but still standard C++14 and counting.
#include <iostream>
#include <strstream>
#include <vector>
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::ostrstream ss;
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
const char* buffer = ss.str(); // direct access!
std::cout << buffer << '\n';
ss.freeze(false); // abomination
}
However, I think the cleanest (and the fastest) solution is boost.karma
#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/include/karma.hpp>
namespace karma = boost::spirit::karma;
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::string s;
karma::generate(back_inserter(s), karma::double_ % ' ', v);
std::cout << s << '\n'; // here's your string
}
This is now possible with C++20, with syntax like:
const std::string s = std::move(ss).str();
This is possible because the std::ostringstream class now has a str() overload that is rvalue-ref qualified:
basic_string<charT, traits, Allocator> str() &&; // since C++20
This was added in P0408, revision 7, which was adopted into C++20.
+1 for the Boost Karma by #Cubbi and the suggestion to "create your own streambuf-dervied type that does not make a copy, and give that to the constructor of a basic_istream<>.".
A more generic answer, though, is missing, and sits between these two.
It uses Boost Iostreams:
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
Here's a demo program:
Live On Coliru
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/stream_buffer.hpp>
namespace bio = boost::iostreams;
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
// any code that uses ostream
void foo(std::ostream& os) {
os << "Hello world "
<< std::hex << std::showbase << 42
<< " " << std::boolalpha << (1==1) << "\n";
}
#include <iostream>
int main() {
std::string output;
output.reserve(100); // optionally optimize if you know roughly how large output is gonna, or know what minimal size it will require
{
string_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Note that you can trivially replace the std::string withstd::wstring, or std::vector<char> etc.
Even better, you can use it with the array_sink device and have a fixed-size buffer. That way you can avoid any buffer allocation whatsoever with your Iostreams code!
Live On Coliru
#include <boost/iostreams/device/array.hpp>
using array_buf = bio::stream_buffer<bio::basic_array<char>>;
// ...
int main() {
char output[100] = {0};
{
array_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Both programs print:
Output contains: Hello world 0x2a true
I implemented "outstringstream" class, which I believe does exactly what you need (see take_str() method). I partially used code from: What is wrong with my implementation of overflow()?
#include <ostream>
template <typename char_type>
class basic_outstringstream : private std::basic_streambuf<char_type, std::char_traits<char_type>>,
public std::basic_ostream<char_type, std::char_traits<char_type>>
{
using traits_type = std::char_traits<char_type>;
using base_buf_type = std::basic_streambuf<char_type, traits_type>;
using base_stream_type = std::basic_ostream<char_type, traits_type>;
using int_type = typename base_buf_type::int_type;
std::basic_string<char_type> m_str;
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
const std::ptrdiff_t diff = this->pptr() - this->pbase();
this->setp(&m_str.front(), &m_str.back());
this->pbump(diff);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return traits_type::not_eof(traits_type::to_int_type(*this->pptr()));
}
void init()
{
this->setp(&m_str.front(), &m_str.back());
const std::size_t size = m_str.size();
if (size)
{
memcpy(this->pptr(), &m_str.front(), size);
this->pbump(size);
}
}
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(this)
{
m_str.reserve(reserveSize);
init();
}
explicit basic_outstringstream(std::basic_string<char_type>&& str)
: base_stream_type(this), m_str(std::move(str))
{
init();
}
explicit basic_outstringstream(const std::basic_string<char_type>& str)
: base_stream_type(this), m_str(str)
{
init();
}
const std::basic_string<char_type>& str() const
{
return m_str;
}
std::basic_string<char_type>&& take_str()
{
return std::move(m_str);
}
void clear()
{
m_str.clear();
init();
}
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
Update: In the face of people's continued dislike of this answer, I thought I'd make an edit and explain.
No, there is no way to avoid a string copy (stringbuf has the same interface)
It will never matter. It's actually more efficient that way. (I will try to explain this)
Imagine writing a version of stringbuf that keeps a perfect, moveable std::string available at all times. (I have actually tried this).
Adding characters is easy - we simply use push_back on the underlying string.
OK, but what about removing characters (reading from the buffer)? We'll have to move some pointer to account for the characters we've removed, all well and good.
However, we have a problem - the contract we're keeping that says we'll always have a std::string available.
So whenever we remove characters from the stream, we'll need to erase them from the underlying string. That means shuffling all the remaining characters down (memmove/memcpy). Because this contract must be kept every time the flow of control leaves our private implementation, this in practice means having to erase characters from the string every time we call getc or gets on the string buffer. This translates to a call to erase on every << operation on the stream.
Then of course there's the problem of implementing the pushback buffer. If you pushback characters into the underlying string, you've got to insert them at position 0 - shuffling the entire buffer up.
The long and short of it is that you can write an ostream-only stream buffer purely for building a std::string. You'll still need to deal with all the reallocations as the underlying buffer grows, so in the end you get to save exactly one string copy. So perhaps we go from 4 string copies (and calls to malloc/free) to 3, or 3 to 2.
You'll also need to deal with the problem that the streambuf interface is not split into istreambuf and ostreambuf. This means you still have to offer the input interface and either throw exceptions or assert if someone uses it. This amounts to lying to users - we've failed to implement an expected interface.
For this tiny improvement in performance, we must pay the cost of:
developing a (quite complex, when you factor in locale management) software component.
suffering the loss of flexibility of having a streambuf which only supports output operations.
Laying landmines for future developers to step on.
I adapted the very good #Kuba answer to fix some issues (unfortunately he's currently unresponsive). In particular:
added a safe_pbump to handle 64 bit offsets;
return a string_view instead of string (internal string doesn't have the right size of the buffer);
resize the string to current buffer size on the move semantics take_str method;
fixed take_str method move semantics with init before return;
removed a useless memcpy on init method;
renamed the template parameter char_type to CharT to avoid ambiguity with basic_streambuf::char_type;
used string::data() and pointer arithmetic instead of possible undefined behavior using string::front() and string::back() as pointed by #LightnessRacesinOrbit;
Implementation with streambuf composition.
#pragma once
#include <cstdlib>
#include <limits>
#include <ostream>
#include <string>
#if __cplusplus >= 201703L
#include <string_view>
#endif
namespace usr
{
template <typename CharT>
class basic_outstringstream : public std::basic_ostream<CharT, std::char_traits<CharT>>
{
using traits_type = std::char_traits<CharT>;
using base_stream_type = std::basic_ostream<CharT, traits_type>;
class buffer : public std::basic_streambuf<CharT, std::char_traits<CharT>>
{
using base_buf_type = std::basic_streambuf<CharT, traits_type>;
using int_type = typename base_buf_type::int_type;
private:
void safe_pbump(std::streamsize off)
{
// pbump doesn't support 64 bit offsets
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47921
int maxbump;
if (off > 0)
maxbump = std::numeric_limits<int>::max();
else if (off < 0)
maxbump = std::numeric_limits<int>::min();
else // == 0
return;
while (std::abs(off) > std::numeric_limits<int>::max())
{
this->pbump(maxbump);
off -= maxbump;
}
this->pbump((int)off);
}
void init()
{
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)m_str.size());
}
protected:
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
size_t size = this->size();
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)size);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return ch;
}
public:
buffer(std::size_t reserveSize)
{
m_str.reserve(reserveSize);
init();
}
buffer(std::basic_string<CharT>&& str)
: m_str(std::move(str))
{
init();
}
buffer(const std::basic_string<CharT>& str)
: m_str(str)
{
init();
}
public:
size_t size() const
{
return (size_t)(this->pptr() - this->pbase());
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return std::basic_string_view<CharT>(m_str.data(), size());
}
#endif
std::basic_string<CharT> take_str()
{
// Resize the string to actual used buffer size
m_str.resize(size());
std::string ret = std::move(m_str);
init();
return ret;
}
void clear()
{
m_str.clear();
init();
}
const CharT * data() const
{
return m_str.data();
}
private:
std::basic_string<CharT> m_str;
};
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(nullptr), m_buffer(reserveSize)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(std::basic_string<CharT>&& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(const std::basic_string<CharT>& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return m_buffer.str();
}
#endif
std::basic_string<CharT> take_str()
{
return m_buffer.take_str();
}
const CharT * data() const
{
return m_buffer.data();
}
size_t size() const
{
return m_buffer.size();
}
void clear()
{
m_buffer.clear();
}
private:
buffer m_buffer;
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
}
If I construct a string made of a list of space separated floating point values using std::ostringstream:
std::ostringstream ss;
unsigned int s = floatData.size();
for(unsigned int i=0;i<s;i++)
{
ss << floatData[i] << " ";
}
Then I get the result in a std::string:
std::string textValues(ss.str());
However, this will cause an unnecessary deep copy of the string contents, as ss will not be used anymore.
Is there any way to construct the string without copying the entire content?
std::ostringstream offers no public interface to access its in-memory buffer unless it non-portably supports pubsetbuf (but even then your buffer is fixed-size, see cppreference example)
If you want to torture some string streams, you could access the buffer using the protected interface:
#include <iostream>
#include <sstream>
#include <vector>
struct my_stringbuf : std::stringbuf {
const char* my_str() const { return pbase(); } // pptr might be useful too
};
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
my_stringbuf buf;
std::ostream ss(&buf);
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
std::cout << buf.my_str() << '\n';
}
The standard C++ way of directly accessing an auto-resizing output stream buffer is offered by std::ostrstream, deprecated in C++98, but still standard C++14 and counting.
#include <iostream>
#include <strstream>
#include <vector>
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::ostrstream ss;
for(unsigned int i=0; i < v.size(); ++i)
ss << v[i] << ' ';
ss << std::ends;
const char* buffer = ss.str(); // direct access!
std::cout << buffer << '\n';
ss.freeze(false); // abomination
}
However, I think the cleanest (and the fastest) solution is boost.karma
#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/include/karma.hpp>
namespace karma = boost::spirit::karma;
int main()
{
std::vector<float> v = {1.1, -3.4, 1/7.0};
std::string s;
karma::generate(back_inserter(s), karma::double_ % ' ', v);
std::cout << s << '\n'; // here's your string
}
This is now possible with C++20, with syntax like:
const std::string s = std::move(ss).str();
This is possible because the std::ostringstream class now has a str() overload that is rvalue-ref qualified:
basic_string<charT, traits, Allocator> str() &&; // since C++20
This was added in P0408, revision 7, which was adopted into C++20.
+1 for the Boost Karma by #Cubbi and the suggestion to "create your own streambuf-dervied type that does not make a copy, and give that to the constructor of a basic_istream<>.".
A more generic answer, though, is missing, and sits between these two.
It uses Boost Iostreams:
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
Here's a demo program:
Live On Coliru
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/stream_buffer.hpp>
namespace bio = boost::iostreams;
using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;
// any code that uses ostream
void foo(std::ostream& os) {
os << "Hello world "
<< std::hex << std::showbase << 42
<< " " << std::boolalpha << (1==1) << "\n";
}
#include <iostream>
int main() {
std::string output;
output.reserve(100); // optionally optimize if you know roughly how large output is gonna, or know what minimal size it will require
{
string_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Note that you can trivially replace the std::string withstd::wstring, or std::vector<char> etc.
Even better, you can use it with the array_sink device and have a fixed-size buffer. That way you can avoid any buffer allocation whatsoever with your Iostreams code!
Live On Coliru
#include <boost/iostreams/device/array.hpp>
using array_buf = bio::stream_buffer<bio::basic_array<char>>;
// ...
int main() {
char output[100] = {0};
{
array_buf buf(output);
std::ostream os(&buf);
foo(os);
}
std::cout << "Output contains: " << output;
}
Both programs print:
Output contains: Hello world 0x2a true
I implemented "outstringstream" class, which I believe does exactly what you need (see take_str() method). I partially used code from: What is wrong with my implementation of overflow()?
#include <ostream>
template <typename char_type>
class basic_outstringstream : private std::basic_streambuf<char_type, std::char_traits<char_type>>,
public std::basic_ostream<char_type, std::char_traits<char_type>>
{
using traits_type = std::char_traits<char_type>;
using base_buf_type = std::basic_streambuf<char_type, traits_type>;
using base_stream_type = std::basic_ostream<char_type, traits_type>;
using int_type = typename base_buf_type::int_type;
std::basic_string<char_type> m_str;
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
const std::ptrdiff_t diff = this->pptr() - this->pbase();
this->setp(&m_str.front(), &m_str.back());
this->pbump(diff);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return traits_type::not_eof(traits_type::to_int_type(*this->pptr()));
}
void init()
{
this->setp(&m_str.front(), &m_str.back());
const std::size_t size = m_str.size();
if (size)
{
memcpy(this->pptr(), &m_str.front(), size);
this->pbump(size);
}
}
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(this)
{
m_str.reserve(reserveSize);
init();
}
explicit basic_outstringstream(std::basic_string<char_type>&& str)
: base_stream_type(this), m_str(std::move(str))
{
init();
}
explicit basic_outstringstream(const std::basic_string<char_type>& str)
: base_stream_type(this), m_str(str)
{
init();
}
const std::basic_string<char_type>& str() const
{
return m_str;
}
std::basic_string<char_type>&& take_str()
{
return std::move(m_str);
}
void clear()
{
m_str.clear();
init();
}
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
Update: In the face of people's continued dislike of this answer, I thought I'd make an edit and explain.
No, there is no way to avoid a string copy (stringbuf has the same interface)
It will never matter. It's actually more efficient that way. (I will try to explain this)
Imagine writing a version of stringbuf that keeps a perfect, moveable std::string available at all times. (I have actually tried this).
Adding characters is easy - we simply use push_back on the underlying string.
OK, but what about removing characters (reading from the buffer)? We'll have to move some pointer to account for the characters we've removed, all well and good.
However, we have a problem - the contract we're keeping that says we'll always have a std::string available.
So whenever we remove characters from the stream, we'll need to erase them from the underlying string. That means shuffling all the remaining characters down (memmove/memcpy). Because this contract must be kept every time the flow of control leaves our private implementation, this in practice means having to erase characters from the string every time we call getc or gets on the string buffer. This translates to a call to erase on every << operation on the stream.
Then of course there's the problem of implementing the pushback buffer. If you pushback characters into the underlying string, you've got to insert them at position 0 - shuffling the entire buffer up.
The long and short of it is that you can write an ostream-only stream buffer purely for building a std::string. You'll still need to deal with all the reallocations as the underlying buffer grows, so in the end you get to save exactly one string copy. So perhaps we go from 4 string copies (and calls to malloc/free) to 3, or 3 to 2.
You'll also need to deal with the problem that the streambuf interface is not split into istreambuf and ostreambuf. This means you still have to offer the input interface and either throw exceptions or assert if someone uses it. This amounts to lying to users - we've failed to implement an expected interface.
For this tiny improvement in performance, we must pay the cost of:
developing a (quite complex, when you factor in locale management) software component.
suffering the loss of flexibility of having a streambuf which only supports output operations.
Laying landmines for future developers to step on.
I adapted the very good #Kuba answer to fix some issues (unfortunately he's currently unresponsive). In particular:
added a safe_pbump to handle 64 bit offsets;
return a string_view instead of string (internal string doesn't have the right size of the buffer);
resize the string to current buffer size on the move semantics take_str method;
fixed take_str method move semantics with init before return;
removed a useless memcpy on init method;
renamed the template parameter char_type to CharT to avoid ambiguity with basic_streambuf::char_type;
used string::data() and pointer arithmetic instead of possible undefined behavior using string::front() and string::back() as pointed by #LightnessRacesinOrbit;
Implementation with streambuf composition.
#pragma once
#include <cstdlib>
#include <limits>
#include <ostream>
#include <string>
#if __cplusplus >= 201703L
#include <string_view>
#endif
namespace usr
{
template <typename CharT>
class basic_outstringstream : public std::basic_ostream<CharT, std::char_traits<CharT>>
{
using traits_type = std::char_traits<CharT>;
using base_stream_type = std::basic_ostream<CharT, traits_type>;
class buffer : public std::basic_streambuf<CharT, std::char_traits<CharT>>
{
using base_buf_type = std::basic_streambuf<CharT, traits_type>;
using int_type = typename base_buf_type::int_type;
private:
void safe_pbump(std::streamsize off)
{
// pbump doesn't support 64 bit offsets
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47921
int maxbump;
if (off > 0)
maxbump = std::numeric_limits<int>::max();
else if (off < 0)
maxbump = std::numeric_limits<int>::min();
else // == 0
return;
while (std::abs(off) > std::numeric_limits<int>::max())
{
this->pbump(maxbump);
off -= maxbump;
}
this->pbump((int)off);
}
void init()
{
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)m_str.size());
}
protected:
int_type overflow(int_type ch) override
{
if (traits_type::eq_int_type(ch, traits_type::eof()))
return traits_type::not_eof(ch);
if (m_str.empty())
m_str.resize(1);
else
m_str.resize(m_str.size() * 2);
size_t size = this->size();
this->setp(const_cast<CharT *>(m_str.data()),
const_cast<CharT *>(m_str.data()) + m_str.size());
this->safe_pbump((std::streamsize)size);
*this->pptr() = traits_type::to_char_type(ch);
this->pbump(1);
return ch;
}
public:
buffer(std::size_t reserveSize)
{
m_str.reserve(reserveSize);
init();
}
buffer(std::basic_string<CharT>&& str)
: m_str(std::move(str))
{
init();
}
buffer(const std::basic_string<CharT>& str)
: m_str(str)
{
init();
}
public:
size_t size() const
{
return (size_t)(this->pptr() - this->pbase());
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return std::basic_string_view<CharT>(m_str.data(), size());
}
#endif
std::basic_string<CharT> take_str()
{
// Resize the string to actual used buffer size
m_str.resize(size());
std::string ret = std::move(m_str);
init();
return ret;
}
void clear()
{
m_str.clear();
init();
}
const CharT * data() const
{
return m_str.data();
}
private:
std::basic_string<CharT> m_str;
};
public:
explicit basic_outstringstream(std::size_t reserveSize = 8)
: base_stream_type(nullptr), m_buffer(reserveSize)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(std::basic_string<CharT>&& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
explicit basic_outstringstream(const std::basic_string<CharT>& str)
: base_stream_type(nullptr), m_buffer(str)
{
this->rdbuf(&m_buffer);
}
#if __cplusplus >= 201703L
std::basic_string_view<CharT> str() const
{
return m_buffer.str();
}
#endif
std::basic_string<CharT> take_str()
{
return m_buffer.take_str();
}
const CharT * data() const
{
return m_buffer.data();
}
size_t size() const
{
return m_buffer.size();
}
void clear()
{
m_buffer.clear();
}
private:
buffer m_buffer;
};
using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;
}