I have this simple code:
#include <boost/unordered_set.hpp>
int main (int argc, char* argv[])
{
boost::unordered_set<int> bset;
for (int i=0; i<10000; i++) {
bset.insert(i);
}
return 0;
}
When I create a purify build and run, I am getting the following UMR (uninitialized memory read) error. I could not figure out how to get rid of this.
**** Purify instrumented app_pure (pid 21943) ****
UMR: Uninitialized memory read (10000 times):
This is occurring while in thread 21943:
std::pair>, bool > boost::unordered::detail::table_impl, int, boost::hash, std::equal_to >>::emplace_impl>(int const&, boost::unordered::detail::set, int, boost::hash, std::equal_to > const&) [unique.hpp:414]
std::pair>, bool > boost::unordered::detail::table_impl, int, boost::hash, std::equal_to >>::emplace(boost::unordered::detail::emplace_args1, int, boost::hash, std::equal_to >> const&) [unique.hpp:393]
std::pair, boost::unordered::detail::ptr_node const* >, bool > boost::unordered::unordered_set, std::equal_to, std::allocator >::emplace(int const&) [unordered_set.hpp:277]
boost::unordered::unordered_set, std::equal_to, std::allocator >::insert(int const&) [unordered_set.hpp:380]
main [app.cxx:8]
__libc_start_main [libc.so.6]
_start [crt1.o]
Reading 8 bytes from 0x7fffffffd668 on the stack of thread 21943 (7 bytes at 0x7fffffffd669 uninit).
Address 0x7fffffffd668 is 56 bytes past start of local variable "pos" in function std::pair>, bool > boost::unordered::detail::table_impl, int, boost::hash, std::equal_to >>::emplace_impl>(int const&, boost::unordered::detail::set, int, boost::hash, std::equal_to > const&).
My boost version is 1.54.0
My GCC version is 4.8.3
Purify version is 7.3 (Purify 7.3-mod1 150930 Linux (64-bit))
Can anyone please give any hint on how to solve this?
Thanks
Related
With reference to the following code
#include <cassert>
#include <vector>
#include <dlfcn.h>
#include <limits>
#include <map>
#include <algorithm>
#include <iostream>
using std::cout;
using std::endl;
using std::vector;
/*
* Overload the malloc call
*/
int max_heap_usage = 0;
std::map<uintptr_t, int>& heap_memory_map;
void track_max_usage(std::map<uintptr_t, int> heap_memory_map,
int& max_heap_usage);
void* malloc(size_t size) {
// get the original malloc function call
static auto original_malloc = (decltype(&malloc)) dlsym(RTLD_NEXT, "malloc");
// Get the pointer from malloc
void* pointer = original_malloc(size);
uintptr_t pointer_handle = reinterpret_cast<uintptr_t>(pointer);
// assert that the pointer does not already exist in the memory map
assert("memory should not exist in memory map before allocation" &&
heap_memory_map.find(pointer_handle) == heap_memory_map.end());
// add to bookkeeping
heap_memory_map[pointer_handle] = size;
track_max_usage(heap_memory_map, max_heap_usage);
return pointer;
}
void* calloc(size_t count, size_t size) {
// get the original calloc
static auto original_calloc = (decltype(&calloc)) dlsym(RTLD_NEXT, "calloc");
// get the pointer returned by calloc
void* pointer = original_calloc(count, size);
uintptr_t pointer_handle = reinterpret_cast<uintptr_t>(pointer);
// assert that the memory has not been allocated before
assert("memory should not exist in the memory map before allocation" &&
heap_memory_map.find(pointer_handle) == heap_memory_map.end());
// add to bookkeeping
heap_memory_map[pointer_handle] = size * count;
track_max_usage(heap_memory_map, max_heap_usage);
return pointer;
}
void free(void* ptr) {
// get the original free function
static auto original_free = (decltype(&free)) dlsym(RTLD_NEXT, "free");
uintptr_t pointer_handle = reinterpret_cast<uintptr_t>(ptr);
// assert that the heap memory map already has the pointer
assert("memory to be freed does not exist in the heap memory map" &&
heap_memory_map.find(pointer_handle) != heap_memory_map.end());
// add to bookkeeping
heap_memory_map.erase(pointer_handle);
// free the memory
original_free(ptr);
}
/*
* Inputs: A map containing pointer values and the amount of heap memory used
* after that point
*
* The variable that keeps track of the max memory usage till this
* point
*
* This function updates the variable to have the max value if the current
* memory map dictates that the memory usage is greater than what it was
* before.
*/
void track_max_usage(std::map<uintptr_t, int>& heap_memory_map,
int& max_heap_usage) {
// loop through all keys and add up the values
int sum {0};
for (const auto ele : heap_memory_map) { sum += ele.second; }
// assign to max
max_heap_usage = std::max(max_heap_usage, sum);
}
int main() {
vector<int> vec {1, 2, 3, 4};
for (auto ele : vec) {
cout << ele << endl;
}
cout << "Total heap usage " << max_heap_usage << endl;
return 0;
}
I am trying to override the malloc, calloc and free calls so that anytime there is a heap allocation I can keep track of it. Somehow the vector class does not seem to allocate any memory on the heap. Could someone explain what exactly is going on here? Also how can I go about achieving the desired result?
Thanks!
Your program as posted - call it main.cpp - does not quite compile, so it can't be quite
the program whose disappointing behaviour you would like explained:
error: 'heap_memory_map' declared as reference but not initialized
std::map<uintptr_t, int>& heap_memory_map;
^
And if we fix that by declaring instead:
std::map<uintptr_t, int> heap_memory_map;
we then have a linkage error:
undefined reference to `track_max_usage(std::map<unsigned long, int, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, int> > >, int&)'
because the declaration:
void track_max_usage(std::map<uintptr_t, int> heap_memory_map,
int& max_heap_usage);
does not match the definition:
void track_max_usage(std::map<uintptr_t, int>& heap_memory_map,
int& max_heap_usage) {
...
}
If we fix that also by declaring:
void track_max_usage(std::map<uintptr_t, int>& heap_memory_map,
int& max_heap_usage);
then we successfully compile and link, at least if we're not fussy
about standard conformance:
$ g++ -o prog -std=c++11 -Wall main.cpp -ldl
If we are fussy about Standard conformance:
$ g++ -o prog -std=c++11 -Wall -pedantic main.cpp -ldl
then there remain compilation errors:
main.cpp:20:25: error: declaration of ‘void* malloc(size_t)’ has a different exception specifier
void* malloc(size_t size) {
^
...
/usr/include/stdlib.h:466:14: error: from previous declaration ‘void* malloc(size_t) throw ()’
extern void *malloc (size_t __size) __THROW __attribute_malloc__ __wur;
^
main.cpp: In function ‘void* calloc(size_t, size_t)’:
main.cpp:40:39: error: declaration of ‘void* calloc(size_t, size_t)’ has a different exception specifier
void* calloc(size_t count, size_t size) {
^
...
/usr/include/stdlib.h:468:14: error: from previous declaration ‘void* calloc(size_t, size_t) throw ()’
extern void *calloc (size_t __nmemb, size_t __size)
^
main.cpp: In function ‘void free(void*)’:
main.cpp:60:20: error: declaration of ‘void free(void*)’ has a different exception specifier
void free(void* ptr) {
^
...
/usr/include/stdlib.h:483:13: error: from previous declaration ‘void free(void*) throw ()’
extern void free (void *__ptr) __THROW;
Another couple of passing carps:
int isn't guaranteed to store the size of heap block. Thus the Standard library says:
void* malloc(size_t size);
void* calloc(size_t num, size_t size);
and not:
void* malloc(int size);
void* calloc(int num, int size);
So by rights you'd have:
size_t max_heap_usage = 0;
std::map<uintptr_t, size_t> heap_memory_map;
Further, what you actually want is a map of void *-values to sizes,
and there's no reason at all not to have such a map:
std::map<void *, size_t> heap_memory_map;
Then the refrain:
uintptr_t pointer_handle = reinterpret_cast<uintptr_t>(pointer);
could be dispensed with.
Going with what we've got however (and remembering that we don't
know exactly what you have got) running prog does not simply
fail to tally any heap allocations; it crashes:
$ ./prog
Segmentation fault (core dumped)
If you debug this and peruse the backtrace to the segfault, you'll
see the circular call sequence:
operator new(unsigned long)
__gnu_cxx::new_allocator<std::_Rb_tree_node<std::pair<unsigned long const, int> > >::allocate /usr/include/c++/5/ext/new_allocator.h 104
std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<unsigned long const, int> > > >::allocate /usr/include/c++/5/bits/alloc_traits.h 360
std::_Rb_tree<unsigned long, std::pair<unsigned long const, int>, std::_Select1st<std::pair<unsigned long const, int> >, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, int> > >::_M_get_node /usr/include/c++/5/bits/stl_tree.h 491
std::_Rb_tree<unsigned long, std::pair<unsigned long const, int>, std::_Select1st<std::pair<unsigned long const, int> >, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, int> > >::_M_create_node<std::piecewise_construct_t const&, std::tuple<unsigned long const&>, std::tuple<> >(std::piecewise_construct_t const&, std::tuple<unsigned long const&>&&, std::tuple<>&&) /usr/include/c++/5/bits/stl_tree.h 545
std::_Rb_tree<unsigned long, std::pair<unsigned long const, int>, std::_Select1st<std::pair<unsigned long const, int> >, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, int> > >::_M_emplace_hint_unique<std::piecewise_construct_t const&, std::tuple<unsigned long const&>, std::tuple<> >(std::_Rb_tree_const_iterator<std::pair<unsigned long const, int> >, std::piecewise_construct_t const&, std::tuple<unsigned long const&>&&, std::tuple<>&&) /usr/include/c++/5/bits/stl_tree.h 2170
std::map<unsigned long, int, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, int> > >::operator[] /usr/include/c++/5/bits/stl_map.h 483
malloc /home/imk/develop/so/heap_track_orig/main.cpp 34
operator new(unsigned long)
repeated ad nauseam. So the program is looping until it runs out of stack.
This is due to a fatal logical flaw. You're proceeding on the assumption that all the C++ dynamic memory management
operations in the program will be delegated to the Standard C library facilities
malloc, calloc and free.
Well, at least some of them are, and in particular the calls to operator new
that originate in
heap_memory_map[pointer_handle] = size;
when you allocate a new element of your heap map are delegated to malloc.
Which is your malloc. Which again calls:
heap_memory_map[pointer_handle] = size;
then operator new, then back to malloc, and so on to stack-exhaustion.
That is the fatal logical flaw, but the motivating assumption is also flaky.
The C++ Standard does not require even the default implementations of operator new and
operator delete to delegate respectively to malloc and free. It doesn't
specify any relationship between dynamic memory management in C++ and that of C.
The C++ compiler I'm using here (Linux, GCC) does in fact so delegate, and
probably so does yours, but an implementor might choose to delegate both
malloc/free and new/delete directly to OS APIs.
Don't attempt to roll your own heap-profiling. Use a proper heap-profiler.
For linux, the go-to heap profiler is Valgrind's massif.
Your distro will almost certainly provide a Valgrind package, including massif.
Here's a program that I'm going to profile with massif and check it's max heap usage:
main.cpp
#include <vector>
#include <iostream>
using namespace std;
int main() {
vector<int> vec;
for (int i = 0; i < 1000; ++i) {
vec.push_back(i);
}
for ( ;vec.size(); vec.pop_back()) {}
return 0;
}
Compile and link:
$ g++ -g -o prog -Wall main.cpp
Run valgrind with massif:
$ valgrind --tool=massif ./prog
==6479== Massif, a heap profiler
==6479== Copyright (C) 2003-2015, and GNU GPL'd, by Nicholas Nethercote
==6479== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==6479== Command: ./prog
==6479==
==6479==
The heap-profile is output by default in massif.out.NNNN. I find massif.out.6479
and run:
$ ms_print massif.out.6479 > heap_prof.txt
I look in heap_prof.txt and at line 32 I read:
Number of snapshots: 29
Detailed snapshots: [4, 14, 17, 20, 23, 26 (peak)]
which tells me that heap snapshot #26 shows the peak usage. I scroll to
snapshot #26 and see:
--------------------------------------------------------------------------------
n time(i) total(B) useful-heap(B) extra-heap(B) stacks(B)
--------------------------------------------------------------------------------
24 2,049,029 74,768 74,752 16 0
25 2,069,629 78,872 78,848 24 0
26 2,070,679 78,872 78,848 24 0
99.97% (78,848B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
->92.18% (72,704B) 0x4EB91FE: ??? (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.21)
| ->92.18% (72,704B) 0x4010608: call_init.part.0 (dl-init.c:72)
| ->92.18% (72,704B) 0x4010719: _dl_init (dl-init.c:30)
| ->92.18% (72,704B) 0x4000D08: ??? (in /lib/x86_64-linux-gnu/ld-2.21.so)
|
->07.79% (6,144B) 0x401788: __gnu_cxx::new_allocator<int>::allocate(unsigned long, void const*) (new_allocator.h:104)
->07.79% (6,144B) 0x401665: __gnu_cxx::__alloc_traits<std::allocator<int> >::allocate(std::allocator<int>&, unsigned long) (alloc_traits.h:182)
->07.79% (6,144B) 0x4014B0: std::_Vector_base<int, std::allocator<int> >::_M_allocate(unsigned long) (stl_vector.h:170)
->07.79% (6,144B) 0x400F59: std::vector<int, std::allocator<int> >::_M_insert_aux(__gnu_cxx::__normal_iterator<int*, std::vector<int, std::allocator<int> > >, int const&) (vector.tcc:353)
->07.79% (6,144B) 0x400CC4: std::vector<int, std::allocator<int> >::push_back(int const&) (stl_vector.h:925)
->07.79% (6,144B) 0x400AEC: main (main.cpp:9)
So the program's top recorded heap consumption was 78,872 bytes, of
which (a mere) 6,144 bytes were allocated for my std::vector.
The C++ standard library containers don't use malloc and such directly, they use "allocator" objects, which are generally provided as a template parameter. You can look at providing a custom allocator, or, providing a custom operator new function, if you want to hook into things like this for measurement purposes.
When trying to access a member of std::unordered_map using [], I get an error:
Attempt to take address of value not located in memory.
There is a nice gdb-stl-views, except it does not support unordered_map.
Is there a similarly nice way to retrieve by key a member of unordered_map?
I think you are capable of viewing the member of std::unordered_map with an additional trivial step:
This is my test code:
#include <iostream>
#include <unordered_map>
std::string make_key(const char *input) { return input; }// The additional function to make sure you could construct the key of your map in gdb from primitive type
int main(int argc, char **argv) {
std::unordered_map<std::string, int> map = {
{"bar", 100}
};
std::cout << map.at("bar");
}
And I am using gdb 11.2 in Archlinux:
g++ -std=gnu++11 -O0 -g unordered_map_test.cpp -o unordered_map_test
gdb unordered_map_test
(gdb) p map
$1 = std::unordered_map with 1 element = {["bar"] = 100}
// Perhaps it's useless to print all key-value pairs in map if you have a large map.
// Then you could print value of specific key
(gdb) p map.at(make_key("bar"))
$2 = (std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, int> > >::mapped_type &) #0x55555556eed8: 100 // 100 is the value of `bar`
// If you think it's annoying that there is too much type information above, you could just print the value after you know the address of value.
(gdb) p *0x55555556eed8
$3 = 100
I have a large code base that can use boost::any or boost::spirit::hold_any (depending on a macro definition).
hold_any seems to be compatible with boost::any (e.g. How to print boost::any to a stream? or Type erasure - Part IV) and faster (Why you shouldn’t use boost::any) but I'm experiencing several segmentation fault errors using hold_any (Boost v1.55 / 1.54 / 1.53).
This is a minimal working example that exhibits the same problem as the original code:
#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/home/support/detail/hold_any.hpp>
typedef boost::spirit::hold_any any;
typedef std::vector<any> vany;
int main()
{
vany data0, data1;
for (unsigned i(0); i < 1000; ++i)
{
std::string s("test_test_test");
data0.push_back(any(s));
}
const unsigned n(data0.size());
vany::iterator iter(data0.begin());
for (unsigned i(0); i < n; ++i)
{
std::cout << "Moving " << i << std::endl;
data1.push_back(*iter);
iter = data0.erase(iter);
}
return 0;
}
The program appears to work correctly:
changing from boost::spirit::hold_any to boost::any;
changing the content of the hold_any to a data type small enough to perform small buffer optimization (e.g. from std::string to int).
It seems strange that there could be some major bug in a widely used library such as Boost Spirit, but
I'm having a hard time finding a bug in the example;
I've tried g++ / clang++ without success.
What's wrong with the example?
You should not be using hold_any as it is in detail/hold_any.hpp for a reason.
That said, hold_any's copy-assignment appears to be broken. I've created a pull request on github with a proposed fix.
Without the fix, the following program demonstrates UB (because the compiler generates a shallow assignment operator which is preferred):
#include <iostream>
#include <string>
#include <boost/spirit/home/support/detail/hold_any.hpp>
typedef boost::spirit::hold_any any;
int main()
{
any b;
{
any a;
a = std::string("test_test_test");
b = a;
}
std::cout << "b: " << b << '\n';
}
When run under valgrind:
==11827== Invalid read of size 8
==11827== at 0x5E9D793: std::basic_ostream<char, std::char_traits<char> >& std::operator<< <char, std::char_traits<char>, std::allocator<char> >(std::basic_ostream<char, std::char_traits<char> >&, std::basic_string<char, std
==11827== by 0x4012FC: boost::spirit::detail::fxns<mpl_::bool_<true> >::type<std::string, char>::stream_out(std::ostream&, void* const*) (hold_any.hpp:113)
==11827== by 0x4010F5: std::basic_ostream<char, std::char_traits<char> >& boost::spirit::operator<< <char>(std::basic_ostream<char, std::char_traits<char> >&, boost::spirit::basic_hold_any<char> const&) (hold_any.hpp:368)
==11827== by 0x400FC9: main (test.cpp:17)
==11827== Address 0x8ac1650 is 0 bytes inside a block of size 39 free'd
==11827== at 0x4C2BADC: operator delete(void*) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==11827== by 0x5EC405E: std::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string() (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.18)
==11827== by 0x401204: boost::spirit::detail::fxns<mpl_::bool_<true> >::type<std::string, char>::static_delete(void**) (hold_any.hpp:89)
==11827== by 0x401328: boost::spirit::basic_hold_any<char>::~basic_hold_any() (hold_any.hpp:246)
==11827== by 0x4010B4: boost::spirit::basic_hold_any<char>::~basic_hold_any() (hold_any.hpp:245)
==11827== by 0x400FA0: main (test.cpp:15)
I am writing an application that uses pion library and notice a crash when I try to send a request. The crash is happening due to segmentation fault. This is in Linux and I am linking with PION_FULL.
Code:
pion::net::HTTPRequest httpRequest("/sm_login/trusted.asp?app-id=test");
int port = 80;
httpRequest.setMethod("GET");
httpRequest.addHeader("Host", "127.0.0.1");
pion::net::TCPConnection connPtr(IO_service);
error = connPtr.connect(boost::asio::ip::address::from_string("127.0.0.1"), port);
if (error || !connPtr.is_open())
{
dprintf(("Unable to establish connection"));
return false;
}
httpRequest.send(connPtr, error);
Dump:
#0 0x0eec4bb4 in void pion::net::HTTPMessage::changeValue<std::tr1::unordered_multimap<std::string, std::string, CaseInsensitiveHash, CaseInsensitiveEqual, std::allocator<std::pair<std::string const, std::string> > >
>(std::tr1::unordered_multimap<std::string, std::string, CaseInsensitiveHash, CaseInsensitiveEqual, std::allocator<std::pair<std::string const, std::string> > >&, std::string const&, std::string const&) () from /sw/lib/libpion-net-2.2.12.so
#1 0x0eec11f0 in pion::net::HTTPMessage::send(pion::net::TCPConnection&, boost::system::error_code&) () from /sw/lib/libpion-net-2.2.12.so
#2 0x0f1e6c24 in CHeapInterface::SendRequest(std::string const&, std::string&, CHeapInterface::EHeapMsgType) () from /sw/lib/libMgmt.so
Thanks,
The problem was due to mismatch in cross compiler version used to build pion libs. The application is not crashing anymore.
Does anyone know if it's kosher to pass a boost::unordered_set as the first parameter to boost::split? Under libboost1.42-dev, this seems to cause problems. Here's a small example program that causes the problem, call it test-split.cc:
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/unordered_set.hpp>
#include <string>
int main(int argc, char **argv) {
boost::unordered_set<std::string> tags_set;
boost::split(tags_set, "a^b^c^",
boost::is_any_of(std::string(1, '^')));
return 0;
}
Then, if I run the following commands:
g++ -o test-split test-split.cc; valgrind ./test-split
I get a bunch of complaints in valgrind like the one that follows (I also sometimes see coredumps without valgrind, though it seems to vary based on timing):
==16843== Invalid read of size 8
==16843== at 0x4ED07D3: std::string::end() const (in /usr/lib/libstdc++.so.6.0.13)
==16843== by 0x401EE2: unsigned long boost::hash_value<char, std::allocator<char> >(std::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) (in /tmp/test-split)
...
==16843== by 0x402248: boost::unordered_set<std::string, boost::hash<std::string>, std::equal_to<std::string>, std::allocator<std::string> >& boost::algorithm::split<boost::unordered_set<std::string, boost::hash<std::string>, std::equal_to<std::string>, std::allocator<std::string> >, char const [26], boost::algorithm::detail::is_any_ofF<char> >(boost::unordered_set<std::string, boost::hash<std::string>, std::equal_to<std::string>, std::allocator<std::string> >&, char const (&) [26], boost::algorithm::detail::is_any_ofF<char>, boost::algorithm::token_compress_mode_type) (in /tmp/test-split)
==16843== by 0x40192A: main (in /tmp/test-split)
==16843== Address 0x5936610 is 0 bytes inside a block of size 32 free'd
==16843== at 0x4C23E0F: operator delete(void*) (vg_replace_malloc.c:387)
==16843== by 0x4ED1EE8: std::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string() (in /usr/lib/libstdc++.so.6.0.13)
==16843== by 0x404A8B: void boost::unordered_detail::hash_unique_table<boost::unordered_detail::set<boost::hash<std::string>, std::equal_to<std::string>, std::allocator<std::string> > >::insert_range_impl<boost::transform_iterator<boost::algorithm::detail::copy_iterator_rangeF<std::string, char const*>, boost::algorithm::split_iterator<char const*>, boost::use_default, boost::use_default> >(std::string const&, boost::transform_iterator<boost::algorithm::detail::copy_iterator_rangeF<std::string, char const*>, boost::algorithm::split_iterator<char const*>, boost::use_default, boost::use_default>, boost::transform_iterator<boost::algorithm::detail::copy_iterator_rangeF<std::string, char const*>, boost::algorithm::split_iterator<char const*>, boost::use_default, boost::use_default>) (in /tmp/test-split)
...
==16843== by 0x402248: boost::unordered_set<std::string, boost::hash<std::string>, std::equal_to<std::string>, std::allocator<std::string> >& boost::algorithm::split<boost::unordered_set<std::string, boost::hash<std::string>, std::equal_to<std::string>, std::allocator<std::string> >, char const [26], boost::algorithm::detail::is_any_ofF<char> >(boost::unordered_set<std::string, boost::hash<std::string>, std::equal_to<std::string>, std::allocator<std::string> >&, char const (&) [26], boost::algorithm::detail::is_any_ofF<char>, boost::algorithm::token_compress_mode_type) (in /tmp/test-split)
==16843== by 0x40192A: main (in /tmp/test-split)
This is a Debian Squeeze box; here's my relevant system info:
$ g++ --version
g++ (Debian 4.4.5-2) 4.4.5
Copyright (C) 2010 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
$ dpkg -l | grep boost
ii libboost-iostreams1.42.0 1.42.0-4 Boost.Iostreams Library
ii libboost1.42-dev 1.42.0-4 Boost C++ Libraries development files
$ uname -a
Linux gcc44-buildvm 2.6.32-5-amd64 #1 SMP Fri Sep 17 21:50:19 UTC 2010 x86_64 GNU/Linux
However, the code seems to work fine if I downgrade libboost1.42-dev to libboost1.40-dev. So is this a bug in boost 1.42, or am I misusing boost::split by passing in a container that can't handle sequences? Thanks!
This was confirmed on the boost-users mailing list to be a bug in the boost::unordered_set implementation. There is a patch available on the mailing list, and a fix will be checked in soon, hopefully in time for boost 1.45.
Boost-users: patch
Boost-users: confirmation
Thanks everyone for looking into this!
I think the answer should be yes.
Reading the headers (split.hpp and iter_find.hpp) split takes a SequenceSequenceT& Result as its first argument, which it passes to iter_split which range-constructs it from two boost::transform_iterators:
SequenceSequenceT Tmp(itBegin, itEnd);
Result.swap(Tmp);
return Result;
So all it needs of this type is that it has a constructor that takes a pair of iterators which dereference to std::string (or, technically, to BOOST_STRING_TYPENAME). And has a .swap() member.. and has a SequenceSequenceT::iterator type whose type is std::string.
proof:
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <string>
#include <iterator>
#include <algorithm>
#include <iostream>
struct X
{
typedef std::iterator<std::forward_iterator_tag,
std::string, ptrdiff_t, std::string*, std::string&>
iterator;
X() {}
template<typename Iter> X(Iter i1, Iter i2)
{
std::cout << "Constructed X: ";
copy(i1, i2, std::ostream_iterator<std::string>(std::cout, " " ));
std::cout << "\n";
}
void swap(X&) {}
};
int main()
{
X x;
boost::split(x, "a^b^c^", boost::is_any_of(std::string(1, '^')));
}
I think that unordered_set<std::string> should satisfy these requirements as well.
Apparently, the answer is no yes.
Using the following code, I get compile-time warnings and a runtime assert (Visual C++ v10) on the unordered_set while the vector works fine (apart from an empty string in the last element, due to the trailing '^').
boost::unordered_set<std::string> tags_set;
vector<string> SplitVec; // #2: Search for tokens
boost::split( SplitVec, "a^b^c^", boost::is_any_of("^") );
boost::split( tags_set, "a^b^c^", boost::is_any_of("^") );
Iterator compatibility between source (string) and the target container is the issue. I would post the warning error, but it's one of those "War and Peace" template warnings.
EDIT:
This looks like a bug in Boost unordered_set? When I use the following, it works as you would expect:
std::unordered_set<std::string> tags_set_std;
boost::split( tags_set_std, string("a^b^c^"), boost::is_any_of(string("^")) );