How to put a file with an arbitrary name and arbitrary size into a boost::interprocess::managed_shared_memory?
Note, I donot mean boost::interprocess::managed_mapped_file or
boost::interprocess::file_mapping.
I chose managed_shared_memory because other options require a fixed file name
to be specified but I need to transfer files with different names.
I need to use boost, not Win32 API.
I rummaged through a huge amount of information on the Internet, but did not
find anything suitable.
Therefore, I am asking you for help. I would be very grateful to you.
UPDATE
Added bonus versions at the end. Now this answer presents three complete versions of the code:
Using managed_shared_memory as requested
Using message_queue as a more natural appraoch for upload/transfter
Using TCP sockets (Asio) as to demonstrate the flexibilities of that
All of these are using Boost only
Shared memory managed segments contain arbitrary objects. So you define an object like
struct MyFile {
std::string _filename;
std::vector<char> _contents;
};
And store it there. But, wait, not so quick, because these can only be stored safely with interprocess allocators, so adding some magic sauce (a.k.a lots of interesting typedefs to get the allocators declared, and some constructors):
namespace Shared {
using Mem = bip::managed_shared_memory;
using Mgr = Mem::segment_manager;
template <typename T>
using Alloc = bc::scoped_allocator_adaptor<bip::allocator<T, Mgr>>;
template <typename T> using Vector = bc::vector<T, Alloc<T>>;
using String =
bc::basic_string<char, std::char_traits<char>, Alloc<char>>;
struct MyFile {
using allocator_type = Alloc<char>;
template <typename It>
explicit MyFile(std::string_view name, It b, It e, allocator_type alloc)
String _filename;
Vector<char> _contents;
};
}
Now you can store your files like:
Shared::Mem shm(bip::open_or_create, "shared_mem", 10ull << 30);
std::ifstream ifs("file_name.txt", std::ios::binary);
std::istreambuf_iterator<char> data_begin{ifs}, data_end{};
auto loaded = shm.find_or_construct<Shared::MyFile>("file1")(
file.native(), data_begin, data_end,
shm.get_segment_manager());
Note that the shared memory won't actually take 30GiB right away, even though
that's what 10ull << 30 specifies. On most operating systems this will be
sparesely allocated and only the pages that contain data will be commited.
Improving
You might have wondered what the scoped_allocator_adaptor was for. It doesn't seem we use it?
Well, the idea was to not use find_or_construct directly per file, but to
store a Vector<MyFile so you can harness the full power of BIP allocators.
The following full demo can be invoked
with filename arguments, which will all be loaded (if they exist as
regular files)
without arguments, which will list previously loaded files
Live On Coliru
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/managed_mapped_file.hpp> // for COLIRU
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/containers/string.hpp>
#include <boost/container/scoped_allocator.hpp>
#include <fstream>
#include <filesystem>
#include <iostream>
#include <iomanip>
namespace bip = boost::interprocess;
namespace bc = boost::container;
namespace fs = std::filesystem;
namespace Shared {
#ifdef COLIRU
using Mem = bip::managed_mapped_file; // managed_shared_memory not allows
#else
using Mem = bip::managed_shared_memory;
#endif
using Mgr = Mem::segment_manager;
template <typename T>
using Alloc = bc::scoped_allocator_adaptor<bip::allocator<T, Mgr>>;
template <typename T> using Vector = bc::vector<T, Alloc<T>>;
using String = bc::basic_string<char, std::char_traits<char>, Alloc<char>>;
struct MyFile {
using allocator_type = Alloc<char>;
MyFile(MyFile&&) = default;
MyFile(MyFile const& rhs, allocator_type alloc)
: _filename(rhs._filename.begin(), rhs._filename.end(), alloc),
_contents(rhs._contents.begin(), rhs._contents.end(), alloc) {}
MyFile& operator=(MyFile const& rhs) {
_filename.assign(rhs._filename.begin(), rhs._filename.end());
_contents.assign(rhs._contents.begin(), rhs._contents.end());
return *this;
}
template <typename It>
explicit MyFile(std::string_view name, It b, It e, allocator_type alloc)
: _filename(name.data(), name.size(), alloc),
_contents(b, e, alloc) {}
String _filename;
Vector<char> _contents;
friend std::ostream& operator<<(std::ostream& os, MyFile const& mf) {
return os << "Name: " << std::quoted(mf._filename.c_str())
<< " content size: " << mf._contents.size();
}
};
} // namespace Shared
int main(int argc, char** argv) {
Shared::Mem shm(bip::open_or_create, "shared_mem", 512ull << 10);
using FileList = Shared::Vector<Shared::MyFile>;
auto& shared_files =
*shm.find_or_construct<FileList>("FileList")(shm.get_segment_manager());
if (1==argc) {
std::cout << "Displaying previously loaded files: \n";
for (auto& entry : shared_files)
std::cout << entry << std::endl;
} else {
std::cout << "Loading files: \n";
for (auto file : std::vector<fs::path>{argv + 1, argv + argc}) {
if (is_regular_file(file)) {
try {
std::ifstream ifs(file, std::ios::binary);
std::istreambuf_iterator<char> data_begin{ifs}, data_end{};
auto& loaded = shared_files.emplace_back(
file.native(), data_begin, data_end);
std::cout << loaded << std::endl;
} catch (std::system_error const& se) {
std::cerr << "Error: " << se.code().message() << std::endl;
} catch (std::exception const& se) {
std::cerr << "Other: " << se.what() << std::endl;
}
}
}
}
}
When run with
g++ -std=c++17 -O2 -Wall -pedantic -pthread main.cpp -lrt -DCOLIRU
./a.out main.cpp a.out
./a.out
Prints
Loading files:
Name: "main.cpp" content size: 3239
Name: "a.out" content size: 175176
Displaying previously loaded files:
Name: "main.cpp" content size: 3239
Name: "a.out" content size: 175176
BONUS
In response to the comments, I think it's worth actually comparing
Message Queue version
For comparison, here's a message queue implementation
Live On Coliru
#include <boost/interprocess/ipc/message_queue.hpp>
#include <boost/endian/arithmetic.hpp>
#include <fstream>
#include <filesystem>
#include <iostream>
#include <iomanip>
namespace bip = boost::interprocess;
namespace fs = std::filesystem;
using bip::message_queue;
static constexpr auto MAX_FILENAME_LENGH = 512; // 512 bytes max filename length
static constexpr auto MAX_CONTENT_SIZE = 512ull << 10; // 512 KiB max payload size
struct Message {
std::vector<char> _buffer;
using Uint32 = boost::endian::big_uint32_t;
struct header_t {
Uint32 filename_length;
Uint32 content_size;
};
static_assert(std::is_standard_layout_v<header_t> and
std::is_trivial_v<header_t>);
Message() = default;
Message(fs::path file) {
std::string const name = file.native();
std::ifstream ifs(file, std::ios::binary);
std::istreambuf_iterator<char> data_begin{ifs}, data_end{};
_buffer.resize(header_len + name.length());
std::copy(begin(name), end(name), _buffer.data() + header_len);
_buffer.insert(_buffer.end(), data_begin, data_end);
header().filename_length = name.length();
header().content_size = size() - header_len - name.length();
}
Message(char const* buf, size_t size)
: _buffer(buf, buf+size) {}
static constexpr auto header_len = sizeof(header_t);
static constexpr auto max_size =
header_len + MAX_FILENAME_LENGH + MAX_CONTENT_SIZE;
char const* data() const { return _buffer.data(); }
size_t size() const { return _buffer.size(); }
header_t& header() {
assert(_buffer.size() >= header_len);
return *reinterpret_cast<header_t*>(_buffer.data());
}
header_t const& header() const {
assert(_buffer.size() >= header_len);
return *reinterpret_cast<header_t const*>(_buffer.data());
}
std::string_view filename() const {
assert(_buffer.size() >= header_len + header().filename_length);
return { _buffer.data() + header_len, header().filename_length };
}
std::string_view contents() const {
assert(_buffer.size() >=
header_len + header().filename_length + header().content_size);
return {_buffer.data() + header_len + header().filename_length,
header().content_size};
}
friend std::ostream& operator<<(std::ostream& os, Message const& mf) {
return os << "Name: " << std::quoted(mf.filename())
<< " content size: " << mf.contents().size();
}
};
int main(int argc, char** argv) {
message_queue mq(bip::open_or_create, "file_transport", 10, Message::max_size);
if (1==argc) {
std::cout << "Receiving uploaded files: \n";
char rawbuf [Message::max_size];
while (true) {
size_t n;
unsigned prio;
mq.receive(rawbuf, sizeof(rawbuf), n, prio);
Message decoded(rawbuf, n);
std::cout << "Received: " << decoded << std::endl;
}
} else {
std::cout << "Loading files: \n";
for (auto file : std::vector<fs::path>{argv + 1, argv + argc}) {
if (is_regular_file(file)) {
try {
Message encoded(file);
std::cout << "Sending: " << encoded << std::endl;
mq.send(encoded.data(), encoded.size(), 0);
} catch (std::system_error const& se) {
std::cerr << "Error: " << se.code().message() << std::endl;
} catch (std::exception const& se) {
std::cerr << "Other: " << se.what() << std::endl;
}
}
}
}
}
A demo:
Note that there is a filesize limit in this approach because messages have a maximum length
TCP Socket Version
Here's a TCP socket implementation.
Live On Coliru
#include <boost/asio.hpp>
#include <boost/endian/arithmetic.hpp>
#include <vector>
#include <fstream>
#include <filesystem>
#include <iostream>
#include <iomanip>
namespace fs = std::filesystem;
using boost::asio::ip::tcp;
using boost::system::error_code;
static constexpr auto MAX_FILENAME_LENGH = 512; // 512 bytes max filename length
static constexpr auto MAX_CONTENT_SIZE = 512ull << 10; // 512 KiB max payload size
struct Message {
std::vector<char> _buffer;
using Uint32 = boost::endian::big_uint32_t;
struct header_t {
Uint32 filename_length;
Uint32 content_size;
};
static_assert(std::is_standard_layout_v<header_t> and
std::is_trivial_v<header_t>);
Message() = default;
Message(fs::path file) {
std::string const name = file.native();
std::ifstream ifs(file, std::ios::binary);
std::istreambuf_iterator<char> data_begin{ifs}, data_end{};
_buffer.resize(header_len + name.length());
std::copy(begin(name), end(name), _buffer.data() + header_len);
_buffer.insert(_buffer.end(), data_begin, data_end);
header().filename_length = name.length();
header().content_size = actual_size() - header_len - name.length();
}
Message(char const* buf, size_t size)
: _buffer(buf, buf+size) {}
static constexpr auto header_len = sizeof(header_t);
static constexpr auto max_size =
header_len + MAX_FILENAME_LENGH + MAX_CONTENT_SIZE;
char const* data() const { return _buffer.data(); }
size_t actual_size() const { return _buffer.size(); }
size_t decoded_size() const {
return header().filename_length + header().content_size;
}
bool is_complete() const {
return actual_size() >= header_len && actual_size() >= decoded_size();
}
header_t& header() {
assert(actual_size() >= header_len);
return *reinterpret_cast<header_t*>(_buffer.data());
}
header_t const& header() const {
assert(actual_size() >= header_len);
return *reinterpret_cast<header_t const*>(_buffer.data());
}
std::string_view filename() const {
assert(actual_size() >= header_len + header().filename_length);
return std::string_view(_buffer.data() + header_len,
header().filename_length);
}
std::string_view contents() const {
assert(actual_size() >= decoded_size());
return std::string_view(_buffer.data() + header_len +
header().filename_length,
header().content_size);
}
friend std::ostream& operator<<(std::ostream& os, Message const& mf) {
return os << "Name: " << std::quoted(mf.filename())
<< " content size: " << mf.contents().size();
}
};
int main(int argc, char** argv) {
boost::asio::io_context ctx;
u_int16_t port = 8989;
if (1==argc) {
std::cout << "Receiving uploaded files: " << std::endl;
tcp::acceptor acc(ctx, tcp::endpoint{{}, port});
while (true) {
auto s = acc.accept();
std::cout << "Connection accepted from " << s.remote_endpoint() << std::endl;
Message msg;
auto buf = boost::asio::dynamic_buffer(msg._buffer);
error_code ec;
while (auto n = read(s, buf, ec)) {
std::cout << "(read " << n << " bytes, " << ec.message() << ")" << std::endl;
while (msg.is_complete()) {
std::cout << "Received: " << msg << std::endl;
buf.consume(msg.decoded_size() + Message::header_len);
}
}
std::cout << "Connection closed" << std::endl;
}
} else {
std::cout << "Loading files: " << std::endl;
tcp::socket s(ctx);
s.connect(tcp::endpoint{{}, port});
for (auto file : std::vector<fs::path>{argv + 1, argv + argc}) {
if (is_regular_file(file)) {
try {
Message encoded(file);
std::cout << "Sending: " << encoded << std::endl;
write(s, boost::asio::buffer(encoded._buffer));
} catch (std::system_error const& se) {
std::cerr << "Error: " << se.code().message() << std::endl;
} catch (std::exception const& se) {
std::cerr << "Other: " << se.what() << std::endl;
}
}
}
}
}
Demo:
Note how this easily scales to larger files, multiple files in a single connection and even multiple connections simultaneously if you need. It also doesn't do double buffering, which improves performance.
This is why this kind of approach is much more usual than any of your other approaches.
Related
I have a 2 dimensional data(row x col), which was reading in a boost::multi_array container. I also need to know if I can read this data into ublas::vector, e.g., data has three rows and read them into three vectors v1, v2, v3: I am not very familiar with the interface of ublas::vector.
Data is stored in a .h5 file and in order to read I'm using this library. Can anyone show me how to replace boost::multi_array with the ublas::vector?
Suggestion with some other example is also appreciated. Thanks!
#include <boost/multi_array.hpp>
#include <h5xx/h5xx.hpp>
#include <iostream>
#include <boost/numeric/ublas/vector.hpp>
using array_2d_t = boost::multi_array<float, 2>;
template <typename T>
void print_array(T const& array)
{
for (auto const& row : array)
{ for (auto v : row)
printf("%10f ", v);
printf("\n");
}
std::cout << "\n End of file " << std::endl;
}
array_2d_t read_frame(std::string const& filename) {
h5xx::file xaa(filename, h5xx::file::mode::in);
h5xx::group g(xaa, "particles/lipids/box/edges");
h5xx::dataset ds(g, "box_size");
auto ds_shape = h5xx::dataspace(ds).extents<2>();
array_2d_t arr(boost::extents[ds_shape[0]][ds_shape[1]]);
h5xx::read_dataset(ds, arr);
return arr;
}
int main(int argc, char const* argv[]) {
if ( argc < 2) {
std::cout << "Usage: " << argv[0] << " input.h5 " << std::endl;
return -1;
}
std::string filename(argv[1]);
auto count = read_frame(filename);
std::cout << "Frames in file: " << count[1][1] << "\n";
print_array(count);
return 0;
}
the question is simple , I want to find a file path inside a directory but I have only part of the filename, so here is a functions for this task
void getfiles(const fs::path& root, const string& ext, vector<fs::path>& ret)
{
if(!fs::exists(root) || !fs::is_directory(root)) return;
fs::recursive_directory_iterator it(root);
fs::recursive_directory_iterator endit;
while(it != endit)
{
if(fs::is_regular_file(*it)&&it->path().extension()==ext) ret.push_back(it->path());//
++it;
}
}
bool find_file(const filesystem::path& dir_path, const filesystem::path file_name, filesystem::path& path_found) {
const fs::recursive_directory_iterator end;
const auto it = find_if(fs::recursive_directory_iterator(dir_path), end,
[file_name](fs::path e) {
cerr<<boost::algorithm::icontains(e.filename().native() ,file_name.native())<<endl;
return boost::algorithm::icontains(e.filename().native() ,file_name.native());//
});
if (it == end) {
return false;
} else {
path_found = it->path();
return true;
}
}
int main (int argc, char* argv[])
{
vector<fs::path> inputClass ;
fs::path textFiles,datasetPath,imgpath;
textFiles=argv[1];
datasetPath=argv[2];
getfiles(textFiles,".txt",inputClass);
for (int i=0;i<inputClass.size();i++)
{
ifstream lblFile(inputClass[i].string().c_str());
string line;
fs::path classname=inputClass[i].parent_path()/inputClass[i].stem().string();
cerr<<classname.stem()<<endl;
while (getline(lblFile,line))
{
bool find=find_file(datasetPath,line,imgpath);
if (find)
{
while(!fs::exists(classname))
fs::create_directories (classname);
fs::copy(imgpath,classname/imgpath.filename());
cerr<<"Found\n";
}
else
cerr<<"Not Found \n";
}
lblFile.close();
}
}
Console out:
"490"
vfv343434.jpeg||E9408000EC0
0
fsdfdsfdfsf.jpeg||E9408000EC0
0
1200E9408000EC0.jpeg||E9408000EC0
0
Not Found
but when I set the search string manually it works fine ! I tried other methods for searching string like std::find but all the methods fail to find the substring, it seems there is problem with input string (line) I printed all the chars but no especial characters or anything.
if I set the search string manually it works as desired
string search="E9408000EC0";
cerr<<e.filename().native()<<"||"<<search<<endl;
cerr<<boost::algorithm::icontains(e.filename().native() ,search)<<endl;
the results for above change is like
"490"
vfv343434.jpeg||E9408000EC0
0
fsdfdsfdfsf.jpeg||E9408000EC0
0
1200E9408000EC0.jpeg||E9408000EC0
1
Found
I cannot reproduce this.
The only hunch I have is that on your platform, perhaps the string() accessor is not returning the plain string, but e.g. the quoted path. That would break the search. Consider using the native() accessor instead.
(In fact, since file_name is NOT a path, but a string pattern, suggest passing the argument as std::string__view or similar instead.)
Live On Coliru
#include <boost/filesystem.hpp>
#include <boost/algorithm/string.hpp>
#include <iostream>
namespace fs = boost::filesystem;
template <typename Out>
void find_file(const fs::path& dir_path, const fs::path file_name, Out out) {
fs::recursive_directory_iterator it(dir_path), end;
std::copy_if(it, end, out, [file_name](fs::path e) {
return boost::algorithm::icontains(e.filename().native(),
file_name.native());
});
}
int main() {
fs::path d = "a/b/c/e";
fs::create_directories(d);
{
std::ofstream ofs(d / "1200E9408000EC0.jpeg");
}
std::cout << fs::path("000EC0").native() << "\n";
std::vector<fs::path> found;
find_file(".", "000EC0", back_inserter(found));
for (auto &f : found)
{
std::cout << "Found: " << f << "\n";
}
}
Prints
000EC0
Found: "./a/b/c/e/1200E9408000EC0.jpeg"
UPDATE: Code Review
To the updated question, came up with an somewhat improved tester that works with boost::filesystem and with std::filesystem just the same.
There are many small improvements (removing repetition, explicit conversions, using optional to return optional matches, etc.
Also added a whitespace trim to avoid choking on extraneous whitespace on the input lines:
Live On Coliru (-DUSE_BOOST_FS)
Live On Coliru (std library)
#include <boost/algorithm/string.hpp>
#include <fstream>
#include <iostream>
using boost::algorithm::icontains;
using boost::algorithm::trim;
#if defined(USE_BOOST_FS)
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
using boost::system::error_code;
#else
#include <filesystem>
namespace fs = std::filesystem;
using std::error_code;
#endif
void getfiles(
const fs::path& root, const std::string& ext, std::vector<fs::path>& ret)
{
if (!exists(root) || !is_directory(root))
return;
for (fs::recursive_directory_iterator it(root), endit; it != endit; ++it) {
if (is_regular_file(*it) && it->path().extension() == ext)
ret.push_back(it->path()); //
}
}
std::optional<fs::path> find_file(const fs::path& dir_path, fs::path partial)
{
fs::recursive_directory_iterator end,
it = fs::recursive_directory_iterator(dir_path);
it = std::find_if(it, end, [partial](fs::path e) {
auto search = partial.native();
//std::cerr << e.filename().native() << "||" << search << std::endl;
auto matches = icontains(e.filename().native(), search);
std::cerr << e << " Matches: " << std::boolalpha << matches
<< std::endl;
return matches;
});
return (it != end)
? std::make_optional(it->path())
: std::nullopt;
}
auto readInputClass(fs::path const& textFiles)
{
std::vector<fs::path> found;
getfiles(textFiles, ".txt", found);
return found;
}
int main(int argc, char** argv)
{
std::vector<std::string> const args(argv, argv + argc);
auto const textFiles = readInputClass(args.at(1));
std::string const datasetPath = args.at(2);
for (fs::path classname : textFiles) {
// open the text file
std::ifstream lblFile(classname);
// use base without extension as output directory
classname.replace_extension();
if (!fs::exists(classname)) {
if (fs::create_directories(classname))
std::cerr << classname << " created" << std::endl;
}
for (std::string line; getline(lblFile, line);) {
trim(line);
if (auto found = find_file(datasetPath, line)) {
auto dest = classname / found->filename();
error_code ec;
copy(*found, dest, ec);
std::cerr << dest << " (" << ec.message() << ")\n";
} else {
std::cerr << "Not Found \n";
}
}
}
}
Testing from scratch with
mkdir -pv textfiles dataset
touch dataset/{vfv343434,fsdfdsfdfsf,1200E9408000EC0}.jpeg
echo 'E9408000EC0 ' > textfiles/490.txt
Running
./a.out textfiles/ dataset/
Prints
"textfiles/490" created
"dataset/1200E9408000EC0.jpeg" Matches: true
"textfiles/490/1200E9408000EC0.jpeg" (Success)
Or on subsequent run
"dataset/fsdfdsfdfsf.jpeg" Matches: false
"dataset/1200E9408000EC0.jpeg" Matches: true
"textfiles/490/1200E9408000EC0.jpeg" (File exists)
BONUS
Doing some more diagnostics and avoiding repeatedly traversing the filesystem for each pattern. The main program is now:
Live On Coliru
int main(int argc, char** argv)
{
std::vector<std::string> const args(argv, argv + argc);
Paths const classes = getfiles(args.at(1), ".txt");
Mappings map = readClassMappings(classes);
std::cout << "Procesing " << map.size() << " patterns from "
<< classes.size() << " classes" << std::endl;
processDatasetDir(args.at(2), map);
}
And the remaining functions are implemented as:
// be smart about case insenstiive patterns
struct Pattern : std::string {
using std::string::string;
using std::string::operator=;
#ifdef __cpp_lib_three_way_comparison
std::weak_ordering operator<=>(Pattern const& other) const {
if (boost::ilexicographical_compare(*this, other)) {
return std::weak_ordering::less;
} else if (boost::ilexicographical_compare(other, *this)) {
return std::weak_ordering::less;
}
return std::weak_ordering::equivalent;
}
#else
bool operator<(Pattern const& other) const {
return boost::ilexicographical_compare(*this, other);
}
#endif
};
using Paths = std::vector<fs::path>;
using Mapping = std::pair<Pattern, fs::path>;
using Patterns = std::set<Pattern>;
using Mappings = std::set<Mapping>;
Mappings readClassMappings(Paths const& classes)
{
Mappings mappings;
for (fs::path classname : classes) {
std::ifstream lblFile(classname);
classname.replace_extension();
for (Pattern pattern; getline(lblFile, pattern);) {
trim(pattern);
if (auto [it, ok] = mappings.emplace(pattern, classname); !ok) {
std::cerr << "WARNING: " << std::quoted(pattern)
<< " duplicates " << std::quoted(it->first)
<< std::endl;
}
}
}
return mappings;
}
size_t processDatasetDir(const fs::path& datasetPath, Mappings const& patterns)
{
size_t copied = 0, failed = 0;
Patterns found;
using It = fs::recursive_directory_iterator;
for (It it = It(datasetPath), end; it != end; ++it) {
if (!it->is_regular_file())
continue;
fs::path const& entry = *it;
for (auto& [pattern, location]: patterns) {
if (icontains(it->path().filename().native(), pattern)) {
found.emplace(pattern);
if (!exists(location) && fs::create_directories(location))
std::cerr << location << " created" << std::endl;
auto dest = location / entry.filename();
error_code ec;
copy(entry, dest, ec);
std::cerr << dest << " (" << ec.message() << ") from "
<< std::quoted(pattern) << "\n";
(ec? failed : copied) += 1;
}
}
}
std::cout << "Copied:" << copied
<< ", missing:" << patterns.size() - found.size()
<< ", failed: " << failed << std::endl;
return copied;
}
With some more "random" test data:
mkdir -pv textfiles dataset
touch dataset/{vfv343434,fsdfdsfdfsf,1200E9408000EC0}.jpeg
echo .jPeg > textfiles/all_of_them.txt
echo $'E9408000EC0 \n e9408000ec0\nE9408\nbOgUs' > textfiles/490.txt
Running as
./a.out textfiles/ dataset/
Prints:
WARNING: "e9408000ec0" duplicates "E9408000EC0"
Procesing 4 patterns from 2 classes
"textfiles/all_of_them" created
"textfiles/all_of_them/1200E9408000EC0.jpeg" (Success) from ".jPeg"
"textfiles/490" created
"textfiles/490/1200E9408000EC0.jpeg" (Success) from "E9408"
"textfiles/490/1200E9408000EC0.jpeg" (File exists) from "E9408000EC0"
"textfiles/all_of_them/vfv343434.jpeg" (Success) from ".jPeg"
"textfiles/all_of_them/fsdfdsfdfsf.jpeg" (Success) from ".jPeg"
Copied:4, missing:1, failed: 1
I am coding an ADTF recording file reader in C++. I have already read the header using the structure specified here
https://support.digitalwerk.net/adtf_libraries/adtf-streaming-library/v2/DATFileFormatSpecification.pdf
typedef struct tagFileHeader {
int ui32FileId;
int ui32VersionId;
int ui32Flags;
int ui32ExtensionCount;
long long ui64ExtensionOffset;
long long ui64DataOffset;
long long ui64DataSize;
long long ui64ChunkCount;
long long ui64MaxChunkSize;
long long ui64Duration;
long long ui64FileTime;
char ui8HeaderByteOrder;
long long ui64TimeOffset;
char ui8PatchNumber;
char _reserved[54];
char strDescription[1912];
} tFileHeader; // size is 2048 Bytes
I read the heder
ifstream file("myfile.dat", std::ifstream::binary);
char buffer[2048];
file.read(buffer, 2048);
const tagFileHeader* header = reinterpret_cast<const tagFileHeader*>(buffer);
And now I need to read the chunks. This is the chunks header, extracted from the same document
typedef struct tagChunkHeader {
long long ui64TimeStamp;
int ui32RefMasterTableIndex;
int ui32OffsetToLast;
int ui32Size;
short ui16StreamId;
short ui16Flags;
long long ui64StreamIndex;
} tChunkHeader; // size is 32 Bytes
Reading the chunks
for (int c = 0; c < header->ui64ChunkCount; ++c)
{
char chunkHeaderBuffer[32];
file.read(chunkHeaderBuffer, 32);
const tChunkHeader* chunk = reinterpret_cast<const tChunkHeader*>(chunkHeaderBuffer);
//Skeep chunk data
file.seekg(chunk->ui32Size, ios_base::cur);
}
I don't know how to interpret the chunk data. Is this specified in another document that I am missing?
Thanks
For the sake of completeness:
The chunk data layout depends on the original sample data and the used serialization. So there is not one single data layout. You have to deserialize the chunk data with the correct deserialization implementation and can then interpret the deserialized data with the correct struct definition. The information about the used serialization is stored within the index extension of a stream.
As C-3PFLO has already stated, the adtf_file library does all this for you, but you need all required deserializer plugins.
Here is a example (based on upcoming ADTF File Library 0.5.0) how to access dat files and extend the reader with additional adtffileplugins. Use this read dat files which contains e.g. flexray data recorded with ADTF 2.x:
/**
* #file
* ADTF File Access example
*
* #copyright
* #verbatim
Copyright # 2017 Audi Electronics Venture GmbH. All rights reserved.
This Source Code Form is subject to the terms of the Mozilla
Public License, v. 2.0. If a copy of the MPL was not distributed
with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular file, then
You may include the notice in a location (such as a LICENSE file in a
relevant directory) where a recipient would be likely to look for such a notice.
You may add additional accurate notices of copyright ownership.
#endverbatim
*/
#include <adtf_file/standard_adtf_file_reader.h>
#include <stdio.h>
#include <iostream>
#include <sstream>
#include <map>
// initalize ADTF File and Plugin Mechanism
static adtf_file::Objects oObjects;
static adtf_file::PluginInitializer oInitializer([]
{
adtf_file::add_standard_objects();
});
void query_file_info(adtf_file::Reader& reader)
{
using namespace adtf_file;
//setup file version
uint32_t ifhd_version = reader.getFileVersion();
std::string adtf_version("ADTF 3 and higher");
if (ifhd_version < ifhd::v400::version_id)
{
adtf_version = "below ADTF 3";
}
//begin print
std::cout << std::endl << "File Header" << std::endl;
std::cout << "------------------------------------------------------------------------------" << std::endl;
std::cout << "File version : " << reader.getFileVersion() << " - " << adtf_version << std::endl;
std::cout << "Date : " << reader.getDateTime().format("%d.%m.%y - %H:%M:%S") << std::endl;
std::cout << "Duration : " << reader.getDuration().count() << std::endl;
std::cout << "Short description : " << getShortDescription(reader.getDescription()) << std::endl;
std::cout << "Long description : " << getLongDescription(reader.getDescription()) << std::endl;
std::cout << "Chunk count : " << reader.getItemCount() << std::endl;
std::cout << "Extension count : " << reader.getExtensions().size() << std::endl;
std::cout << "Stream count : " << reader.getStreams().size() << std::endl;
std::cout << std::endl << "Streams" << std::endl;
std::cout << "------------------------------------------------------------------------------" << std::endl;
auto streams = reader.getStreams();
for (const auto& current_stream : streams)
{
auto property_stream_type = std::dynamic_pointer_cast<const PropertyStreamType>(current_stream.initial_type);
if (property_stream_type)
{
std::string stream_meta_type = property_stream_type->getMetaType();
std::cout << "Stream #" << current_stream.stream_id << " : " << current_stream.name << std::endl;
std::cout << " MetaType : " << stream_meta_type << std::endl;
property_stream_type->iterateProperties(
[&](const char* name,
const char* type,
const char* value) -> void
{
std::cout << " " << name << " - " << value << std::endl;
});
}
}
}
class StreamsInfo
{
typedef std::map<uint16_t, std::chrono::microseconds> LastTimesMap;
typedef std::map<uint16_t, std::string> StreamNameMap;
public:
StreamsInfo(adtf_file::Reader& reader)
{
auto streams = reader.getStreams();
for (auto current_stream : streams)
{
_map_stream_name[current_stream.stream_id] = current_stream.name;
UpdateType(current_stream.stream_id, current_stream.initial_type);
}
}
~StreamsInfo() = default;
std::string GetDiffToLastChunkTime(const uint16_t& stream_id, const std::chrono::microseconds& current_time)
{
return GetLastTimeStamp(_map_last_chunk_time, stream_id, current_time);
}
std::string GetDiffToLastSampleStreamTime(const uint16_t& stream_id, const std::chrono::microseconds& current_time)
{
return GetLastTimeStamp(_map_last_stream_time, stream_id, current_time);
}
std::string GetStreamName(const uint16_t& stream_id)
{
return _map_stream_name[stream_id];
}
void UpdateType(const uint16_t& stream_id, const std::shared_ptr<const adtf_file::StreamType>& type)
{
auto property_stream_type = std::dynamic_pointer_cast<const adtf_file::PropertyStreamType>(type);
if (property_stream_type)
{
_map_stream_meta_type[stream_id] = property_stream_type->getMetaType();
}
}
std::string GetLastStreamMetaType(const uint16_t& stream_id)
{
return _map_stream_meta_type[stream_id];
}
private:
std::string GetLastTimeStamp(LastTimesMap& map_last_times,
const uint16_t& stream_id,
const std::chrono::microseconds& current_time)
{
std::chrono::microseconds result(-1);
LastTimesMap::iterator it = map_last_times.find(stream_id);
if (it != map_last_times.end())
{
result = current_time - it->second;
it->second = current_time;
}
else
{
if (current_time.count() != -1)
{
map_last_times[stream_id] = current_time;
}
}
if (result.count() >= 0)
{
return a_util::strings::format("%lld", result.count());
}
else
{
return "";
}
}
LastTimesMap _map_last_chunk_time;
LastTimesMap _map_last_stream_time;
StreamNameMap _map_stream_name;
StreamNameMap _map_stream_meta_type;
};
void access_file_data(adtf_file::Reader& reader, const std::string& csv_file_path)
{
using namespace adtf_file;
//load stream information
StreamsInfo stream_info(reader);
std::cout << std::endl << "File data" << std::endl;
std::cout << "------------------------------------------------------------------------------" << std::endl;
utils5ext::File csv_file;
csv_file.open(csv_file_path, utils5ext::File::om_append | utils5ext::File::om_write);
//set the labels
csv_file.writeLine("stream;stream_name;chunk_type;stream_type;chunk_time;samplestream_time;chunk_time_delta_to_lastofstream;samplestream_time_delta_to_lastofstream");
size_t item_count = 0;
for (;; ++item_count)
{
try
{
auto item = reader.getNextItem();
std::chrono::microseconds chunk_time = item.time_stamp;
std::string chunk_type;
auto type = std::dynamic_pointer_cast<const StreamType>(item.stream_item);
auto data = std::dynamic_pointer_cast<const Sample>(item.stream_item);
auto trigger = std::dynamic_pointer_cast<const Trigger>(item.stream_item);
std::chrono::microseconds sample_time(-1);
std::string sample_time_string("");
if (type)
{
//the type change is part of the
chunk_type = "stream_type";
stream_info.UpdateType(item.stream_id,
type);
}
else if (data)
{
chunk_type = "sample";
auto sample_data = std::dynamic_pointer_cast<const DefaultSample>(data);
if (sample_data)
{
sample_time = sample_data->getTimeStamp();
sample_time_string = a_util::strings::format("%lld", sample_time.count());
}
}
else if (trigger)
{
chunk_type = "trigger";
}
csv_file.writeLine(a_util::strings::format("%d;%s;%s;%s;%lld;%s;%s;%s",
static_cast<int>(item.stream_id),
stream_info.GetStreamName(item.stream_id).c_str(),
chunk_type.c_str(),
stream_info.GetLastStreamMetaType(item.stream_id).c_str(),
chunk_time.count(),
sample_time_string.c_str(),
stream_info.GetDiffToLastChunkTime(item.stream_id, chunk_time).c_str(),
stream_info.GetDiffToLastSampleStreamTime(item.stream_id, sample_time).c_str()
));
}
catch (const exceptions::EndOfFile&)
{
break;
}
}
csv_file.close();
}
adtf_file::Reader create_reader(const a_util::filesystem::Path& adtfdat_file_path)
{
//open file -> create reader from former added settings
adtf_file::Reader reader(adtfdat_file_path,
adtf_file::getFactories<adtf_file::StreamTypeDeserializers,
adtf_file::StreamTypeDeserializer>(),
adtf_file::getFactories<adtf_file::SampleDeserializerFactories,
adtf_file::SampleDeserializerFactory>(),
std::make_shared<adtf_file::sample_factory<adtf_file::DefaultSample>>(),
std::make_shared<adtf_file::stream_type_factory<adtf_file::DefaultStreamType>>());
return reader;
}
int main(int argc, char* argv[])
{
if (argc < 3 || argv[1] == NULL || argv[2] == NULL)
{
std::cerr << "usage: " << argv[0] << " <adtfdat> <csv> [<adtffileplugin> ...]" << std::endl;
return -1;
}
//set path for adtfdat|dat and csv file
a_util::filesystem::Path adtfdat_file = argv[1];
a_util::filesystem::Path csv_file = argv[2];
try
{
//verify adtf|dat file
if (("adtfdat" != adtfdat_file.getExtension())
&& ("dat" != adtfdat_file.getExtension()))
{
throw std::runtime_error(adtfdat_file + " is not valid, please use .adtfdat (ADTF 3.x) or .dat (ADTF 2.x).");
}
//verify csv file
if ("csv" != csv_file.getExtension())
{
throw std::runtime_error(csv_file + " is not valid, please use .csv for sample data export.");
}
//check for additional adtffileplugins
for (int i = 3; i < argc; i++)
{
a_util::filesystem::Path adtffileplugin = argv[i];
if ("adtffileplugin" == adtffileplugin.getExtension())
{
adtf_file::loadPlugin(adtffileplugin);
}
}
//setup reader
auto reader = create_reader(adtfdat_file);
//print information about adtfdat|dat file
query_file_info(reader);
//export sample data
access_file_data(reader, csv_file);
}
catch (const std::exception& ex)
{
std::cerr << ex.what() << std::endl;
return -2;
}
return 0;
}
Is there any reason why you try to reimplement an ADTF DAT File Reader ? It will be provided by the ADTF Streaming Library and should provide to access any data stored in a dat file. See the File Access Example (https://support.digitalwerk.net/adtf_libraries/adtf-streaming-library/v2/api/page_fileaccess.html) how to use the reader as well as the API itself and all other examples.
Hint: You can also use the successor - ADTF File Library with the same possibilities but with two more benefits: Complete Open Source to see how the (adtf)dat file handling works and also support for files created with ADTF 3.x. See https://support.digitalwerk.net/adtf_libraries/adtf-file-library/v0/html/index.html
For those interested in downloading the streaming Library, here follows the link
https://support.digitalwerk.net/projects/download-center/repository/show/adtf-libraries/adtf-streaming-library/release-2.9.0
First of all I am using MSVC 2017 (latest version).
Here is my code for the nonterminal parser:
player.hpp
namespace parse
{
namespace impl
{
namespace x3 = boost::spirit::x3;
struct _tag;
using player_type = x3::rule<_tag, PlayerIterator>;
using player_vector_type = x3::rule<_tag, std::vector<PlayerIterator>>;
BOOST_SPIRIT_DECLARE(player_type);
BOOST_SPIRIT_DECLARE(player_vector_type);
}; //impl
impl::player_type player();
impl::player_vector_type player_vector();
}; //parse
player.cpp
namespace parse
{
namespace impl
{
const player_type player = "player";
const player_vector_type player_vector = "player_vector";
auto player_find = [](auto &ctx)
{
auto &attr = x3::_attr(ctx);
if(attr.which() == 0)
return x3::_val(ctx) = PlayerManager::find(boost::get<int>(attr));
return x3::_val(ctx) = PlayerManager::find(boost::get<std::string>(attr));
};
auto player_vector_find = [](auto &ctx)
{
return x3::_val(ctx) = PlayerManager::vector_find(x3::_attr(ctx));
};
auto const player_def = (x3::int_ | (+x3::char_))[player_find];
auto const player_vector_def = (((+x3::char_)[player_vector_find]));
BOOST_SPIRIT_DEFINE(player);
BOOST_SPIRIT_DEFINE(player_vector);
BOOST_SPIRIT_INSTANTIATE(player_type, iterator_type, context_type);
BOOST_SPIRIT_INSTANTIATE(player_vector_type, iterator_type, context_type);
} //impl
parse::impl::player_type player() { return impl::player; }
parse::impl::player_vector_type player_vector() { return impl::player_vector; }
}//parse
I get linker LNK2019 errors about "unresolved external symbols referenced":
Pastebin.com link with the errors
Any ideas about them?
Thanks in advance.
EDIT:
That's how I call it in my source file:
void test(std::string ¶ms)
{
std::tuple<PlayerIterator, std::vector<PlayerIterator>, std::string> tuple;
if (!x3::phrase_parse(params.begin(), params.end(), parse::player()>> parse::player_vector() >> (+x3::char_), x3::space,tuple))
{
std::cout << "Error: Parsing failed" << std::endl;
return;
}
std::cout << "Parsing succeded" << std::endl;
std::cout << "Found player, size of player vector: "<< std::get<1>(tuple).size() << ", also parsed string:" << std::get<2>(tuple);
return;
};
I'm willing to bet $10 that you mismatched the context or iterator types on the instantiations.
E.g. in your test function, the argument is std::string&, hence params.begin() will be std::string::iterator. If you had the iterator_type configured as follows:
using iterator_type = std::string::const_iterator; // very sensible!
you would have unresolved externals because the iterator type doesn't match the one actually required.
Same thing for the context. To match your invocation it needs to be exactly:
using context_type = x3::phrase_parse_context<x3::space_type>::type;
Sadly you didn't show the whole code, so you'll have to check on your own.
Notes
re-using the tag type is recipe for disaster. I don't think it can work. The rule tags are what dispatches the implementation function in the case of separated compilation units. Fix it:
using player_type = x3::rule<struct player_tag, PlayerIterator>;
using player_vector_type = x3::rule<struct player_vector_tag, std::vector<PlayerIterator>>;
copying the rules seems wasteful, consider returning by reference:
impl::player_type const& player();
impl::player_vector_type const& player_vector();
Note: this should be fine w.r.t. static initialization order fiasco
using which() on a variant is an anti-pattern. You can replace
auto player_find = [](auto &ctx) {
auto &attr = x3::_attr(ctx);
if (attr.which() == 0)
return x3::_val(ctx) = PlayerManager::find(boost::get<int>(attr));
return x3::_val(ctx) = PlayerManager::find(boost::get<std::string>(attr));
};
With
auto find = [](auto const& key) { return PlayerManager::find(key); };
auto player_find = [](auto &ctx) {
return x3::_val(ctx) = boost::apply_visitor(find, x3::_attr(ctx));
};
(+x3::char_) always matches all input
(+x3::graph) still matches all input because of the skipper
Instead you wanted a lexeme:
auto const name = x3::lexeme[+x3::graph];
auto const player_def = (x3::int_ | name) [player_find];
auto const player_vector_def = name[ player_vector_find];
May I suggest to write the test function a lot more concisely:
void test(std::string const ¶ms) {
auto comment_ = x3::lexeme[+x3::char_];
PlayerIterator player;
PlayerIterators vec;
std::string comment;
auto tuple = std::tie(player, vec, comment);
if (phrase_parse(params.cbegin(), params.cend(), parse::player() >> parse::player_vector() >> comment_, x3::space, tuple)) {
std::cout << "Parsing succeded" << std::endl;
std::cout << "Found player, size of player vector: " << vec.size() << "\n";
std::cout << "Also parsed string: " << std::quoted(comment);
} else {
std::cout << "Error: Parsing failed" << std::endl;
}
}
Full Demo
See it Live On Wandbox
stuff.h
Contains mockup PlayerManager
#pragma once
#include <string>
#include <vector>
#include <iostream>
struct PlayerIterator { };
using PlayerIterators = std::vector<PlayerIterator>;
struct PlayerManager {
static PlayerIterator find(std::string const&) { std::cout << __PRETTY_FUNCTION__ << "\n"; return {}; }
static PlayerIterator find(int) { std::cout << __PRETTY_FUNCTION__ << "\n"; return {}; }
static PlayerIterators vector_find(std::string const&) { std::cout << __PRETTY_FUNCTION__ << "\n"; return {}; }
};
test.h
#pragma once
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted.hpp>
#include "stuff.h"
namespace x3 = boost::spirit::x3;
namespace parse
{
namespace impl
{
using player_type = x3::rule<struct player_tag, PlayerIterator>;
using player_vector_type = x3::rule<struct player_vector_tag, PlayerIterators>;
BOOST_SPIRIT_DECLARE(player_type)
BOOST_SPIRIT_DECLARE(player_vector_type)
} //impl
impl::player_type const& player();
impl::player_vector_type const& player_vector();
} //parse
test.cpp
#include "stuff.h"
#include "test.h"
using iterator_type = std::string::const_iterator;
using context_type = x3::phrase_parse_context<x3::space_type>::type;
namespace parse {
namespace impl {
const player_type player = "player";
const player_vector_type player_vector = "player_vector";
auto find = [](auto const& key) { return PlayerManager::find(key); } ;
auto player_find = [](auto &ctx) { return x3::_val(ctx) = boost::apply_visitor(find, x3::_attr(ctx)); } ;
auto player_vector_find = [](auto &ctx) { return x3::_val(ctx) = PlayerManager::vector_find(x3::_attr(ctx)); } ;
auto const name = x3::lexeme[+x3::graph];
auto const player_def = (x3::int_ | name) [player_find];
auto const player_vector_def = name[ player_vector_find];
BOOST_SPIRIT_DEFINE(player)
BOOST_SPIRIT_DEFINE(player_vector)
BOOST_SPIRIT_INSTANTIATE(player_type, iterator_type, context_type)
BOOST_SPIRIT_INSTANTIATE(player_vector_type, iterator_type, context_type)
} // namespace impl
parse::impl::player_type const& player() { return impl::player; }
parse::impl::player_vector_type const& player_vector() { return impl::player_vector; }
} // namespace parse
main.cpp
#include "stuff.h"
#include "test.h"
#include <iostream>
#include <iomanip>
void test(std::string const ¶ms) {
auto comment_ = x3::lexeme[+x3::char_];
PlayerIterator player;
PlayerIterators vec;
std::string comment;
auto tuple = std::tie(player, vec, comment);
if (phrase_parse(params.cbegin(), params.cend(), parse::player() >> parse::player_vector() >> comment_, x3::space, tuple)) {
std::cout << "Parsing succeded" << std::endl;
std::cout << "Found player, size of player vector: " << vec.size() << "\n";
std::cout << "Also parsed string: " << std::quoted(comment);
} else {
std::cout << "Error: Parsing failed" << std::endl;
}
}
int main() {
test("42 someword # bogus trailing comment");
}
Prints:
static PlayerIterator PlayerManager::find(int)
static PlayerIterators PlayerManager::vector_find(const std::string &)
Parsing succeded
Found player, size of player vector: 0
Also parsed string: "# bogus trailing comment"
I'm trying to make Log Macro with Boost and my macro is non thread-safe. Here is my sample code:
Initialization
inline boost::shared_ptr<cilog_async_sink_t> init_async_logger(const std::string& app_name,
const std::string& target = "./log", int64_t rotation_size = 1024 * 1024 * 1024,
bool auto_flush = true) {
namespace expr = boost::log::expressions;
namespace attrs = boost::log::attributes;
namespace keywords = boost::log::keywords;
boost::log::add_common_attributes();
boost::shared_ptr<cilog_backend> backend(
new cilog_backend(boost::filesystem::path(target), app_name, rotation_size, auto_flush));
boost::shared_ptr<cilog_async_sink_t> sink(new cilog_async_sink_t(backend));
sink->set_formatter(
expr::stream << expr::format_date_time<boost::posix_time::ptime>("TimeStamp", "[%Y-%m-%d_%H:%M:%S.%f] ")
<< "[" << expr::attr<severity_level, severity_tag>("Severity") << "] "
<< "[" << expr::attr<attrs::current_process_id::value_type>("ProcessID") << "] "
<< "[" << expr::attr<attrs::current_thread_id::value_type>("ThreadID") << "] "
<< expr::smessage);
boost::log::core::get()->add_sink(sink);
cilog_async_sink_t::locked_backend_ptr p = sink->locked_backend();
return sink;
}
Format
class cilog_backend: public boost::log::sinks::basic_formatted_sink_backend<char,
boost::log::sinks::synchronized_feeding> {
private:
bool auto_flush_;
boost::filesystem::ofstream file_;
boost::filesystem::path target_path_;
boost::filesystem::path file_path_;
std::string file_name_suffix_;
uintmax_t rotation_size_;
uintmax_t characters_written_;
boost::gregorian::date current_date_;
public:
explicit cilog_backend(boost::filesystem::path const& target_path,
std::string const& file_name_suffix, uintmax_t rotation_size,
bool auto_flush) :
auto_flush_(auto_flush), target_path_(target_path), file_name_suffix_(file_name_suffix),
rotation_size_(rotation_size), characters_written_(0),
current_date_(boost::gregorian::day_clock::local_day()) {
}
void consume(boost::log::record_view const& /*rec*/,
string_type const& formatted_message) {
if (current_date_ != boost::gregorian::day_clock::local_day())
rotate_file();
if (!file_.is_open()) {
file_path_ = generate_filepath();
boost::filesystem::create_directories(file_path_.parent_path());
file_.open(file_path_, std::ofstream::out | std::ofstream::app);
if (!file_.is_open()) return; // failed to open file
characters_written_ = static_cast<std::streamoff>(file_.tellp());
}
file_.write(formatted_message.data(), static_cast<std::streamsize>(formatted_message.size()));
file_.put('\n');
characters_written_ += formatted_message.size() + 1;
if (auto_flush_)
file_.flush();
if ((file_.is_open() && (characters_written_ >= rotation_size_)) || (!file_.good()))
rotate_file();
}
};
I found some code for thread-safe on Boost:
cilog_async_sink_t::locked_backend_ptr p = sink->locked_backend();
in my init but not working.
Can anyone advice for me?