I am trying to create a helper class to execute a system command and get response back with piping support. For the cases where I need to get the response only (no STDIN to consume for the command) it is working as expected, for pipe support, I am getting garbled STDIN and I can not find out the root cause.
The main function which handles this mechanism is (please ignore the minor error check issues)
the minimal working example
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include <unistd.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <signal.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <stdarg.h>
struct exec_cmd_t {
exec_cmd_t(std::vector<std::string> args) : args(args), has_executed(false), cpid(-1) { }
exec_cmd_t(const exec_cmd_t &) = delete;
exec_cmd_t(exec_cmd_t &&) = delete;
exec_cmd_t & operator=(const exec_cmd_t &) = delete;
exec_cmd_t & operator=(exec_cmd_t &&) = delete;
std::string operator()();
std::string pipe_cmd(const std::string & input);
std::string pipe_cmd();
~exec_cmd_t();
private:
std::vector<std::string> args;
bool has_executed;
int cpid;
std::stringstream in_stream;
std::stringstream out_stream;
friend std::string operator | (exec_cmd_t & first, exec_cmd_t & second);
friend std::string operator | (exec_cmd_t && first, exec_cmd_t && second);
friend std::string operator | (std::string, exec_cmd_t & second);
friend std::string operator | (std::string, exec_cmd_t && second);
};
std::string exec_cmd_t::pipe_cmd(const std::string & input) {
this->has_executed = true;
const int read_end = 0;
const int write_end = 1;
int read_pipe[2];
int write_pipe[2];
if (pipe(read_pipe) < 0 || pipe(write_pipe) < 0) {
this->has_executed = false;
return std::string{};
}
this->in_stream << input;
std::string line;
while(getline(this->in_stream, line)) {
if (line.size() == 0) {
continue;
}
int wr_sz = write(write_pipe[write_end], line.c_str(), line.size());
if (wr_sz <= 0) {
break;
}
write(write_pipe[write_end], "\n", 1);
}
close(write_pipe[write_end]);
this->cpid = fork();
if (this->cpid == 0) {
dup2(write_pipe[read_end], STDIN_FILENO);
dup2(read_pipe[write_end], STDOUT_FILENO);
close(read_pipe[read_end]);
close(write_pipe[write_end]);
close(read_pipe[write_end]);
close(write_pipe[read_end]);
prctl(PR_SET_PDEATHSIG, SIGTERM);
char * params[args.size()];
const char * image_path = args[0].c_str();
for(int i = 1; i < args.size(); i++) {
params[i-1] = const_cast<char *>(args[i].c_str());
}
params[args.size()] = nullptr;
execv(image_path, params);
exit(1);
}
close(read_pipe[write_end]);
close(write_pipe[read_end]);
char buff[256];
int rd_sz = -1;
int flags = fcntl(read_pipe[0], F_GETFL, 0);
fcntl(read_pipe[read_end], F_SETFL, flags | O_NONBLOCK);
int status = 0;
waitpid(this->cpid, &status, 0);
this->has_executed = false;
int error_code = 0;
while((rd_sz = read(read_pipe[read_end], buff, sizeof(buff))) > 0) {
buff[rd_sz] = '\0';
this->out_stream << std::string{buff};
}
close(read_pipe[read_end]);
return this->out_stream.str();
}
std::string exec_cmd_t::pipe_cmd() {
static std::string empty_str{};
return pipe_cmd(empty_str);
}
std::string exec_cmd_t::operator()() {
return pipe_cmd();
}
exec_cmd_t::~exec_cmd_t() {
if (this->has_executed) {
int status;
waitpid(this->cpid, &status, WNOHANG);
if (!WIFEXITED(status)) {
kill(this->cpid, SIGKILL);
waitpid(this->cpid, &status, 0);
}
}
}
std::string operator | (exec_cmd_t & first, exec_cmd_t & second) {
return second.pipe_cmd(first());
}
std::string operator | (exec_cmd_t && first, exec_cmd_t && second) {
return second.pipe_cmd(first());
}
std::string operator | (std::string output, exec_cmd_t & second) {
return second.pipe_cmd(output);
}
std::string operator | (std::string output, exec_cmd_t && second) {
return second.pipe_cmd(output);
}
int main() {
auto str = exec_cmd_t{ {"/bin/echo", "echo", "hello\nworld\nor\nnot"} } | exec_cmd_t{ {"/bin/grep", "grep", "world", "-"} };
std::cout << str << std::endl;
return 0;
}
gives me
grep: =V: No such file or directory
(standard input):world
It seems like grep is executing twice, one failing with no such file or directory and another one is succeeding. Any suggestion would be very helpful :-) .
Thanks in advance.
You have at east one cause for Undefined Behaviour that may cause your program to do what it does. You declare and use a VLA out-of-range like this:
char* params[args.size()];
...
params[args.size()] = nullptr;
execv(image_path, params);
This leaves the terminating char* in your params uninitialized so it could point anywhere. grep thinks it points at a filename, tries to open it and fails.
Since VLA:s aren't in the C++ standard, consider changing it to:
std::vector<char*> params(args.size());
...
params[args.size() - 1] = nullptr;
execv(image_path, params.data());
Another cause for concern is that you use ints where you should have used ssize_ts even though it's highly unlikely that you've read or written more than an int could handle.
After I made those changes, it started working and printed the expected world. I even added a third command to check it could handle it. Suggested changes:
14,15c14,15
< exec_cmd_t(std::vector<std::string> args) :
< args(args), has_executed(false), cpid(-1) {}
---
> exec_cmd_t(std::vector<std::string> Args) :
> args(Args), has_executed(false), cpid(-1), in_stream{}, out_stream{} {}
59c59
< int wr_sz = write(write_pipe[write_end], line.c_str(), line.size());
---
> ssize_t wr_sz = write(write_pipe[write_end], line.c_str(), line.size());
76c76
< char* params[args.size()];
---
> std::vector<char*> params(args.size());
78c78
< for(int i = 1; i < args.size(); i++) {
---
> for(decltype(args.size()) i = 1; i < args.size(); i++) {
81,82c81,82
< params[args.size()] = nullptr;
< execv(image_path, params);
---
> params[args.size() - 1] = nullptr;
> execv(image_path, params.data());
90c90
< int rd_sz = -1;
---
> ssize_t rd_sz = -1;
96c96
< int error_code = 0;
---
> // int error_code = 0; // unused
106,107c106
< static std::string empty_str{};
< return pipe_cmd(empty_str);
---
> return pipe_cmd({});
143c142,143
< exec_cmd_t{{"/bin/grep", "grep", "world", "-"}};
---
> exec_cmd_t{{"/bin/grep", "grep", "-A1", "hello"}} |
> exec_cmd_t{{"/bin/grep", "grep", "world"}};
I also realized that your program acts like a proxy between the piped commands, reading everything from one command and writing it to the next.
You could start all programs at the same time and setup the pipes between the started programs in one go. For three commands, you'd need three pipes:
cmd1 cmd2 cmd3
| w--r w--r |
stdin read output into program
or fed by your program
This would make performance and memory consumption less of an issue if you decide to run commands with a lot of output. Internally you'd would only need to store what you'd like to store by reading the output from the last command. I made a small test of this approach and it works like a charm.
Related
I made an exe binder (which can bind multiple exes without any error checking, anyway), it works as expected, only that the antivirus screams instantly :(
Here is the source code:
#undef UNICODE
#include <Windows.h>
#include <fstream>
#include <vector>
#include <string>
#define SEPARATOR "*****"
#define SEPARATOR_SIZE strlen(SEPARATOR)
void FindAllOccurrences(const std::string& data, const std::string& query, std::vector<size_t>& occurancesPoss) {
size_t pos = data.find(query);
while(pos != std::string::npos) {
occurancesPoss.push_back(pos);
pos = data.find(query, pos + query.size());
}
}
inline void FileAsString(const std::string& file, std::string& str, const std::ios_base::openmode iosOM = std::ios::binary) {
std::ifstream ifs(file, iosOM);
str.assign((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
}
void Bind(const std::vector<std::string>& files, const std::string& fileBinded, const std::string& fileOpener) {
std::ofstream ofs(fileBinded, std::ios::binary);
ofs << std::ifstream(fileOpener, std::ios::binary).rdbuf() << SEPARATOR;
size_t index = files.size();
for(auto& file : files) {
ofs << std::ifstream(file, std::ios::binary).rdbuf();
if(--index) {
ofs << SEPARATOR;
}
}
}
void Open(const std::string& file) {
std::string data;
FileAsString(file, data);
std::vector<size_t> occurancesPoss;
FindAllOccurrences(data, SEPARATOR, occurancesPoss);
std::vector<std::string> exes;
for(size_t i = 1; i < occurancesPoss.size() - 1; i++) {
std::string exeName(std::to_string(i) + ".exe");
std::ofstream ofs(exeName, std::ios::binary);
size_t exeStart = occurancesPoss[i] + SEPARATOR_SIZE;
ofs << data.substr(exeStart, occurancesPoss[i + 1] - exeStart);
exes.push_back(exeName);
}
{
std::string exeName(std::to_string(occurancesPoss.size() - 1) + ".exe");
std::ofstream ofs(exeName, std::ios::binary);
ofs << data.substr(occurancesPoss.back() + SEPARATOR_SIZE);
exes.push_back(exeName);
}
for(auto& exe : exes) {
SetFileAttributes(exe.c_str(), FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_READONLY);
ShellExecute(nullptr, "open", exe.c_str(), nullptr, nullptr, SW_NORMAL);
}
}
int main(int argc, char** argv) {
if(argc > 1) {
Bind(std::vector<std::string>(&argv[1], argv + argc - 1), argv[argc - 1], argv[0]);
} else {
Open(argv[0]);
}
return 0;
}
My question is, what makes an exe binder undetectable and how to make it.
I think that the opener code should be the one which needs to be changed. Correct me if I am wrong.
If you got any feedback for the code hit me up. (about the error checking.. I didn't add it for the sake of simplicity).
Thank you in advance!
What is the purpose of this application? You're using Windows API functions that are mosst commonly used in bad software. That's why the Anti Virus scanners are reacting.
I would like to create unique files, appending a number to their name if neccessary (similar to how browsers usually name downloaded files).
Here is what I am trying to do, but with directories (using the <filesystem> library) :
#include <filesystem>
namespace fs = std::filesystem;
template <typename... Args> std::string concatenate(Args&&... args) {
std::ostringstream sstr;
(sstr << ... << std::forward<Args>(args));
return sstr.str();
}
fs::path unique_dir(const fs::path &base, unsigned max_tries) {
if(fs::create_directories(base))
return base;
for(unsigned i = 1; i < max_tries; ++i) {
fs::path p = base;
p += concatenate('_', i);
if(fs::create_directory(p))
return p;
}
throw std::runtime_error("unique_dir: gave up");
}
int main() {
unique_dir("my_dir", 3); // creates my_dir
unique_dir("my_dir", 3); // creates my_dir_1
unique_dir("my_dir", 3); // creates my_dir_2
unique_dir("my_dir", 3); // throws
}
How can I do the same with files ?
Some precisions :
It does not need to be highly-performing (it is for a very cold part of the code)
Non-cross-platform is fine, as long as there is a variant for Linux, Windows and Mac
I do not want to use a mkstemp-type function that would require to put a non-user-friendly id in the filename
Thank you in advance.
With directories, you are checking if the creation of a directory worked. With files you can achieve this effect by checking if the particular path is pointing at an existing file, and if not, then creating it:
fs::path unique_file(const fs::path &base, unsigned max_tries) {
if(!fs::exists(base)) {
std::ofstream ofs(base);
return base;
}
for(unsigned i = 1; i < max_tries; ++i) {
fs::path p = base;
p += concat() << '_' << i;
if(!fs::exists(p)) {
std::ofstream ofs(p);
return p;
}
}
throw std::runtime_error("unique_file: gave up");
}
Here is a solution for POSIX systems, using open():
#include <fcntl.h> // open()
#include <unistd.h> // close()
fs::path unique_file(const fs::path &base, unsigned max_tries) {
fs::path p(base);
for(unsigned i = 1; i <= max_tries; ++i) {
// O_CREAT | O_EXCL will create the file if it does not exist, and fail otherwise
// 0666 : Read + Modify permissions for everybody
int fd = open(p.c_str(), O_CREAT | O_EXCL, 0666);
if(fd != -1) {
if(close(fd) == -1) // We immediately close the file, which might be a waste
throw fs::filesystem_error("unique_file: POSIX close() error", p, std::error_code(errno, std::generic_category()));
return p;
}
// EEXIST is set if open() failed because the file already existed
if(errno != EEXIST)
throw fs::filesystem_error("unique_file: POSIX open() error", p, std::error_code(errno, std::generic_category()));
errno = 0;
p = base.parent_path() / base.stem();
p += concatenate('_', i);
p += base.extension();
}
throw std::runtime_error("unique_file: gave up");
}
Windows seems to offer the _sopen_s function which offers the same _O_CREAT | _O_EXCL flag combination.
I`m trying to parallelize reading and processing archives, that are already in memory, but I get data race while calling libarchive function in a thread. Google sanitizer says that problem is in calling archive_read_open_memory function inside get_archive_contens. But I read that all functions in libarchive should be thread-safe. Can anyone tell me what I did wrong?
Here is my thread code.
void indexing_thread(std::mutex &m,
int ¤t_indexing_threads, concurrent_queue<std::pair<std::string, std::string>> &raw_files_q,
concurrent_queue<std::map<std::string, size_t>> &words_q) {
while (true) {
auto raw_file = raw_files_q.front();
std::string file_buffer = raw_file.first;
std::string ext = raw_file.second;
if (file_buffer.empty() && ext.empty()) {
break;
}
raw_files_q.pop();
std::string file_content;
if (ext == ".zip") {
auto archive_contents = get_archive_content(file_buffer);
for (int i = 0; i < archive_contents.size(); ++i) {
auto cur_ext = boost::filesystem::extension(archive_contents[i]);
if (cur_ext == ".txt") {
file_content = get_archive_file_contents(archive_contents[i], archive_contents, file_buffer);
file_content = convert_to_normalized_utf_string(file_content);
}
}
for (int i = 0; i < archive_contents.size(); ++i) {
auto cur_ext = boost::filesystem::extension(archive_contents[i]);
if (cur_ext == ".txt") {
file_content = get_archive_file_contents(archive_contents[i], archive_contents, file_buffer);
file_content = convert_to_normalized_utf_string(file_content);
}
}
}
auto words = word_count_map_nonparallel(file_content);
words_q.push_back(words);
}
m.lock();
current_indexing_threads--;
if (current_indexing_threads == 0) {
words_q.push_back(std::map<std::string, size_t>{});
}
m.unlock();
}
get_archive_content code:
std::string
get_archive_file_contents(const std::string &filename, std::vector<std::string> contents, std::string file_buffer) {
if (std::find(contents.begin(), contents.end(), filename) == contents.end()) {
throw FileDoesNotExistsException(filename);
}
struct archive_entry *entry;
struct archive *archive = archive_read_new();
archive_read_support_filter_all(archive);
archive_read_support_format_all(archive);
archive_read_support_format_raw(archive);
archive_read_support_format_empty(archive);
int reading_result = archive_read_open_memory(archive, file_buffer.data(), file_buffer.size());
if (reading_result != 0) {
throw std::runtime_error("Error reading archive");
}
void *buf;
int64_t length;
while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
if (archive_entry_filetype(entry) == AE_IFREG) {
length = archive_entry_size(entry);
buf = malloc(length);
if (!buf) {
archive_read_data_skip(archive);
continue;
}
archive_read_data(archive, buf, length);
break;
}
}
std::string result = static_cast<char *>(buf);
return result;
}
UPD: Google thread sanitizer report
I worked it out. The problem was in binary, that gets installed from ubuntu repositories. I solved this problem install libarchive from the sources.
I have coded a program that uses mmap as input to fill a integer 2D vector from a .txt file. The code is part of a larger program and will be submitted to a competition. Is there a way to improve the speed using mmap, or by using a different way all together? Here is the code:
#include <fstream>
#include <vector>
#include <algorithm>
#include <cstring>
#include <iostream>
// for mmap:
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
using namespace std;
const char* map_file(const char* fname, size_t& length);
int main()
{
auto start = std::chrono::high_resolution_clock::now();
size_t length;
auto f = map_file("erasmus.in", length);
auto l = f + length;
int i = 0;
bool flag = false;
string lines;
vector<vector<int> > students(10000); //The number of lines is predefined
const char* temp;
while (f && f!=l) {
string element = "";
temp = static_cast<const char*>(memchr(f, '\n', l-f));
for(f = f; f<=temp; f++)
{
if(!isspace(*f))
{
element += *f;
flag = true;
}
if(isspace(*f) && flag == true)
{
flag = false;
int assigned_element = stoi(element);
students[i].push_back(assigned_element);
element = "";
}
}
i++;
temp++;
}
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed1 = finish - start;
FILE* output = fopen("erasmus.out", "w");
for (int i = 0; i < students.size(); i++)
{
for (int j = 0; j < students[i].size(); j++)
{
fprintf(output, "%d ", students[i][j]);
}
fprintf(output, "\n");
}
std::cout << "Elapsed time: " << elapsed1.count() << " s\n";
return 0;
}
void handle_error(const char* msg) {
perror(msg);
exit(255);
}
const char* map_file(const char* directory, size_t& length)
{
int fileDirectory = open(directory, O_RDONLY);
if (fileDirectory == -1)
handle_error("open");
// obtain file size
struct stat sb;
if (fstat(fileDirectory, &sb) == -1)
handle_error("fstat");
length = sb.st_size;
const char* map = static_cast<const char*>(mmap(NULL, length, PROT_READ, MAP_PRIVATE, fileDirectory, 0u));
if (map == MAP_FAILED)
handle_error("mmap");
return map;
}
The file will be executed on a linux system, if this helps to find the optimal answer. At the end of each line of the .txt
there is a space character (' ') and a newline('\n')
it need a way to call function whose name is stored in a string similar to eval. Can you help?
C++ doesn't have reflection so you must hack it, i. e.:
#include <iostream>
#include <map>
#include <string>
#include <functional>
void foo() { std::cout << "foo()"; }
void boo() { std::cout << "boo()"; }
void too() { std::cout << "too()"; }
void goo() { std::cout << "goo()"; }
int main() {
std::map<std::string, std::function<void()>> functions;
functions["foo"] = foo;
functions["boo"] = boo;
functions["too"] = too;
functions["goo"] = goo;
std::string func;
std::cin >> func;
if (functions.find(func) != functions.end()) {
functions[func]();
}
return 0;
}
There are at least 2 alternatives:
The command pattern.
On windows, you can use GetProcAddress to get a callback by name, and dlopen + dlsym on *nix.
#include <iostream>
#include <fstream>
#include <cstdlib>
using namespace std;
double eval( string expression );
int main( int argc, char *argv[] )
{
string expression = "";
for ( int i = 1; i < argc; i++ )
{
expression = expression + argv[i];
}
cout << "Expression [ " << expression << " ] = " << endl;
eval( expression );
}
double eval( string expression )
{
string program = "";
program = program + "#include <cmath>\n";
program = program + "#include <iostream>\n";
program = program + "using namespace std;\n";
program = program + "int main()\n";
program = program + "{\n";
program = program + " cout << ";
program = program + expression;
program = program + " << endl;\n";
program = program + "}";
ofstream out( "abc.cpp" );
out << program;
out.close();
system( "g++ -o abc.exe abc.cpp" );
system( "abc" );
}
You could try to adopt an existing scripting engine, expose the functions you like to this and then use this to evaluate your statements. One such enging could be the V8 engine: https://developers.google.com/v8/intro but there are many alternatives and different languages to choose from.
Here are some examples:
Boost Python
V8
LUA
AngelScript
Except using the function map in the program and hack it on the Makefile, you can access it through ELF.
I think this method is better as it did not need to write duplicate code and compile it every time on different machine.
Here is my demo C/C++ equivalent of eval(“function(arg1, arg2)”)
#include<stdio.h>
#include<stdlib.h>
#include<elf.h>
#include<libelf.h>
#include<unistd.h>
#include<fcntl.h>
#include<gelf.h>
#include<string.h>
void my_fun()
{
int a = 19;
printf("my_fun is excute, a is %d \n", a);
}
void my_fun2()
{
printf("my_fun2 is excute\n");
return;
}
void my_fun3()
{
return;
}
void excute_fun(char *program_name, char *function_name)
{
int i, count;
Elf32_Ehdr *ehdr;
GElf_Shdr shdr;
Elf *elf;
Elf_Scn *scn = NULL;
Elf_Data *data;
int flag = 0;
int fd = open(program_name, O_RDONLY);
if(fd < 0) {
perror("open\n");
exit(1);
}
if(elf_version(EV_CURRENT) == EV_NONE) {
perror("elf_version == EV_NONE");
exit(1);
}
elf = elf_begin(fd, ELF_C_READ, (Elf *) NULL);
if(!elf) {
perror("elf error\n");
exit(1);
}
/* Elf32_Off e_shoff; */
/* if ((ehdr = elf32_getehdr(elf)) != 0) { */
/* e_shoff = ehdr->e_shoff; */
/* } */
/* scn = elf_getscn(elf, 0); */
/* printf("e_shoff is %u\n", e_shoff); */
/* scn += e_shoff; */
while ((scn = elf_nextscn(elf, scn)) != NULL) {
gelf_getshdr(scn, &shdr);
if (shdr.sh_type == SHT_SYMTAB) {
/* found a symbol table. */
break;
}
}
data = elf_getdata(scn, NULL);
if(!shdr.sh_entsize)
count = 0;
else
count = shdr.sh_size / shdr.sh_entsize;
for (i = 0; i < count; ++i) {
GElf_Sym sym;
gelf_getsym(data, i, &sym);
char *sym_name = elf_strptr(elf, shdr.sh_link, sym.st_name);
if(sym_name != NULL && sym_name[0] != '_' && sym_name[0] != '\0' && sym_name[0] != ' ' && sym.st_value != 0)
{
/* printf("sym_name is %s\n", sym_name); */
/* printf("%s = %X\n", elf_strptr(elf, shdr.sh_link, sym.st_name), sym.st_value); */
if(!strcmp(sym_name, function_name)) {
void (*fun)(void) = (void*)sym.st_value;
(*fun)();
flag = 1;
}
}
}
if(!flag)
printf("can not find this function\n");
elf_end(elf);
close(fd);
}
int main(int argc, char *argv[])
{
char *input = (char*)malloc(100);
for(;;) {
printf("input function_name to excute: ");
scanf("%s", input);
excute_fun(argv[0], input);
memset(input, 0, sizeof(input));
printf("\n");
}
free(input);
return 0;
}
This implementation is based on Example of Printing the ELF Symbol Table