I have a few thousand pcap files that I'm trying to parse as part of a research project. They are all named as xxx.xxx.xxx.xxx_yyy.yyy.yyy.yyy.pcap where the first IP address is the one I'm trying to use as a variable in my C++ program.
Parsing the pcap files themselves is not an issue. I am passing the filename to the function as a pointer and just don't quite know how to grab that first part of the filename.
As requested, here is a bit of code:
//program.cpp//
int main(int argc, char *argv[]){
char * inFile;
inFile = argv[1];
result = parsePkts(inFile);
return 0;
}
//functions.h//
int parsePkts(char *fn){
struct ip *ipHdr = NULL;
ipHdr = (struct ip *)(data + sizeof(struct ether_header));
if((ipHdr -> ip_dst.s_addr)) == xxx.xxx.xxx.xxx) {
do stuff
}
}
Obviously there is a lot more to the program but this is where I need to grab it. Thanks.
if your input is const char* as filename you can split it in severals ways. You wrote that you need to split it into some parts (first part). I have little snippet to split string by char in your case '_'
void stringSplitBy(std::string str, const char *separator, std::vector<std::string> &results)
{
size_t found = str.find_first_of(separator);
while (found != std::string::npos) {
if (found > 0) {
results.push_back(str.substr(0, found));
}
str = str.substr(found + 1);
found = str.find_first_of(separator);
}
if (str.length() > 0) {
results.push_back(str);
}
}
Use it in this way:
const char* inputfname = "xxx.xxx.xxx.xxx_yyy.yyy.yyy.yyy.pcap";
std::string fname = std::string(inputfname);
std::vector<std::string> results;
stringSplitBy(fname, '_', results);
You can print result:
std::vector<std::string>::iterator it = results.begin();
for (; it != results.end(); ++it)
{
std::cout<< (*it).c_str() << std::endl;
}
Related
So I've made a program that reads in various config files. Some of these config files can be small, some can be semi-large (largest one is 3,844 KB).
The read in file is stored in a string (in the program below it's called sample).
I then have the program extract information from the string based on various formatting rules. This works well, the only issue is that when reading larger files it is very slow....
I was wondering if there was anything I could do to speed up the parsing or if there was an existing library that does what I need (extract string up until a delimiter & extract string string in between 2 delimiters on the same level). Any assistance would be great.
Here's my code & a sample of how it should work...
#include "stdafx.h"
#include <string>
#include <vector>
std::string ExtractStringUntilDelimiter(
std::string& original_string,
const std::string& delimiter,
const int delimiters_to_skip = 1)
{
std::string needle = "";
if (original_string.find(delimiter) != std::string::npos)
{
int total_found = 0;
auto occurance_index = static_cast<size_t>(-1);
while (total_found != delimiters_to_skip)
{
occurance_index = original_string.find(delimiter);
if (occurance_index != std::string::npos)
{
needle = original_string.substr(0, occurance_index);
total_found++;
}
else
{
break;
}
}
// Remove the found string from the original string...
original_string.erase(0, occurance_index + 1);
}
else
{
needle = original_string;
original_string.clear();
}
if (!needle.empty() && needle[0] == '\"')
{
needle = needle.substr(1);
}
if (!needle.empty() && needle[needle.length() - 1] == '\"')
{
needle.pop_back();
}
return needle;
}
void ExtractInitialDelimiter(
std::string& original_string,
const char delimiter)
{
// Remove extra new line characters
while (!original_string.empty() && original_string[0] == delimiter)
{
original_string.erase(0, 1);
}
}
void ExtractInitialAndFinalDelimiters(
std::string& original_string,
const char delimiter)
{
ExtractInitialDelimiter(original_string, delimiter);
while (!original_string.empty() && original_string[original_string.size() - 1] == delimiter)
{
original_string.erase(original_string.size() - 1, 1);
}
}
std::string ExtractStringBetweenDelimiters(
std::string& original_string,
const std::string& opening_delimiter,
const std::string& closing_delimiter)
{
const size_t first_delimiter = original_string.find(opening_delimiter);
if (first_delimiter != std::string::npos)
{
int total_open = 1;
const size_t opening_index = first_delimiter + opening_delimiter.size();
for (size_t i = opening_index; i < original_string.size(); i++)
{
// Check if we have room for opening_delimiter...
if (i + opening_delimiter.size() <= original_string.size())
{
for (size_t j = 0; j < opening_delimiter.size(); j++)
{
if (original_string[i + j] != opening_delimiter[j])
{
break;
}
else if (j == opening_delimiter.size() - 1)
{
total_open++;
}
}
}
// Check if we have room for closing_delimiter...
if (i + closing_delimiter.size() <= original_string.size())
{
for (size_t j = 0; j < closing_delimiter.size(); j++)
{
if (original_string[i + j] != closing_delimiter[j])
{
break;
}
else if (j == closing_delimiter.size() - 1)
{
total_open--;
}
}
}
if (total_open == 0)
{
// Extract result, and return it...
std::string needle = original_string.substr(opening_index, i - opening_index);
original_string.erase(first_delimiter, i + closing_delimiter.size());
// Remove new line symbols
ExtractInitialAndFinalDelimiters(needle, '\n');
ExtractInitialAndFinalDelimiters(original_string, '\n');
return needle;
}
}
}
return "";
}
int main()
{
std::string sample = "{\n"
"Line1\n"
"Line2\n"
"{\n"
"SubLine1\n"
"SubLine2\n"
"}\n"
"}";
std::string result = ExtractStringBetweenDelimiters(sample, "{", "}");
std::string LineOne = ExtractStringUntilDelimiter(result, "\n");
std::string LineTwo = ExtractStringUntilDelimiter(result, "\n");
std::string SerializedVector = ExtractStringBetweenDelimiters(result, "{", "}");
std::string SubLineOne = ExtractStringUntilDelimiter(SerializedVector, "\n");
std::string SubLineTwo = ExtractStringUntilDelimiter(SerializedVector, "\n");
// Just for testing...
printf("LineOne: %s\n", LineOne.c_str());
printf("LineTwo: %s\n", LineTwo.c_str());
printf("\tSubLineOne: %s\n", SubLineOne.c_str());
printf("\tSubLineTwo: %s\n", SubLineTwo.c_str());
system("pause");
}
Use string_view or a hand rolled one.
Don't modify the string loaded.
original_string.erase(0, occurance_index + 1);
is code smell and going to be expensive with a large original string.
If you are going to modify something, do it in one pass. Don't repeatedly delete from the front of it -- that is O(n^2). Instead, procceed along it and shove "finished" stuff into an output accumulator.
This will involve changing how your code works.
You're reading your data into a string. "Length of string" should not be a problem. So far, so good...
You're using "string.find().". That's not necessarily a bad choice.
You're using "string.erase()". That's probably the main source of your problem.
SUGGESTIONS:
Treat the original string as "read-only". Don't call erase(), don't modify it.
Personally, I'd consider reading your text into a C string (a text buffer), then parsing the text buffer, using strstr().
Here is a more efficient version of ExtractStringBetweenDelimiters. Note that this version does not mutate the original buffer. You would perform subsequent queries on the returned string.
std::string trim(std::string buffer, char what)
{
auto not_what = [&what](char ch)
{
return ch != what;
};
auto first = std::find_if(buffer.begin(), buffer.end(), not_what);
auto last = std::find_if(buffer.rbegin(), std::make_reverse_iterator(first), not_what).base();
return std::string(first, last);
}
std::string ExtractStringBetweenDelimiters(
std::string const& buffer,
const char opening_delimiter,
const char closing_delimiter)
{
std::string result;
auto first = std::find(buffer.begin(), buffer.end(), opening_delimiter);
if (first != buffer.end())
{
auto last = std::find(buffer.rbegin(), std::make_reverse_iterator(first),
closing_delimiter).base();
if(last > first)
{
result.assign(first + 1, last);
result = trim(std::move(result), '\n');
}
}
return result;
}
If you have access to string_view (c++17 for std::string_view or boost::string_view) you could return one of these from both functions for extra efficiency.
It's worth mentioning that this method of parsing a structured file is going to cause you problems down the line if any of the serialised strings contains a delimiter, such as a '{'.
In the end you'll want to write or use someone else's parser.
The boost::spirit library is a little complicated to learn, but creates very efficient parsers for this kind of thing.
I am trying to write my own operating system. I have followed the tutorials on the OSDev Wiki, and I am now working on writing a console mode, with commands. I need to be able to split a char* into a char**, without all the library functionality (hence freestanding). I have tried iterating through until I meet my delimiter etc, but however I do it, I just get garbage stuck on the end of my first result. What am I doing wrong? This is what I have so far:
static char** splitStr (char* string, char delim) {
char returner[VGA_WIDTH][255];
int loc = 0;
int innerLoc = 0;
for (int i = 0; string[i] != 0x00; i++) {
char c = string[i];
if (c != delim) {
returner[loc][innerLoc] = c;
innerLoc++;
} else {
print ("a string was ");
println (returner[loc]);
innerLoc = 0;
loc++;
}
}
print ("the first string was ");
println (returner[0]);
return (char**)returner;
}
I am asking a question about how to write a specific function in C++ freestanding mode.
void split(const char* str, const char d, char** into)
{
if(str != NULL && into != NULL)
{
int n = 0;
int c = 0;
for(int i = 0; str[c] != '\0'; i++,c++)
{
into[n][i] = str[c];
if(str[c] == d)
{
into[n][i] = '\0';
i = -1;
++n;
}
}
}
}
I'm allocating using calloc to get rid of garbage characters.
EDIT: You should allocate the pointers inside the char** before writing to them.
void allocarr(char** pointers, int bytes, int slots)
{
int i = 0;
while(i <= slots)
{
pointers[i] = (char*)calloc(1, bytes);
++i;
}
}
...
char** sa = (char**)malloc(50*sizeof(char*));
allocarr(sa, 512, 50);
split("Hello;World;", ';', sa);
puts(sa[0]);
I'm working on a text tokenizer. ICU is one of very few C++ libraries that have this feature, and probably the best maintained one, so I'd like to use it.
I've found the docs about BreakIterator, but there's one problem with it: how do I leave the punctuation out?
#include "unicode/brkiter.h"
#include <QFile>
#include <vector>
std::vector<QString> listWordBoundaries(const UnicodeString& s)
{
UErrorCode status = U_ZERO_ERROR;
BreakIterator* bi = BreakIterator::createWordInstance(Locale::getUS(), status);
std::vector<QString> words;
bi->setText(s);
for (int32_t p = bi->first(), prevBoundary = 0; p != BreakIterator::DONE; prevBoundary = p, p = bi->next())
{
const auto word = s.tempSubStringBetween(prevBoundary, p);
char buffer [16384];
word.toUTF8(CheckedArrayByteSink(buffer, 16384));
words.emplace_back(QString::fromUtf8(buffer));
}
delete bi;
return words;
}
int main(int /*argc*/, char * /*argv*/ [])
{
QFile f("E:\\words.TXT");
f.open(QFile::ReadOnly);
QFile result("E:\\words.TXT");
result.open(QFile::WriteOnly);
const QByteArray strData = f.readAll();
for (const QString& word: listWordBoundaries(UnicodeString::fromUTF8(StringPiece(strData.data(), strData.size()))))
{
result.write(word.toUtf8());
result.write("\n");
}
return 0;
}
Naturally, the resulting file looks like this:
“
Come
outside
.
Best
if
we
do
not
wake
him
.
”
What I need is just the words. How can this be done?
QT library include several useful methods for check the char's properties:
QChar.
Indeed, you could create the QString variable from the buffer
and check all properties you need before to insert into the output vector.
For example:
auto token = QString::fromUtf8(buffer);
if (token.length() > 0 && token.data()[0].isPunct() == false) {
words.push_back(std::move(token));
}
With that code I can access the first character of the string and check
whether it is a punctuation mark or not.
Something more robust, I express that as function:
bool isInBlackList(const QString& str) {
const auto len = str.lenght();
if (len == 0) return true;
for(int i = 0; i < len; ++i) {
const auto&& c = str.data()[i];
if (c.isPunct() == true || c.isSpace() == true) {
return true;
}
}
return false;
}
If that function returns true, the token hasn't to be inserted into the vector.
I have a text file that contains keys and values like this:
keyOne=1
keyTwo=734
keyThree=22.3
keyFour=5
The keys are just lower-case and upper-case letters like in my example. The values are either integers or floats. Each key and value is separated by an equals sign (=). Now I want to read the values into variables I have in my program.
This is the code I have tried to read the values:
(I omitted the part where I store the values in my program's variables, and just print them out now for demonstration.)
std::fstream file(optionsFile, std::fstream::in);
if (file.good()) {
int begin;
int end;
std::string line;
while(std::getline(file, line)) {
// find the position of the value in the line
for (unsigned int i = 0; i < line.length(); i++) {
if (line.at(i) == '=') {
begin = i + 1;
end = line.length();
break;
}
}
// build the string... it starts at <begin> and ends at <end>
const char *string = "";
for (int i = begin; i < end; i++) {
string += line.at(i);
}
// only gibberish is printed in the following line :(
std::cout << "string=" << string << std::endl;
}
}
I don't understand why it won't print the value.. instead only weird stuff or even nothing is printed
Please help this broke my spirit so hard :(
You are using C-style strings (char arrays) without properly allocated memory, and you are just manipulating with the pointer, so you are not appending characters into your string:
// build the string... it starts at <begin> and ends at <end>
const char *string = "";
for (int i = begin; i < end; i++) {
string += line.at(i);
}
Use std::string instead:
/// build the string... it starts at <begin> and ends at <end>
std::string str;
for (int i = begin; i < end; i++) {
str += line.at(i);
}
Or allocate memory by hand, use the proper indexing, terminate the string with '\0' character and don't forget to delete the string after you don't need it anymore:
char *string = new char[end - begin + 1];
int j = 0;
for (int i = begin; i < end; i++) {
string[j++] = line.at(i);
}
// Don't forget to end the string!
string[j] = '\0';
// Don't forget to delete string afterwards!
delete [] string;
So, just use std::string.
Edit Why did you mix C strings and std::string in the first place?
As was already mentioned, native string types in c/c++ do not support straightforward concatenation since they are essentially pointers to some preallocated memory. You should always use std::string when a string is supposed to be mutable.
Btw, think about the following refactoring:
void process_option (const std::string& a_key, const std::string& a_value)
{
std::cout << a_key << " <-- " << a_value << std::endl;
}
void read_options (std::istream& a_in, const char* a_source)
{
int line_n = 0;
std::string line;
while (std::getline(a_in, line))
{
++ line_n;
std::string::size_type p = line. find('=');
if (p == line. npos)
{
// invalid_entry(a_source, line_n);
continue;
}
process_option(
line. substr(0, p), // key
line. substr(p + 1, line. find_first_of("\t\r\n", p + 1)) // value
);
}
}
void read_options (const char* a_filename)
{
std::ifstream file(a_filename);
if (! file)
{
// read_error(a_filename);
return;
}
read_options(file, a_filename);
file. close();
}
void read_options (const std::string& a_filename)
{
read_options(a_filename. c_str());
}
#include <iostream>
#include <vector>
using namespace std;
void RevStr (char *str)
{
if(*str !=0)
{
vector<char> v1;
while((*str != ' ')&&(*str !=0))
v1.push_back(*str++);
// trying to not add space in the last word of string
if(*str !=0)
{
v1.push_back(' ');
str++;
}
RevStr(str);
cout<<*str;
}
}
int main()
{
RevStr("hello world!");
cout<<endl;
}
I want to change the order of words in the string for example " how are you" => "you are how"
I am having some problem, its not printing correctly (print only w), please help me and tell me what i did wrong. However i know that I should not call "cout<<*str;
" since i am inserting the "array of char" in stack (recurssion) but i dont know what i need to do.
C++ makes it simple:
#include <algorithm>
#include <iterator>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
std::string reverse(std::string const& text)
{
std::stringstream inStream(text);
std::stringstream outStream;
std::vector<std::string> words;
std::copy(std::istream_iterator<std::string>(inStream), std::istream_iterator<std::string>(), std::back_inserter(words));
std::copy(words.rbegin(), words.rend(), std::ostream_iterator<std::string>(outStream, " "));
return outStream.str();
}
int main()
{
std::cout << reverse("Hello World") << "\n";
}
A common approach to do this is to reverse the entire string first, then for each word, reverse the letters in the word. So no recursion is necessary. You might find it easier to give this a try (yes, I know this isn't exactly an answer to your question :) ).
Use cout << str, not cout << *str to print a string. There's an operator<< overload for char *. But maybe that's not what you're trying to do; I can't quite follow your logic, in any event.
You're losing the "hello" part.
The algorithm you seem to go for does this:
each call to RevStr isolates the first word in the string it is passed as a parameter
calls RevStr with the remaining of the string
prints the word it isolated at step 1 as the stack unwinds
Basically, you should be printing the v1 data.
I would strongly advise making using some of the functionality exposed via std::string as a place to start.
One way you might do this would look like this:
std::string ReverseString(std::string s)
{
std::stack<std::string > stack;
std::string tmpstr = "";
std::string newstr = "";
size_t strsize = s.size();
size_t pos = 0; size_t tmppos = 0;
size_t i = 0; size_t stacksize = 0;
while( pos < strsize )
{
tmppos = s.find(" ", pos, 1); // starting as pos, look for " "
if (tmppos == std::string::npos) // std::string::npos => reached end
{
tmppos = strsize; // don't forget the last item.
}
tmpstr = s.substr(pos, tmppos-pos); // split the string.
stack.push(tmpstr); // push said string onto the stack
pos = tmppos+1;
}
stacksize = stack.size();
for ( i = 0; i < stacksize; i++ )
{
tmpstr = stack.top(); // grab string from top of the stack
stack.pop(); // stacks being LIFO, we're getting
if ( i != 0 ) // everything backwards.
{
newstr.append(" "); // add preceding whitespace.
}
newstr.append(tmpstr); // append word.
}
return newstr;
}
It's by no means the best or fastest way to achieve this; there are many other ways you could do it (Jerry Coffin mentions using std::vector with an iterator, for example), but as you have the power of C++ there, to me it would make sense to use it.
I've done it this way so you could use a different delimiter if you wanted to.
In case you're interested, you can now use this with:
int main(int argc, char** argv)
{
std::string s = "In Soviet Russia String Format You";
std::string t = ReverseString(s);
std::cout << t << std::endl;
}
given that its a char*, this reverses it inplace (ie, doesn't require more memory proportional to the incoming 'str'). This avoids converting it to a std::string ( not that its a bad idea to, just because it's a char* to start with.)
void reverse_words(char* str)
{
char* last = strlen(str) + str;
char *s, *e;
std::reverse(str,last);
for(s=e=str; e != last; e++)
{
if(*e == ' ')
{
std::reverse(s,e);
s = e+1;
}
}
std::reverse(s,e);
}
void Reverse(const string& text)
{
list<string> words;
string temp;
for ( auto cur = text.begin(); cur != text.end(); ++cur)
{
if (*cur == ' ')
{
words.push_front(temp);
temp.clear();
}
else
{
temp += *cur;
}
}
if (! temp.empty())
{
words.push_front(temp);
}
for_each(words.begin(), words.end(), [](const string& word) { cout << word << " "; });
cout << endl;
}
void swap(char* c1, char* c2) {
char tmp = *c1;
*c1 = *c2;
*c2 = tmp;
}
void reverse(char* s, char* e) {
if (s == NULL || e == NULL)
return;
while(s < e)
swap(s++, e--);
}
void reverse_words(char* line) {
if (line == NULL)
return;
reverse(line, line+strlen(line)-1);
char *s = line;
char *e;
while (*s != '\0') {
e = s;
while (*e != ' ' && *e != '\0') ++e;
--e;
reverse(s,e);
s = e+2;
}
}