For a school lab I have to take in an email input and output the username before '#' and the site type. The site type is either the last three letters of the address (com for commercial ventures, edu for educational institutions, etc) or the last two letters that are abbreviations of the country (us for united states, etc). When dealing with two letter country code site types, I inFile a text file (countries.txt) that has each country code with its respective country in each line. I have finished my code but when I run my code and inputting an email with a three letter site type (com, org, etc) it works fine. But when I run my code and input an email with a two letter site type at the end that abbreviates for a country code, I get a fatal string error. I have tried looking everywhere in my else if loop that is called upon when dealing with the two letter country code email address, but I cannot find when I am going out of bounds with the string. Any help is appreciated!
#include <iostream>
#include <string>
#include <iomanip>
#include <fstream>
#include <vector>
using namespace std;
void getemail(string &email);
void finduser(string email);
void findsitetype(string email);
int main()
{
string email;
getemail(email);
finduser(email);
findsitetype(email);
system("pause");
return 0;
}
void getemail(string &email)
{
cout << "Please enter your email address: ";
cin >> email;
cout << endl;
}
void finduser(string email)
{
int index = email.find('#');
cout << "Username: ";
for (int i = 0; i < index; i++)
cout << email[i];
cout << endl << endl;
}
void findsitetype(string email)
{
int truesize = size(email) - 1, i = 0;
string lastthree, countrycode, line;
vector<string> countries;
lastthree.resize(3);
for (int i = 0; i < 3; i++)
{
lastthree[i] = email[truesize - (2 - i)];
}
cout << "Site type: ";
if (lastthree == "edu")
cout << "Educational institutions";
else if (lastthree == "org")
cout << "Not-for-profit organizations";
else if (lastthree == "gov")
cout << "Government entities";
else if (lastthree == "mil")
cout << "Military installations";
else if (lastthree == "net")
cout << "Network service providers";
else if (lastthree == "com")
cout << "Commercial ventures";
else if (email[truesize - 2] == '.') //Source of my error seems to be in this else if loop.
{
countrycode[0] = email[truesize - 1];
countrycode[1] = email[truesize];
ifstream inFile("countries.txt");
if (inFile.fail())
{
cout << "File not found.";
exit(1);
}
else
{
while (getline(inFile, line)) //This is the first time I am using getline I suspect I might be misusing it.
{
countries[i] = line;
i = i + 1;
}
inFile.close();
for (int j = 0; j < countries.size(); j++)
{
string line = countries[j];
string country;
if (line.find(countrycode) != -1)
{
int index = line.find(countrycode);
cout << "Country ";
for (int k = 5; k < line.size(); k++)
{
cout << line[k];
}
}
}
}
}
}
The text file I include:
countries.txt (file name)
ac : Ascension Island
ad : Andorra
ae : United Arab Emirates
af : Afghanistan
ag : Antigua and Barbuda
ai : Anguilla
al : Albania
am : Armenia
an : Netherlands Antilles
ao : Angola
aq : Antarctica
ar : Argentina
as : American Samoa
at : Austria
au : Australia
aw : Aruba
ax : Aland Islands
az : Azerbaijan
ba : Bosnia and Herzegovina
bb : Barbados
bd : Bangladesh
be : Belgium
bf : Burkina Faso
bg : Bulgaria
bh : Bahrain
bi : Burundi
bj : Benin
bm : Bermuda
bn : Brunei Darussalam
bo : Bolivia
br : Brazil
bs : Bahamas
bt : Bhutan
bv : Bouvet Island
bw : Botswana
by : Belarus
bz : Belize
ca : Canada
cc : Cocos (Keeling) Islands
cd : Congo, Democratic Republic
cf : Central African Republic
cg : Congo
ch : Switzerland
ci : Cote D'Ivoire (Ivory Coast)
ck : Cook Islands
cl : Chile
cm : Cameroon
cn : China
co : Colombia
cr : Costa Rica
cs : Czechoslovakia (former)
cu : Cuba
cv : Cape Verde
cx : Christmas Island
cy : Cyprus
cz : Czech Republic
de : Germany
dj : Djibouti
dk : Denmark
dm : Dominica
do : Dominican Republic
dz : Algeria
ec : Ecuador
ee : Estonia
eg : Egypt
eh : Western Sahara
er : Eritrea
es : Spain
et : Ethiopia
eu : European Union
fi : Finland
fj : Fiji
fk : Falkland Islands (Malvinas)
fm : Micronesia
fo : Faroe Islands
fr : France
fx : France, Metropolitan
ga : Gabon
gb : Great Britain (UK)
gd : Grenada
ge : Georgia
gf : French Guiana
gg : Guernsey
gh : Ghana
gi : Gibraltar
gl : Greenland
gm : Gambia
gn : Guinea
gp : Guadeloupe
gq : Equatorial Guinea
gr : Greece
gs : S. Georgia and S. Sandwich Isls.
gt : Guatemala
gu : Guam
gw : Guinea-Bissau
gy : Guyana
hk : Hong Kong
hm : Heard and McDonald Islands
hn : Honduras
hr : Croatia (Hrvatska)
ht : Haiti
hu : Hungary
id : Indonesia
ie : Ireland
il : Israel
im : Isle of Man
in : India
io : British Indian Ocean Territory
iq : Iraq
ir : Iran
is : Iceland
it : Italy
je : Jersey
jm : Jamaica
jo : Jordan
jp : Japan
ke : Kenya
kg : Kyrgyzstan
kh : Cambodia
ki : Kiribati
km : Comoros
kn : Saint Kitts and Nevis
kp : Korea (North)
kr : Korea (South)
kw : Kuwait
ky : Cayman Islands
kz : Kazakhstan
la : Laos
lb : Lebanon
lc : Saint Lucia
li : Liechtenstein
lk : Sri Lanka
lr : Liberia
ls : Lesotho
lt : Lithuania
lu : Luxembourg
lv : Latvia
ly : Libya
ma : Morocco
mc : Monaco
md : Moldova
me : Montenegro
mg : Madagascar
mh : Marshall Islands
mk : F.Y.R.O.M. (Macedonia)
ml : Mali
mm : Myanmar
mn : Mongolia
mo : Macau
mp : Northern Mariana Islands
mq : Martinique
mr : Mauritania
ms : Montserrat
mt : Malta
mu : Mauritius
mv : Maldives
mw : Malawi
mx : Mexico
my : Malaysia
mz : Mozambique
na : Namibia
nc : New Caledonia
ne : Niger
nf : Norfolk Island
ng : Nigeria
ni : Nicaragua
nl : Netherlands
no : Norway
np : Nepal
nr : Nauru
nt : Neutral Zone
nu : Niue
nz : New Zealand (Aotearoa)
om : Oman
pa : Panama
pe : Peru
pf : French Polynesia
pg : Papua New Guinea
ph : Philippines
pk : Pakistan
pl : Poland
pm : St. Pierre and Miquelon
pn : Pitcairn
pr : Puerto Rico
ps : Palestinian Territory, Occupied
pt : Portugal
pw : Palau
py : Paraguay
qa : Qatar
re : Reunion
ro : Romania
rs : Serbia
ru : Russian Federation
rw : Rwanda
sa : Saudi Arabia
sb : Solomon Islands
sc : Seychelles
sd : Sudan
se : Sweden
sg : Singapore
sh : St. Helena
si : Slovenia
sj : Svalbard & Jan Mayen Islands
sk : Slovak Republic
sl : Sierra Leone
sm : San Marino
sn : Senegal
so : Somalia
sr : Suriname
st : Sao Tome and Principe
su : USSR (former)
sv : El Salvador
sy : Syria
sz : Swaziland
tc : Turks and Caicos Islands
td : Chad
tf : French Southern Territories
tg : Togo
th : Thailand
tj : Tajikistan
tk : Tokelau
tm : Turkmenistan
tn : Tunisia
to : Tonga
tp : East Timor
tr : Turkey
tt : Trinidad and Tobago
tv : Tuvalu
tw : Taiwan
tz : Tanzania
ua : Ukraine
ug : Uganda
uk : United Kingdom
um : US Minor Outlying Islands
us : United States
uy : Uruguay
uz : Uzbekistan
va : Vatican City State (Holy See)
vc : Saint Vincent & the Grenadines
ve : Venezuela
vg : British Virgin Islands
vi : Virgin Islands (U.S.)
vn : Viet Nam
vu : Vanuatu
wf : Wallis and Futuna Islands
ws : Samoa
xk : Kosovo*
ye : Yemen
yt : Mayotte
yu : Serbia and Montenegro (former)
za : South Africa
zm : Zambia
zw : Zimbabwe
countries is empty vector, use push_back to add new items:
while (getline(inFile, line)) //This is the first time I am using getline I suspect I might be misusing it.
{
//countries[i] = line;
countries.push_back(line);
i = i + 1;
}
Another issue here:
if (line.find(countrycode) != -1)
It should be changed to
if (line.find(countrycode) == 0)
Because you expect countrycode to be at the beginning of the line.
std::string has a number of other functions such as find_last_of and substr, use them instead of going through the characters.
void findsitetype(string email)
{
size_t dot = email.find_last_of('.');
if (dot == string::npos || dot == (email.size() - 1))
return;
string countrycode = email.substr(dot + 1);
if (!countrycode.size())
return;
vector<string> countries;
if (countrycode == "edu") cout << "Educational institutions";
else if (countrycode == "org") cout << "Not-for-profit organizations";
else if (countrycode == "gov") cout << "Government entities";
else if (countrycode == "mil") cout << "Military installations";
else if (countrycode == "net") cout << "Network service providers";
else if (countrycode == "com") cout << "Commercial ventures";
else if (countrycode.size() == 2)
{
ifstream inFile("countries.txt");
if (inFile.fail())
{
cout << "File not found.";
exit(1);
}
else
{
string line;
while (getline(inFile, line))
countries.push_back(line);
inFile.close();
for (size_t j = 0; j < countries.size(); j++)
{
if (countries[j].find(countrycode) == 0)
{
line = countries[j];
if (line.size() > 5)
cout << "country: " << line.substr(5) << "\n";
}
}
}
}
}
Related
I'm trying to print this LIST but it's driving me insane.
So this is the output that I wanted to get :
Veranstaltungen:
1: Freitag 08:00 - 09:30 SU Informatik III Kevin Kaufmann H3
2: Freitag 10:00 - 11:30 Ueb Informatik III Kevin Kaufmann D 114
3: Freitag 12:15 - 13:45 Ueb Informatik III Kevin Kaufmann D 114
4: Montag 10:00 - 11:30 SU Mathe III Nathan Neuling B 301
5: Mittwoch 10:00 - 11:30 Ueb Mathe III Nathan Neuling D 209
6: Donnerstag 16:00 - 17:30 Englisch Sabine Sauber D 419
7: Dienstag 16:00 - 17:30 SU Digitale Systeme Willi Witzig H5
8: Dienstag 17:45 - 19:15 Ueb Digitale Systeme Willi Witzig D 114
This is the print function in my code :
I tried first of all this simple alternative here:
void CEvents::print()
{
cout << "Veranstaltungen:" << endl ;
for (int i = 0 ; i < counter;i++)
{
CWeekday Day = Events[i]->WeekDay;
cout<<i+1<<": "<<Events[i]->getDay(Day) << " ";
Events[i]->Block->print();
cout<<" "<<Events[i]->Name<<" "<<Events[i]->Teacher->Name<<" "<<Events[i]->Room->Name<< endl;
}
}
I got from this code this output :
Veranstaltungen:
1: Freitag 08:00 - 09:30 SU Informatik III Kevin Kaufmann H3
2: Freitag 10:00 - 11:30 Ueb Informatik III Kevin Kaufmann D 114
3: Freitag 12:15 - 13:45 Ueb Informatik III Kevin Kaufmann D 114
4: Montag 10:00 - 11:30 SU Mathe III Nathan Neuling B 301
5: Mittwoch 10:00 - 11:30 Ueb Mathe III Nathan Neuling D 209
6: Donnerstag 16:00 - 17:30 Englisch Sabine Sauber D 419
7: Dienstag 16:00 - 17:30 SU Digitale Systeme Willi Witzig H5
8: Dienstag 17:45 - 19:15 Ueb Digitale Systeme Willi Witzig D 114
Of course this one won't work correctly because they all don't have the same lengths but atleast the first part here is correct .
Then I tried to switch to printf() instead of cout , got a little closer to the result but still not the same :
extern "C" void CEvents::print()
{
cout << "Veranstaltungen:" << endl ;
string a= " ";
for (int i = 0 ; i < counter;i++)
{
CWeekday Day = Events[i]->WeekDay;
printf("%d: %10s %5s",i+1,Events[i]->getDay(Day).c_str(),a.c_str());
Events[i]->Block->print();
printf("%30s %20s %10s\n",Events[i]->Name.c_str(),Events[i]->Teacher->Name.c_str(),Events[i]->Room->Name.c_str());
}
}
For this code I got this output :
1: Freitag 08:00 - 09:30 SU Informatik III Kevin Kaufmann H3
2: Freitag 10:00 - 11:30 Ueb Informatik III Kevin Kaufmann D 114
3: Freitag 12:15 - 13:45 Ueb Informatik III Kevin Kaufmann D 114
4: Montag 10:00 - 11:30 SU Mathe III Nathan Neuling B 301
5: Mittwoch 10:00 - 11:30 Ueb Mathe III Nathan Neuling D 209
6: Donnerstag 16:00 - 17:30 Englisch Sabine Sauber D 419
7: Dienstag 16:00 - 17:30 SU Digitale Systeme Willi Witzig H5
8: Dienstag 17:45 - 19:15 Ueb Digitale Systeme Willi Witzig D 114
Now second part is correct(the time) and the other parts are not.
How do I make this format correctly ?
Using std::left and std::setw should get you the formatting you want.
I've had to improvise a little as you didn't provide a full example but you should be able to adapt this code into yours:
#include <vector>
#include <string>
#include <iostream>
#include <array>
#include <iomanip>
int main()
{
std::vector<std::array<std::string, 5>> data =
{
{ "Freitag", "08:00 - 09:30", "SU Informatik III", "Kevin Kaufmann", "H3" },
{ "Freitag", "10:00 - 11:30", "Ueb Informatik III", "Kevin Kaufmann", "D 114" },
{ "Freitag", "12:15 - 13:45", "Ueb Informatik III", "Kevin Kaufmann", "D 114" },
{ "Montag", "10:00 - 11:30", "SU Mathe III", "Nathan Neuling", "B 301" },
{ "Mittwoch", "10:00 - 11:30", "Ueb Mathe III", "Nathan Neuling", "D 209" },
{ "Donnerstag", "16:00 - 17:30", "Englisch", "Sabine Sauber", "D 419" },
{ "Dienstag", "16:00 - 17:30", "SU Digitale Systeme", "Willi Witzig", "H5" },
{ "Dienstag", "17:45 - 19:15", "Ueb Digitale Systeme", "Willi Witzig", "D 114" },
};
std::cout << "Veranstaltungen:\n";
for (size_t i = 0; i < data.size(); i++)
{
std::cout << (i+1) << ": " <<
std::left <<
std::setw(12) << data[i][0] <<
std::setw(16) << data[i][1] <<
std::setw(22) << data[i][2] <<
std::setw(16) << data[i][3] <<
data[i][4] << "\n";
}
}
https://godbolt.org/z/EqafWs
To fix your problems you need to left justify the text for some fields:
printf("%-30s %-20s %-10s\n",Events[i]->Name.c_str(),Events[i]->Teacher->Name.c_str(),Events[i]->Room->Name.c_str();
Note the minus in the number before s. Additionally I think you will have to make some of the fields wider than this constants
As a side note here is fmt example (note fmt is part of C++20 and currently is not supported by any compiler, so it has to be used as external library):
#include <iostream>
#include <string>
#include <vector>
#include <fmt/core.h>
int main()
{
std::vector<std::array<std::string, 5>> data =
{
{ "Freitag", "08:00 - 09:30", "SU Informatik III", "Kevin Kaufmann", "H3" },
{ "Freitag", "10:00 - 11:30", "Ueb Informatik III", "Kevin Kaufmann", "D 114" },
{ "Freitag", "12:15 - 13:45", "Ueb Informatik III", "Kevin Kaufmann", "D 114" },
{ "Montag", "10:00 - 11:30", "SU Mathe III", "Nathan Neuling", "B 301" },
{ "Mittwoch", "10:00 - 11:30", "Ueb Mathe III", "Nathan Neuling", "D 209" },
{ "Donnerstag", "16:00 - 17:30", "Englisch", "Sabine Sauber", "D 419" },
{ "Dienstag", "16:00 - 17:30", "SU Digitale Systeme", "Willi Witzig", "H5" },
{ "Dienstag", "17:45 - 19:15", "Ueb Digitale Systeme", "Willi Witzig", "D 114" },
};
std::cout << "Veranstaltungen:\n";
size_t i = 0;
for (const auto&x : data)
{
fmt::print("{:2}: {:12} {:16} {:22} {:16} {}\n",
++i, x[0], x[1], x[2], x[3], x[4]);
}
}
Live demo
I have sample data like below
class: 9
section: A
stud : Robert
subject: maths
mark : 69
subject:science
mark: 75
stud : Billy
subject: maths
mark : 69
subject:science
mark: 75
stud : Venice
subject: maths
mark : 69
subject:science
mark: 75
stud : Marc
subject: maths
mark : 69
subject:science
mark: 75
class: 10
section: A
stud : Agnes
subject: maths
mark : 69
subject:science
mark: 75
stud : Sarah
subject: maths
mark : 69
subject:science
mark: 75
stud : Scott
subject: maths
mark : 69
subject:science
mark: 75
stud : Alex
subject: maths
mark : 69
subject:science
mark: 75
line1
line2
line3
...
line n
I am trying to extract class 9 student data out of this file. Here is my code
val datafile = sc.textFile("file.txt").collect().mkString(" ")
// to take the data I needed from whole file
val datpattern = """(class: 9).*?(?=\bline\s)
val finaldata = datpattern.findAllIn(datafile)
//student data extract regex
val stupattern = "section: (\S+)\s+ stud : ([\w\S]+)\s+ subject: ([\w\S]+)\s+ mark : (\d+)"""".r
val finalresult = finaldata.flatMap { a => stupattern findAllIn a }
.map {l =
val stupattern(section,stuname,sub,mark) = l
(section,stuname,sub,mark)
}
.foreach(println)
But this gave me only first record in each class that too only the first subject & mark. (Robert maths mark & Agnes Maths mark from class 9 & 10th S A section.
I thought this is because only that matches entire pattern.
I tried to change it like with 0 or more occurences for subject & mark. something like below (Only the lines I have changed given below)
val stupattern = "section: (\S+)\s+ stud : ([\w\S]+)\s+ (subject: ([\w\S]+)\s+ mark : (\d+))*"""".r
val finalresult = finaldata.flatMap { a => stupattern findAllIn a }
.map {l =
val stupattern(section,stuname,{sub,mark}) = l//This doesn't even let me compile
(section,stuname,sub,mark)//This doesn't even let me compiled
}
.foreach(println)
It error out like for those 2 lines "Illegal start of pattern".
Can someone tell me how to extract repeat subset of data from above?
Thanks in Advance.
------------------------------------------------
Artiles for a magazine
------------------------------------------------
There are total 5 articles in the magazine
------------------------------------------------
ID : 3
Description : opis2
Price : 212
Incoming amount : 2
Outgoing amount : 0
Taxes : 0
Total : 424
Date : 20324
------------------------------------------------
ID : 3
Description : 54
Price : 123
Incoming amount : 12
Outgoing amount : 0
Taxes : 0
Total : 1476
Date : 120915
------------------------------------------------
ID : 3
Description : opsi2
Price : 12
Incoming amount : 324
Outgoing amount : 0
Taxes : 0
Total : 3888
Date : 570509
------------------------------------------------
ID : 2
Description : vopi
Price : 2
Incoming amount : 2
Outgoing amount : 0
Taxes : 0
Total : 4
Date : 951230
------------------------------------------------
ID : 1
Description : opis1
Price : 2
Incoming amount : 2
Outgoing amount : 0
Taxes : 0
Total : 4
Date : 101
------------------------------------------------
I have a file called directory.dat with the contents above. What I'm trying to do is the following.
I want to find all articles with the same ID in a given year and do the following : outgoing amount - incoming amount. So, my problem is how can I find all the articles with same ID in a given year (by the user) and do the outgoing amount-incoming amount for them, by working with the file?
I tried something like this:
ifstream directory("directory.dat");
//directory.open("directory.dat");
string line;
string priceLine = "Price : ";
int price;
while(getline(directory, line)){
if(line.find(priceLine) == 0){
cout << atoi(line.substr(priceLine.size()).c_str()) << endl;
}
}
cout << price << endl;
directory.close();
But I am far away from getting on the right track and I need some help to achieve something like this.
You need to define precisely the format of your input (perhaps as a BNF grammar). A single example is not enough. We can't guess if Artiles for a magazine is meaningful or not.
while(getline(directory, line)){
int colonpos = -1;
if (line.find("----")) {
/// check that line has only dashes, then
process_dash_line();
}
else if ((colonpos=line.find(':'))>0) {
std::string name = line.substr(0, colonpos-1);
std::string value = line.substr(colonpos+1);
process_name_value (name, value);
}
}
Also, study (and perhaps adapt) the source code of some free software C++ parsers for JSON (e.g. jsoncpp) and YAML (e.g. yaml-cpp). They will certainly give you some inspiration.
Learn more about C++ standard libraries, e.g. on cppreference.com & cplusplus.com (both sites are easy to read but are imperfect) and of course by reading the C++11 standard, or at least its draft n3337
I would like to add columns to a data.table based on a string in another column. This is my data and the approach that I have tried:
Params
1: { clientID : 459; time : 1386868908703; version : 6}
2: { clientID : 459; id : 52a9ea8b534b2b0b5000575f; time : 1386868824339; user : 459001}
3: { clientID : 988; time : 1388939739771}
4: { clientID : 459; id : 52a9ec00b73cbf0b210057e9; time : 1386868810519; user : 459001}
5: { clientID : 459; time : 1388090530634}
Code to create this table:
DT = data.table(Params=c("{ clientID : 459; time : 1386868908703; version : 6}","{ clientID : 459; id : 52a9ea8b534b2b0b5000575f; time : 1386868824339; user : 459001}","{ clientID : 988; time : 1388939739771}","{ clientID : 459; id : 52a9ec00b73cbf0b210057e9; time : 1386868810519; user : 459001}","{ clientID : 459; time : 1388090530634}"))
I would like to parse the text in the "Params"-column and create new columns based on the text in it. For example I would like to have a new column named "user" that only holds the number after "user:" in the Params string. The added column should look like this:
Params user
1: { clientID : 459; time : 1386868908703; version : 6} NA
2: { clientID : 459; id : 52a9ea8b534b2b0b5000575f; time : 1386868824339; user : 459001} 459001
3: { clientID : 988; time : 1388939739771} NA
4: { clientID : 459; id : 52a9ec00b73cbf0b210057e9; time : 1386868810519; user : 459001} 459001
5: { clientID : 459; time : 1388090530634} 459001
I created the following function to parse (in this case for the "user"):
myparse <- function(searchterm, s) {
s <-gsub("{","",s, fixed = TRUE)
s <-gsub(" ","",s, fixed = TRUE)
s <-gsub("}","",s, fixed = TRUE)
s <-strsplit(s, '[;:]')
s <-unlist(s)
if (length(s[which(s==searchterm)])>0) {s[which(s==searchterm)+1]} else {NA}
}
Then I use the following function to add a column:
DT <- transform(DT, user = myparse("user", Params))
This works in the case of "time" which is included in all the rows but does not work in the case of "user" which is only included in two of the rows. The following error is returned:
Error in data.table(list(Params = c("{ clientID : 459; time : 1386868908703; version : 6}", :
argument 2 (nrow 2) cannot be recycled without remainder to match longest nrow (5)
How can I address this? Thanks!
Here's a way to use regular expressions for this task:
myparse <- function(searchterm, s) {
res <- rep(NA_character_, length(s)) # NA vector
idx <- grepl(searchterm, s) # index for strings including the search term
pattern <- paste0(".*", searchterm, " : ([^;}]+)[;}].*") # regex pattern
res[idx] <- sub(pattern, "\\1", s[idx]) # extract target string
return(res)
}
You can use this function to add new columns, e.g., for user:
DT[, user := myparse("user", Params)]
The new column contains NA for the rows with no user field:
DT[, user]
# [1] NA "459001" NA "459001" NA
I would use some external parser, for example:
library(yaml)
DT = data.frame(
Params=c("{ clientID : 459; time : 1386868908703; version : 6}","{ clientID : 459; id : 52a9ea8b534b2b0b5000575f; time : 1386868824339; user : 459001}","{ clientID : 988; time : 1388939739771}","{ clientID : 459; id : 52a9ec00b73cbf0b210057e9; time : 1386868810519; user : 459001}","{ clientID : 459; time : 1388090530634}"),
stringsAsFactors=F
)
conv.to.yaml <- function(x){
gsub('; ','\n',substr(x, 3, nchar(x)-1))
}
tmp <- lapply( DT$Params, function(x) yaml.load(conv.to.yaml(x)) )
then combine the parsed lists into data frame:
unames <- unique( unlist(sapply( tmp, names) ) )
res <- as.data.frame( do.call(rbind, lapply(tmp, function(x)x[unames]) ) )
colnames( res ) <- unames
res
the result is pretty much close to what you have in mind, but you need to think about better handling for the time values:
> res
clientID time version id user
1 459 -405527905 6 NULL NULL
2 459 -405612269 NULL 52a9ea8b534b2b0b5000575f 459001
3 988 1665303163 NULL NULL NULL
4 459 -405626089 NULL 52a9ec00b73cbf0b210057e9 459001
5 459 816094026 NULL NULL NULL
I want to know programmatic way to get the memory consumed by my user defined class.
Following is the declaration of the class
struct TrieNode {
typedef std::map<char, TrieNode *> ChildType;
std::string m_word;
bool m_visited;
}
I have inserted around 264061 words into this Trie. After this when i do sizeof(trieobject) it just show me 32. How do i know how much exact memory is used by such data structures.
I use
valgrind --tool=massif ./myprogram -opt arg1 arg2
ms_print massif.* | less -SR
for that. Sample output from this page
19.63^ ###
| #
| # ::
| # : :::
| :::::::::# : : ::
| : # : : : ::
| : # : : : : :::
| : # : : : : : ::
| ::::::::::: # : : : : : : :::
| : : # : : : : : : : ::
| ::::: : # : : : : : : : : ::
| ###: : : # : : : : : : : : : #
| ::# : : : # : : : : : : : : : #
| :::: # : : : # : : : : : : : : : #
| ::: : # : : : # : : : : : : : : : #
| ::: : : # : : : # : : : : : : : : : #
| :::: : : : # : : : # : : : : : : : : : #
| ::: : : : : # : : : # : : : : : : : : : #
| :::: : : : : : # : : : # : : : : : : : : : #
| ::: : : : : : : # : : : # : : : : : : : : : #
0 +----------------------------------------------------------------------->KB 0 29.48
Number of snapshots: 25
Detailed snapshots: [9, 14 (peak), 24]
The remainder of the log details the highest percentiles of memory allocations, you can specifically see what type of class takes what % of heap memory (and where the allocations originate in terms of call stack), e.g.:
--------------------------------------------------------------------------------
n time(B) total(B) useful-heap(B) extra-heap(B) stacks(B)
--------------------------------------------------------------------------------
10 10,080 10,080 10,000 80 0
11 12,088 12,088 12,000 88 0
12 16,096 16,096 16,000 96 0
13 20,104 20,104 20,000 104 0
14 20,104 20,104 20,000 104 0
99.48% (20,000B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
->49.74% (10,000B) 0x804841A: main (example.c:20)
|
->39.79% (8,000B) 0x80483C2: g (example.c:5)
| ->19.90% (4,000B) 0x80483E2: f (example.c:11)
| | ->19.90% (4,000B) 0x8048431: main (example.c:23)
| |
| ->19.90% (4,000B) 0x8048436: main (example.c:25)
|
->09.95% (2,000B) 0x80483DA: f (example.c:10)
->09.95% (2,000B) 0x8048431: main (example.c:23)
Well, this is not so easy to do. First of all m_word is a string with variable size right? Internally the std::string holds an array of chars among other things. The same stands for std::map. I guess you could get a rough estimation based on the size of the map * TrieNode but this will be just a rough estimate.
I think some code profiling with an external tool would be of more help. Hell you can even use the task manager if you are out of any tools left :).
Your "object size" is sizeof(std::string) + sizeof(bool) + m_word.capacity() + padding bytes or sizeof(trieobject) + m_word.capacity()
Here's a piece of code for GCC that I came up with that you can use in a test program where you only instantiate one object of your class and do some typical work with it. The code replaces the global operator new() and operator delete(); so it will only track allocations through ::new expressions and the standard allocator, provided that the standard allocator itself uses ::operator new() (this is the case for GCC).
Since we need to track the pointers and their allocations, we need a separate map for that, which of course cannot use the standard allocator itself; GCC's malloc-allocator comes to the rescue.
We use a statically initialized global to make the memory tracker print its data after main returns.
#include <unordered_map>
#include <string>
#include <iostream>
#include <ext/malloc_allocator.h>
struct Memtrack
{
typedef std::unordered_map<void*, std::size_t, std::hash<void*>,
std::equal_to<void*>, __gnu_cxx::malloc_allocator<void*>> AllocMap;
static int memtrack;
static int memmax;
static AllocMap allocs;
Memtrack() { std::cout << "starting tracker: cur = " << memtrack << ", max = " << memmax << ".\n"; }
~Memtrack() { std::cout << "ending tracker: cur = " << memtrack << ", max = " << memmax << ".\n"; }
static void track_new(std::size_t n, void * p)
{
memtrack += n;
if (memmax < memtrack) memmax = memtrack;
allocs[p] = n;
std::cout << "... allocating " << n << " bytes...\n";
}
static void track_delete(void * p)
{
const int n = int(allocs[p]);
memtrack -= n;
std::cout << "... freeing " << n << " bytes...\n";
}
} m;
int Memtrack::memtrack = 0;
int Memtrack::memmax = 0;
Memtrack::AllocMap Memtrack::allocs;
void * operator new(std::size_t n) throw(std::bad_alloc)
{
void * const p = std::malloc(n);
Memtrack::track_new(n, p);
return p;
}
void operator delete(void * p) throw()
{
Memtrack::track_delete(p);
std::free(p);
}
int main()
{
std::cout << "Beginning of main.\n";
std::unordered_map<std::string, int> m; // this piece of code
m["hello"] = 4; // is a typical test for working
m["world"] = 7; // with dynamic allocations
std::cout << "End of main.\n";
}
Some typical output:
starting tracker: cur = 0, max = 0.
Beginning of main.
... allocating 48 bytes...
... allocating 12 bytes...
... allocating 12 bytes...
End of main.
... freeing 12 bytes...
... freeing 12 bytes...
... freeing 48 bytes...
ending tracker: cur = 0, max = 72.
Trivial. If you have some time (which might be the case, if you are only interested in the size for debugging/optimising purposes). This approach might be unsuited for production code!
#include <malloc.h>
template <typename T> int objSize(T const* obj) {
// instead of uordblks, you may be interested in 'arena', you decide!
int oldSize = mallinfo().uordblks;
T* dummy = new T(*obj);
int newSize = mallinfo().uordblks;
delete dummy;
return newSize - oldSize;
}