Context first:
My program do some parallel calculation which are logged in a file. Threads are grouped by blocks (I'm using CUDA). The log file is formated this way:
#begin run
({blockIdx,threadIdx}) {thread_info}
({blockIdx,threadIdx}) {thread_info}
...
#end run
I've wrote a function that should read the log file and sort each run messages by thread.
//------------------------------------------------------------------------------
// Comparison struct for log file sorting
//------------------------------------------------------------------------------
typedef struct
{
bool operator()(const string &rString1 , const string &rString2)
{
int closeParenthesisLocalition1 = rString1.find_first_of(')');
int closeParenthesisLocalition2 = rString2.find_first_of(')');
int compResult = rString1.compare(0 , closeParenthesisLocalition1 + 2 , rString2 , 0 , closeParenthesisLocalition2 + 2);
return (compResult < 0);
}
} comp;
//------------------------------------------------------------------------------------
// Sort the log file. Lines with same prefix (blockIdx,ThreadIdx) will be grouped in file per run.
//------------------------------------------------------------------------------------
void CudaUnitTest::sortFile()
{
comp comparison;
deque<string> threadsPrintfs;
ifstream inputFile(m_strInputFile);
assert(inputFile.is_open());
//Read whole input file and close it. Saves disk accesses.
string strContent((std::istreambuf_iterator<char>(inputFile)), std::istreambuf_iterator<char>());
inputFile.close();
ofstream outputFile(m_strOutputFile);
assert(outputFile.is_open());
string strLine;
int iBeginRunIdx = -10; //value just to addapt on while loop (to start on [0])
int iBeginRunNewLineOffset = 10; //"idx offset to a new line char in string. Starts with the offset of the string "#begin run\n".
int iEndRunIdx;
int iLastNewLineIdx;
int iNewLineIdx;
while((iBeginRunIdx = strContent.find("#begin run\n" , iBeginRunIdx + iBeginRunNewLineOffset)) != string::npos)
{
iEndRunIdx = strContent.find("#end run\n" , iBeginRunIdx + iBeginRunNewLineOffset);
assert(iEndRunIdx != string::npos);
iLastNewLineIdx = iBeginRunIdx + iBeginRunNewLineOffset;
while((iNewLineIdx = strContent.find("\n" , iLastNewLineIdx + 1)) < iEndRunIdx)
{
strLine = strContent.substr(iLastNewLineIdx + 1 , iNewLineIdx);
if(verifyPrefix(strLine))
threadsPrintfs.push_back(strLine);
iLastNewLineIdx = iNewLineIdx;
}
//sort last run info
sort(threadsPrintfs.begin() , threadsPrintfs.end() , comparison);
threadsPrintfs.push_front("#begin run\n");
threadsPrintfs.push_back("#end run\n");
//output it
for(deque<string>::iterator it = threadsPrintfs.begin() ; it != threadsPrintfs.end() ; ++it)
{
assert(outputFile.good());
outputFile.write(it->c_str() , it->size());
}
outputFile.flush();
threadsPrintfs.clear();
}
outputFile.close();
}
The problem is that the resulting file has a lot of trash data. For example an input log file with 6KB generated a output log of 192KB! It appears the output file has a lot of repetitions of the input file. When debugging code the deque showed the right values before and after sort, though. I think there is something wrong with the ofstream write itself.
Edit: The function isn't running in parallel.
Just to show the final code. Note the change on substr, now instead of an index it's receiving the lenght.
//------------------------------------------------------------------------------------
// Sort the log file. Lines with same prefix (blockIdx,ThreadIdx) will be grouped in file per run.
//------------------------------------------------------------------------------------
void CudaUnitTest::sortFile()
{
comp comparison;
deque<string> threadsPrintfs;
ifstream inputFile(m_strInputFile);
assert(inputFile.is_open());
//Read whole input file and close it. Saves disk accesses.
string strContent((std::istreambuf_iterator<char>(inputFile)), std::istreambuf_iterator<char>());
inputFile.close();
ofstream outputFile(m_strOutputFile);
assert(outputFile.is_open());
string strLine;
int iBeginRunIdx = -10; //value just to addapt on while loop (to start on [0])
int iBeginRunNewLineOffset = 10; //"idx offset to a new line char in string. Starts with the offset of the string "#begin run\n".
int iEndRunIdx;
int iLastNewLineIdx;
int iNewLineIdx;
while((iBeginRunIdx = strContent.find("#begin run\n" , iBeginRunIdx + iBeginRunNewLineOffset)) != string::npos)
{
iEndRunIdx = strContent.find("#end run\n" , iBeginRunIdx + iBeginRunNewLineOffset);
assert(iEndRunIdx != string::npos);
iLastNewLineIdx = iBeginRunIdx + iBeginRunNewLineOffset;
while((iNewLineIdx = strContent.find("\n" , iLastNewLineIdx + 1)) < iEndRunIdx)
{
strLine = strContent.substr(iLastNewLineIdx + 1 , iNewLineIdx - iLastNewLineIdx);
if(verifyPrefix(strLine))
threadsPrintfs.push_back(strLine);
iLastNewLineIdx = iNewLineIdx;
}
//sort last run info
sort(threadsPrintfs.begin() , threadsPrintfs.end() , comparison);
threadsPrintfs.push_front("#begin run\n");
threadsPrintfs.push_back("#end run\n");
//output it
for(deque<string>::iterator it = threadsPrintfs.begin() ; it != threadsPrintfs.end() ; ++it)
{
assert(outputFile.good());
outputFile.write(it->c_str() , it->size());
}
threadsPrintfs.clear();
}
outputFile.close();
}
Related
Whenever I attempt to output a line, it outputs the data from the file vertically instead of outputting the full line horizontally. My main goal is to output each line individually and remove commas and repeat till no more lines are in the CSV file.
An example when I run the code:
cout << data[1] << "\t";
Output:
Huggenkizz Pinzz White Dwarf Dildock Operknockity DeVille
What I'm trying to get is:
Huggenkizz Amanda 3/18/1997 Sales Associate 2 A A F
My CSV File:
ID,Last Name,First Name,DOB,DtHire,Title,Level,Region,Status,Gender
1,Huggenkizz,Amanda,3/18/1997,,Sales Associate,2,A,A,F
2,Pinzz,Bobby,5/12/1986,,Sales Associate,3,B,A,F
3,White,Snow,12/23/1995,,Sales Associate,2,C,A,F
4,Dwarf,Grumpy,9/8/1977,,Sales Associate,2,C,A,M
5,Dildock,Dopey,4/1/1992,,Sales Associate,1,B,A,M
6,Operknockity,Michael,10/2/1989,,Sales Associate,1,A,S,M
9,DeVille,Cruella,8/23/1960,,Sales Manager,,,A,F
My Code:
vector<string> SplitString(string s, string delimiter)
{
string section;
size_t pos = 0;
vector<string> annualSalesReport;
while ((pos = s.find(delimiter)) != string::npos) //finds string till, if not returns String::npos
{
section = (s.substr(0, pos)); // returns the substring section
annualSalesReport.push_back(section); // places comma split section into the next array
s.erase(0, pos + delimiter.length()); // removes the previous string up to the current pos
}
annualSalesReport.push_back((s));
return annualSalesReport;
}
int main()
{
vector<string> data;
string readLine;
ifstream myIFS;
myIFS.open("SalesAssociateAnnualReport.csv");
int lineCounter = 0;
while (getline(myIFS, readLine))
{
lineCounter++;
if (lineCounter > 1)
{
data = SplitString(readLine, ",");
if (data.size() > 1) //removes top line
{
cout << data[1]<< "\t";
}
}
}
myIFS.close();
return 0;
}
Please change your main function as follows
int main()
{
vector<vector<string>> data;
string readLine;
ifstream myIFS;
myIFS.open("SalesAssociateAnnualReport.csv");
int lineCounter = 0;
while (getline(myIFS, readLine))
{
lineCounter++;
if (lineCounter > 1)
{
vector<string> dataLine = SplitString(readLine, ",");
data.push_back(dataLine);
}
}
myIFS.close();
// output the first data line of csv file without delimiter and without first column
for (size_t i = 1; i < data[0].size(); i++)
{
cout << data[0][i] << '\t';
}
return 0;
}
to get your desired output of
Huggenkizz Amanda 3/18/1997 Sales Associate 2 A AF
without having to change your SplitString function.
Please be aware that C++ first array index is always 0 instead of 1.
I've separated the CSV file input processing and the output generation, just to follow the simple programming model IPO:
Input -> Process -> Output
Therefore I've introduced the matrix of strings vector<vector<string>> to store the whole desired CSV file data.
As mentioned in the comments, the SplitString function may be refactored and it should also be fixed to split the last two columns properly.
Hope it helps?
I am currently working on a small assignment using Graphs to display characters in an adjacency list. Normally our professor supplies us with examples from class that he works out, however he has not given us any examples this time. This is just a one point extra credit assignment but the real reason I am seeking help is the fact that we have a 50 point in class assignment Friday. Me nor my other classmates know what is going on currently. This is the function that I cannot get.
My main question is how would I convert the string * value returned by the
split function to use in a call to the markEdge function?
void loadListFromFile(Graph& g, string filename) {
/* TODO (1):
*
* Open the file and update the matrix with the proper
* edge status, i.e. 1 if there is an edge from source
* to target.
*
* The file format is:
*
* sourceNode adjacentNode1 adjacentNode2 ...adjacentNode3
*
* Each node is a single character in the range A-Z.
*/
ifstream ifs;
ifs.open(filename);
if (ifs.is_open() == false){
cerr << "Error opening file for reading." << endl;
}
string line;
int tokenCount;
getline( ifs, line);
while (!ifs.eof()){
split(line, ' ', tokenCount);
getline(ifs, line);
}
}// end loadListFromFile()
Here is a given function used to split the values
//given
string * split(const string& str, const char delimiter, int& tokenCount){
string * fields = nullptr;
tokenCount = 0;
string token;
string remaining = str;
if (remaining.back() == '\n') {
remaining.pop_back();
}
remaining.push_back(delimiter);
size_t delimiterPos = remaining.find_first_of(delimiter, 0);
while ( delimiterPos != string::npos ) {
token = remaining.substr(0, delimiterPos);
remaining = remaining.substr(delimiterPos + 1);
if ( fields ) {
// resize array and add new token.
string * fieldsTmp = new string [tokenCount + 1];
for (int i = 0; i <= tokenCount - 1; i++) {
fieldsTmp[i] = fields[i];
}
fieldsTmp[tokenCount++] = token;
delete [] fields;
fields = fieldsTmp;
fieldsTmp = nullptr;
} else {
// create array and add first token.
fields = new string[tokenCount + 1];
fields[tokenCount++] = token;
}
delimiterPos = remaining.find_first_of(delimiter, 0);
}
return fields;
}// end split()
Here is a function that I believe is right.
void markEdge(const int fromIndex, const char node) {
/* TODO (1):
* Add the node to the list at index fromIndex.
*/
adjList->pushAt(fromIndex, node);
}// end markEdge()
Thanks for the help.
My code is making me crazy as it sometimes works fine but sometimes get core dumped or segmentation fault or double free(faststop ) error.
I think it's because some threads can not be created but I couldn't make it. What is wrong with this code?
This code is supposed to find \n's in a text file that is stored in path.
Here is the code:
This is Search_inp struct
typedef struct Search_inp{
string * path;
string * phrase;
int a ;
int b;
int file_size;
vector<int>* vec;
}search_inp;
This function should return a void * pointer to the struct that contains my data that I want to pass to thread!
void * make_search_inp(string & path , string & phrase , int a , int b , int file_size , vector<int>&vec){
search_inp * res = (search_inp*)malloc(sizeof(search_inp));
res->path = &path;
res->phrase = & phrase;
res->a = a;
res->b = b;
res -> file_size = file_size;
res->vec = &vec;
return (void *)res;
}
This function will starting the search of \n's in the file
// this function will multi thread the search of \n's and do this through search func
void find_backslash(string path , vector<int> &place_backslash , int file_size){
int counter = 0;
string backslash = "\n";
vector<void*>temp;
vector<pthread_t> tid;
pthread_t t;
while(counter * range <= file_size ){
temp.push_back( make_search_inp(path , backslash , counter*range , (counter+1)*range-1 , file_size , place_backslash ));
pthread_create(&t, NULL , search , temp.back() );
tid.push_back(t);
counter++;
}
for(int i = 0 ; i<tid.size() ;i++) pthread_join(tid[i] , NULL);
//when the erorr happend program can not reach this place...
while(tid.size()) tid.pop_back();
while(temp.size()) temp.pop_back();
sort(place_backslash.begin() , place_backslash.end());
}
This is search function of my code:
void* search(void * temp){
search_inp* Stemp = (search_inp*)temp;
string path = *(Stemp->path);
string phrase = *(Stemp->phrase);
int a = Stemp->a;
int b = Stemp->b;
int file_size = Stemp->file_size;
vector<int>&vec = *(Stemp->vec);
if(path == "" ) return NULL;//check the path correctness
ifstream fin;//1opening the file 2check if the file opening is successful 3put the g in the correct place with seekg
fin.open(path.c_str());
if(a < 0) a=0;
if(b < 0) b=0;
if(a >file_size)
a = b = file_size;
if(b > file_size){
b = file_size;
}
fin.seekg(a , fin.beg);
if(!fin){
cout << "ERROR:File Does Not Exist!" << endl;
return NULL;
}
//opening the output file for
//The search phase
int counter=0 , charNum =a;//this counter hold the number of appearance of the phrase in the file
while(!fin.eof() && charNum < b){
int cnt = 0;char inp;
do{
fin.get(inp);charNum++;
if(phrase[cnt] == inp)
cnt++;
else
break;
}while( cnt<phrase.length() && !fin.eof());
if( cnt == phrase.length()){
counter++;
vec.push_back( ((int)fin.tellg())-1 );
}
}
fin.close();
}
I will run this program calling find_backslah(path_of_my_file , a vector<int> , size_of_file) and get the error sometimes and it isn't happening always.
I'm just guessing at the problem here, but you pass a (pointer to a) structure to all threads, and all threads have some common pointers that they all share in the structure, for example the std::vector. If more than one thread tries to modify the vector at the same time you have a race condition.
Race conditions are bad and you need to protect against them using some kind of lock, for example using a mutex.
I have seen many posts but didn't find something like i want.
I am getting wrong output :
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ...... // may be this is EOF character
Going into infinite loop.
My algorithm:
Go to end of file.
decrease position of pointer by 1 and read character by
character.
exit if we found our 10 lines or we reach beginning of file.
now i will scan the full file till EOF and print them //not implemented in code.
code:
#include<iostream>
#include<stdio.h>
#include<conio.h>
#include<stdlib.h>
#include<string.h>
using namespace std;
int main()
{
FILE *f1=fopen("input.txt","r");
FILE *f2=fopen("output.txt","w");
int i,j,pos;
int count=0;
char ch;
int begin=ftell(f1);
// GO TO END OF FILE
fseek(f1,0,SEEK_END);
int end = ftell(f1);
pos=ftell(f1);
while(count<10)
{
pos=ftell(f1);
// FILE IS LESS THAN 10 LINES
if(pos<begin)
break;
ch=fgetc(f1);
if(ch=='\n')
count++;
fputc(ch,f2);
fseek(f1,pos-1,end);
}
return 0;
}
UPD 1:
changed code: it has just 1 error now - if input has lines like
3enil
2enil
1enil
it prints 10 lines only
line1
line2
line3ÿine1
line2
line3ÿine1
line2
line3ÿine1
line2
line3ÿine1
line2
PS:
1. working on windows in notepad++
this is not homework
also i want to do it without using any more memory or use of STL.
i am practicing to improve my basic knowledge so please don't post about any functions (like tail -5 tc.)
please help to improve my code.
Comments in the code
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
FILE *in, *out;
int count = 0;
long int pos;
char s[100];
in = fopen("input.txt", "r");
/* always check return of fopen */
if (in == NULL) {
perror("fopen");
exit(EXIT_FAILURE);
}
out = fopen("output.txt", "w");
if (out == NULL) {
perror("fopen");
exit(EXIT_FAILURE);
}
fseek(in, 0, SEEK_END);
pos = ftell(in);
/* Don't write each char on output.txt, just search for '\n' */
while (pos) {
fseek(in, --pos, SEEK_SET); /* seek from begin */
if (fgetc(in) == '\n') {
if (count++ == 10) break;
}
}
/* Write line by line, is faster than fputc for each char */
while (fgets(s, sizeof(s), in) != NULL) {
fprintf(out, "%s", s);
}
fclose(in);
fclose(out);
return 0;
}
There are a number of problems with your code. The most
important one is that you never check that any of the functions
succeeded. And saving the results an ftell in an int isn't
a very good idea either. Then there's the test pos < begin;
this can only occur if there was an error. And the fact that
you're putting the results of fgetc in a char (which results
in a loss of information). And the fact that the first read you
do is at the end of file, so will fail (and once a stream enters
an error state, it stays there). And the fact that you can't
reliably do arithmetic on the values returned by ftell (except
under Unix) if the file was opened in text mode.
Oh, and there is no "EOF character"; 'ÿ' is a perfectly valid
character (0xFF in Latin-1). Once you assign the return value
of fgetc to a char, you've lost any possibility to test for
end of file.
I might add that reading backwards one character at a time is
extremely inefficient. The usual solution would be to allocate
a sufficiently large buffer, then count the '\n' in it.
EDIT:
Just a quick bit of code to give the idea:
std::string
getLastLines( std::string const& filename, int lineCount )
{
size_t const granularity = 100 * lineCount;
std::ifstream source( filename.c_str(), std::ios_base::binary );
source.seekg( 0, std::ios_base::end );
size_t size = static_cast<size_t>( source.tellg() );
std::vector<char> buffer;
int newlineCount = 0;
while ( source
&& buffer.size() != size
&& newlineCount < lineCount ) {
buffer.resize( std::min( buffer.size() + granularity, size ) );
source.seekg( -static_cast<std::streamoff>( buffer.size() ),
std::ios_base::end );
source.read( buffer.data(), buffer.size() );
newlineCount = std::count( buffer.begin(), buffer.end(), '\n');
}
std::vector<char>::iterator start = buffer.begin();
while ( newlineCount > lineCount ) {
start = std::find( start, buffer.end(), '\n' ) + 1;
-- newlineCount;
}
std::vector<char>::iterator end = remove( start, buffer.end(), '\r' );
return std::string( start, end );
}
This is a bit weak in the error handling; in particular, you
probably want to distinguish the between the inability to open
a file and any other errors. (No other errors should occur,
but you never know.)
Also, this is purely Windows, and it supposes that the actual
file contains pure text, and doesn't contain any '\r' that
aren't part of a CRLF. (For Unix, just drop the next to the
last line.)
This can be done using circular array very efficiently.
No additional buffer is required.
void printlast_n_lines(char* fileName, int n){
const int k = n;
ifstream file(fileName);
string l[k];
int size = 0 ;
while(file.good()){
getline(file, l[size%k]); //this is just circular array
cout << l[size%k] << '\n';
size++;
}
//start of circular array & size of it
int start = size > k ? (size%k) : 0 ; //this get the start of last k lines
int count = min(k, size); // no of lines to print
for(int i = 0; i< count ; i++){
cout << l[(start+i)%k] << '\n' ; // start from in between and print from start due to remainder till all counts are covered
}
}
Please provide feedback.
int end = ftell(f1);
pos=ftell(f1);
this tells you the last point at file, so EOF.
When you read, you get the EOF error, and the ppointer wants to move 1 space forward...
So, i recomend decreasing the current position by one.
Or put the fseek(f1, -2,SEEK_CUR) at the beginning of the while loop to make up for the fread by 1 point and go 1 point back...
I believe, you are using fseek wrong. Check man fseek on the Google.
Try this:
fseek(f1, -2, SEEK_CUR);
//1 to neutrialize change from fgect
//and 1 to move backward
Also you should set position at the beginning to the last element:
fseek(f1, -1, SEEK_END).
You don't need end variable.
You should check return values of all functions (fgetc, fseek and ftell). It is good practise. I don't know if this code will work with empty files or sth similar.
Use :fseek(f1,-2,SEEK_CUR);to back
I write this code ,It can work ,you can try:
#include "stdio.h"
int main()
{
int count = 0;
char * fileName = "count.c";
char * outFileName = "out11.txt";
FILE * fpIn;
FILE * fpOut;
if((fpIn = fopen(fileName,"r")) == NULL )
printf(" file %s open error\n",fileName);
if((fpOut = fopen(outFileName,"w")) == NULL )
printf(" file %s open error\n",outFileName);
fseek(fpIn,0,SEEK_END);
while(count < 10)
{
fseek(fpIn,-2,SEEK_CUR);
if(ftell(fpIn)<0L)
break;
char now = fgetc(fpIn);
printf("%c",now);
fputc(now,fpOut);
if(now == '\n')
++count;
}
fclose(fpIn);
fclose(fpOut);
}
I would use two streams to print last n lines of the file:
This runs in O(lines) runtime and O(lines) space.
#include<bits/stdc++.h>
using namespace std;
int main(){
// read last n lines of a file
ifstream f("file.in");
ifstream g("file.in");
// move f stream n lines down.
int n;
cin >> n;
string line;
for(int i=0; i<k; ++i) getline(f,line);
// move f and g stream at the same pace.
for(; getline(f,line); ){
getline(g, line);
}
// g now has to go the last n lines.
for(; getline(g,line); )
cout << line << endl;
}
A solution with a O(lines) runtime and O(N) space is using a queue:
ifstream fin("file.in");
int k;
cin >> k;
queue<string> Q;
string line;
for(; getline(fin, line); ){
if(Q.size() == k){
Q.pop();
}
Q.push(line);
}
while(!Q.empty()){
cout << Q.front() << endl;
Q.pop();
}
Here is the solution in C++.
#include <iostream>
#include <string>
#include <exception>
#include <cstdlib>
int main(int argc, char *argv[])
{
auto& file = std::cin;
int n = 5;
if (argc > 1) {
try {
n = std::stoi(argv[1]);
} catch (std::exception& e) {
std::cout << "Error: argument must be an int" << std::endl;
std::exit(EXIT_FAILURE);
}
}
file.seekg(0, file.end);
n = n + 1; // Add one so the loop stops at the newline above
while (file.tellg() != 0 && n) {
file.seekg(-1, file.cur);
if (file.peek() == '\n')
n--;
}
if (file.peek() == '\n') // If we stop in the middle we will be at a newline
file.seekg(1, file.cur);
std::string line;
while (std::getline(file, line))
std::cout << line << std::endl;
std::exit(EXIT_SUCCESS);
}
Build:
$ g++ <SOURCE_NAME> -o last_n_lines
Run:
$ ./last_n_lines 10 < <SOME_FILE>
I have a class that takes a html file and formats it. Here is my code.
void FormatHtml::Format(const std::string &formattedFile, const std::string &inputFile) const
{
string str;
ifstream inputfileObj(inputFile.c_str());
//ofstream formattedFileObj(formattedFile.c_str());
if(inputfileObj.is_open() /*&& formattedFileObj.is_open()*/)
{
while(inputfileObj.good())
{
getline(inputfileObj,str);
//cout<<str<<endl;
//formattedFileObj<<str;
int pos = str.find(">");
int pos3;
while(pos != string::npos)
{
pos3 = str.find("<",pos);
if(str.length() >= pos3+1)
{
if(str.at(pos3+1) == '/')
{
pos = str.find(">",pos3);
}
}
cout<<str.substr(0,pos+1)<<endl;
//formattedFileObj<<str.substr(0,pos+1)<<endl;
str = str.substr(pos+1,string::npos);
pos = str.find(">");
}
}
inputfileObj.close();
//formattedFileObj.close();
}
else
cout<<"could not open file";
}
}
but if i use this function with small file it works fyn, but for larger html file like google's home page source it goes to infinite loop.
following is call stack.
ntdll.dll!76f99a94()
[Frames below may be incorrect and/or missing, no symbols loaded for ntdll.dll]
ntdll.dll!76f98d94()
ntdll.dll!76fa9522()
kernel32.dll!7588cb6c()
kernel32.dll!7588cbfc()
kernel32.dll!7588c964()
msvcr90d.dll!_write_nolock(int fh=14548992, const void * buf=0x77004cc0, unsigned int cnt=4074376) Line 335 + 0x3c bytes C
ffffffff()
And when i pause the execution it always stops in one file called write.c and at following code:
/* write the lf buf and update total */
if ( WriteFile( (HANDLE)_osfhnd(fh),
lfbuf,
(int)(q - lfbuf),
(LPDWORD)&written,
NULL) )
{
charcount += written;
if (written < q - lfbuf)
break;
}
Any one having clue what could be the reason, why it always happens with large unformatted file.
This line:
pos = str.find(">",pos3);
If pos == string::npos, then you carry on to do this:
str = str.substr(pos+1,string::npos);
pos = str.find(">");
string::npos == -1, so pos+1 == 0, so str.substr returns all of str. You are now in an infinite loop.