C++ find in file a specific string which contains name - c++

So my goal is to create a console app using visual C++ lang which opens a file ( in my case it is a MIME file) and finds a certain string. In my case it sounds - Content-Disposition: attachment; filename="file.smth".
And then the app shows the file.smth
So here is what I have done. It has some problems that I am not able to find.When I run a console app It gets stuck at finding a filename.
#include "stdafx.h"
#include <stdlib.h>
#include <string.h>
bool ShowFile(char * FileName, char * Name)
{
FILE* file;
if (fopen_s(&file, FileName, "rt") != 0) { return false; }
while (!feof(file))
{
char AttName[100];
int a = sscanf_s("Content-Disposition: attachment; filename=\"%[^\"]\"", AttName,_countof(AttName));
Name = AttName;
}
fclose(file);
return true;
}
int main(int argc, char* argv[])
{
char FileName[100];
if (argc == 2) strcpy_s(FileName, _countof(FileName), argv[1]);
else {
printf("Source file name: "); gets_s(FileName, _countof(FileName));
}
char Name[100];
ShowFile(FileName, Name);
printf("%s \n", Name);
system("pause");
return 0;
}
Thank you for your attention!

The function ShowFile can be improved.
Suggestion 1
If you would like the function to return the name of the file (file.smth) by the second argument, change it to std::string&.
//bool ShowFile(char * FileName, char * Name)
bool ShowFile(char * FileName, std::string& Name)
As the function stands right now, you are changing Name in the function to point to a local variable. But that has no impact on the calling function. The line
Name = AttName;
is completely useless.
Suggestion 2
You haven't added any code to read the data from the file. The line
int a = sscanf_s("Content-Disposition: attachment; filename=\"%[^\"]\"", AttName,_countof(AttName));
has the format specification as the first argument, not the string from which to read the data. It is missing the source string.
You need to add code to read lines of text and try to extract AttName from those lines.
Suggestion 3
Don't use while (!feof(file)). See Why is “while ( !feof (file) )” always wrong?.
You need something like:
char line[200];
while ( fgets(line, sizeof(line), file) )
{
char AttName[100];
int a = sscanf_s(line, "Content-Disposition: attachment; filename=\"%[^\"]\"", AttName,_countof(AttName));
...
}
Suggestion 4
Always check the returned value of scanf family of functions to make sure that the function was able to assign data to the variables. Don't assume it succeeded.
int a = sscanf_s(line, "Content-Disposition: attachment; filename=\"%[^\"]\"", AttName,_countof(AttName));
// Name = AttName;
if ( a == 1 )
{
Name = AttName;
}
Suggestion 5
Add a flag to indicate that Name was successfully read.
Revised function
bool ShowFile(char * FileName, std::string& Name)
{
bool status = false;
FILE* file;
if (fopen_s(&file, FileName, "rt") != 0) { return false; }
char line[200];
while (fgets(line, sizeof(line), file))
{
char AttName[100];
int a = sscanf_s(line, "Content-Disposition: attachment; filename=\"%[^\"]\"", AttName,_countof(AttName));
if ( a == 1 )
{
Name = AttName;
// Got what we are looking for.
// Set the status and break out of the loop.
// There is no need to look for Name any more.
status = true;
break;
}
}
fclose(file);
return status;
}

Related

Enabling C code to run as C++ code

I have a C program that finds duplicate files within a directory. The program is executed on the command line and passed 2 arguments. One is the parent directory, and argument two is the file name. It is working code in c, but I have a GUI and other files for "microservices" written in c++.
How would one call this C code from a c++ file?
#include<stdio.h>
#include<dirent.h>
#include<sys/stat.h>
#include<errno.h>
#include<string.h>
#include<stdlib.h>
#include<fcntl.h>
//Compile: gcc dreamduplicatefinder.c -o dreamduplicatefinder.exe
//Run: ./dreamduplicateFinder.exe parent_dir filename...
#define false 0
#define true 1
int duplicateCount = 0;
int FindDuplicates(char* path, char* fileName);
int CompareFiles(char* originalFile, char* currFile);
int main(int argc, char *argv[])
{
//Two additional arguments are expected: Parent dir, file to find duplicates of...
if (argc != 3)
{
printf("Usage: %s 'Base Directory' 'File Name'\n", argv[0]);
return -1;
}
//argv[1] = base dir, argv[2] = file to find duplicates of; e.g argv[1] = /home,
//argv[2] = "file.txt"...
FindDuplicates(argv[1], argv[2]);
printf("\n\nFound %d duplicate(s)\n", duplicateCount);
return 0;
}
int FindDuplicates(char* path, char* fileName)
{
DIR *dir;
struct dirent *dp;
struct dirent *result;
struct stat statp;
char absoluteFilePath[255];
if ((dir = opendir(path)) == NULL)
{
//printf(dir); //error could becuase trying to open shortcut or corrupt folder.
printf("%s\n",path);
perror("Failed to open directory");
return -1;
}
while ((dp = readdir(dir)) != NULL)
{
//readdir returns . and .. which we should ignore...
if (strcmp(dp->d_name, ".") && strcmp(dp->d_name, ".."))
{
//find file full path, relative to base path. e.g, a /home/file.txt...
//copy path to absoluteFilePath...
strcpy(absoluteFilePath, path);
//append / at end...
strcat(absoluteFilePath, "/");
//append filename to path...
strcat(absoluteFilePath, dp->d_name);
//check if the current file is actually file or dir...
stat(absoluteFilePath, &statp);
if (S_ISDIR(statp.st_mode)) //is a directory...
{
//recurse through this dir...
FindDuplicates(absoluteFilePath, fileName);
}
else if (S_ISREG(statp.st_mode)) //is a file...
{
//check for duplicates here...
//compare current file with the file specified by user...
if (strcmp(fileName, absoluteFilePath))
{
if (CompareFiles(fileName, absoluteFilePath))
{
//yes, duplicate; print it...
printf("%s\n", absoluteFilePath);
duplicateCount++;
}
}
} //end else if (S_ISREG(statp.st_mode))...
} //if (strcmp(dp->d_name, ".") && strcmp(dp->d_name,".."))...
} //end while...
closedir(dir);
return 0;
}
int CompareFiles(char* originalFile, char* currFile)
{
//two step comparison: (1) first check size; if not same, return false.
//If equal, (2) compare file content.If equal, return true, false otherwise...
struct stat statOriginal, statCurr;
stat(originalFile, &statOriginal);
stat(currFile, &statCurr);
//Step 1...
if ((int)statOriginal.st_size != (int)statCurr.st_size) //size not same...
return false;
//Step 2...
//size matches, files can be same; confirm it by matching both file contents...
int fdOriginal = open(originalFile, O_RDONLY);
int fdCurr = open(currFile, O_RDONLY);
if (fdOriginal == -1 || fdCurr == -1)
return false; //error occurred, not sure if file is duplicate...
//we will read file in small chunks and compare...
int chunkSize = 1024, bytesRead;
char *bufferOriginal = (char*)malloc(chunkSize * sizeof(char));
char *bufferCurr = (char*)malloc(chunkSize * sizeof(char));
while (true)
{
//read file in chunk...
bytesRead = read(fdOriginal, bufferOriginal, chunkSize);
if (bytesRead <= 0)
break; //end of file...
bytesRead = read(fdCurr, bufferCurr, bytesRead);
//compare buffer...
if (strcmp(bufferOriginal, bufferCurr)) //if content not matching...
return false;
}
return true;
}
My errors include: (from compareFiles function)
2x 'open' identifier not found
2x 'read' identifier not found
The working code for those curious.
Thank you #MarcusMüller & #JesperJuhl
#include "stdafx.h" //there is nothing in this header
#include<stdio.h>
#include<dirent.h>
#include<sys/stat.h>
#include<errno.h>
#include<string.h>
#include<stdlib.h>
#include<fcntl.h>
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
//Compile: gcc <name of this file>.cpp -o <nameOfThisFile>.exe
//Run: <nameOfThisFile> parent_dir filename...
#define false 0
#define true 1
int duplicateCount = 0;
int FindDuplicates(char* path, char* fileName);
int CompareFiles(char* originalFile, char* currFile);
int main(int argc, char *argv[])
{
//Two additional arguments are expected: Parent dir, file to find duplicates of...
if (argc != 3)
{
printf("Usage: %s 'Base Directory' 'File Name'\n", argv[0]);
return -1;
}
//argv[1] = base dir, argv[2] = file to find duplicates of; e.g argv[1] = /home,
//argv[2] = "file.txt"...
FindDuplicates(argv[1], argv[2]);
printf("\n\nFound %d duplicate(s)\n", duplicateCount);
return 0;
}
int FindDuplicates(char* path, char* fileName)
{
DIR *dir;
struct dirent *dp;
struct dirent *result;
struct stat statp;
char absoluteFilePath[255];
if ((dir = opendir(path)) == NULL)
{
//possibly trying to open shortcut or corrupt folder typically.
printf("Failed to open directory %s \n",path);
return -1;
}
while ((dp = readdir(dir)) != NULL)
{
//readdir returns . and .. which we should ignore...
if (strcmp(dp->d_name, ".") && strcmp(dp->d_name, ".."))
{
//find file full path, relative to base path. e.g, a /home/file.txt...
//copy path to absoluteFilePath...
strcpy(absoluteFilePath, path);
//append / at end...
strcat(absoluteFilePath, "/");
//append filename to path...
strcat(absoluteFilePath, dp->d_name);
//check if the current file is actually file or dir...
stat(absoluteFilePath, &statp);
if (S_ISDIR(statp.st_mode)) //is a directory...
{
//recurse through this dir...
FindDuplicates(absoluteFilePath, fileName);
}
else if (S_ISREG(statp.st_mode)) //is a file...
{
//check for duplicates here...
//compare current file with the file specified by user...
if (strcmp(fileName, absoluteFilePath))
{
if (CompareFiles(fileName, absoluteFilePath))
{
//yes, duplicate; print it...
printf("This is a duplicate! %s\n", absoluteFilePath);
duplicateCount++;
}
}
} //end else if (S_ISREG(statp.st_mode))...
} //if (strcmp(dp->d_name, ".") && strcmp(dp->d_name,".."))...
} //end while...
closedir(dir);
return 0;
}
int CompareFiles(char* originalFile, char* currFile)
{
//two step comparison: (1) first check size; if not same, return false.
//If equal, (2) compare file content.If equal, return true, false otherwise...
struct stat statOriginal, statCurr;
stat(originalFile, &statOriginal);
stat(currFile, &statCurr);
//Step 1...
if ((int)statOriginal.st_size != (int)statCurr.st_size) //size not same...
return false;
FILE* fdOriginal;
if (fdOriginal = fopen(originalFile, "r")) {
if (fdOriginal == NULL) { fputs("File error", stderr); return false; }
}
else return false; //error occurred, not sure if duplicate
FILE* fdCurr;
if (fdCurr = fopen(currFile, "r")) {
if (fdCurr == NULL) { fputs("File error", stderr); return false; }
}
else return false;
int chunkSize = 1024, objsRead;
char *bufferOriginal = (char*)malloc(chunkSize * sizeof(char));
if (bufferOriginal == NULL) { fputs("Memory error for buff orig", stderr); exit(2); }
char *bufferCurr = (char*)malloc(chunkSize * sizeof(char));
if (bufferCurr == NULL) { fputs("Memory error for buff curr", stderr); exit(2); }
while (true)
{
//read file in chunk...
//std::size_t fread( void* buffer, std::size_t size, std::size_t count, std::FILE* stream );
objsRead = fread(bufferOriginal, sizeof(char), chunkSize , fdOriginal);
if (objsRead <= 0)
break; //end of file...
objsRead = fread(bufferCurr, sizeof(char), objsRead, fdCurr);
//compare buffer...
if (strcmp(bufferOriginal, bufferCurr)) //if content not matching...
return false;
}
return true;
}
You usually just wouldn't do that. You'd wrap it in a C function, and compile it to an object file.
Then you'd include your C header with extern "C" {…}, and just call that function from C++.
When building your executable, you'd link in the object file containing your C function. Done!
Note: C isn't C++, and albeit your code not being illegal in C++ (as far as I can instantly tell), it does very "ugly" things (like #defineing true and false – ugh, that would already be a bad idea in C, to be honest). So, deal with it like you would deal with code in Fortran, or Java, or any other language that has a calling convention that you can use from C++ (which, usually, is the C calling convention): Just use it as an extern object.
Using ::open and ::read should cause the functions to be found.
You may also want to replace the C headers (like "string.h") with their C++ equivalent versions (like "cstring").
Your defines for true and false should also go. In C++ those are proper bools, not integers. This means the return type of CompareFiles should be changed to bool.
And you should wrap duplicateCount in an anonymous namespace - or return it from the function that updates it (either by returning a small struct with two ints, or by using a std::pair or std::tuple) - global variables are evil.

C++ Parse command line parameters and bool

So, I'm trying to add something that allows me to use argv to allow three command line inputs.
such that:
./program input.dat (string input)
so that (I assume) argv[0] = input.dat and argv[1] = string input, argue[2] = file output
...I'm not sure if I'm explaining it right, but this is my best effort.
what I want to do is have a command line input that allows me to have like, if it says "encrypt" it makes a bool true, and if I type "decrypt" it sets that bool to false.
bool encrypt;
std::string action(argv[2]);
if (action == "encrypt") {
encrypt = true;
} else if (action == "decrypt") {
encrypt = false;
} else {
// Report invalid argument
}
To do what you're describing:
int main(int argc, char** argv) {
if (argc < 3) {
// print usage here and return, since that's what you need.
}
const char* filename = argv[1];
if (!strcmp(argv[2], "encrypt")) {
// encrypt!
}
else if (!strcmp(argv[2], "decrypt")) {
// decrypt!
}
else {
// error!
}
}
Or alternatively with strings:
std::string filename = argv[1];
std::string mode = argv[2];
if (mode == "encrypt") {
// etc.
}
You can have lots of args, There's nothing to stop you from calling:
./program hi everybody this is a little excessive but just an example
At which point, you'd get called with argc == 12 and, e.g., argv[5] pointing to `"a"``.

Weird string result

My program should open a file, the file path is retrieved from the command line using argv[1].
I then try to open the file using fopen but my program crashes because the filepath I use doesn't contain double backslashes so fopen doesn't work.
I've tried to write my own convert function and using print to check the result looked good at first sight.
The problem is that when I use the returned const char* as argument it gives me a weird result.. my code:
#include <stdio.h>
#include <stdlib.h>
#include <string>
const char* ConvertToPath(std::string path)
{
std::string newpath = "";
for(unsigned int i = 0; i < path.length(); ++i)
{
if(path[i] == '\\')
{
newpath += "\\\\";
}
else
{
newpath += path[i];
}
}
printf("%s \n", newpath.c_str());
return newpath.c_str();
}
bool OpenDBC(const char* path)
{
const char* file = ConvertToPath(path);
printf("%s \n", file);
FILE* dbc = fopen(file, "rbw");
if (!dbc)
return false;
return true;
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
printf("Error, expected DBC file.");
getchar();
return -1;
}
if (!OpenDBC(argv[1]))
{
printf("There was an error opening the DBC file.");
getchar();
return -1;
}
getchar();
return 0;
}
Opening a DBC file with my program gives me the following result:
D:\\Achievement.dbc
a
So it looks like const char* file only contains 1 char of the file path, why?
You do not need the ConvertToPath function at all. Double backslashes are only needed in string literals. Never in variables such as a std::string.
I compiled your code on Linux and can not replicate your result
Running ./filereader "D:\\Achievement.dbc" results in
D:\\Achievement.dbc
D:\\Achievement.dbc
Running ./filereader "D:\\\\Achievement.dbc" results in
D:\\\\Achievement.dbc
D:\\\\Achievement.dbc
The later is what you want because command line arguments need to be escaped. Then you can remove the ConvertToPath.

Help Editing Code to Fix "Argument list too long" Error

I am currently doing some testing with a new addition to the ICU dictionary-based break iterator.
I have code that allows me to test the word-breaking on a text document but when the text document is too large it gives the error: bash: ./a.out: Argument list too long
I am not sure how to edit the code to break-up the argument list when it gets too long so that a file of any size can be run through the code. The original code author is quite busy, would someone be willing to help out?
I tried removing the printing of what is being examined to see if that would help, but I still get the error on large files (printing what is being examined isn't necessary - I just need the result).
If the code could be modified to read the source text file line by line and export the results line by line to another text file (ending up with all the lines when it is done), that would be perfect.
The code is as follows:
/*
Written by George Rhoten to test how word segmentation works.
Code inspired by the break ICU sample.
Here is an example to run this code under Cygwin.
PATH=$PATH:icu-test/source/lib ./a.exe "`cat input.txt`" > output.txt
Encode input.txt as UTF-8.
The output text is UTF-8.
*/
#include <stdio.h>
#include <unicode/brkiter.h>
#include <unicode/ucnv.h>
#define ZW_SPACE "\xE2\x80\x8B"
void printUnicodeString(const UnicodeString &s) {
int32_t len = s.length() * U8_MAX_LENGTH + 1;
char *charBuf = new char[len];
len = s.extract(0, s.length(), charBuf, len, NULL);
charBuf[len] = 0;
printf("%s", charBuf);
delete charBuf;
}
/* Creating and using text boundaries */
int main(int argc, char **argv)
{
ucnv_setDefaultName("UTF-8");
UnicodeString stringToExamine("Aaa bbb ccc. Ddd eee fff.");
printf("Examining: ");
if (argc > 1) {
// Override the default charset.
stringToExamine = UnicodeString(argv[1]);
if (stringToExamine.charAt(0) == 0xFEFF) {
// Remove the BOM
stringToExamine = UnicodeString(stringToExamine, 1);
}
}
printUnicodeString(stringToExamine);
puts("");
//print each sentence in forward and reverse order
UErrorCode status = U_ZERO_ERROR;
BreakIterator* boundary = BreakIterator::createWordInstance(NULL, status);
if (U_FAILURE(status)) {
printf("Failed to create sentence break iterator. status = %s",
u_errorName(status));
exit(1);
}
printf("Result: ");
//print each word in order
boundary->setText(stringToExamine);
int32_t start = boundary->first();
int32_t end = boundary->next();
while (end != BreakIterator::DONE) {
if (start != 0) {
printf(ZW_SPACE);
}
printUnicodeString(UnicodeString(stringToExamine, start, end-start));
start = end;
end = boundary->next();
}
delete boundary;
return 0;
}
Thanks so much!
-Nathan
The Argument list too long error message is coming from the bash shell and is happening before your code even gets started executing.
The only code you can fix to eliminate this problem is the bash source code (or maybe it is in the kernel) and then, you're always going to run into a limit. If you increase from 2048 files on command line to 10,000, then some day you'll need to process 10,001 files ;-)
There are numerous solutions to managing 'too big' argument lists.
The standardized solution is the xargs utility.
find / -print | xargs echo
is a un-helpful, but working example.
See How to use "xargs" properly when argument list is too long for more info.
Even xargs has problems, because file names can contain spaces, new-line chars, and other unfriendly stuff.
I hope this helps.
The code below reads the content of a file whos name is given as the first parameter on the command-line and places it in a str::buffer. Then, instead of calling the function UnicodeString with argv[1], use that buffer instead.
#include<iostream>
#include<fstream>
using namespace std;
int main(int argc, char **argv)
{
std::string buffer;
if(argc > 1) {
std::ifstream t;
t.open(argv[1]);
std::string line;
while(t){
std::getline(t, line);
buffer += line + '\n';
}
}
cout << buffer;
return 0;
}
Update:
Input to UnicodeString should be char*. The function GetFileIntoCharPointer does that.
Note that only the most rudimentary error checking is implemented below!
#include<iostream>
#include<fstream>
using namespace std;
char * GetFileIntoCharPointer(char *pFile, long &lRet)
{
FILE * fp = fopen(pFile,"rb");
if (fp == NULL) return 0;
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *pData = new char[size + 1];
lRet = fread(pData, sizeof(char), size, fp);
fclose(fp);
return pData;
}
int main(int argc, char **argv)
{
long Len;
char * Data = GetFileIntoCharPointer(argv[1], Len);
std::cout << Data << std::endl;
if (Data != NULL)
delete [] Data;
return 0;
}

How can I read keyboard input to character strings? (C++)

getc (stdin) reads keyboard input to integers, but what if I want to read keyboard input to character strings?
#include "stdafx.h"
#include "string.h"
#include "stdio.h"
void CharReadWrite(FILE *fin);
FILE *fptr2;
int _tmain(int argc, _TCHAR* argv[])
{
char alpha= getc(stdin);
char filename=alpha;
if (fopen_s( &fptr2, filename, "r" ) != 0 )
printf( "File stream %s was not opened\n", filename );
else
printf( "The file %s was opened\n", filename );
CharReadWrite(fptr2);
fclose(fptr2);
return 0;
}
void CharReadWrite(FILE *fin){
int c;
while ((c=fgetc(fin)) !=EOF) {
putchar(c);}
}
Continuing with the theme of getc you can use fgets to read a line of input into a character buffer.
E.g.
char buffer[1024];
char *line = fgets(buffer, sizeof(buffer), stdin);
if( !line ) {
if( feof(stdin) ) {
printf("end of file\n");
} else if( ferror(stdin) ) {
printf("An error occurerd\n");
exit(0);
}
} else {
printf("You entered: %s", line);
}
Note that ryansstack's answer is a much better, easier and safer solution given you are using C++.
http://www.cplusplus.com/reference/iostream/istream/getline/
Ta da!
A character (ASCII) is just an unsigned 8 bit integral value, ie. it can have a value between 0-255. If you have a look at an ASCII table you can see how the integer values map to characters. But in general you can just jump between the types, ie:
int chInt = getc(stdin);
char ch = chInt;
// more simple
char ch = getc(stdin);
// to be explicit
char ch = static_cast<char>(getc(stdin));
Edit: If you are set on using getc to read in the file name, you could do the following:
char buf[255];
int c;
int i=0;
while (1)
{
c = getc(stdin);
if ( c=='\n' || c==EOF )
break;
buf[i++] = c;
}
buf[i] = 0;
This is a pretty low level way of reading character inputs, the other responses give higher level/safer methods, but again if you're set on getc...
Since you already are mixing "C" code with "C++" by using printf, why not continue and use scanf scanf("%s", &mystring); in order to read and format it all nicely ?
Or of course what already was said.. getline