I want to get the changed content of a textfile. Detecting file change using inotify in C++ works fine, but is there a way to get the changed content of a text file? The problem is that the event of inotify is consumed before the content is written so ifstream gives the previous output.
I am using inotify as follows (simplified):
fd = inotify_init();
wd = inotify_add_watch(fd, "myDir", IN_MODIFY);
while(1)
{
i = 0;
length = read( fd, buffer, BUF_LEN );
while ( i < length ) {
struct inotify_event *event = ( struct inotify_event * ) &buffer[ i ];
if ( event->len && (event->mask & IN_MODIFY) && !(event->mask & IN_ISDIR)) {
getNewContent();//-->if I use ifstream herre, I get the previous content
cout<<event->name<<", WD"<<event->wd<<endl;
i += EVENT_SIZE + event->len;
}
}
}
void getNewContent(){
string temp;
ifstream in("myDir/test.txt");
while(getline(in, temp,';')){
cout<<temp.c_str()<<endl;
}
in.close();
}
Related
I need to download the file after a pause. But I do not know how to implement it correctly using https://github.com/yhirose/cpp-httplib . But the problem is that when the download resumes, the server starts sending me the file first. Question: how do I tell the server the size of the piece that I have already downloaded, so that the server sends me only the necessary part?
My code:
std::string body;
httplib::Client cli( url, port );
cli.set_follow_location( true );
int file_size = is_part_of_file ? GetFileSize( result_file_name.c_str() ) : 0; // it is downloaded part of the file
int last_percent;
auto res = cli.Get(
file_path.c_str(), httplib::Headers(),
[ & ]( const httplib::Response& response )
{
( void )response;
return *is_download;
},
[ & ]( const char* data, size_t data_length )
{
body.append( data, data_length );
return *is_download;
},
[ & ]( uint64_t len, uint64_t total )
{
int percent = ( int )( len * 100 / total );
if( last_percent != percent )
{
*p_percent = ( ( float )percent / 100 );
}
last_percent = percent;
return *is_download;
} );
if( res )
{
std::ofstream out( result_file_name, std::ios::binary | std::ios::app );
out << body;
out.close();
}
else
{
if( is_part_of_file )
{
std::ofstream out( result_file_name, std::ios::binary | std::ios::app );
out << body;
out.close();
}
return false;
}
I compiled a Linux program on Windows via Mingw but the output is wrong.
Error description:
The output of the program looks different on Windows than on Linux. This is how it looks on Windows:
>tig_2
CAATCTTCAGAGTCCAGAGTGGGAGGCACAGACTACAGAAAATGAGCAGCGGGGCTGGTA
>cluster_1001_conTTGGTGAAGAGAATTTGGACATGGATGAAGGCTTGGGCTTGACCATGCGAAGG
Expected output:
>cluster_1001_contig2
CAATCTTCAGAGTCCAGAGTGGGAGGCACAGACTACAGAAAATGAGCAGCGGGGCTGGTA
>cluster_1001_contig1
TTGGTGAAGAGAATTTGGACATGGATGAAGGCTTGGGCTTGACCATGCGAAGG
(Note: the output is very large to paste it here so the examples above are pseudo-real).
Possible cause:
I have observed that if I convert the enter characters the input file from Linux (LF) to Windows (CRLF) it almost works: the first character (>) in file is missing. The same code works perfectly on Linux without any enter conversion. So, the problem must be in the function that is parsing the input not in the one that writes the output:
seq_db.Read( db_in.c_str(), options );
Source code:
This is the piece that is parsing the input file. Anyway, I might me wrong. The fault might be in other place. In case it is needed, the FULL source code is here :)
void SequenceDB::Read( const char *file, const Options & options )
{
Sequence one;
Sequence dummy;
Sequence des;
Sequence *last = NULL;
FILE *swap = NULL;
FILE *fin = fopen( file, "r" );
char *buffer = NULL;
char *res = NULL;
size_t swap_size = 0;
int option_l = options.min_length;
if( fin == NULL ) bomb_error( "Failed to open the database file" );
if( options.store_disk ) swap = OpenTempFile( temp_dir );
Clear();
dummy.swap = swap;
buffer = new char[ MAX_LINE_SIZE+1 ];
while (not feof( fin ) || one.size) { /* do not break when the last sequence is not handled */
buffer[0] = '>';
if ( (res=fgets( buffer, MAX_LINE_SIZE, fin )) == NULL && one.size == 0) break;
if( buffer[0] == '+' ){
int len = strlen( buffer );
int len2 = len;
while( len2 && buffer[len2-1] != '\n' ){
if ( (res=fgets( buffer, MAX_LINE_SIZE, fin )) == NULL ) break;
len2 = strlen( buffer );
len += len2;
}
one.des_length2 = len;
dummy.des_length2 = len;
fseek( fin, one.size, SEEK_CUR );
}else if (buffer[0] == '>' || buffer[0] == '#' || (res==NULL && one.size)) {
if ( one.size ) { // write previous record
one.dat_length = dummy.dat_length = one.size;
if( one.identifier == NULL || one.Format() ){
printf( "Warning: from file \"%s\",\n", file );
printf( "Discarding invalid sequence or sequence without identifier and description!\n\n" );
if( one.identifier ) printf( "%s\n", one.identifier );
printf( "%s\n", one.data );
one.size = 0;
}
one.index = dummy.index = sequences.size();
if( one.size > option_l ) {
if ( swap ) {
swap_size += one.size;
// so that size of file < MAX_BIN_SWAP about 2GB
if ( swap_size >= MAX_BIN_SWAP) {
dummy.swap = swap = OpenTempFile( temp_dir );
swap_size = one.size;
}
dummy.size = one.size;
dummy.offset = ftell( swap );
dummy.des_length = one.des_length;
sequences.Append( new Sequence( dummy ) );
one.ConvertBases();
fwrite( one.data, 1, one.size, swap );
}else{
//printf( "==================\n" );
sequences.Append( new Sequence( one ) );
//printf( "------------------\n" );
//if( sequences.size() > 10 ) break;
}
//if( sequences.size() >= 10000 ) break;
}
}
one.size = 0;
one.des_length2 = 0;
int len = strlen( buffer );
int len2 = len;
des.size = 0;
des += buffer;
while( len2 && buffer[len2-1] != '\n' ){
if ( (res=fgets( buffer, MAX_LINE_SIZE, fin )) == NULL ) break;
des += buffer;
len2 = strlen( buffer );
len += len2;
}
size_t offset = ftell( fin );
one.des_begin = dummy.des_begin = offset - len;
one.des_length = dummy.des_length = len;
int i = 0;
if( des.data[i] == '>' || des.data[i] == '#' || des.data[i] == '+' ) i += 1;
if( des.data[i] == ' ' or des.data[i] == '\t' ) i += 1;
if( options.des_len and options.des_len < des.size ) des.size = options.des_len;
while( i < des.size and ( des.data[i] != '\n') ) i += 1;
des.data[i] = 0;
one.identifier = dummy.identifier = des.data;
} else {
one += buffer;
}
}
#if 0
int i, n = 0;
for(i=0; i<sequences.size(); i++) n += sequences[i].bufsize + 4;
cout<<n<<"\t"<<sequences.capacity() * sizeof(Sequence)<<endl;
int i;
scanf( "%i", & i );
#endif
one.identifier = dummy.identifier = NULL;
delete[] buffer;
fclose( fin );
}
The format of the input file is like this:
> comment
ACGTACGTACGTACGTACGTACGTACGTACGT
> comment
ACGTACGTACGTACGTACGTACGTACGTACGT
> comment
ACGTACGTACGTACGTACGTACGTACGTACGT
etc
The issue is more than likely you need to open the file using the "rb" switch in the call to fopen. The "rb" opens the file in binary mode, as opposed to "r", which opens a file in "text" mode.
Since you're going back and forth between Linux and Windows, the end-of-line characters will be different. If you open the file as "text" in Windows, but the file was formatted for Linux, you're lying to Windows that it is a text file. So the runtime will do CR/LF conversion all wrong.
Therefore you should open the file as binary, "rb" so that the CR/LF translation isn't done.
I took the code below from the many examples on the internet about how to use inotify.
I then tried the following experiment:
1) run the watcher below
2) in a separate shell, cd into '/mypath' create some files to the folder you are watching. For example, 'date > output.txt' one ore more times.
3) you will see notifications from the watcher.
4) type 'ls /mypath' (or even 'watch -n 1 /mypath')
5) try 'date > output.txt' in /mypath. You will no longer see notifications from the watcher. Or at least, this is what happened when I tested with Ubuntu 12/13.
Any ideas about how to fix it?
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/inotify.h>
#include <limits.h>
#include <unistd.h>
#define MAX_EVENTS 1024 /*Max. number of events to process at one go*/
#define LEN_NAME 16 /*Assuming that the length of the filename won't exceed 16 bytes*/
#define EVENT_SIZE ( sizeof (struct inotify_event) ) /*size of one event*/
#define BUF_LEN ( MAX_EVENTS * ( EVENT_SIZE + LEN_NAME )) /*buffer to store the data of events*/
int main()
{
int length, i = 0, wd;
int fd;
char buffer[BUF_LEN];
/* Initialize Inotify*/
fd = inotify_init();
if ( fd < 0 ) {
perror( "Couldn't initialize inotify");
}
/* add watch to starting directory */
wd = inotify_add_watch(fd, "/mypath", IN_CLOSE_WRITE | IN_CLOSE_NOWRITE);
if (wd == -1)
{
printf("Couldn't add watch to %s\n","/mypath");
}
else
{
printf("Watching:: %s\n","/mypath");
}
/* do it forever*/
while(1)
{
i = 0;
length = read( fd, buffer, BUF_LEN );
if ( length < 0 ) {
perror( "read" );
}
while ( i < length ) {
struct inotify_event *event = ( struct inotify_event * ) &buffer[ i ];
if ( event->len ) {
if ( event->mask & IN_CLOSE_WRITE) {
if (event->mask & IN_ISDIR)
printf( "The directory %s was Created.\n", event->name );
else
printf( "The file %s was closed (write) with WD %d\n", event->name, event->wd );
}
if ( event->mask & IN_CLOSE_NOWRITE) {
if (event->mask & IN_ISDIR)
printf( "The directory %s was Created.\n", event->name );
else
printf( "The file %s was closed (nowrite) with WD %d\n", event->name, event->wd );
}
i += EVENT_SIZE + event->len;
}
}
}
/* Clean up*/
inotify_rm_watch( fd, wd );
close( fd );
return 0;
}
You should not put i += EVENT_SIZE + event->len; inside the if ( event->len ) block. If an event has a zero-length name, then the pointer should still be incremented by EVENT_SIZE (which is what will happen if you put that statement outside the block). I think you might be seeing an infinite loop in your inotify program, kicked off by the first event which happens to have a zero-length name. (Which is exactly what happens with the ls: The directory is being opened, not its files, so there's nothing in the name field.)
You get into an ever-ending loop since you do not change i when event->len == 0
Add this:
else
i += EVENT_SIZE ;
in case if ( event->len == 0 )
It turns out that when your program stops working it eats all the CPU. I made a few changes and it seems to work now. Here are the details:
Declare BUF_LEN to handle 16 events (you can increase that value):
#define BUF_LEN (16 * (sizeof(struct inotify_event) + NAME_MAX + 1))
Change your while (i < length) loop that processes the events to the following for loop:
for ( p = buffer; p < buffer + length; ) {
struct inotify_event *event = ( struct inotify_event * ) p;
p += sizeof(struct inotify_event) + event->len;
if ( event->len ) {
/* SNIP */
}
}
The p variable should be declared as char * and you can remove the i which is not used anymore.
I am using Totalview to debug some code, and it is stopping with a Bus Error.
What is this, and how do I fix it? here is the code snip
In my main:
char *infilename = "/home/dcole/Images/lena1024s.jpg";
/* Try opening a jpeg*/
if( read_jpeg_file( infilename ) > 0 )
{
//do some stuff
}
The function:
int read_jpeg_file( const char *filename )
{
/* these are standard libjpeg structures for reading(decompression) */
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
/* libjpeg data structure for storing one row, that is, scanline of an image */
JSAMPROW row_pointer[1];
FILE *infile = fopen( filename, "rb" ); //this line is where the debugger stops with a Bus Error
unsigned long location = 0;
int i = 0;
if ( !infile )
{
printf("Error opening jpeg file %s\n!", filename );
return -1;
}
return 1;
}
The file I am passing in does exist. I can see that it shows up as the right sting to the full path in the debugger.
I think you're having a memory access problem because of the filename variable...
Instead of:
char *infilename = "/home/dcole/Images/lena1024s.jpg";
try using:
char infilename[] = "/home/dcole/Images/lena1024s.jpg";
I have 3 terabyte .gz file and want to read its uncompressed content line-by-line in a C++ program. As the file is quite huge, I want to avoid loading it completely in memory.
Can anyone post a simple example of doing it?
You most probably will have to use ZLib's deflate, example is available from their site
Alternatively you may have a look at BOOST C++ wrapper
The example from BOOST page (decompresses data from a file and writes it to standard output)
#include <fstream>
#include <iostream>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/zlib.hpp>
int main()
{
using namespace std;
ifstream file("hello.z", ios_base::in | ios_base::binary);
filtering_streambuf<input> in;
in.push(zlib_decompressor());
in.push(file);
boost::iostreams::copy(in, cout);
}
For something that is going to be used regularly, you probably want to use one of the previous suggestions. Alternatively, you can do
gzcat file.gz | yourprogram
and have yourprogram read from cin. This will decompress parts of the file in memory as it is needed, and send the uncompressed output to yourprogram.
Using zlib, I'm doing something along these lines:
// return a line in a std::vector< char >
std::vector< char > readline( gzFile f ) {
std::vector< char > v( 256 );
unsigned pos = 0;
for ( ;; ) {
if ( gzgets( f, &v[ pos ], v.size() - pos ) == 0 ) {
// end-of-file or error
int err;
const char *msg = gzerror( f, &err );
if ( err != Z_OK ) {
// handle error
}
break;
}
unsigned read = strlen( &v[ pos ] );
if ( v[ pos + read - 1 ] == '\n' ) {
if ( pos + read >= 2 && v[ pos + read - 2 ] == '\r' ) {
pos = pos + read - 2;
} else {
pos = pos + read - 1;
}
break;
}
if ( read == 0 || pos + read < v.size() - 1 ) {
pos = read + pos;
break;
}
pos = v.size() - 1;
v.resize( v.size() * 2 );
}
v.resize( pos );
return v;
}
EDIT: Removed two mis-copied * in the example above.
EDIT: Corrected out of bounds read on v[pos + read - 2]
The zlib library supports decompressing files in memory in blocks, so you don't have to decompress the entire file in order to process it.
Here is some code with which you can read normal and zipped files line by line:
char line[0x10000];
FILE *infile=open_file(file);
bool gzipped=endsWith(file, ".gz");
if(gzipped)
init_gzip_stream(infile,&line[0]);
while (readLine(infile,line,gzipped)) {
if(line[0]==0)continue;// skip gzip new_block
printf(line);
}
#include <zlib.h>
#define CHUNK 0x100
#define OUT_CHUNK CHUNK*100
unsigned char gzip_in[CHUNK];
unsigned char gzip_out[OUT_CHUNK];
///* These are parameters to inflateInit2. See http://zlib.net/manual.html for the exact meanings. */
#define windowBits 15
#define ENABLE_ZLIB_GZIP 32
z_stream strm = {0};
z_stream init_gzip_stream(FILE* file,char* out){// unsigned
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.next_in = gzip_in;
strm.avail_in = 0;
strm.next_out = gzip_out;
inflateInit2 (& strm, windowBits | ENABLE_ZLIB_GZIP);
return strm;
}
bool inflate_gzip(FILE* file, z_stream strm,size_t bytes_read){
strm.avail_in = (int)bytes_read;
do {
strm.avail_out = OUT_CHUNK;
inflate (& strm, Z_NO_FLUSH);
// printf ("%s",gzip_out);
}while (strm.avail_out == 0);
if (feof (file)) {
inflateEnd (& strm);
return false;
}
return true;// all OK
}
char* first_line=(char*)&gzip_out[0];
char* current_line=first_line;
char* next_line=first_line;
char hangover[1000];
bool readLine(FILE* infile,char* line,bool gzipped){
if(!gzipped)
return fgets(line, sizeof(line), infile) != NULL;
else{
bool ok=true;
current_line=next_line;
if(!current_line || strlen(current_line)==0 || next_line-current_line>OUT_CHUNK){
current_line=first_line;
size_t bytes_read = fread (gzip_in, sizeof (char), CHUNK, infile);
ok=inflate_gzip(infile,strm,bytes_read);
strcpy(line,hangover);
}
if(ok){
next_line=strstr(current_line,"\n");
if(next_line){
next_line[0]=0;
next_line++;
strcpy(line+strlen(hangover),current_line);
hangover[0]=0;
}else{
strcpy(hangover,current_line);
line[0]=0;// skip that one!!
}
}
return ok;
}
}
You can't do that, because *.gz doesn't have "lines".
If compressed data has newlines, you'll have to decompress it. You don't have to decompress all data at once, you know, you can do it in chunks, and send strings back to main program when you encounter newline characters. *.gz can be decompressed using zlib.
Chilkat (http://www.chilkatsoft.com/) has libraries to read compressed files from a C++, .Net, VB, ... application.