I'm running into an annoying issue where I cannot access memory obtained from mmap in any way without getting a segmentation fault.
The function I used to obtain the mapped memory looks like this.
/**
* Preconditions: filename must be verified as referencing a valid file.
*/
char *IOUtils::memory_map_file(string const& filename, size_t length, int open_flags){
int fd = open(filename.c_str(), open_flags | O_CREAT,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
int prot;
if (open_flags == O_RDONLY)
prot = PROT_READ;
else
prot = PROT_READ | PROT_WRITE;
void *output = mmap(NULL, length, prot, MAP_SHARED, fd, 0);
if (output == (void *) -1){
cerr << filename << ": " << strerror(errno) << '\n';
_exit(2);
}
close(fd);
return (char *)output;
}
My main function looks like this.
int main(int argc, char *argv[]){
size_t input_length = IOUtils::file_size(argv[1]); //This works fine
char *input_buffer = IOUtils::memory_map_file(argv[1], input_length,
O_RDONLY); //This succeeds
char *output_buffer = IOUtils::memory_map_file(argv[2], 2*input_length,
O_RDWR); //This succeeds
DomainParser parser(input_length, input_buffer, output_buffer);
while(!parser.finished()){
parser.write_entry();
}
mremap(output_buffer, 2*input_length, MREMAP_MAYMOVE,
parser.bytes_written());
munmap(output_buffer, parser.bytes_written());
}
The parser's relevant code looks like this
void DomainParser::write_entry(void){
char const *in = input(); //Gets position in input file
char const *copy_up_to = end(); //Gets position at input EOF
for(char const *it = in; it < copy_up_to; ++it){
cerr << *it; //SIGSEGV!
if(*it == '\n') break;
}
cerr << '\n';
/* Do writes */
}
The program segfaults immediately upon cerr << *it. I have no idea why this would happen, considering all mapped memory is equipped with read permissions and is successfully allocated.
Edit: If anyone suspects the class is broken somewhere, here's the full source code.
using std::stringstream;
using std::string;
class DomainParser{
size_t _input_offset;
const size_t _input_length;
size_t _output_offset;
char const *input_buffer;
char *output_buffer;
char const *input(void){
return input_buffer + _input_offset;
}
char *output(void){
return output_buffer + _output_offset;
}
char const* end(void){
return input_buffer + _input_length;
}
char const *find(char const *begin, char const *max, char c){
while (*begin != c){
cerr << *begin++;
}
cerr << c;
return begin;
}
public:
DomainParser(size_t length, char const *input, char *output) :
_input_length(length), input_buffer(input), output_buffer(output)
{}
bool finished(void){
return _input_offset == _input_length;
}
size_t bytes_written(void){
return _output_offset;
}
size_t write_entry(void){
if (finished()){
return 0;
}
char const *in = input();
char const *copy_up_to = find(in, end(), '\n');
size_t input_entry_length = copy_up_to - in;
string s(in, copy_up_to);
stringstream ss(s);
string name, type, host;
ss >> name >> type >> host;
if (!ss){
cerr << s << '\n';
_input_offset += input_entry_length;
return 0;
}
ss.str(""); ss.clear();
ss << "{\"name\":\"" << name << "\"," <<
"\"host\":\"" << host << "\"," <<
"\"type\":\"" << type << "\"}\n";
string entry = ss.str();
std::memcpy(output(), entry.c_str(), entry.size());
_input_offset += input_entry_length;
_output_offset += entry.size();
return entry.size();
}
};
I don't see any initialization of _input_offset.
If you fix that, you will run into the problem that the output file is empty, so accessing any pages will trigger a SIGBUS signal. You need to resize it using ftruncate to the intended size (probably to match the size of the mapping, but this depends on what you are trying to do).
Also not that munmap can be very expensive (especially on large systems), so memory-mapped I/O is only a win when the file sizes are quite large.
Related
I am trying to compress and decompress raw PCM (16-Bit) audio, using OPUS.
Here below is my code for opus_encoder.c. If I remove my decoder.c, the buffer works just fine as in the microphone is able to take in raw PCM data. However, once I have implemented my decoder class, it gave me a lot of errors such as memory allocation, heap corruption and so on. Here are some of my errors:
std::bad_alloc at memory location 0x0031D4BC
Stack overflow (parameters: 0x00000000, 0x05122000)
Access violation reading location 0x04A40000.
Based on my understanding, I think my decoder size cannot allocate the memory properly. Can you take a look at my codes and see what went wrong?
Opus_encoder.c
#include "opusencoder.h"
#include <QtConcurrent/QtConcurrent>
opusencoder::opusencoder(){
}
opusencoder::~opusencoder(){
}
OpusEncoder *enc;
int error;
unsigned char *compressedbuffer;
opus_uint32 enc_final_range;
short pcm = 0;
unsigned char *opusencoder::encodedata(const char *audiodata, const unsigned int& size) {
if (size == 0)
return false;
enc = (OpusEncoder *)malloc(opus_encoder_get_size(1));
enc = opus_encoder_create(8000, 1, OPUS_APPLICATION_VOIP, &error);
if (enc == NULL)
{
exit;
}
opus_int32 rate;
opus_encoder_ctl(enc, OPUS_GET_BANDWIDTH(&rate));
this->encoded_data_size = rate;
int len;
for (int i = 0; i < size / 2; i++)
{
//combine pairs of bytes in the original data into two-byte number
//convert const char to short
pcm= audiodata[2 * i] << 8 | audiodata[(2 * i) + 1];
}
qDebug() << "audiodata: " << pcm << endl;
compressedbuffer = new (unsigned char[this->encoded_data_size]);
len = opus_encode(enc, &pcm, 320, compressedbuffer, this->encoded_data_size);
len = opus_packet_unpad(compressedbuffer, len);
len++;
if (len < 0)
{
qDebug() << "Failure to compress";
return NULL;
}
qDebug() << "COmpressed buffer:" << compressedbuffer << endl;
qDebug() << "opus_encode() ................................ OK.\n" << endl;
}
Opus_decoder.c
##include "opusdecoder.h"
#include <QtConcurrent/QtConcurrent>
#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
int num_channels = 1;
opusdecoder::opusdecoder(){
}
opusdecoder::~opusdecoder(){
}
opus_int16* opusdecoder::decodedata(int frame_size, const unsigned char *data)
{
dec = opus_decoder_create(8000, 1, &err);
if (dec == NULL)
{
exit;
}
opus_int32 rate;
opus_decoder_ctl(dec, OPUS_GET_BANDWIDTH(&rate));
rate = decoded_data_size;
this->num_channels = num_channels;
int decodedatanotwo;
opus_int16 *decompress = new (opus_int16[frame_size * this->num_channels]);
opus_packet_get_nb_channels(data);
decodedatanotwo= opus_decode(dec, data, this->decoded_data_size, decompress, 320, 0);
if (decodedatanotwo < 0)
{
qDebug() << "Failure to decompress";
return NULL;
}
qDebug() << "opus_decode() ................................ OK.\n" << decodedatanotwo << endl;
if (decodedatanotwo != frame_size)
{
exit;
}
}
Loop isn't making 10 copies and i have no idea how to change file names
#include "iostream"
#include "fstream"
#include "windows.h"
using namespace std;
void main()
{
char str[200];
ifstream myfile("as-1.txt");
if (!myfile)
{
cerr << "file not opening";
exit(1);
}
for (int i = 0; i < 10; i++)
{
ofstream myfile2("as-2.txt");
while (!myfile.eof())
{
myfile.getline(str, 200);
myfile2 << str << endl;
}
}
system("pause");
}
Solution using plain C API from <cstdio>. Easily customizable.
const char* file_name_format = "as-%d.txt"; //Change that if you need different name pattern
const char* original_file_name = "as-1.txt"; //Original file
const size_t max_file_name = 255;
FILE* original_file = fopen(original_file_name, "r+");
if(!original_file)
//file not found, handle error
fseek(original_file, 0, SEEK_END); //(*)
long file_size = ftell(original_file);
fseek(original_file, 0, SEEK_SET);
char* original_content = (char*)malloc(file_size);
fread(original_content, file_size, 1, original_file);
fclose(original_file);
size_t copies_num = 10;
size_t first_copy_number = 2;
char file_name[max_file_name];
for(size_t n = first_copy_number; n < first_copy_number + copies_num; ++n)
{
snprintf(file_name, max_file_name, file_name_format, n);
FILE* file = fopen(file_name, "w");
fwrite(original_content, file_size, 1, file);
fclose(file);
}
free(original_content);
(*) As noted on this page, SEEK_END may not necessarily be supported (i.e. it is not a portable solution). However most POSIX-compliant systems (including the most popular Linux distros), Windows family and OSX support this without any problems.
Oh, and one more thing. This line
while (!myfile.eof())
is not quite correct. Read this question - it explains why you shouldn't write such code.
int main()
{
const int copies_of_file = 10;
for (int i = 1; i <= copies_of_file; ++i)
{
std::ostringstream name;
name << "filename as-" << i << ".txt";
std::ofstream ofile(name.str().c_str());
ofile.close();
}
return 0;
}
That will make 10 copies of a blank .txt file named "filename as-1.txt" "filename as-2.txt" etc.
Note also the use of int main: main always has a return of int, never void
I have following piece of code that is supposed to calculate the SHA256 of a file. I am reading the file chunk by chunk and using EVP_DigestUpdate for the chunk. When I test the code with the file that has content
Test Message
Hello World
in Windows, it gives me SHA256 value of 97b2bc0cd1c3849436c6532d9c8de85456e1ce926d1e872a1e9b76a33183655f but the value is supposed to be 318b20b83a6730b928c46163a2a1cefee4466132731c95c39613acb547ccb715, which can be verified here too.
Here is the code:
#include <openssl\evp.h>
#include <iostream>
#include <string>
#include <fstream>
#include <cstdio>
const int MAX_BUFFER_SIZE = 1024;
std::string FileChecksum(std::string, std::string);
int main()
{
std::string checksum = FileChecksum("C:\\Users\\Dell\\Downloads\\somefile.txt","sha256");
std::cout << checksum << std::endl;
return 0;
}
std::string FileChecksum(std::string file_path, std::string algorithm)
{
EVP_MD_CTX *mdctx;
const EVP_MD *md;
unsigned char md_value[EVP_MAX_MD_SIZE];
int i;
unsigned int md_len;
OpenSSL_add_all_digests();
md = EVP_get_digestbyname(algorithm.c_str());
if(!md) {
printf("Unknown message digest %s\n",algorithm);
exit(1);
}
mdctx = EVP_MD_CTX_create();
std::ifstream readfile(file_path,std::ifstream::in|std::ifstream::binary);
if(!readfile.is_open())
{
std::cout << "COuldnot open file\n";
return 0;
}
readfile.seekg(0, std::ios::end);
long filelen = readfile.tellg();
std::cout << "LEN IS " << filelen << std::endl;
readfile.seekg(0, std::ios::beg);
if(filelen == -1)
{
std::cout << "Return Null \n";
return 0;
}
EVP_DigestInit_ex(mdctx, md, NULL);
long temp_fil = filelen;
while(!readfile.eof() && readfile.is_open() && temp_fil>0)
{
int bufferS = (temp_fil < MAX_BUFFER_SIZE) ? temp_fil : MAX_BUFFER_SIZE;
char *buffer = new char[bufferS+1];
buffer[bufferS] = 0;
readfile.read(buffer, bufferS);
std::cout << strlen(buffer) << std::endl;
EVP_DigestUpdate(mdctx, buffer, strlen(buffer));
temp_fil -= bufferS;
delete[] buffer;
}
EVP_DigestFinal_ex(mdctx, md_value, &md_len);
EVP_MD_CTX_destroy(mdctx);
printf("Digest is: ");
//char *checksum_msg = new char[md_len];
//int cx(0);
for(i = 0; i < md_len; i++)
{
//_snprintf(checksum_msg+cx,md_len-cx,"%02x",md_value[i]);
printf("%02x", md_value[i]);
}
//std::string res(checksum_msg);
//delete[] checksum_msg;
printf("\n");
/* Call this once before exit. */
EVP_cleanup();
return "";
}
I tried to write the hash generated by program as string using _snprintf but it didn't worked. How can I generate the correct hash and return the value as string from FileChecksum Function? Platform is Windows.
EDIT: It seems the problem was because of CRLF issue. As Windows in saving file using \r\n, the Checksum calculated was different. How to handle this?
MS-DOS used the CR-LF convention,So basically while saving the file in windows, \r\n comes in effect for carriage return and newline. And while testing on online (given by you), only \n character comes in effect.
Thus either you have to check the checksum of Test Message\r\nHello World\r\n in string which is equivalent to creating and reading file in windows(as given above), which is the case here.
However, the checksum of files,wherever created, will be same.
Note: your code works fine :)
It seems the problem was associated with the value of length I passed in EVP_DigestUpdate. I had passed value from strlen, but replacing it with bufferS did fixed the issue.
The code was modified as:
while(!readfile.eof() && readfile.is_open() && temp_fil>0)
{
int bufferS = (temp_fil < MAX_BUFFER_SIZE) ? temp_fil : MAX_BUFFER_SIZE;
char *buffer = new char[bufferS+1];
buffer[bufferS] = 0;
readfile.read(buffer, bufferS);
EVP_DigestUpdate(mdctx, buffer, bufferS);
temp_fil -= bufferS;
delete[] buffer;
}
and to send the checksum string, I modified the code as:
EVP_DigestFinal_ex(mdctx, md_value, &md_len);
EVP_MD_CTX_destroy(mdctx);
char str[128] = { 0 };
char *ptr = str;
std::string ret;
for(i = 0; i < md_len; i++)
{
//_snprintf(checksum_msg+cx,md_len-cx,"%02x",md_value[i]);
sprintf(ptr,"%02x", md_value[i]);
ptr += 2;
}
ret = str;
/* Call this once before exit. */
EVP_cleanup();
return ret;
As for the wrong checksum earlier, the problem was associated in how windows keeps the line feed. As suggested by Zangetsu, Windows was making text file as CRLF, but linux and the site I mentioned earlier was using LF. Thus there was difference in the checksum value. For files other than text, eg dll the code now computes correct checksum as string
I'm trying to read a binary file and store it in a buffer. The problem is, that in the binary file are multiple null-terminated characters, but they are not at the end, instead they are before other binary text, so if I store the text after the '\0' it just deletes it in the buffer.
Example:
char * a = "this is a\0 test";
cout << a;
This will just output: this is a
here's my real code:
this function reads one character
bool CStream::Read (int * _OutChar)
{
if (!bInitialized)
return false;
int iReturn = 0;
*_OutChar = fgetc (pFile);
if (*_OutChar == EOF)
return false;
return true;
}
And this is how I use it:
char * SendData = new char[4096 + 1];
for (i = 0; i < 4096; i++)
{
if (Stream.Read (&iChar))
SendData[i] = iChar;
else
break;
}
I just want to mention that there is a standard way to read from a binary file into a buffer.
Using <cstdio>:
char buffer[BUFFERSIZE];
FILE * filp = fopen("filename.bin", "rb");
int bytes_read = fread(buffer, sizeof(char), BUFFERSIZE, filp);
Using <fstream>:
std::ifstream fin("filename.bin", ios::in | ios::binary );
fin.read(buffer, BUFFERSIZE);
What you do with the buffer afterwards is all up to you of course.
Edit: Full example using <cstdio>
#include <cstdio>
const int BUFFERSIZE = 4096;
int main() {
const char * fname = "filename.bin";
FILE* filp = fopen(fname, "rb" );
if (!filp) { printf("Error: could not open file %s\n", fname); return -1; }
char * buffer = new char[BUFFERSIZE];
while ( (int bytes = fread(buffer, sizeof(char), BUFFERSIZE, filp)) > 0 ) {
// Do something with the bytes, first elements of buffer.
// For example, reversing the data and forget about it afterwards!
for (char *beg = buffer, *end=buffer + bytes; beg < end; beg++, end-- ) {
swap(*beg, *end);
}
}
// Done and close.
fclose(filp);
return 0;
}
static std::vector<unsigned char> read_binary_file (const std::string filename)
{
// binary mode is only for switching off newline translation
std::ifstream file(filename, std::ios::binary);
file.unsetf(std::ios::skipws);
std::streampos file_size;
file.seekg(0, std::ios::end);
file_size = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<unsigned char> vec;
vec.reserve(file_size);
vec.insert(vec.begin(),
std::istream_iterator<unsigned char>(file),
std::istream_iterator<unsigned char>());
return (vec);
}
and then
auto vec = read_binary_file(filename);
auto src = (char*) new char[vec.size()];
std::copy(vec.begin(), vec.end(), src);
The problem is definitievely the writing of your buffer, because you read a byte at a time.
If you know the length of the data in your buffer, you could force cout to go on:
char *bf = "Hello\0 world";
cout << bf << endl;
cout << string(bf, 12) << endl;
This should give the following output:
Hello
Hello world
However this is a workaround, as cout is foreseent to output printable data. Be aware that the output of non printable chars such as '\0' is system dependent.
Alternative solutions:
But if you manipulate binary data, you should define ad-hoc data structures and printing. Here some hints, with a quick draft for the general principles:
struct Mybuff { // special strtucture to manage buffers of binary data
static const int maxsz = 512;
int size;
char buffer[maxsz];
void set(char *src, int sz) // binary copy of data of a given length
{ size = sz; memcpy(buffer, src, max(sz, maxsz)); }
} ;
Then you could overload the output operator function:
ostream& operator<< (ostream& os, Mybuff &b)
{
for (int i = 0; i < b.size; i++)
os.put(isprint(b.buffer[i]) ? b.buffer[i]:'*'); // non printables replaced with *
return os;
}
ANd you could use it like this:
char *bf = "Hello\0 world";
Mybuff my;
my.set(bf, 13); // physical copy of memory
cout << my << endl; // special output
I believe your problem is not in reading the data, but rather in how you try to print it.
char * a = "this is a\0 test";
cout << a;
This example you show us prints a C-string. Since C-string is a sequence of chars ended by '\0', the printing function stops at the first null char.
This is because you need to know where the string ends either by using special terminating character (like '\0' here) or knowing its length.
So, to print whole data, you must know the length of it and use a loop similar to the one you use for reading it.
Are you on Windows? If so you need to execute _setmode(_fileno(stdout), _O_BINARY);
Include <fcntl.h> and <io.h>
I need to search a (non-text) file for the byte sequence "9µ}Æ" (or "\x39\xb5\x7d\xc6").
After 5 hours of searching online this is the best I could do. It works but I wanted to know if there is a better way:
char buffer;
int pos=in.tellg();
// search file for string
while(!in.eof()){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='9'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='µ'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='}'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='Æ'){
cout << "found";
}
}
}
}
in.seekg((streampos) pos);
Note:
I can't use getline(). It's not a text file so there are probably not many line breaks.
Before I tried using a multi-character buffer and then copying the buffer to a C++ string, and then using string::find(). This didn't work because there are many '\0' characters throughout the file, so the sequence in the buffer would be cut very short when it was copied to the string.
Similar to what bames53 posted; I used a vector as a buffer:
std::ifstream ifs("file.bin");
ifs.seekg(0, std::ios::end);
std::streamsize f_size = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::vector<unsigned char> buffer(f_size);
ifs.read(buffer.data(), f_size);
std::vector<unsigned char> seq = {0x39, 0xb5, 0x7d, 0xc6};
bool found = std::search(buffer.begin(), buffer.end(), seq.begin(), seq.end()) != buffer.end();
If you don't mind loading the entire file into an in-memory array (or using mmap() to make it look like the file is in memory), you could then search for your character sequence in-memory, which is a bit easier to do:
// Works much like strstr(), except it looks for a binary sub-sequence rather than a string sub-sequence
const char * MemMem(const char * lookIn, int numLookInBytes, const char * lookFor, int numLookForBytes)
{
if (numLookForBytes == 0) return lookIn; // hmm, existential questions here
else if (numLookForBytes == numLookInBytes) return (memcmp(lookIn, lookFor, numLookInBytes) == 0) ? lookIn : NULL;
else if (numLookForBytes < numLookInBytes)
{
const char * startedAt = lookIn;
int matchCount = 0;
for (int i=0; i<numLookInBytes; i++)
{
if (lookIn[i] == lookFor[matchCount])
{
if (matchCount == 0) startedAt = &lookIn[i];
if (++matchCount == numLookForBytes) return startedAt;
}
else matchCount = 0;
}
}
return NULL;
}
.... then you can just call the above function on the in-memory data array:
char * ret = MemMem(theInMemoryArrayContainingFilesBytes, numBytesInFile, myShortSequence, 4);
if (ret != NULL) printf("Found it at offset %i\n", ret-theInMemoryArrayContainingFilesBytes);
else printf("It's not there.\n");
This program loads the entire file into memory and then uses std::search on it.
int main() {
std::string filedata;
{
std::ifstream fin("file.dat");
std::stringstream ss;
ss << fin.rdbuf();
filedata = ss.str();
}
std::string key = "\x39\xb5\x7d\xc6";
auto result = std::search(std::begin(filedata), std::end(filedata),
std::begin(key), std::end(key));
if (std::end(filedata) != result) {
std::cout << "found\n";
// result is an iterator pointing at '\x39'
}
}
const char delims[] = { 0x39, 0xb5, 0x7d, 0xc6 };
char buffer[4];
const size_t delim_size = 4;
const size_t last_index = delim_size - 1;
for ( size_t i = 0; i < last_index; ++i )
{
if ( ! ( is.get( buffer[i] ) ) )
return false; // stream to short
}
while ( is.get(buffer[last_index]) )
{
if ( memcmp( buffer, delims, delim_size ) == 0 )
break; // you are arrived
memmove( buffer, buffer + 1, last_index );
}
You are looking for 4 bytes:
unsigned int delim = 0xc67db539;
unsigned int uibuffer;
char * buffer = reinterpret_cast<char *>(&uibuffer);
for ( size_t i = 0; i < 3; ++i )
{
if ( ! ( is.get( buffer[i] ) ) )
return false; // stream to short
}
while ( is.get(buffer[3]) )
{
if ( uibuffer == delim )
break; // you are arrived
uibuffer >>= 8;
}
Because you said you cannot search the entire file because of null terminator characters in the string, here's an alternative for you, which reads the entire file in and uses recursion to find the first occurrence of a string inside of the whole file.
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
string readFile (char *fileName) {
ifstream fi (fileName);
if (!fi)
cerr << "ERROR: Cannot open file" << endl;
else {
string str ((istreambuf_iterator<char>(fi)), istreambuf_iterator<char>());
return str;
}
return NULL;
}
bool findFirstOccurrenceOf_r (string haystack, char *needle, int haystack_pos, int needle_pos, int needle_len) {
if (needle_pos == needle_len)
return true;
if (haystack[haystack_pos] == needle[needle_pos])
return findFirstOccurrenceOf_r (haystack, needle, haystack_pos+1, needle_pos+1, needle_len);
return false;
}
int findFirstOccurrenceOf (string haystack, char *needle, int length) {
int pos = -1;
for (int i = 0; i < haystack.length() - length; i++) {
if (findFirstOccurrenceOf_r (haystack, needle, i, 0, length))
return i;
}
return pos;
}
int main () {
char str_to_find[4] = {0x39, 0xB5, 0x7D, 0xC6};
string contents = readFile ("input");
int pos = findFirstOccurrenceOf (contents, str_to_find, 4);
cout << pos << endl;
}
If the file is not too large, your best solution would be to load the whole file into memory, so you don't need to keep reading from the drive. If the file is too large to load in at once, you would want to load in chunks of the file at a time. But if you do load in chucks, make sure you check to edges of the chunks. It's possible that your chunk happens to split right in the middle of the string you're searching for.