Given a C function that populates a unsigned char**, how to populate a std::vector with the data without an intermediate copy - c++

The C functions are:
int i2d_PrivateKey(EVP_PKEY *a, unsigned char **pp);
int i2d_X509(X509 *a, unsigned char **ppout);
And I have written code like this to copy into the std::vector :
// populate PrivateKey
std::vector<uint8_t> PrivateKey;
EVP_PKEY *privatekey = NULL;
int size = i2d_PrivateKey(privatekey, NULL);
if (size > 0)
{
PrivateKey.reserve(size);
uint8_t* ptr = &PrivateKey[0];
i2d_PrivateKey(privatekey, &ptr);
std::cout << "PrivateKey size=" << PrivateKey.size() << '\n';
}
PrivateKey.size() returns zero so I know that the vector has not been populated. However, I know that size is a positive integer so the code inside the if (size block is executed.
If ptr is the address of the start of the PrivateKey array then shouldn't this code work?
Although this code uses openssl, it is more a general pointers question I think. If I create a temporary uint8_t array then it works but I would rather copy directly into the vector and save the overhead of the temporary copy.
Here is the code:
#include <openssl/ssl.h>
#include <openssl/pem.h>
#include <openssl/ossl_typ.h>
#include <openssl/x509.h>
#include <openssl/x509v3.h>
#include <openssl/pkcs12.h>
#include <openssl/err.h>
#include <iostream>
#include <vector>
#include <string>
#include <cstdint>
int main()
{
std::vector<uint8_t> input; // contains pkcs12 data
std::string Password = "password";
std::vector<uint8_t> Certificate;
std::vector<uint8_t> PrivateKey;
OpenSSL_add_all_algorithms();
OpenSSL_add_all_ciphers();
OpenSSL_add_all_digests();
ERR_load_crypto_strings();
PKCS12* p12_cert = NULL;
const uint8_t* p1 = &input[0];
if(d2i_PKCS12(&p12_cert, &p1, input.size()) != NULL) {
EVP_PKEY *privatekey = NULL;
X509 *x509_cert = NULL;
// additional certs, last arg is CA which we don't care about
if (PKCS12_parse(p12_cert, Password.c_str(), &privatekey, &x509_cert, NULL))
{
// populate m_privateKey
int size = i2d_PrivateKey(privatekey, NULL);
std::cout << "privatekey size=" << size << '\n';
if (size > 0)
{
PrivateKey.reserve(size);
uint8_t* ptr = &PrivateKey[0];
i2d_PrivateKey(privatekey, &ptr);
std::cout << "PrivateKey size=" << PrivateKey.size() << '\n';
}
// populate certificate
size = i2d_X509(x509_cert, NULL);
std::cout << "certificate size=" << size << '\n';
if(size > 0)
{
Certificate.reserve(size);
uint8_t* ptr = &Certificate[0];
int ret = i2d_X509(x509_cert, &ptr);
std::cout << "ret=" << ret <<'\n';
std::cout << "cert size=" << Certificate.size() << '\n';
}
}
PKCS12_free(p12_cert);
}
}
UPDATE, can use code below to incorporate Arron's fix:
#include <openssl/ssl.h>
#include <openssl/pem.h>
#include <openssl/ossl_typ.h>
#include <openssl/x509.h>
#include <openssl/x509v3.h>
#include <openssl/pkcs12.h>
#include <openssl/err.h>
#include <iostream>
#include <fstream>
#include <iterator>
#include <iomanip>
#include <vector>
#include <string>
#include <cstdint>
using namespace std;
std::vector<uint8_t>& File2Buffer(const std::string path,
std::vector<uint8_t>& buffer) {
fstream fs(path, ios::in | ios::binary);
if (fs) {
// Don't skip new lines
fs.unsetf(ios::skipws);
fs.seekg(0, ios::end);
size_t size = static_cast<size_t>(fs.tellg());
fs.seekg(0, ios::beg);
buffer.reserve(size);
buffer.insert(buffer.begin(),
istream_iterator<uint8_t>(fs),
istream_iterator<uint8_t>());
}
return buffer;
}
int main(int argc, char* argv[])
{
if (argc != 3) {
cout << "Usage: " << argv[0] << " <pkcs12 file> " << "<password>\n";
exit(0);
}
std::vector<uint8_t> input;
File2Buffer(argv[1], input);
std::string Password = argv[2];
std::vector<uint8_t> Certificate;
std::vector<uint8_t> PrivateKey;
OpenSSL_add_all_algorithms();
OpenSSL_add_all_ciphers();
OpenSSL_add_all_digests();
ERR_load_crypto_strings();
PKCS12* p12_cert = NULL;
const uint8_t* p1 = &input[0];
if(d2i_PKCS12(&p12_cert, &p1, input.size()) != NULL) {
EVP_PKEY *privatekey = NULL;
X509 *x509_cert = NULL;
// additional certs, last arg is CA which we don't care about
if (PKCS12_parse(p12_cert, Password.c_str(), &privatekey, &x509_cert, NULL))
{
// populate m_privateKey
int size = i2d_PrivateKey(privatekey, NULL);
std::cout << "privatekey size=" << size << '\n';
if (size > 0)
{
PrivateKey.resize(size);
uint8_t* ptr = &PrivateKey[0];
i2d_PrivateKey(privatekey, &ptr);
std::cout << "PrivateKey size=" << PrivateKey.size() << '\n';
}
// populate certificate
size = i2d_X509(x509_cert, NULL);
std::cout << "certificate size=" << size << '\n';
if(size > 0)
{
Certificate.resize(size);
uint8_t* ptr = &Certificate[0];
int ret = i2d_X509(x509_cert, &ptr);
std::cout << "ret=" << ret <<'\n';
std::cout << "cert size=" << Certificate.size() << '\n';
}
}
PKCS12_free(p12_cert);
}
// test it out:
if (Certificate.size() > 0) {
cout << "Certificate size=" << Certificate.size() << '\n';
for (auto& ch : Certificate) {
cout << hex << ch << " ";
}
}
}

Use resize instead of reserve. The problem with reserve is that if you do an assign(like PrivateKey[5] = 5), and call PrivateKey.size(), size will still remain 0.
(Reserve in practice can be used in pair with back_inserter in std::copy), but in your case, you should do an resize.

Related

BIO_write() is not writing to the file when using BIO_f_base64()

I am trying to learn OPENSSL and following this tutorial. I have written following code to read from a file and write the encoded form in another file.
#include <iostream>
#include <openssl/ssl.h>
#include <openssl/bio.h>
#define MAX_BUFFER_SIZE 512
int main (int argc, char* argv[])
{
BIO* bio_out = nullptr;
BIO* bio_in = nullptr;
BIO* bio_b64 = nullptr;
int in_byte, out_byte;
char buffer[MAX_BUFFER_SIZE];
std::memset(buffer, '\0', MAX_BUFFER_SIZE);
if (argc != 3)
{
std::cout << "Usage: bio_b64_encode <source-read> <encoded-write>\n";
return 1;
}
bio_in = BIO_new_file(argv[1], "r");
bio_out = BIO_new_file(argv[2], "wb");
bio_b64 = BIO_new(BIO_f_base64());
BIO_push(bio_b64, bio_out);
in_byte = BIO_read(bio_in, buffer, MAX_BUFFER_SIZE);
while (in_byte > 0)
{
std::cout << "Read " << in_byte << " bytes.\n";
out_byte = BIO_write(bio_b64, buffer, in_byte);
std::cout << "Wrote " << out_byte << " bytes.\n";
if (in_byte != out_byte)
{
std::cout << "In bytes: " << in_byte << "and Out bytes: " << out_byte << " are not equal.\n";
BIO_free(bio_in);
BIO_free(bio_out);
return 1;
}
in_byte = BIO_read(bio_in, buffer, MAX_BUFFER_SIZE);
}
BIO_free(bio_in);
BIO_free_all(bio_b64);
return 0;
}
The code reads properly from the input file given in argv[1] and tries to write to argv[2] but when I open the output file, it is empty but it should have the encoded text from the input file.
the output of the above program when I run ./bio_b64_encode in.txt out.txt is
Read 22 bytes.
Wrote 22 bytes.
but the output file is empty. If I remove the encoding constraints and try to simply write whatever I read from input file (the code is below), it works properly.
#include <iostream>
#include <openssl/ssl.h>
#include <openssl/bio.h>
#define MAX_BUFFER_SIZE 512
int main (int argc, char* argv[])
{
BIO* bio_out = nullptr;
BIO* bio_in = nullptr;
BIO* bio_b64 = nullptr;
int in_byte, out_byte;
char buffer[MAX_BUFFER_SIZE];
std::memset(buffer, '\0', MAX_BUFFER_SIZE);
if (argc != 3)
{
std::cout << "Usage: bio_write <file-read> <file-write>\n";
return 1;
}
bio_in = BIO_new_file(argv[1], "r");
bio_out = BIO_new_file(argv[2], "w");
in_byte = BIO_read(bio_in, buffer, MAX_BUFFER_SIZE);
while (in_byte > 0)
{
std::cout << "Read " << in_byte << " bytes.\n";
out_byte = BIO_write(bio_out, buffer, in_byte);
std::cout << "Wrote " << out_byte << " bytes.\n";
if (in_byte != out_byte)
{
std::cout << "In bytes: " << in_byte << "and Out bytes: " << out_byte << " are not equal.\n";
BIO_free(bio_in);
BIO_free(bio_out);
return 1;
}
in_byte = BIO_read(bio_in, buffer, MAX_BUFFER_SIZE);
}
BIO_free(bio_in);
BIO_free_all(bio_out);
return 0;
}
I am not able to figure out what am I missing here.

C/C++ library to decompress tar.gz file while downloading

I download a tar.gz file with libcurl, and need to decompress the file while downloading, that is, when chunk of the file is downloaded, decompress chunk of the file imediately, instead of decompressing the whole file when the whole file is downloaded. Is there any C/C++ libraries that meet my requirements
I tried to use libarchive to extract the file, but it returned truncated gzip input when extracting the first chunk of the file. It seems that libarchive need the whole file to extract it. Here is my code. I am not sure if i used libarchive correctly as I am new to it.
#include <iostream>
#include <vector>
#include <string>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <atomic>
#include <thread>
// libarchive
#include <archive.h>
#include <archive_entry.h>
#include <curl/curl.h>
struct mydata {
void *buffer;
ssize_t *size;
};
struct curldata {
void *buffer;
ssize_t *size;
CURL *curl;
};
std::atomic<bool> rd(true);
struct archive *archive, *archivefd;
std::atomic<bool> start_read(false);
la_ssize_t libarchiveRead(struct archive* a, void* client_data, const void** block)
{
if(!rd) {
mydata *my_data = (mydata*)client_data;
std::cout << "calling custom read(), size " << *(my_data->size) << std::endl;
*block = my_data->buffer;
rd=true;
return *(my_data->size);
}
return 0;
}
int libarchiveClose(struct archive* a, void* client_data)
{
std::cout << "calling custom close() for archive" << std::endl;
mydata *my_data = (mydata*)client_data;
delete my_data;
return (ARCHIVE_OK);
}
int libarchiveClosefd(struct archive* a, void* client_data)
{
std::cout << "calling custom close() for archivefd" << std::endl;
mydata *my_data = (mydata*)client_data;
delete my_data;
return (ARCHIVE_OK);
}
static size_t curlWriteFunction(void *ptr, size_t size, size_t nmemb, void *write_data) {
//size is always 1
curldata *my_data = (curldata*)(write_data);
*(my_data->size) = nmemb * size;
std::cout << "calling curlWriteFunction(), size: " << size << " , nmemb: " << nmemb
<< " , my_data->size: " << *(my_data->size) << std::endl;
memcpy(my_data->buffer, ptr, *(my_data->size));
curl_easy_pause(my_data->curl, CURL_WRITEFUNC_PAUSE);
rd=false;
return (*(my_data->size));
}
static size_t progress(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow) {
CURL *curl = (CURL *)clientp;
(void)ultotal;
(void)ulnow;
if(dltotal == 0) {
return 0;
}
if(rd) {
curl_easy_pause(curl, CURLPAUSE_CONT);
std::cout << "progress: " << dlnow/dltotal * 100 << "%" << std::endl;
}
return 0;
}
void readarchive(void *client_data) {
struct archive_entry *entry;
int flags = ARCHIVE_EXTRACT_TIME;
flags |= ARCHIVE_EXTRACT_PERM;
flags |= ARCHIVE_EXTRACT_ACL;
flags |= ARCHIVE_EXTRACT_FFLAGS;
while(rd);
std::cout << "calling archive_read_open for archive.." << std::endl;
int res = archive_read_open(archive,
client_data,
nullptr,
(archive_read_callback*)libarchiveRead,
(archive_close_callback*)libarchiveClose);
std::cout << "called archive_read_open for archive.." << std::endl;
res = archive_read_next_header(archive, &(entry));
while(res == ARCHIVE_OK ) {
std::cout << "Extracting for archive " << archive_entry_pathname(entry) << "..." << std::endl;
// extract current entry
archive_read_extract(archive, entry, flags);
// read next if available
res = archive_read_next_header(archive, &(entry));
}
std::cout << "archive_read_next_header for archive failed, errcode: " << res << " error: " << archive_error_string(archive) << std::endl;
}
//size_t curlWriteFunction(void *ptr, size_t size, size_t nmemb,FILE* fptr) {
// //size is always 1
// std::cout << "calling curlWriteFunction().." << std::endl;
// return fwrite(ptr, size, nmemb, fptr);
//}
int main(int argc, char** argv) {
if(argc < 3)
{
std::cout << argv[0] << "{-r | -w} file[s]" << std::endl;
return 1;
}
std::vector<std::string> filenames;
filenames.reserve(argc);
while (*++argv != nullptr)
{
filenames.emplace_back(*argv);
}
bool modeRead = (filenames[0] == "-r");
std::cout << filenames[0] << " " << filenames[1] << std::endl;
// archive related variables
char buff_archive[16 * 1024], buff_archivefd[16 * 1024];
if(modeRead)
{
archive = archive_read_new();
archive_read_support_filter_gzip(archive);
archive_read_support_format_tar(archive);
mydata *client_data = new mydata();
int res;
char *buff1 = new char[16 * 1024];
client_data->size = new ssize_t;
*(client_data->size) = 0;
client_data->buffer = buff1;
curldata *curl_data = new curldata();
curl_data->size=client_data->size;
curl_data->buffer=buff1;
CURL *curl = curl_easy_init();
curl_data->curl = curl;
curl_easy_setopt(curl, CURLOPT_URL, filenames[1].c_str());
curl_easy_setopt(curl, CURLOPT_WRITEDATA, curl_data);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriteFunction);
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
curl_easy_setopt(curl, CURLOPT_PROGRESSDATA, curl);
curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION,progress);
std::thread t(readarchive, client_data);
CURLcode result = curl_easy_perform(curl);
if(result != CURLE_OK) {
std::cout << "curl perform failed, errcode; " << result << " err: " << curl_easy_strerror(result) << std::endl;
}
//std::cout << "calling archive_read_open for archivefd.." << std::endl;
//res = archive_read_open(archivefd,
// client_datafd,
// nullptr,
// (archive_read_callback*)libarchiveReadfd,
// (archive_close_callback*)libarchiveClosefd);
//std::cout << "called archive_read_open for archivefd.." << std::endl;
//res = archive_read_next_header(archivefd, &(entry));
//if (res != ARCHIVE_OK) {
// std::cout << "archive_read_next_header for archivefd failed, errcode: " << res << " error: " << archive_error_string(archivefd) << std::endl;
//}
//while(res == ARCHIVE_OK) {
// std::cout << "Extracting for archivefd " << archive_entry_pathname(entry) << "..." << std::endl;
// // extract current entry
// archive_read_extract(archivefd, entry, flags);
// // read next if available
// res = archive_read_next_header(archivefd, &(entry));
//}
t.join();
delete client_data->size;
delete []buff1;
archive_read_close(archive);
archive_read_free(archive);
archive_read_free(archive);
curl_easy_cleanup(curl);
}
return 0;
}

Does get_nprocs_conf() always return 1 when using a virtual machine?

I'm supposed to use get_nprocs_conf() to get the number of execution contexts on my machine. I'm doing this because I am coding a server and client to interact with each other, and the server may only host get_nprocs_conf()-1 clients. Before I add code to my server to wait for an opening, I want to figure out this issue.
I'm running this code on a virtual machine because I'm using Linux and my desktop is Windows, and when I use said code above, my maximum number of clients is 0, meaning that get_nprocs_conf() only returns 1. Is this because I'm using a virtual machine and for some reason it only can use one execution context, or am I misunderstanding and my computer only has one execution context?
Provided below are my server and client programs.
Server Code:
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <iostream>
#include <fstream>
#include <sys/sysinfo.h>
#include <vector>
#include <cstring>
#include <arpa/inet.h>
//#define BUFFER_SIZE 32
//May need to replace -1 with EXIT_FAILURE in all the exits
int main(int argc, char *argv[]) {
if(argc != 2) {
std::cout << "Need domain socket file name" << std::endl;
exit(-1);
}
struct sockaddr_un server;
char buffer[32];
unlink(argv[1]);
int serverSock = socket(AF_UNIX, SOCK_STREAM, 0);
if(serverSock < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
std::clog << "SERVER STARTED" << std::endl;
size_t maxClients = get_nprocs_conf()-1;
std::clog << "\t" << "MAX CLIENTS: " << maxClients << std::endl;
memset(&server, 0, sizeof(server));
server.sun_family = AF_UNIX;
strncpy(server.sun_path, argv[1], sizeof(server.sun_path)-1);
int success = bind(serverSock, (const struct sockaddr *) &server,
sizeof(struct sockaddr_un));
if(success < 0) {
perror("bind");
exit(EXIT_FAILURE);
}
success = listen(serverSock, maxClients);
if(success < 0) {
perror("listen");
exit(EXIT_FAILURE);
}
while(true) {
std::cout << "Waiting for clients" << std::endl;
int clientSock = accept(serverSock, nullptr, nullptr);
if(clientSock < 0) {
perror("accept");
exit(EXIT_FAILURE);
}
std::clog << "CLIENT CONNECTED" << std::endl;
std::string path="";
std::string searchStr="";
char fileBuff[32];
char searchBuff[32];
memset(fileBuff, 0, 32);
success = read(clientSock, fileBuff, 32);
if(success < 0) {
perror("read");
exit(EXIT_FAILURE);
}
path = fileBuff;
std::cout << path << std::endl;
if(path.empty()) {
std::cout << "No path to file given" << std::endl;
exit(1);
}
memset(searchBuff, 0, 32);
success = read(clientSock, searchBuff, 32);
if(success < 0) {
perror("read");
exit(EXIT_FAILURE);
}
searchStr = searchBuff;
std::cout << searchStr << std::endl;
if(searchStr.empty()) {
std::cout << "No search string given" << std::endl;
exit(1);
}
std::ifstream inFile;
inFile.open(path);
std::string line = "";
int bytesSent = 0;
std::vector<std::string> allLines;
if(inFile.is_open()) {
while(std::getline(inFile, line)) {
if(line.find(searchStr, 0)!=std::string::npos) {
allLines.push_back(line);
//std::cout << line << std::endl;
}
}
}
//Sending over entire length of vector containing all lines to be sent.
long length = htonl(allLines.size());
success = write(clientSock, &length, sizeof(length));
if(success < 0) {
perror("write");
exit(EXIT_FAILURE);
}
for(int b=0; b<allLines.size(); b++) {
length = htonl(allLines[b].length());
success = write(clientSock, &length, sizeof(length));
if(success < 0) {
perror("write");
exit(EXIT_FAILURE);
}
success = write(clientSock, allLines[b].data(), allLines[b].length());
if(success < 0) {
perror("write");
exit(EXIT_FAILURE);
}
bytesSent += allLines[b].length();
}
//char end[] = {'\n'};
//write(clientSock, end, sizeof(char));
std::cout << "BYTES SENT: " << bytesSent << std::endl;
inFile.close();
close(clientSock);
}
//return 0;
}
Client Code:
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <sys/socket.h>
#include <unistd.h>
#include <iostream>
#include <fstream> //Don't think I'll need this
#include <errno.h>
#include <sys/un.h>
#include <sstream>
#include <arpa/inet.h>
#include <vector>
int main(int argc, char *argv[]){
if(argc != 4) {
std::cout << "Need domain socket file name, file path and name, " <<
"and search string" << std::endl;
exit(-1);
}
struct sockaddr_un client;
char buffer[64]; //Prof had 64 for client may need to change to 32 to match
int clientSock = socket(AF_UNIX, SOCK_STREAM, 0);
if(clientSock < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
std::cout << "socket connected" << std::endl;
memset(&client, 0, sizeof(struct sockaddr_un));
client.sun_family = AF_UNIX;
strncpy(client.sun_path, argv[1], sizeof(client.sun_path)-1);
int connectClient = connect(clientSock, (const struct sockaddr *)&client,
sizeof(struct sockaddr_un));
if(connectClient < 0) {
fprintf(stderr, "The server is not working.\n");
exit(EXIT_FAILURE);
}
std::cout << "client connected" << std::endl;
//char arg2[] = {*argv[2],'\n'};
std::string path = argv[2];
std::cout << "Path: " << path << std::endl;
connectClient = write(clientSock, argv[2], path.length());
if(connectClient < 0) {
perror("write");
exit(EXIT_FAILURE);
}
std::string search = argv[3];
std::cout << "Search String: " << search << std::endl;
connectClient = write(clientSock, argv[3], search.length());
if(connectClient < 0) {
perror("write");
exit(EXIT_FAILURE);
}
//int servRet;
int lineCount=0;
int bytes_received=0;
//std::string line = "";
char length[sizeof(int)];
//std::string leng = "";
int num=0;
std::stringstream ss;
std::vector<std::string> allLines;
long size = 0;
read(clientSock, &size, sizeof(size));
size = ntohl(size);
for(int a=0; a<size; ++a) {
long length = 0;
std::string line = "";
connectClient = read(clientSock, &length, sizeof(length));
length = ntohl(length);
while(0 < length) {
char buffer[1024];
connectClient = read(clientSock, buffer, std::min<unsigned long>(sizeof(buffer),length));
line.append(buffer, connectClient);
length-=connectClient;
}
allLines.push_back(line);
lineCount++;
std::cout << lineCount << "\t" << line << std::endl;
bytes_received += line.length();
}
std::cout << "BYTES RECEIVED: " << bytes_received << std::endl;
close(clientSock);
return 0;
}
Right now everything in the server and client work as they should. I'm just hesitating to add code that waits for an execution context to open for another client to be read because it seems like it would never accept any clients since the sole execution context is being used by the server. Any clarification on my issue or on if I'm just using get_nprocs_conf() incorrectly would be greatly appreciated.

Understanding ICU ubidi. Direction is always UBIDI_LTR

I have written a piece of sample code referring to ICU reference, to read a line from a file and get it's base direction and see the result of Unicode Bidi algorithm on it.
in my input file I have written فارسی which is a sequence of right to left characters.
but this line std::cout << ubidi_getBaseDirection(us.getBuffer(), us.length()) << std::endl; prints 0 which is UBIDI_LTR.
And no matter what combination of characters (RTL and LTR combinations) I give in the input file, it will always have one run with direction UBIDI_LTR.
Is there something wrong with my code?
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/localpointer.h"
#include "unicode/ubidi.h"
#include <unicode/unistr.h>
#include<string>
#include<iostream>
#include <fstream>
#include "unicode/ustream.h"
using namespace icu;
using icu::UnicodeString;
int main(int argc, char* argv[])
{
std::string input;
std::string output;
std::ifstream MyReadFile("in.txt");
getline(MyReadFile, input);
UnicodeString us(input.c_str());
UBiDi* bidi = ubidi_open();
UErrorCode errorCode = U_ZERO_ERROR;
ubidi_setPara(bidi, us.getBuffer(), us.length(), UBIDI_RTL, nullptr, &errorCode);
std::cout << (ubidi_getBaseDirection(us.getBuffer(), us.length()) == UBIDI_LTR) << std::endl;
std::ofstream MyFile;
MyFile.open("out.txt");
if (U_SUCCESS(errorCode))
{
UnicodeString Ustring(ubidi_getText(bidi));
std::string Ustr;
Ustring.toUTF8String(Ustr);
int32_t count = ubidi_countRuns(bidi, &errorCode);
int32_t logicalStart, length;
if (count > 0)
MyFile << "VisualRun \t" << "direction" << "\t" << "s" << '\t' << "l" << '\t' << "output" << std::endl;
for (int32_t i = 0; i < count; i++) {
UBiDiDirection dir = ubidi_getVisualRun(bidi, i, &logicalStart, &length);
std::string dirstr = "UBIDI_LTR";
if (dir == UBIDI_RTL)
dirstr = "UBIDI_RTL";
UnicodeString temp = Ustring.tempSubString(logicalStart, length);
char* dest = (char*)malloc(temp.length());
temp.extract(logicalStart, length, dest, strlen(dest));
output = std::string(dest);
MyFile << "VisualRun \t" << dirstr << "\t" << logicalStart << '\t' << length << '\t' << output << std::endl;
}
}
else
{
std::cout << "Failed" << std::endl;
}
MyFile.close();
return 0;
}

Reading in Wav header - Not setting data size

I'm trying to read in the Header information of a .wav file.
If I have a .wav file that has a low sample rate (22050) it will read all the information in perfectly, however, if I have a higher Sample Rate (8000) then it fails to read in some information:
"dataSize" set's when using a 22050 .wav file however, when using a 8000 .wav file it does not get set and just displays some random numbers.. e.g. "1672494080" when the actual size is around 4k-4.5k in size.
Any suggestions to where I am going wrong?
EDIT:
#include <iostream>
#include <fstream>
#include <vector>
#include <inttypes.h>
#include <stdint.h>
#include <math.h>
using namespace std;
struct riff_hdr
{
char id[4];
uint32_t size;
char type[4];
};
struct chunk_hdr
{
char id[4];
uint32_t size;
};
struct wavefmt
{
uint16_t format_tag;
uint16_t channels;
uint32_t sample_rate;
uint32_t avg_bytes_sec;
uint16_t block_align;
uint16_t bits_per_sample;
uint16_t extra_size;
};
riff_hdr riff;
chunk_hdr chunk;
wavefmt fmt = {0};
uint32_t padded_size;
vector<uint8_t> chunk_data;
bool readHeader(ifstream &file) {
file.read(reinterpret_cast<char*>(&riff), sizeof(riff));
if (memcmp(riff.id, "RIFF", 4) == 0)
{
cout << "size=" << riff.size << endl;
cout << "id=" << string(riff.type, 4) << endl;
if (memcmp(riff.type, "WAVE", 4) == 0)
{
// chunks can be in any order!
// there is no guarantee that "fmt" is the first chunk.
// there is no guarantee that "fmt" is immediately followed by "data".
// There can be other chunks present!
do {
file.read(reinterpret_cast<char*>(&chunk), sizeof(chunk));
padded_size = ((chunk.size + 2 - 1) & ~1);
cout << "id=" << string(chunk.id, 4) << endl;
cout << "size=" << chunk.size << endl;
cout << "padded size=" << padded_size << endl;
if (memcmp(chunk.id, "fmt\0", 4) == 0)
{
if (chunk.size < sizeof(wavefmt))
{
// error!
file.ignore(padded_size);
}else{
// THIS block doesn't seem to be executing
chunk_data.resize(padded_size);
file.read(reinterpret_cast<char*>(&chunk_data[0]), padded_size);
fmt = *(reinterpret_cast<wavefmt*>(&chunk_data[0]));
cout << "format_tag=" << fmt.format_tag << endl;
cout << "channels=" << fmt.channels << endl;
cout << "sample_rate=" << fmt.sample_rate << endl;
cout << "avg_bytes_sec=" << fmt.avg_bytes_sec << endl;
cout << "block_align=" << fmt.block_align << endl;
cout << "bits_per_sample=" << fmt.bits_per_sample << endl;
cout << "extra_size=" << fmt.extra_size << endl;
}
if(fmt.format_tag != 1)
{
uint8_t *extra_data = &chunk_data[sizeof(wavefmt)];
}
}else if(memcmp(chunk.id, "data", 4) == 0) {
file.ignore(padded_size);
}else{
file.ignore(padded_size);
}
}while ((!file) && (!file.eof()));
}
}
return true;
}
int main()
{
ifstream file("example2.wav");
readHeader(file);
return 0;
}
OUTPUT:
size=41398
id=WAVE
id=fmt
size=18
padded size=18
chunk_data size=0
Where am I going wrong?
You have two problems with your code:
There is a 2-byte integer after the bitsPerSample value that you are not reading. It specifies the size of any extra data in that chunk. If the value of format2 indicates a PCM format only, you can ignore the value of the integer (it will usually be 0 anyway, but it may also be garbage), but you still have to account for its presense. The integer cannot be ignored for non-PCM formats, you have to read the value and then read how many bytes it says. You need to make sure you are reading the entire chunk before then entering your while loop, otherwise you will not be on the correct starting position in the file to read further chunks.
You are not taking into account that chunks are padded to the nearest WORD boundary, but the chunk size does not include any padding. When you call seekg(), you need to round the value up to the next WORD boundary.
Update: based on the new code you posted, it should look more like this instead:
#include <iostream>
#include <fstream>
#include <vector>
#include <inttypes.h>
#include <stdint.h>
#include <math.h>
using namespace std;
// if your compiler does not have pshpack1.h and poppack.h, then
// use #pragma pack instead. It is important that these structures
// be byte-alignd!
#include <pshpack1.h>
struct s_riff_hdr
{
char id[4];
uint32_t size;
char type[4];
};
struct s_chunk_hdr
{
char id[4];
uint32_t size;
};
struct s_wavefmt
{
uint16_t format_tag;
uint16_t channels;
uint32_t sample_rate;
uint32_t avg_bytes_sec;
uint16_t block_align;
};
struct s_wavefmtex
{
s_wavefmt fmt;
uint16_t bits_per_sample;
uint16_t extra_size;
};
struct s_pcmwavefmt
{
s_wavefmt fmt;
uint16_t bits_per_sample;
};
#include <poppack.h>
bool readWave(ifstream &file)
{
s_riff_hdr riff_hdr;
s_chunk_hdr chunk_hdr;
uint32_t padded_size;
vector<uint8_t> fmt_data;
s_wavefmt *fmt = NULL;
file.read(reinterpret_cast<char*>(&riff_hdr), sizeof(riff_hdr));
if (!file) return false;
if (memcmp(riff_hdr.id, "RIFF", 4) != 0) return false;
cout << "size=" << riff_hdr.size << endl;
cout << "type=" << string(riff_hdr.type, 4) << endl;
if (memcmp(riff_hdr.type, "WAVE", 4) != 0) return false;
// chunks can be in any order!
// there is no guarantee that "fmt" is the first chunk.
// there is no guarantee that "fmt" is immediately followed by "data".
// There can be other chunks present!
do
{
file.read(reinterpret_cast<char*>(&chunk_hdr), sizeof(chunk_hdr));
if (!file) return false;
padded_size = ((chunk_hdr.size + 1) & ~1);
cout << "id=" << string(chunk_hdr.id, 4) << endl;
cout << "size=" << chunk_hdr.size << endl;
cout << "padded size=" << padded_size << endl;
if (memcmp(chunk_hdr.id, "fmt ", 4) == 0)
{
if (chunk_hdr.size < sizeof(s_wavefmt)) return false;
fmt_data.resize(padded_size);
file.read(reinterpret_cast<char*>(&fmt_data[0]), padded_size);
if (!file) return false;
fmt = reinterpret_cast<s_wavefmt*>(&fmt_data[0]);
cout << "format_tag=" << fmt->format_tag << endl;
cout << "channels=" << fmt->channels << endl;
cout << "sample_rate=" << fmt->sample_rate << endl;
cout << "avg_bytes_sec=" << fmt->avg_bytes_sec << endl;
cout << "block_align=" << fmt->block_align << endl;
if (fmt->format_tag == 1) // PCM
{
if (chunk_hdr.size < sizeof(s_pcmwavefmt)) return false;
s_pcmwavefmt *pcm_fmt = reinterpret_cast<s_pcmwavefmt*>(fmt);
cout << "bits_per_sample=" << pcm_fmt->bits_per_sample << endl;
}
else
{
if (chunk_hdr.size < sizeof(s_wavefmtex)) return false;
s_wavefmtex *fmt_ex = reinterpret_cast<s_wavefmtex*>(fmt);
cout << "bits_per_sample=" << fmt_ex->bits_per_sample << endl;
cout << "extra_size=" << fmt_ex->extra_size << endl;
if (fmt_ex->extra_size != 0)
{
if (chunk_hdr.size < (sizeof(s_wavefmtex) + fmt_ex->extra_size)) return false;
uint8_t *extra_data = reinterpret_cast<uint8_t*>(fmt_ex + 1);
// use extra_data, up to extra_size bytes, as needed...
}
}
}
else if (memcmp(chunk_hdr.id, "data", 4) == 0)
{
// process chunk data, according to fmt, as needed...
file.ignore(padded_size);
if (!file) return false;
}
else
{
// process other chunks as needed...
file.ignore(padded_size);
if (!file) return false;
}
}
while (!file.eof());
return true;
}
int main()
{
ifstream file("example2.wav");
readWave(file);
return 0;
}