Can one read a remote file as an istream with libcurl? - c++

I'd like to use the libcurl library to open a remote date file and iterate through it with an istream. I've looked through the nice example in this thread but it writes the remote file to a local file. Instead I'd like to have the remote reads be pushed to an istream for subsequent programmatic manipulation. Is this possible? I would greatly appreciate help.
Best,
Aaron

Boost's IO Stream might be a better solution than STL's own stream. At least it is much simpler to create a boost stream. From boost's own docs:
#include <curl/curl.h>
#include <boost/iostreams/stream.hpp>
class CURLDevice
{
private:
CURL* handle;
public:
typedef char char_type;
typedef boost::iostreams::source_tag category;
CURLDevice()
{
handle = curl_easy_init();
}
CURLDevice(const std::string &url)
{
handle = curl_easy_init();
open( url );
}
~CURLDevice()
{
curl_easy_cleanup(handle);
}
void open(const std::string &url)
{
curl_easy_setopt(handle, CURLOPT_URL, url.c_str());
curl_easy_setopt(handle, CURLOPT_CONNECT_ONLY, 1);
curl_easy_perform(handle);
}
std::streamsize read(char* s, std::streamsize n)
{
size_t read;
CURLcode ret = curl_easy_recv(handle, s, n, &read);
if ( ret == CURLE_OK || ret == CURLE_AGAIN )
return read;
else
return -1;
}
};
typedef boost::iostreams::stream<CURLDevice> CURLStream;
int main(int argc, char **argv)
{
curl_global_init(CURL_GLOBAL_ALL);
{
CURLStream stream("http://google.com");
char buffer[256];
int sz;
do
{
sz = 256;
stream.read( buffer, sz );
sz = stream.gcount();
std::cout.write( buffer, sz );
}
while( sz > 0 );
}
curl_global_cleanup();
return 0;
}
Note: when I run the code above I get a segfault in CURL, this appears to be because I don't know exactly how to use curl itself.

Related

VS2013 ERROR 0xC0000005: when using libcurl

I'm trying to use libcurl (http://curl.haxx.se/libcurl/c/) for downloading data from a web, and store these data in a txt file , and here is my code:
// CLASS SinaStk
size_t save_data(char *buffer, size_t size, size_t nmemb, FILE* userdata){
locale loc = std::locale::global(std::locale("")); //TRY TO OPEN FILE WITH CHINESE
userdata = fopen(fpath.c_str(), "w");
if (userdata == NULL)
printf("File not open!\n");
locale::global(loc);
size_t writelen=size * nmemb;
fwrite(buffer, size, nmemb, userdata);
return writelen;
};
virtual void downloadUrl()
{
CURL* stkCURL=NULL;
CURLcode res;
FILE * fp=NULL;
curl_global_init(CURL_GLOBAL_WIN32);
stkCURL = curl_easy_init();
curl_easy_setopt(stkCURL, CURLOPT_URL,"http://hq.sinajs.cn/list=s_sh000001");
curl_easy_setopt(stkCURL, CURLOPT_WRITEFUNCTION, &SinaStk::save_data);
curl_easy_setopt(stkCURL, CURLOPT_WRITEDATA,fp);
res=curl_easy_perform(stkCURL); //<-STOP!!!!
fclose(fp);
curl_easy_cleanup(stkCURL);
curl_global_cleanup();
return;
};
and when I debug my code, it always stop and then jump to xstring:
size_type size() const _NOEXCEPT
{ // return length of sequence
return (this->_Mysize); // <-STOP!!!
}
0xC0000005: Access violation reading location 0x0000009E
I have no idea about the problem for almost a week. I am upset, I asked people around me and nobody knows why.
Thanks for reading, I am really confused.
=============
Problem is solved! Thanks you guys! now my code is:
//CLASS StkApiInfo
size_t writeData(char* buffer, size_t size, size_t nmemb){
if (stkFile.is_open()){
stkFile.close();
stkFile.clear();
};
fpath = "D:\\Code\\代码\\数据文件\\" + fname + ".txt";
stkFile.open(fpath.c_str(), ios::out);
//if (stkFile.is_open())
cout << buffer<<size<<nmemb;
stkFile << buffer<<endl;
stkFile.close();
stkFile.clear();
return size*nmemb;
};
//CLASS SinaStk : public StkApiInfo
static size_t save_data(char *buffer, size_t size, size_t nmemb, void* userdata){
SinaStk* self = (SinaStk*)userdata;
return self->writeData(buffer, size, nmemb);
};
virtual void downloadUrl()
{
CURL* stkCURL = NULL;
CURLcode res;
curl_global_init(CURL_GLOBAL_WIN32);
stkCURL = curl_easy_init();
if (stkCURL)
{
curl_easy_setopt(stkCURL, CURLOPT_URL, stkUrl.c_str());
curl_easy_setopt(stkCURL, CURLOPT_WRITEFUNCTION, &SinaStk::save_data);
curl_easy_setopt(stkCURL, CURLOPT_WRITEDATA, this);
res = curl_easy_perform(stkCURL);
//if (res != CURLE_OK)
curl_easy_cleanup(stkCURL);
curl_global_cleanup();
}
return;
};
Callback passed with CURLOPT_WRITEFUNCTION argument should be of type write_callback (with exact that signature) and therefore cannot be non-static class method. Usual workaround is to define callback as non-member or static method and pass this as an argument:
static size_t save_data(char *buffer, size_t size, size_t nmemb, void* userdata)
{
SinaStk* self = (SinaStk*) userdata;
return self->doStuff(buffer, size, nmemb);
}
virtual void downloadUrl()
{
//...
curl_easy_setopt(stkCURL, CURLOPT_WRITEFUNCTION, &SinaStk::save_data);
curl_easy_setopt(stkCURL, CURLOPT_WRITEDATA, this);
//...
}
If you need to access additional data (like FILE* in your example) you can either store it as class field or introduce temporary structure that would contain this and additional data fields and pass it's address as callback argument.

thread-safety proxy with libcurl

#pragma once
#ifndef __CURL_CURL_H
#include "curl.h"
#endif
#ifndef __CURL_EASY_H
#include "easy.h"
#endif
#include <stdint.h>
#include <memory>
#include <string>
namespace CommUnit
{
enum ERR_PROXY
{
ERR_CURL_INIT_FAILED = 0xA0,
ERR_SET_PROXY_FAILED = 0xA1,
};
class MyProxy
{
public:
static MyProxy & GetInstance() //Meyers' Singlton
{
static MyProxy ProxySigleton;
return ProxySigleton;
}
public:
/*
* #bref:Get request
* #param[in] sUrl:Access URL
* #param[in] sProxyIp:Proxy IP
* #param[in] uProxyPort:Proxy Port
* #param[in] uTimeOut:Time out
* #param[in] isSSL:HTTPS true,else false
* #param[out] sRetContent:Return the URL content
*/
uint32_t Get(const std::string &sUrl,
const std::string& sProxyIp,
uint32_t uProxyPort,
uint32_t uTimeOut,
bool isSSL,
std::string &sRetContent);
private:
MyProxy(); //Constructor hidden
MyProxy(MyProxy const &); //Copy-Constructor hidden
MyProxy & operator= (MyProxy const &); //Assign operator hidden
~MyProxy(); //Destructor hidden
inline void _setCurlopt(CURL *pCurl,
const std::string &sUrl,
std::string &sWriterData,
const uint32_t uTimeOut,
bool isSSL);
//Callback function, write data to writerData
static int Writer(char *data,
uint32_t size,
uint32_t nmemb,
std::string *writerData);
private:
std::string m_sErrMsg;
static char s_ErrBuffer[CURL_ERROR_SIZE];
static const uint32_t m_MAXBUF = 2 * 1024 * 1024 - 128;
};
}
//////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <string>
#include "MyProxy.h"
#include "Log.h"
#include <curl.h>
using namespace CommUnit;
char MyProxy::s_ErrBuffer[CURL_ERROR_SIZE] = { 0 };
MyProxy::MyProxy(void)
{
CURLcode oCURLcode = curl_global_init(CURL_GLOBAL_ALL);
if (oCURLcode != CURLE_OK)
{
Log("ERR: %s curl_init failed!", __func__);
}
}
MyProxy::~MyProxy(void)
{
curl_global_cleanup();
}
uint32_t MyProxy::Get(const std::string &sUrl,
const std::string& sProxyIp,
uint32_t uProxyPort,
uint32_t uTimeOut,
bool isSSL,
std::string &sRetContent)
{
sRetContent.clear();
CURL *pCurl = curl_easy_init();
CURLcode oCURLcode;
if (nullptr == pCurl)
{
Log("ERR: %s curl_easy_init failed!", __func__);
return ERR_CURL_INIT_FAILED;
}
_setCurlopt(pCurl, sUrl, sRetContent, uTimeOut, isSSL);
if (0 == sProxyIp.length()|| 0 == uProxyPort)
{
Log("ERR: %s SetProxy: ProxyIp [%s], ProxyPort[%u] failed",__func__, sProxyIp.c_str(), uProxyPort);
return ERR_SET_PROXY_FAILED;
}
Log("INFO: %s SetProxy: ProxyIp [%s], ProxyPort[%u] failed", __func__, sProxyIp.c_str(), uProxyPort);
curl_easy_setopt(pCurl, CURLOPT_PROXY, sProxyIp.c_str());
curl_easy_setopt(pCurl, CURLOPT_PROXYPORT, uProxyPort);
int iTimes = 0;
while (true)
{
oCURLcode = curl_easy_perform(pCurl);
if (oCURLcode != CURLE_OK && ++iTimes < 3)
usleep(5);
else
break;
}
if (oCURLcode != CURLE_OK)
{
Log("ERR: %s curl_easy_perform failed!", __func__);
}
curl_easy_cleanup(pCurl);
return oCURLcode;
}
void MyProxy::_setCurlopt(CURL *pCurl,
const std::string &sUrl,
std::string &sWriterData,
const uint32_t uTimeOut,
bool isSSL)
{
curl_easy_setopt(pCurl, CURLOPT_ERRORBUFFER, s_ErrBuffer);
curl_easy_setopt(pCurl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(pCurl, CURLOPT_URL, sUrl.c_str());
curl_easy_setopt(pCurl, CURLOPT_TIMEOUT, uTimeOut);
Log("INFO: %s Set Url:[%s],TimeOut:[%d]", __func__, sUrl.c_str(), uTimeOut);
curl_easy_setopt(pCurl, CURLOPT_WRITEFUNCTION, MyProxy::Writer);
curl_easy_setopt(pCurl, CURLOPT_WRITEDATA, &sWriterData);
//Skip peer and hostname verification
if (isSSL)
{
curl_easy_setopt(pCurl, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(pCurl, CURLOPT_SSL_VERIFYHOST, 0L);
}
}
int MyProxy::Writer(char *data,
uint32_t size,
uint32_t nmemb,
std::string *writerData)
{
if (writerData == nullptr)
{
Log("ERR: %s writerData is null!", __func__);
return 0;
}
int len = size * nmemb;
if ((writerData->size() + len) > m_MAXBUF)
{
Log("ERR: %s writerData size over MAXBUF!", __func__);
return 0;
}
writerData->append(data, len);
return len;
}
I want to realize a proxy with libcurl, which can get the content of given url(https). Morever, it need to be thread-safety.
But when I created 200 threads with pthreads to test my code, an segment fault occured sometimes.
How can I solve this problem?
Is there a relation with the sRetContent(std::string)?
Thanks!
Errmsg:
double free or corruption (!prev): 0x0ac72840 ***
Segmentation fault
My understanding is that libcurl is not thread-safe if you are using https (and it looks like you are) due to the fact that it is using underlying ssl libraries. See libcurl documentation and OpenSSL documentation for more info.
If your libcurl was compiled with OpenSSL for example then you have to initialize a few callback functions or you could run into issues. This is the sort of thing you need to do (compiles on Windows):
#include <curl/curl.h>
#include <openssl/crypto.h>
void win32_locking_callback(int mode, int type, const char *file, int line)
{
if (mode & CRYPTO_LOCK)
{
WaitForSingleObject(lock_cs[type],INFINITE);
}
else
{
ReleaseMutex(lock_cs[type]);
}
}
void thread_setup(void)
{
int i;
lock_cs=(HANDLE*)OPENSSL_malloc(CRYPTO_num_locks() * sizeof(HANDLE));
for (i=0; i<CRYPTO_num_locks(); i++)
{
lock_cs[i]=CreateMutex(NULL,FALSE,NULL);
}
CRYPTO_set_locking_callback((void (*)(int,int,const char *,int))win32_locking_callback);
}
void thread_cleanup(void)
{
int i;
CRYPTO_set_locking_callback(NULL);
for (i=0; i<CRYPTO_num_locks(); i++)
CloseHandle(lock_cs[i]);
OPENSSL_free(lock_cs);
}
I always call thread_setup() after my call to curl_global_init(CURL_GLOBAL_ALL)
and then thread_cleanup() right before my call to curl_global_cleanup().
I use this sort of code with libcurl often in load test scenarios and have never run into any issues. If you continue to run into problems, it is not libcurl, but something not being done properly in your code.
libcurl is thread-safe as long as you play by the rules

Segfault with multithreaded curl request

I'm having some trouble with a C++ program here. Basically I've written a simple wrapper for http requests, with the ability to do multiple requests at once.
Works absolutely fine, but when I do httpS requests, it crashes randomly in multithreaded mode. I'm using curl and posix threads.
Backtrace looks like this:
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(+0x80996)[0x7fea9046d996]
/lib/x86_64-linux-gnu/libc.so.6(+0x82b80)[0x7fea9046fb80]
/lib/x86_64-linux-gnu/libc.so.6(realloc+0xf2)[0x7fea90470ae2]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(CRYPTO_realloc+0x49)[0x7fea8f9c6169]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(lh_insert+0x101)[0x7fea8fa4bfb1]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(+0xe844e)[0x7fea8fa4e44e]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(ERR_get_state+0xde)[0x7fea8fa4eeee]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(ERR_clear_error+0x15)[0x7fea8fa4f065]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x24e79)[0x7fea90f10e79]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x39ea0)[0x7fea90f25ea0]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0xf8fd)[0x7fea90efb8fd]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x219f5)[0x7fea90f0d9f5]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x35538)[0x7fea90f21538]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(curl_multi_perform+0x91)[0x7fea90f21d31]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(curl_easy_perform+0x107)[0x7fea90f19457]
./exbot[0x40273a]
/lib/x86_64-linux-gnu/libpthread.so.0(+0x7f6e)[0x7fea90cd6f6e]
/lib/x86_64-linux-gnu/libc.so.6(clone+0x6d)[0x7fea904e79cd]
Could this be a bug in libcrypto?
Can I somehow tell curl not to use libcrypto? Any alternatives?
It only crahes wenn using httpS requests and works fine with even 10000 simultaneous http queries.
Cheers,
Thomas
Just for completeness my code:
// simple wrapper for http requests
#ifndef _REQUEST_H_
#define _REQUEST_H_
#include <curl/curl.h>
#include <pthread.h>
#include <string>
#include <iostream>
//////////////////////////////////
// MACROS
//////////////////////////////////
#define ERR(_msg) std::cerr << __FUNCTION__ << ": " << _msg << std::endl
//////////////////////////////////
// REQUEST WRAPPER
//////////////////////////////////
typedef unsigned int uint;
class RequestWrapper
{
private: // non copyable
RequestWrapper();
RequestWrapper(const RequestWrapper &that);
RequestWrapper &operator=(const RequestWrapper &that);
public:
struct Response
{
Response() : msg(""), success(false) {}
std::string msg;
bool success;
};
static Response simpleGET(std::string url, uint timeout);
static size_t write(char *content, size_t size, size_t nmemb, void *userp);
};
//////////////////////////////////
// GET
//////////////////////////////////
inline size_t RequestWrapper::write(char *content, size_t size, size_t nmemb, void *userp)
{
std::string *buf = static_cast<std::string *>(userp);
size_t realsize = size * nmemb;
for (uint i = 0; i < realsize; ++i)
{
buf->push_back(content[i]);
}
return realsize;
}
inline RequestWrapper::Response RequestWrapper::simpleGET(std::string url, uint timeout)
{
Response resp;
CURL *curl;
CURLcode res;
std::string buf;
// send request
buf.clear();
curl = curl_easy_init();
if (!curl)
{
//ERR("libcurl init failed");
return resp;
}
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, static_cast<void *>(&buf));
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
res = curl_easy_perform(curl);
if(res != CURLE_OK)
{
//ERR("libcurl request failed, CODE: " << res);
return resp;
}
curl_easy_cleanup(curl);
// done
resp.msg = buf;
resp.success = true;
return resp;
}
//////////////////////////////////
// MULTITHREADED REQUEST
//////////////////////////////////
class RequestList
{
private:
std::vector<std::string> _reqs;
static void *sender(void *payload);
static pthread_mutex_t _mutex;
public:
inline void add(std::string request)
{
_reqs.push_back(request);
}
inline void clear()
{
_reqs.clear();
}
std::vector<std::string> send(uint timeout) const;
struct Payload
{
std::string url;
std::vector<std::string> *out;
uint tout, index;
Payload(std::string url,
std::vector<std::string> *out,
uint tout, uint index) : url(url), out(out), tout(tout), index(index) { }
Payload() : url(""), out(NULL), tout(0), index(0) { }
};
};
//////////////////////////////////
// SEND MT REQUEST
//////////////////////////////////
pthread_mutex_t RequestList::_mutex;
void *RequestList::sender(void *payload)
{
Payload *pl = static_cast<Payload *>(payload);
RequestWrapper::Response resp = RequestWrapper::simpleGET(pl->url, pl->tout);
pthread_mutex_lock(&_mutex);
if (resp.success)
{
pl->out->at(pl->index) = resp.msg;
std::cerr << ".";
}
else
{
std::cerr << "x";
}
pthread_mutex_unlock(&_mutex);
return NULL;
}
inline std::vector<std::string> RequestList::send(uint timeout) const
{
std::vector<std::string> resp;
resp.resize(_reqs.size());
Payload *payloads = new Payload[_reqs.size()];
pthread_t *tids = new pthread_t[_reqs.size()];
// create mutex
pthread_mutex_init(&_mutex, NULL);
// prepare payload and create thread
for (uint i = 0; i < _reqs.size(); ++i)
{
payloads[i] = Payload(_reqs[i], &resp, timeout, i);
pthread_create(&tids[i], NULL, RequestList::sender, static_cast<void *>(&payloads[i]));
}
// wait for threads to finish
for (uint i = 0; i < _reqs.size(); ++i)
{
pthread_join(tids[i], NULL);
}
std::cerr << std::endl;
//destroy mutex
pthread_mutex_destroy(&_mutex);
delete[] payloads;
delete[] tids;
return resp;
}
#endif
Libcrypto is part of OpenSSL, which is not thread-safe unless you provide the necessary callbacks. According to the documentation, on a POSIX-compliant system (which has thread-local errno) the default thread-id implementation is acceptable, so you just need a locking function:
void locking_function(int mode, int n, const char *file, int line);
This function will need to maintain a set of CRYPTO_num_locks() mutexes, and lock or unlocks the n-th mutex depending on the value of mode. You can read the documentation for more details. The libcurl website actually has some sample code showing how to do this.
Alternatively, you can build libcurl with a different SSL library that is thread safe, such as GnuTLS.

Adding to char array isn't working

I'm trying to read a text file line by line, and add each line to a char array. But the lines aren't added, at all.
//This is the default char array that comes with the cURL code.
char *text[]={
"one\n",
"two\n",
"three\n",
" Hello, this is CURL email SMTP\n",
NULL
};
/*Now we're going to replace that char array, with an array that holds the contents of a textfile.
We'll read a textfile out line by line, and add each line to the char array.
*/
void makemailmessage()
{
text[0] = '\0'; //Clear text
text[0] = "testy\n"; //First line in new char array
//Read the text file, add each line to the char array.
string line;
ifstream myfile ("C:\\Users\\admin\\Downloads\\bbb.txt");
int counter;
counter = 1;
if (myfile.is_open())
{
while ( myfile.good() )
{
getline (myfile,line);
//Convert the string variable "line" to a char (a)
char *a=new char[line.size()+1];
a[line.size()]=0;
memcpy(a,line.c_str(),line.size());
//Add \n to the end of "a" (new char will be "str")
char str[80];
strcpy (str,a);
strcat (str,"\n");
//Add "str" to the char array "text"
text[counter] = str;
text[counter+1] = "test\n"; //Also added this for testing purposes
write_data("C:\\Users\\admin\\Downloads\\checkit.txt", str); //Also for testing purposes
//Increase counter by 2 because we added two new items to the char array "text"
counter++;
counter++;
}
myfile.close();
text[counter-1] = "testy2\n"; //Ad another text line
text[counter] = NULL; //End char array
}
Each str is written correctly to checkit.txt but for some reason it is not added to the char array because I end up with the char array looking like this:
testy
test
test
testy2
What am I doing wrong?
UPDATE2:
The reason I am trying to make a char array is because the cURL function I am using needs a char array to form the email body. This is the important part of the cURL code.
static size_t read_callback(void *ptr, size_t size, size_t nmemb, void *userp)
{
struct WriteThis *pooh = (struct WriteThis *)userp;
const char *data;
if(size*nmemb < 1)
return 0;
data = text[pooh->counter]; //This part is using the char array.
if(data) {
size_t len = strlen(data);
memcpy(ptr, data, len);
pooh->counter++;
return len;
}
return 0;
}
Here's the full code
Okay, after chatting on this a bit more, here is a fix:
C++ version
Full code file here: https://gist.github.com/1342118#file_test.cpp
Replace the relevant code with:
#include <vector>
#include <fstream>
// ...
std::vector<std::string> text;
static int read_text(char* fname)
{
//Read the text file, add each line to the char array.
std::ifstream myfile (fname);
std::string line;
while (std::getline(myfile, line))
text.push_back(line + '\n');
return 0;
}
static size_t read_callback(void *ptr, size_t size, size_t nmemb, void *userp)
{
/* This was already in. */
struct WriteThis *pooh = (struct WriteThis *)userp;
if(size*nmemb < 1)
return 0;
if (pooh->counter < text.size())
{
const std::string& data = text[pooh->counter];
memcpy(ptr, data.data(), data.length());
pooh->counter++; /* advance pointer */
return data.length();
}
return 0; /* no more data left to deliver */
}
Pure C version
Full code file here: https://gist.github.com/1342118#file_test.c
Replace
//This is the default char array that comes with the cURL code.
char *text[]={
"one\n",
"two\n",
"three\n",
" Hello, this is CURL email SMTP\n",
NULL
};
With
char **text = 0;
static int read_text(char* fname)
{
unsigned capacity = 10;
int linecount = 0;
// free_text(); see below
text = realloc(text, capacity*sizeof(*text));
FILE* file = fopen(fname, "r");
if (!file)
{ perror("Opening file"); return 1; }
char buf[2048];
char* line = 0;
while (line = fgets(buf, sizeof(buf), file))
{
if (linecount>=capacity)
{
capacity *= 2;
text = realloc(text, capacity*sizeof(*text));
}
text[linecount++] = strdup(line);
}
fclose(file);
return 0;
}
Hook it up in you main function, e.g. like so
if (argc<2)
{
printf("Usage: %s <email.eml>\n", argv[0]);
exit(255);
} else
{
printf("Reading email body from %s\n", argv[1]);
if (0 != read_text(argv[1]))
exit(254);
}
Or, if you so prefer, just calling read_text("C:\\Users\\admin\\Downloads\\bbb.txt") :)
To really top things off, don't forget to reclaim memory when you're done - properly:
#include "curl/curl.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <string.h>
#define GetCurrentDir getcwd
#define USERNAME "obscured#gmail.com"
#define PASSWORD "obscured"
#define SMTPSERVER "smtp.gmail.com"
#define SMTPPORT ":587"
#define RECIPIENT "<obscured#gmail.com>"
#define MAILFROM "<obscured#gmail.com>"
#define MULTI_PERFORM_HANG_TIMEOUT 60 * 1000
/* Note that you should include the actual meta data headers here as well if
you want the mail to have a Subject, another From:, show a To: or whatever
you think your mail should feature! */
char **text = 0;
void free_text()
{
if (text)
{
char** it;
for (it = text; *it; ++it)
free(*it);
free(text);
text = 0;
}
}
static int read_text(char* fname)
{
unsigned capacity = 10;
int linecount = 0;
free_text();
text = realloc(text, capacity*sizeof(*text));
FILE* file = fopen(fname, "r");
if (!file)
{ perror("Opening file"); return 1; }
char buf[2048];
char* line = 0;
while (line = fgets(buf, sizeof(buf), file))
{
if (linecount>=capacity)
{
capacity *= 2;
text = realloc(text, capacity*sizeof(*text));
}
text[linecount++] = strdup(line);
}
if (linecount>=capacity)
text = realloc(text, (++capacity)*sizeof(*text));
text[linecount] = 0; // terminate
fclose(file);
return 0;
}
struct WriteThis {
int counter;
};
static size_t read_callback(void *ptr, size_t size, size_t nmemb, void *userp)
{
/* This was already in. */
struct WriteThis *pooh = (struct WriteThis *)userp;
const char *data;
if(size*nmemb < 1)
return 0;
data = text[pooh->counter];
if(data) {
size_t len = strlen(data);
memcpy(ptr, data, len);
pooh->counter++; /* advance pointer */
return len;
}
return 0; /* no more data left to deliver */
}
static struct timeval tvnow(void)
{
/*
** time() returns the value of time in seconds since the Epoch.
*/
struct timeval now;
now.tv_sec = (long)time(NULL);
now.tv_usec = 0;
return now;
}
static long tvdiff(struct timeval newer, struct timeval older)
{
return (newer.tv_sec-older.tv_sec)*1000+
(newer.tv_usec-older.tv_usec)/1000;
}
int main(int argc, char** argv)
{
if (argc<2)
{
printf("Usage: %s <email.eml>\n", argv[0]);
exit(255);
} else
{
printf("Reading email body from %s\n", argv[1]);
if (0 != read_text(argv[1]))
exit(254);
}
CURL *curl;
CURLM *mcurl;
int still_running = 1;
struct timeval mp_start;
char mp_timedout = 0;
struct WriteThis pooh;
struct curl_slist* rcpt_list = NULL;
pooh.counter = 0;
curl_global_init(CURL_GLOBAL_DEFAULT);
curl = curl_easy_init();
if(!curl)
return 1;
mcurl = curl_multi_init();
if(!mcurl)
return 2;
rcpt_list = curl_slist_append(rcpt_list, RECIPIENT);
/* more addresses can be added here
rcpt_list = curl_slist_append(rcpt_list, "<others#example.com>");
*/
curl_easy_setopt(curl, CURLOPT_URL, "smtp://" SMTPSERVER SMTPPORT);
curl_easy_setopt(curl, CURLOPT_USERNAME, USERNAME);
curl_easy_setopt(curl, CURLOPT_PASSWORD, PASSWORD);
curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback);
curl_easy_setopt(curl, CURLOPT_MAIL_FROM, MAILFROM);
curl_easy_setopt(curl, CURLOPT_MAIL_RCPT, rcpt_list);
curl_easy_setopt(curl, CURLOPT_USE_SSL, CURLUSESSL_ALL);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER,0);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
curl_easy_setopt(curl, CURLOPT_READDATA, &pooh);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
curl_easy_setopt(curl, CURLOPT_SSLVERSION, 0);
curl_easy_setopt(curl, CURLOPT_SSL_SESSIONID_CACHE, 0);
curl_multi_add_handle(mcurl, curl);
mp_timedout = 0;
mp_start = tvnow();
/* we start some action by calling perform right away */
curl_multi_perform(mcurl, &still_running);
while(still_running) {
struct timeval timeout;
int rc; /* select() return code */
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
int maxfd = -1;
long curl_timeo = -1;
FD_ZERO(&fdread);
FD_ZERO(&fdwrite);
FD_ZERO(&fdexcep);
/* set a suitable timeout to play around with */
timeout.tv_sec = 1;
timeout.tv_usec = 0;
curl_multi_timeout(mcurl, &curl_timeo);
if(curl_timeo >= 0) {
timeout.tv_sec = curl_timeo / 1000;
if(timeout.tv_sec > 1)
timeout.tv_sec = 1;
else
timeout.tv_usec = (curl_timeo % 1000) * 1000;
}
/* get file descriptors from the transfers */
curl_multi_fdset(mcurl, &fdread, &fdwrite, &fdexcep, &maxfd);
/* In a real-world program you OF COURSE check the return code of the
function calls. On success, the value of maxfd is guaranteed to be
greater or equal than -1. We call select(maxfd + 1, ...), specially in
case of (maxfd == -1), we call select(0, ...), which is basically equal
to sleep. */
//rc = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout);
if (tvdiff(tvnow(), mp_start) > MULTI_PERFORM_HANG_TIMEOUT) {
fprintf(stderr, "ABORTING TEST, since it seems "
"that it would have run forever.\n");
break;
}
switch(rc) {
case -1:
/* select error */
break;
case 0: /* timeout */
default: /* action */
curl_multi_perform(mcurl, &still_running);
break;
}
}
curl_slist_free_all(rcpt_list);
curl_multi_remove_handle(mcurl, curl);
curl_multi_cleanup(mcurl);
curl_easy_cleanup(curl);
curl_global_cleanup();
free_text();
return 0;
}
I'm trying to read a text file line by line, and add each line to a
char array.
Since this is C++, why not use an std::vector<string> and use the std::string version of getline?
The std::string class will look after the memory needed to hold a string of any sort of length, and the std::vector class will worry about the memory needed to hold an "array", so to speak, of strings.
EDIT: Actually looking at your code again, you do use an std::string and then allocate memory to store it as an array of chars, and then store pointers to those strings in some fixed sized array, test. Why go to all that trouble when, as I mentioned above, you can use an std::vector<string> to hold all your std::string objects? Mind = boggled.
EDIT2: Couldn't you also use cURLpp as a C++ wrapper for cURL? I haven't used either so I can't comment on the effectiveness of it.
What am I doing wrong?
For one, this:
char str[80];
strcpy (str,a);
strcat (str,"\n");
//Add "str" to the char array "text"
text[counter] = str;
str is allocated on the stack, with block-wide scope. Then you enter that pointer in an array with a greater scope. This is usually a recipe for disaster - a rather impressive segmentation fault or whatever the equivalent is on your platform.
In this case, due to its use in a loop, your program will either crash, or - if the stars have the proper alignment - you will end up with all pointers in your array pointing to the same out-of-scope string, namely the one that was last read.
Why do you even go into that trouble, when you have already dynamically allocated a in the heap?
By the way, mixing char[] arrays (and the associated standard C library functions) with C++ strings is NOT a good idea. Not even an acceptable one. OK, it a bad idea. Just stick to C++ strings...

Multithreaded curl application has memory allocation problems

I'm working on an application in C++ that threads and hands a bunch of threads URLs for cURL to download in parallel.
I'm employing a method that should be safe to download images and videos, etc. I uses memcpy instead of assuming the data is a string or character array.
I pass each thread a structure, thread_status, for a number of things. The structure lets the parent process know the thread is done downloading. It also stores the data cURL is downloading and keeps track of it's size as cURL returns more buffers for writing.
I pass a (void *) pointer that points to each structure that's allocated at initialization to each thread that does the downloading. The first page is downloaded properly, after that I keep getting errors from realloc().
Here is the simplest example that illustrates my problem. This sample is not multi-threaded but uses a similar structure to keep track of itself.
#include <string>
#include <assert.h>
#include <iostream>
#include <curl/curl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define NOT_READY 1
#define READY 0
using namespace std;
struct thread_status {
int id;
pthread_t *pid;
int readyState;
char *url;
void *data;
size_t bufferlen;
size_t writepos;
int initialized;
} ;
size_t static
writefunction( void *ptr, size_t size,
size_t nmemb, void *userdata)
{
size_t nbytes = size*nmemb;
struct thread_status **this_status;
this_status = (struct thread_status **) userdata;
if (!(*this_status)->initialized){
(*this_status)->data = (void *)malloc(1024);
(*this_status)->bufferlen = 1024;
(*this_status)->writepos = 0;
(*this_status)->initialized = true;
}
if ((*this_status)->bufferlen < ((*this_status)->writepos + nbytes)){
(*this_status)->bufferlen = (*this_status)->bufferlen + nbytes;
(*this_status)->data = realloc((*this_status)->data, (size_t) ((*this_status)->writepos + nbytes));
}
assert((*this_status)->data != NULL);
memcpy((*this_status)->data + (*this_status)->writepos, ptr, nbytes);
(*this_status)->writepos += nbytes;
return nbytes;
}
void *pull_data (void *my_struct){
struct thread_status *this_struct;
this_struct = (struct thread_status *) my_struct;
this_struct->initialized = false;
cout<<(char *)this_struct->url<<"\n";
CURL *curl;
curl = curl_easy_init();
size_t rc = 0;
while(true){
curl_easy_setopt(curl,
CURLOPT_WRITEFUNCTION, writefunction);
curl_easy_setopt(curl,
CURLOPT_WRITEDATA, (void *) &this_struct);
curl_easy_setopt(curl,
CURLOPT_NOSIGNAL, true);
curl_easy_setopt(curl,
CURLOPT_URL, (char *)this_struct->url);
if (curl_easy_perform(curl) != 0){
cout<<"curl did not perform\n";
exit(1);
} else {
if (this_struct->data != NULL){
// Use a binary write.
rc = fwrite(this_struct->data, this_struct->writepos, 1, stdout);
free(this_struct->data);
} else {
cout<<"Data is NULL\n";
}
}
// Tell the babysitter the thread is ready.
this_struct->readyState = READY;
// This would pause the thread until the parent thread has processed the data in it.
// while(this_struct->readyState == READY){;}
// Now get ready for another round!
this_struct->writepos = (size_t) 0;
this_struct->initialized = false;
this_struct->bufferlen = (size_t) 0;
break;
}
curl_easy_cleanup(curl);
return (void *)"a";
}
int main(){
char *urls[] = { "http://www.example.com/", "http://www.google.com", "http://www.touspassagers.com/", "http://www.facebook.com/" };
int i=0;
struct thread_status mystatuses[4];
for (i=0;i<4;i++){
struct thread_status my_status;
char *data;
my_status.id = i;
my_status.readyState = NOT_READY;
my_status.url = urls[i];
my_status.data = data;
my_status.bufferlen = 0;
my_status.writepos = 0;
my_status.initialized = false;
mystatuses[i] = my_status;
}
for (i=0;i<4;i++){
cout<<"pulling #"<<i<<"\n";
pull_data((void *)&mystatuses[i]);
}
}
If anyone can enlighten me as to the source of my error or a remedy for it I would appreciate it.
You might consider using valgrind to help locate the source of the memory problems.
Got it!
Apparently 1KB isn't enough memory to handle the first cURL buffer. I changed 1024 to nbytes and it works!
Before the memory memcpy put in the buffer ran over the allocated memory resulting in corruptions.
I did a post about it if anyone cares to see the full implementation:
http://www.touspassagers.com/2011/01/a-working-curlopt_writefunction-function-for-libcurl/