Closed. This question is not reproducible or was caused by typos. It is not currently accepting answers.
This question was caused by a typo or a problem that can no longer be reproduced. While similar questions may be on-topic here, this one was resolved in a way less likely to help future readers.
Closed last month.
Improve this question
I'm trying to create c++ code that download data from some URLs, but it's throwing a write access violation:
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifndef WIN32
#endif
#include <curl/curl.h>
#include <string>
static const char* urls[] = {
"http://www.example.com",
"http://www.example1.com",
};
#define MAX_PARALLEL 10 /* number of simultaneous transfers */
#define NUM_URLS sizeof(urls)/sizeof(char *)
static size_t write_cb(void* ptr, size_t size, size_t nmemb, void* buffer)
{
((std::string*)buffer)->append((char*)ptr, nmemb);
return nmemb;
}
static void add_transfer(CURLM* cm, int i, int* left)
{
CURL* eh = curl_easy_init();
curl_easy_setopt(eh, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(eh, CURLOPT_URL, urls[i]);
curl_easy_setopt(eh, CURLOPT_PRIVATE, urls[i]);
curl_easy_setopt(eh, CURLOPT_WRITEDATA, &write_cb);
curl_easy_setopt(eh, CURLOPT_VERBOSE, 1L);
curl_multi_add_handle(cm, eh);
(*left)++;
}
int main(void)
{
CURLM* cm;
unsigned int transfers = 0;
int msgs_left = -1;
int left = 0;
curl_global_init(CURL_GLOBAL_ALL);
cm = curl_multi_init();
/* Limit the amount of simultaneous connections curl should allow: */
curl_multi_setopt(cm, CURLMOPT_MAXCONNECTS, (long)MAX_PARALLEL);
for (transfers = 0; transfers < MAX_PARALLEL && transfers < NUM_URLS;
transfers++)
add_transfer(cm, transfers, &left);
do {
int still_alive = 1;
curl_multi_perform(cm, &still_alive);
CURLMsg* msg;
int queued;
CURLMcode mc = curl_multi_perform(cm, &still_alive);
if (cm)
/* wait for activity, timeout or "nothing" */
mc = curl_multi_poll(cm, NULL, 0, 1000, NULL);
if (mc)
break;
do {
msg = curl_multi_info_read(cm, &queued);
if (msg) {
if (msg->msg == CURLMSG_DONE) {
/* a transfer ended */
fprintf(stderr, "Transfer completed\n");
}
}
} while (msg);
if (left)
curl_multi_wait(cm, NULL, 0, 1000, NULL);
} while (left);
curl_multi_cleanup(cm);
curl_global_cleanup();
return EXIT_SUCCESS;
}
Its crashing on the line:
_Mypair._Myval2._Mysize = _Old_size + _Count;
The full error message is:
Exception thrown: write access violation.
this was 0x7FF7941D39D0.
How can I make this code download each Url data without any error?
In your write_cb() callback, you are expecting the buffer parameter to point at a std::string object, but in add_transfer() you are setting CURLOPT_WRITEDATA to point at write_cb itself rather than at a std::string object.
Try something more like this instead:
struct url_info {
const char* url;
std::string data;
};
static url_info urls[] = {
{"http://www.example.com", ""},
{"http://www.example1.com", ""}
};
static const int NUM_URLS = sizeof(urls)/sizeof(urls[0]);
static size_t write_cb(void* ptr, size_t size, size_t nmemb, void* buffer)
{
size_t result = size * nmemb;
static_cast<std::string*>(buffer)->append(static_cast<char*>(ptr), result);
return result;
}
static void add_transfer(CURLM* cm, int i, int* left)
{
...
curl_easy_setopt(eh, CURLOPT_URL, urls[i].url);
curl_easy_setopt(eh, CURLOPT_WRITEDATA, &urls[i].data);
curl_easy_setopt(eh, CURLOPT_PRIVATE, &urls[i]);
...
}
int main(void)
{
...
while ((msg = curl_multi_info_read(cm, &queued)) != NULL) {
if (msg->msg == CURLMSG_DONE) {
/* a transfer ended */
url_info *info;
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, reinterpret_cast<char**>(&info));
// use info->url and info->data as needed...
std::cerr << "Transfer completed from " << info->url << ", bytes received: " << info->data.size() << "\n";
}
}
...
}
Related
#pragma once
#ifndef __CURL_CURL_H
#include "curl.h"
#endif
#ifndef __CURL_EASY_H
#include "easy.h"
#endif
#include <stdint.h>
#include <memory>
#include <string>
namespace CommUnit
{
enum ERR_PROXY
{
ERR_CURL_INIT_FAILED = 0xA0,
ERR_SET_PROXY_FAILED = 0xA1,
};
class MyProxy
{
public:
static MyProxy & GetInstance() //Meyers' Singlton
{
static MyProxy ProxySigleton;
return ProxySigleton;
}
public:
/*
* #bref:Get request
* #param[in] sUrl:Access URL
* #param[in] sProxyIp:Proxy IP
* #param[in] uProxyPort:Proxy Port
* #param[in] uTimeOut:Time out
* #param[in] isSSL:HTTPS true,else false
* #param[out] sRetContent:Return the URL content
*/
uint32_t Get(const std::string &sUrl,
const std::string& sProxyIp,
uint32_t uProxyPort,
uint32_t uTimeOut,
bool isSSL,
std::string &sRetContent);
private:
MyProxy(); //Constructor hidden
MyProxy(MyProxy const &); //Copy-Constructor hidden
MyProxy & operator= (MyProxy const &); //Assign operator hidden
~MyProxy(); //Destructor hidden
inline void _setCurlopt(CURL *pCurl,
const std::string &sUrl,
std::string &sWriterData,
const uint32_t uTimeOut,
bool isSSL);
//Callback function, write data to writerData
static int Writer(char *data,
uint32_t size,
uint32_t nmemb,
std::string *writerData);
private:
std::string m_sErrMsg;
static char s_ErrBuffer[CURL_ERROR_SIZE];
static const uint32_t m_MAXBUF = 2 * 1024 * 1024 - 128;
};
}
//////////////////////////////////////////////////////////////////////////
#include <iostream>
#include <string>
#include "MyProxy.h"
#include "Log.h"
#include <curl.h>
using namespace CommUnit;
char MyProxy::s_ErrBuffer[CURL_ERROR_SIZE] = { 0 };
MyProxy::MyProxy(void)
{
CURLcode oCURLcode = curl_global_init(CURL_GLOBAL_ALL);
if (oCURLcode != CURLE_OK)
{
Log("ERR: %s curl_init failed!", __func__);
}
}
MyProxy::~MyProxy(void)
{
curl_global_cleanup();
}
uint32_t MyProxy::Get(const std::string &sUrl,
const std::string& sProxyIp,
uint32_t uProxyPort,
uint32_t uTimeOut,
bool isSSL,
std::string &sRetContent)
{
sRetContent.clear();
CURL *pCurl = curl_easy_init();
CURLcode oCURLcode;
if (nullptr == pCurl)
{
Log("ERR: %s curl_easy_init failed!", __func__);
return ERR_CURL_INIT_FAILED;
}
_setCurlopt(pCurl, sUrl, sRetContent, uTimeOut, isSSL);
if (0 == sProxyIp.length()|| 0 == uProxyPort)
{
Log("ERR: %s SetProxy: ProxyIp [%s], ProxyPort[%u] failed",__func__, sProxyIp.c_str(), uProxyPort);
return ERR_SET_PROXY_FAILED;
}
Log("INFO: %s SetProxy: ProxyIp [%s], ProxyPort[%u] failed", __func__, sProxyIp.c_str(), uProxyPort);
curl_easy_setopt(pCurl, CURLOPT_PROXY, sProxyIp.c_str());
curl_easy_setopt(pCurl, CURLOPT_PROXYPORT, uProxyPort);
int iTimes = 0;
while (true)
{
oCURLcode = curl_easy_perform(pCurl);
if (oCURLcode != CURLE_OK && ++iTimes < 3)
usleep(5);
else
break;
}
if (oCURLcode != CURLE_OK)
{
Log("ERR: %s curl_easy_perform failed!", __func__);
}
curl_easy_cleanup(pCurl);
return oCURLcode;
}
void MyProxy::_setCurlopt(CURL *pCurl,
const std::string &sUrl,
std::string &sWriterData,
const uint32_t uTimeOut,
bool isSSL)
{
curl_easy_setopt(pCurl, CURLOPT_ERRORBUFFER, s_ErrBuffer);
curl_easy_setopt(pCurl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(pCurl, CURLOPT_URL, sUrl.c_str());
curl_easy_setopt(pCurl, CURLOPT_TIMEOUT, uTimeOut);
Log("INFO: %s Set Url:[%s],TimeOut:[%d]", __func__, sUrl.c_str(), uTimeOut);
curl_easy_setopt(pCurl, CURLOPT_WRITEFUNCTION, MyProxy::Writer);
curl_easy_setopt(pCurl, CURLOPT_WRITEDATA, &sWriterData);
//Skip peer and hostname verification
if (isSSL)
{
curl_easy_setopt(pCurl, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(pCurl, CURLOPT_SSL_VERIFYHOST, 0L);
}
}
int MyProxy::Writer(char *data,
uint32_t size,
uint32_t nmemb,
std::string *writerData)
{
if (writerData == nullptr)
{
Log("ERR: %s writerData is null!", __func__);
return 0;
}
int len = size * nmemb;
if ((writerData->size() + len) > m_MAXBUF)
{
Log("ERR: %s writerData size over MAXBUF!", __func__);
return 0;
}
writerData->append(data, len);
return len;
}
I want to realize a proxy with libcurl, which can get the content of given url(https). Morever, it need to be thread-safety.
But when I created 200 threads with pthreads to test my code, an segment fault occured sometimes.
How can I solve this problem?
Is there a relation with the sRetContent(std::string)?
Thanks!
Errmsg:
double free or corruption (!prev): 0x0ac72840 ***
Segmentation fault
My understanding is that libcurl is not thread-safe if you are using https (and it looks like you are) due to the fact that it is using underlying ssl libraries. See libcurl documentation and OpenSSL documentation for more info.
If your libcurl was compiled with OpenSSL for example then you have to initialize a few callback functions or you could run into issues. This is the sort of thing you need to do (compiles on Windows):
#include <curl/curl.h>
#include <openssl/crypto.h>
void win32_locking_callback(int mode, int type, const char *file, int line)
{
if (mode & CRYPTO_LOCK)
{
WaitForSingleObject(lock_cs[type],INFINITE);
}
else
{
ReleaseMutex(lock_cs[type]);
}
}
void thread_setup(void)
{
int i;
lock_cs=(HANDLE*)OPENSSL_malloc(CRYPTO_num_locks() * sizeof(HANDLE));
for (i=0; i<CRYPTO_num_locks(); i++)
{
lock_cs[i]=CreateMutex(NULL,FALSE,NULL);
}
CRYPTO_set_locking_callback((void (*)(int,int,const char *,int))win32_locking_callback);
}
void thread_cleanup(void)
{
int i;
CRYPTO_set_locking_callback(NULL);
for (i=0; i<CRYPTO_num_locks(); i++)
CloseHandle(lock_cs[i]);
OPENSSL_free(lock_cs);
}
I always call thread_setup() after my call to curl_global_init(CURL_GLOBAL_ALL)
and then thread_cleanup() right before my call to curl_global_cleanup().
I use this sort of code with libcurl often in load test scenarios and have never run into any issues. If you continue to run into problems, it is not libcurl, but something not being done properly in your code.
libcurl is thread-safe as long as you play by the rules
I've never really done anything multithreaded or asynchronous in c++, I only used cURL to do single synchronous requests so far.
In order to better visualize what I'm trying to do, I wrote a simple Javascript which would do what I want to do with cURL in C++.
function AddRequest( method, url, data, id ) {
var httpObj = new ActiveXObject("Msxml2.XMLHTTP.6.0"); //new XMLHttpRequest();
httpObj.onreadystatechange = function() {
if (httpObj.readyState == 4)
ResponseCallback( httpObj, id );
};
httpObj.Open( method, url, true );
httpObj.Send( data );
}
function ResponseCallback( httpObj, id ) {
WScript.Echo( id ); //alert( id );
WScript.Echo( httpObj.ResponseText ); //alert( httpObj.ResponseText );
}
//It could now be used like this:
AddRequest("GET","http://example.com/","",1);
AddRequest("GET","https://www.facebook.com","",2);
WScript.Echo( "all requests sent" ); //alert( "all requests sent" );
//these requests are all done at the same time
//and every time a request has finished it calls the ResponseCallback() function,
//telling it which request has finished
CURL just seems to be COMPLETELY different and unnecessary more complicated than XmlHttpRequest, even though both are just sending http requests...
Here is my first approach (based on the answer of hogren):
#include "stdafx.hpp"
#include <iostream> //#include <stdio.h>
#include <curl.h>
#include <pthread.h>
#include <map>
#include <string>
using namespace std;
bool printing = false; //will allow us to prevent prints overlapping each other
struct requestStruct { //will allow us to pass more than one argument to the threaded functions
int id;
const char* url;
const char* method;
const char* body;
map<const char*, const char*> headers;
const char* proxy;
int timeout;
};
struct responseStruct { //will allow us to return more than one value from the Request function
long statusCode;
//map<const char*, const char*> headers;
const char* body;
};
size_t writeToString(void *ptr, size_t size, size_t count, void *stream) {
((string*)stream)->append((char*)ptr, 0, size* count);
return size* count;
}
static void *ResponseCallback(int id, struct responseStruct *response) {
long statusCode = response -> statusCode;
//map<const char*, const char*> headers = response -> headers;
const char* body = response -> body;
//while (printing) {} //wait for other threads to stop printing
printing = true; //tell other threads to not print anything
cout << id << " response received! Code: " << statusCode << endl << body << endl;
printing = false; //tell other threads printing is okay again
return NULL;
}
struct responseStruct HttpRequest(const char* url, const char* method, const char* body, map<const char*, const char*> &headers, const char* proxy, long timeout) {
CURL *curl;
curl = curl_easy_init();
long statusCode = 0;
map<const char*, const char*> respHeaders;
string respBody;
string _url(url);
string _method(method);
string _proxy(proxy);
struct curl_slist *headerList = NULL;
string headerString;
curl_easy_setopt(curl, CURLOPT_URL, url); //set url
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method); //set method
for (auto header=headers.begin(); header!=headers.end(); ++header) { //make header list
headerString = header->first;
headerString.append(": ").append(header->second);
headerList = curl_slist_append(headerList, headerString.c_str());
//cout << headerString << '\n';
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerList); //set headers
if (_method == "POST" || _method == "PUT" || _method == "DELETE") //set body if the request method would allow it
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
if (_url.find(string("https://")) != string::npos) //set ssl verifypeer if it's an https url
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
if (_proxy != "") //set proxy
curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
if (timeout != 0) //set timeout
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); //follow redirects
//curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeToString);
//curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &respHeaders); //to receive response headers
//??
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToString);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &respBody); //to receive response body
curl_easy_perform(curl); //send the request
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode); //get status code
struct responseStruct response;
response.statusCode = statusCode;
//response.headers;
response.body = respBody.c_str();
curl_easy_cleanup(curl);
return response;
}
static void *AddRequest( void *arguments ) {
// get arguments:
struct requestStruct *args = (struct requestStruct*)arguments;
int id = args->id;
const char* url = args->url;
const char* method = args->method;
const char* body = args->body;
map<const char*, const char*> headers = args->headers;
const char* proxy = args->proxy;
int timeout = args->timeout;
// print arguments:
//while (printing) {} //wait for other threads to stop printing
//printing = true; //tell other threads to not print anything
// cout << id << endl << url << endl << method << endl;
//printing = false; //tell the other threads it's okay to print again now
struct responseStruct response = HttpRequest(url, method, body, headers, proxy, timeout);
ResponseCallback(id,&response);
pthread_exit(0);
return NULL;
}
int main() {
//map<const char*, const char*> headers;
//headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
//struct responseStruct response = HttpRequest("https://facebook.com", "GET", "", headers, "localhost:8888", 6000);
//cout << response.body << endl;
pthread_t threads[3];
struct requestStruct reqArguments[3];
map<const char*, const char*> headers;
headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
const char* proxy = "";
reqArguments[0].id = 0;
reqArguments[0].url = "https://www.facebook.com/";
reqArguments[0].method = "GET";
reqArguments[0].headers = headers;
reqArguments[0].body = "";
reqArguments[0].proxy = proxy;
reqArguments[0].timeout = 6000;
pthread_create(&threads[0], NULL, &AddRequest, (void *)&reqArguments[0]); //create a thread on AddRequest() passing a full struct of arguments
reqArguments[1].id = 1;
reqArguments[1].url = "https://www.facebook.com/";
reqArguments[1].method = "GET";
reqArguments[1].headers = headers;
reqArguments[1].body = "";
reqArguments[1].proxy = proxy;
reqArguments[1].timeout = 6000;
pthread_create(&threads[1], NULL, &AddRequest, (void *)&reqArguments[1]); //create a thread on AddRequest() passing a full struct of arguments
reqArguments[2].id = 2;
reqArguments[2].url = "https://www.facebook.com/";
reqArguments[2].method = "GET";
reqArguments[2].headers = headers;
reqArguments[2].body = "";
reqArguments[2].proxy = proxy;
reqArguments[2].timeout = 6000;
pthread_create(&threads[2], NULL, &AddRequest, (void *)&reqArguments[2]); //create a thread on AddRequest() passing a full struct of arguments
getchar(); //prevent console from closing instantly
return 0;
}
I'm not really sure if I'm doing the whole pthread thing correctly..
There are some issues:
1. For some reason only the first request succeeds the others aren't even sent.
UNLESS I uncomment the first 4 lines of the main function which will do a direct request without a new thread, but I obviously don't want to use that code.
2. The HttpRequest() function doesn't return the response html code properly, I only receive garbage.
I think issue 2 might be a pointer related issue with the return struct of HttpRequest(), but I wasn't able to fix it. :(
3. My last and not that important problem is that I don't know how to receive the response headers and put them in a map.
Btw: I'm compiling with Visual C++ 2010 and I'm debugging the http traffic with Fiddler.
EDIT : This is your code that I corrected.
There was not really an error. But after several tests, I saw that to launch several curl_perform in the same time cause issues. So I added a delay (5000ms is large, you can reduce it).
And pthread_exit() caused prolems with response error.
#include "stdafx.hpp"
#include <iostream> //#include <stdio.h>
#include <curl/curl.h>
#include <pthread.h>
#include <map>
#include <string>
using namespace std;
bool printing = false; //will allow us to prevent prints overlapping each other
#if defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__WINDOWS__) || defined(__TOS_WIN__)
#include <windows.h>
inline void delay( unsigned long ms )
{
Sleep( ms );
}
#else /* presume POSIX */
#include <unistd.h>
inline void delay( unsigned long ms )
{
usleep( ms * 1000 );
}
#endif
struct requestStruct { //will allow us to pass more than one argument to the threaded functions
int id;
const char* url;
const char* method;
const char* body;
map<const char*, const char*> headers;
const char* proxy;
int timeout;
};
struct responseStruct { //will allow us to return more than one value from the Request function
long statusCode;
//map<const char*, const char*> headers;
const char* body;
};
size_t writeToString(void *ptr, size_t size, size_t count, void *stream) {
((string*)stream)->append((char*)ptr, 0, size* count);
return size* count;
}
static void *ResponseCallback(int id, struct responseStruct *response) {
long statusCode = response -> statusCode;
//map<const char*, const char*> headers = response -> headers;
const char* body = response -> body;
//while (printing) {} //wait for other threads to stop printing
printing = true; //tell other threads to not print anything
cout << id << " response received! Code: " << statusCode << endl << body << endl;
printing = false; //tell other threads printing is okay again
return NULL;
}
struct responseStruct HttpRequest(const char* url, const char* method, const char* body, map<const char*, const char*> &headers, const char* proxy, long timeout) {
CURL *curl;
curl = curl_easy_init();
long statusCode = 0;
map<const char*, const char*> respHeaders;
string respBody;
string _url(url);
string _method(method);
string _proxy(proxy);
struct curl_slist *headerList = NULL;
string headerString;
curl_easy_setopt(curl, CURLOPT_URL, url); //set url
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method); //set method
for (std::map<const char*, const char*>::iterator header=headers.begin(); header!=headers.end(); ++header) { //make header list
headerString = header->first;
headerString.append(": ").append(header->second);
headerList = curl_slist_append(headerList, headerString.c_str());
//cout << headerString << '\n';
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerList); //set headers
if (_method == "POST" || _method == "PUT" || _method == "DELETE") //set body if the request method would allow it
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
if (_url.find(string("https://")) != string::npos) //set ssl verifypeer if it's an https url
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
if (_proxy != "") //set proxy
curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
if (timeout != 0) //set timeout
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); //follow redirects
//curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeToString);
//curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &respHeaders); //to receive response headers
//??
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToString);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &respBody); //to receive response body
static int i=0;
delay(5000*(i++));
std::cout << "url: " << _url << ";" << std::endl;
curl_easy_perform(curl); //send the request
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode); //get status code
struct responseStruct response;
response.statusCode = statusCode;
//response.headers;
response.body = respBody.c_str();
curl_easy_cleanup(curl);
return response;
}
static void *AddRequest( void *arguments ) {
// get arguments:
struct requestStruct *args = (struct requestStruct*)arguments;
int id = args->id;
const char* url = args->url;
const char* method = args->method;
const char* body = args->body;
map<const char*, const char*> headers = args->headers;
const char* proxy = args->proxy;
int timeout = args->timeout;
// print arguments:
//while (printing) {} //wait for other threads to stop printing
//printing = true; //tell other threads to not print anything
// cout << id << endl << url << endl << method << endl;
//printing = false; //tell the other threads it's okay to print again now
struct responseStruct response = HttpRequest(url, method, body, headers, proxy, timeout);
ResponseCallback(id,&response);
/* this code cause trouble (no response code) */
//pthread_exit(0);
return NULL;
}
int main() {
//map<const char*, const char*> headers;
//headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
//struct responseStruct response = HttpRequest("https://facebook.com", "GET", "", headers, "localhost:8888", 6000);
//cout << response.body << endl;
pthread_t threads[3];
struct requestStruct reqArguments[3];
map<const char*, const char*> headers;
headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
const char* proxy = "";
reqArguments[0].id = 0;
reqArguments[0].url = "https://www.duckduckgo.com/";
reqArguments[0].method = "GET";
reqArguments[0].headers = headers;
reqArguments[0].body = "";
reqArguments[0].proxy = proxy;
reqArguments[0].timeout = 6000;
pthread_create(&threads[0], NULL, &AddRequest, (void *)&reqArguments[0]); //create a thread on AddRequest() passing a full struct of arguments
reqArguments[1].id = 1;
reqArguments[1].url = "https://www.google.com/";
reqArguments[1].method = "GET";
reqArguments[1].headers = headers;
reqArguments[1].body = "";
reqArguments[1].proxy = proxy;
reqArguments[1].timeout = 6000;
pthread_create(&threads[1], NULL, &AddRequest, (void *)&reqArguments[1]); //create a thread on AddRequest() passing a full struct of arguments
reqArguments[2].id = 2;
reqArguments[2].url = "https://www.facebook.com/";
reqArguments[2].method = "GET";
reqArguments[2].headers = headers;
reqArguments[2].body = "";
reqArguments[2].proxy = proxy;
reqArguments[2].timeout = 6000;
pthread_create(&threads[2], NULL, &AddRequest, (void *)&reqArguments[2]); //create a thread on AddRequest() passing a full struct of arguments
// getchar();
// that is cleaner
for (int i=0; i<3; ++i) {
int rc = pthread_join(threads[i], NULL);
printf("In main: thread %d is complete\n", i);
}
return 0;
}
For the last question about headers, please post an other question on stackoverflow. Because there are yet many subjects in one (I think).
And a little advice, work with objects is very more easy to write and to read code sources.
END EDIT
This is a copy of the official example to make multi-threading with libcurl :
http://curl.haxx.se/libcurl/c/multithread.html
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel#haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at http://curl.haxx.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
***************************************************************************/
/* A multi-threaded example that uses pthreads extensively to fetch
* X remote files at once */
#include <stdio.h>
#include <pthread.h>
#include <curl/curl.h>
#define NUMT 4
/*
List of URLs to fetch.
If you intend to use a SSL-based protocol here you MUST setup the OpenSSL
callback functions as described here:
http://www.openssl.org/docs/crypto/threads.html#DESCRIPTION
*/
const char * const urls[NUMT]= {
"http://curl.haxx.se/",
"ftp://cool.haxx.se/",
"http://www.contactor.se/",
"www.haxx.se"
};
static void *pull_one_url(void *url)
{
CURL *curl;
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_perform(curl); /* ignores error */
curl_easy_cleanup(curl);
return NULL;
}
/*
int pthread_create(pthread_t *new_thread_ID,
const pthread_attr_t *attr,
void * (*start_func)(void *), void *arg);
*/
int main(int argc, char **argv)
{
pthread_t tid[NUMT];
int i;
int error;
/* Must initialize libcurl before any threads are started */
curl_global_init(CURL_GLOBAL_ALL);
for(i=0; i< NUMT; i++) {
error = pthread_create(&tid[i],
NULL, /* default attributes please */
pull_one_url,
(void *)urls[i]);
if(0 != error)
fprintf(stderr, "Couldn't run thread number %d, errno %d\n", i, error);
else
fprintf(stderr, "Thread %d, gets %s\n", i, urls[i]);
}
/* now wait for all threads to terminate */
for(i=0; i< NUMT; i++) {
error = pthread_join(tid[i], NULL);
fprintf(stderr, "Thread %d terminated\n", i);
}
return 0;
}
You can, for a interactive use, transform the urls Array to a vector.
I hope that it will help you !
Disclaimer: I am not asking anyone to debug this code, I am more interested to know if anyone sees that I am using libcurl improperly, because as far as I can tell, I am following the documentation exactly.
The problem is in the MakeRequest() method. At curl_easy_perform(), I get std output of
* About to connect() to dynamodb.us-east-1.amazonaws.com port 80 (#0)
* Trying 72.21.195.244... * connected
Then a segfault.
Here is the stack trace:
Thread [1] 30267 [core: 0] (Suspended : Signal : SIGSEGV:Segmentation fault)
Curl_getformdata() at 0x7ffff79069bb
Curl_http() at 0x7ffff790b178
Curl_do() at 0x7ffff791a298
Curl_do_perform() at 0x7ffff7925457
CurlHttpClient::MakeRequest() at CurlHttpClient.cpp:91 0x7ffff7ba17f5
AWSClient::MakeRequest() at AWSClient.cpp:54 0x7ffff7bbac4d
DynamoDbV2Client::GetItem() at DynamoDbV2Client.cpp:34 0x7ffff7bb7380
GetItemResultTest_TestLiveRequest_Test::TestBody() at GetItemResultTest.cpp:88 0x43db5a
testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>() at gtest-all.cc:3,562 0x46502f
testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>() at gtest-all.cc:3,598 0x4602f6
<...more frames...>
Here is the code in question.
#include "http/curl/CurlHttpClient.h"
#include "http/standard/StandardHttpResponse.h"
#include "utils/StringUtils.h"
#include <curl/curl.h>
#include <sstream>
#include <algorithm>
#include <functional>
#include <vector>
bool CurlHttpClient::isInit = false;
void SetOptCodeForHttpMethod(CURL* requestHandle, HttpMethod method)
{
switch (method)
{
case GET:
curl_easy_setopt(requestHandle, CURLOPT_HTTPGET, 1);
break;
case POST:
curl_easy_setopt(requestHandle, CURLOPT_HTTPPOST, 1);
break;
case PUT:
curl_easy_setopt(requestHandle, CURLOPT_PUT, 1);
break;
default:
curl_easy_setopt(requestHandle, CURLOPT_CUSTOMREQUEST, "DELETE");
break;
}
}
CurlHttpClient::CurlHttpClient()
{
if (!isInit)
{
isInit = true;
curl_global_init(CURL_GLOBAL_ALL);
}
}
CurlHttpClient::~CurlHttpClient()
{
}
HttpResponse* CurlHttpClient::MakeRequest(const HttpRequest& request) const
{
struct curl_slist* headers = NULL;
std::stringstream headerStream;
HeaderValueCollection requestHeaders = request.GetHeaders();
for (HeaderValueCollection::iterator iter = requestHeaders.begin();
iter != requestHeaders.end(); ++iter)
{
headerStream.str("");
headerStream << iter->first << ": " << iter->second;
headers = curl_slist_append(headers, headerStream.str().c_str());
}
CURL* singleRequestHandle = curl_easy_init();
HttpResponse* response = NULL;
if (singleRequestHandle)
{
if (headers)
{
curl_easy_setopt(singleRequestHandle, CURLOPT_HTTPHEADER, headers);
}
if(request.GetMethod() == HttpMethod::POST)
{
curl_easy_setopt(singleRequestHandle, CURLOPT_POSTFIELDS, request.GetUri().GetFormParameters().c_str());
}
response = new StandardHttpResponse(request);
SetOptCodeForHttpMethod(singleRequestHandle, request.GetMethod());
std::string url = request.GetURIString(false);
curl_easy_setopt(singleRequestHandle, CURLOPT_URL, url.c_str());
curl_easy_setopt(singleRequestHandle, CURLOPT_WRITEFUNCTION, &CurlHttpClient::WriteData);
curl_easy_setopt(singleRequestHandle, CURLOPT_WRITEDATA, response);
curl_easy_setopt(singleRequestHandle, CURLOPT_HEADERFUNCTION, &CurlHttpClient::WriteHeader);
curl_easy_setopt(singleRequestHandle, CURLOPT_HEADERDATA, response);
if (request.GetContentBody() != NULL)
{
curl_easy_setopt(singleRequestHandle, CURLOPT_POSTFIELDSIZE, request.GetContentBody()->tellp());
curl_easy_setopt(singleRequestHandle, CURLOPT_READFUNCTION, &CurlHttpClient::ReadBody);
curl_easy_setopt(singleRequestHandle, CURLOPT_READDATA, &request);
}
curl_easy_setopt(singleRequestHandle, CURLOPT_VERBOSE, 1L);
curl_easy_perform(singleRequestHandle);
int responseCode;
curl_easy_getinfo(singleRequestHandle, CURLINFO_RESPONSE_CODE, &responseCode);
response->SetResponseCode((HttpResponseCode) responseCode);
char* contentType = NULL;
curl_easy_getinfo(singleRequestHandle, CURLINFO_CONTENT_TYPE, &contentType);
response->SetContentType(contentType);
curl_easy_cleanup(singleRequestHandle);
}
if (headers)
{
curl_slist_free_all(headers);
}
return response;
}
size_t CurlHttpClient::WriteData(char *ptr, size_t size, size_t nmemb, void* userdata)
{
if (ptr)
{
HttpResponse* response = (HttpResponse*)userdata;
if (!response->GetResponseBody())
{
std::streambuf* strBuffer = new std::stringbuf;
response->SetResponseBody(new std::iostream(strBuffer));
}
int sizeToWrite = size * nmemb;
response->GetResponseBody()->write(ptr, sizeToWrite);
return sizeToWrite;
}
return 0;
}
size_t CurlHttpClient::WriteHeader(char *ptr, size_t size, size_t nmemb, void* userdata)
{
if (ptr)
{
HttpResponse* response = (HttpResponse*)userdata;
std::string headerLine(ptr);
std::vector<std::string> keyValuePair = StringUtils::Split(headerLine, ':');
if (keyValuePair.size() == 2)
{
std::string headerName = keyValuePair[0];
headerName = StringUtils::Trim(headerName);
std::string headerValue = keyValuePair[1];
headerValue = StringUtils::Trim(headerValue);
response->AddHeader(headerName, headerValue);
}
return size * nmemb;
}
return 0;
}
size_t CurlHttpClient::ReadBody(char* ptr, size_t size, size_t nmemb, void* userdata)
{
HttpRequest* request = (HttpRequest*)userdata;
std::shared_ptr<std::iostream> outputStream = request->GetContentBody();
if (outputStream != NULL && size * nmemb)
{
size_t written = outputStream->readsome(ptr, size * nmemb);
return written;
}
return 0;
}
For reference here is the definition for CurlHttpClient:
//Curl implementation of an http client. Right now it is only synchronous.
class CurlHttpClient : public HttpClient
{
public:
//Creates client, intializes curl handle if it hasn't been created already.
CurlHttpClient();
//cleans up curl lib
virtual ~CurlHttpClient();
//Makes request and recieves response synchronously
virtual HttpResponse* MakeRequest(const HttpRequest& request) const;
private:
//Callback to read the content from the content body of the request
static size_t ReadBody(char* ptr, size_t size, size_t nmemb, void* userdata);
//callback to write the content from the response to the response object
static size_t WriteData( char* ptr, size_t size, size_t nmemb, void* userdata);
//callback to write the headers from the response to the response
static size_t WriteHeader( char* ptr, size_t size, size_t nmemb, void* userdata);
//init flag.
static bool isInit;
};
One definite problem I see with the code is
curl_easy_setopt(requestHandle, CURLOPT_HTTPPOST, 1);
CURLOPT_HTTPPOST expects a pointer to a structure of type struct curl_httppost. Passing 1 creates a dangling pointer. You probably might want to use the CURLOPT_POST instead.
I'm having some trouble with a C++ program here. Basically I've written a simple wrapper for http requests, with the ability to do multiple requests at once.
Works absolutely fine, but when I do httpS requests, it crashes randomly in multithreaded mode. I'm using curl and posix threads.
Backtrace looks like this:
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(+0x80996)[0x7fea9046d996]
/lib/x86_64-linux-gnu/libc.so.6(+0x82b80)[0x7fea9046fb80]
/lib/x86_64-linux-gnu/libc.so.6(realloc+0xf2)[0x7fea90470ae2]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(CRYPTO_realloc+0x49)[0x7fea8f9c6169]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(lh_insert+0x101)[0x7fea8fa4bfb1]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(+0xe844e)[0x7fea8fa4e44e]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(ERR_get_state+0xde)[0x7fea8fa4eeee]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(ERR_clear_error+0x15)[0x7fea8fa4f065]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x24e79)[0x7fea90f10e79]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x39ea0)[0x7fea90f25ea0]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0xf8fd)[0x7fea90efb8fd]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x219f5)[0x7fea90f0d9f5]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x35538)[0x7fea90f21538]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(curl_multi_perform+0x91)[0x7fea90f21d31]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(curl_easy_perform+0x107)[0x7fea90f19457]
./exbot[0x40273a]
/lib/x86_64-linux-gnu/libpthread.so.0(+0x7f6e)[0x7fea90cd6f6e]
/lib/x86_64-linux-gnu/libc.so.6(clone+0x6d)[0x7fea904e79cd]
Could this be a bug in libcrypto?
Can I somehow tell curl not to use libcrypto? Any alternatives?
It only crahes wenn using httpS requests and works fine with even 10000 simultaneous http queries.
Cheers,
Thomas
Just for completeness my code:
// simple wrapper for http requests
#ifndef _REQUEST_H_
#define _REQUEST_H_
#include <curl/curl.h>
#include <pthread.h>
#include <string>
#include <iostream>
//////////////////////////////////
// MACROS
//////////////////////////////////
#define ERR(_msg) std::cerr << __FUNCTION__ << ": " << _msg << std::endl
//////////////////////////////////
// REQUEST WRAPPER
//////////////////////////////////
typedef unsigned int uint;
class RequestWrapper
{
private: // non copyable
RequestWrapper();
RequestWrapper(const RequestWrapper &that);
RequestWrapper &operator=(const RequestWrapper &that);
public:
struct Response
{
Response() : msg(""), success(false) {}
std::string msg;
bool success;
};
static Response simpleGET(std::string url, uint timeout);
static size_t write(char *content, size_t size, size_t nmemb, void *userp);
};
//////////////////////////////////
// GET
//////////////////////////////////
inline size_t RequestWrapper::write(char *content, size_t size, size_t nmemb, void *userp)
{
std::string *buf = static_cast<std::string *>(userp);
size_t realsize = size * nmemb;
for (uint i = 0; i < realsize; ++i)
{
buf->push_back(content[i]);
}
return realsize;
}
inline RequestWrapper::Response RequestWrapper::simpleGET(std::string url, uint timeout)
{
Response resp;
CURL *curl;
CURLcode res;
std::string buf;
// send request
buf.clear();
curl = curl_easy_init();
if (!curl)
{
//ERR("libcurl init failed");
return resp;
}
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, static_cast<void *>(&buf));
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
res = curl_easy_perform(curl);
if(res != CURLE_OK)
{
//ERR("libcurl request failed, CODE: " << res);
return resp;
}
curl_easy_cleanup(curl);
// done
resp.msg = buf;
resp.success = true;
return resp;
}
//////////////////////////////////
// MULTITHREADED REQUEST
//////////////////////////////////
class RequestList
{
private:
std::vector<std::string> _reqs;
static void *sender(void *payload);
static pthread_mutex_t _mutex;
public:
inline void add(std::string request)
{
_reqs.push_back(request);
}
inline void clear()
{
_reqs.clear();
}
std::vector<std::string> send(uint timeout) const;
struct Payload
{
std::string url;
std::vector<std::string> *out;
uint tout, index;
Payload(std::string url,
std::vector<std::string> *out,
uint tout, uint index) : url(url), out(out), tout(tout), index(index) { }
Payload() : url(""), out(NULL), tout(0), index(0) { }
};
};
//////////////////////////////////
// SEND MT REQUEST
//////////////////////////////////
pthread_mutex_t RequestList::_mutex;
void *RequestList::sender(void *payload)
{
Payload *pl = static_cast<Payload *>(payload);
RequestWrapper::Response resp = RequestWrapper::simpleGET(pl->url, pl->tout);
pthread_mutex_lock(&_mutex);
if (resp.success)
{
pl->out->at(pl->index) = resp.msg;
std::cerr << ".";
}
else
{
std::cerr << "x";
}
pthread_mutex_unlock(&_mutex);
return NULL;
}
inline std::vector<std::string> RequestList::send(uint timeout) const
{
std::vector<std::string> resp;
resp.resize(_reqs.size());
Payload *payloads = new Payload[_reqs.size()];
pthread_t *tids = new pthread_t[_reqs.size()];
// create mutex
pthread_mutex_init(&_mutex, NULL);
// prepare payload and create thread
for (uint i = 0; i < _reqs.size(); ++i)
{
payloads[i] = Payload(_reqs[i], &resp, timeout, i);
pthread_create(&tids[i], NULL, RequestList::sender, static_cast<void *>(&payloads[i]));
}
// wait for threads to finish
for (uint i = 0; i < _reqs.size(); ++i)
{
pthread_join(tids[i], NULL);
}
std::cerr << std::endl;
//destroy mutex
pthread_mutex_destroy(&_mutex);
delete[] payloads;
delete[] tids;
return resp;
}
#endif
Libcrypto is part of OpenSSL, which is not thread-safe unless you provide the necessary callbacks. According to the documentation, on a POSIX-compliant system (which has thread-local errno) the default thread-id implementation is acceptable, so you just need a locking function:
void locking_function(int mode, int n, const char *file, int line);
This function will need to maintain a set of CRYPTO_num_locks() mutexes, and lock or unlocks the n-th mutex depending on the value of mode. You can read the documentation for more details. The libcurl website actually has some sample code showing how to do this.
Alternatively, you can build libcurl with a different SSL library that is thread safe, such as GnuTLS.
I'm working on an application in C++ that threads and hands a bunch of threads URLs for cURL to download in parallel.
I'm employing a method that should be safe to download images and videos, etc. I uses memcpy instead of assuming the data is a string or character array.
I pass each thread a structure, thread_status, for a number of things. The structure lets the parent process know the thread is done downloading. It also stores the data cURL is downloading and keeps track of it's size as cURL returns more buffers for writing.
I pass a (void *) pointer that points to each structure that's allocated at initialization to each thread that does the downloading. The first page is downloaded properly, after that I keep getting errors from realloc().
Here is the simplest example that illustrates my problem. This sample is not multi-threaded but uses a similar structure to keep track of itself.
#include <string>
#include <assert.h>
#include <iostream>
#include <curl/curl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define NOT_READY 1
#define READY 0
using namespace std;
struct thread_status {
int id;
pthread_t *pid;
int readyState;
char *url;
void *data;
size_t bufferlen;
size_t writepos;
int initialized;
} ;
size_t static
writefunction( void *ptr, size_t size,
size_t nmemb, void *userdata)
{
size_t nbytes = size*nmemb;
struct thread_status **this_status;
this_status = (struct thread_status **) userdata;
if (!(*this_status)->initialized){
(*this_status)->data = (void *)malloc(1024);
(*this_status)->bufferlen = 1024;
(*this_status)->writepos = 0;
(*this_status)->initialized = true;
}
if ((*this_status)->bufferlen < ((*this_status)->writepos + nbytes)){
(*this_status)->bufferlen = (*this_status)->bufferlen + nbytes;
(*this_status)->data = realloc((*this_status)->data, (size_t) ((*this_status)->writepos + nbytes));
}
assert((*this_status)->data != NULL);
memcpy((*this_status)->data + (*this_status)->writepos, ptr, nbytes);
(*this_status)->writepos += nbytes;
return nbytes;
}
void *pull_data (void *my_struct){
struct thread_status *this_struct;
this_struct = (struct thread_status *) my_struct;
this_struct->initialized = false;
cout<<(char *)this_struct->url<<"\n";
CURL *curl;
curl = curl_easy_init();
size_t rc = 0;
while(true){
curl_easy_setopt(curl,
CURLOPT_WRITEFUNCTION, writefunction);
curl_easy_setopt(curl,
CURLOPT_WRITEDATA, (void *) &this_struct);
curl_easy_setopt(curl,
CURLOPT_NOSIGNAL, true);
curl_easy_setopt(curl,
CURLOPT_URL, (char *)this_struct->url);
if (curl_easy_perform(curl) != 0){
cout<<"curl did not perform\n";
exit(1);
} else {
if (this_struct->data != NULL){
// Use a binary write.
rc = fwrite(this_struct->data, this_struct->writepos, 1, stdout);
free(this_struct->data);
} else {
cout<<"Data is NULL\n";
}
}
// Tell the babysitter the thread is ready.
this_struct->readyState = READY;
// This would pause the thread until the parent thread has processed the data in it.
// while(this_struct->readyState == READY){;}
// Now get ready for another round!
this_struct->writepos = (size_t) 0;
this_struct->initialized = false;
this_struct->bufferlen = (size_t) 0;
break;
}
curl_easy_cleanup(curl);
return (void *)"a";
}
int main(){
char *urls[] = { "http://www.example.com/", "http://www.google.com", "http://www.touspassagers.com/", "http://www.facebook.com/" };
int i=0;
struct thread_status mystatuses[4];
for (i=0;i<4;i++){
struct thread_status my_status;
char *data;
my_status.id = i;
my_status.readyState = NOT_READY;
my_status.url = urls[i];
my_status.data = data;
my_status.bufferlen = 0;
my_status.writepos = 0;
my_status.initialized = false;
mystatuses[i] = my_status;
}
for (i=0;i<4;i++){
cout<<"pulling #"<<i<<"\n";
pull_data((void *)&mystatuses[i]);
}
}
If anyone can enlighten me as to the source of my error or a remedy for it I would appreciate it.
You might consider using valgrind to help locate the source of the memory problems.
Got it!
Apparently 1KB isn't enough memory to handle the first cURL buffer. I changed 1024 to nbytes and it works!
Before the memory memcpy put in the buffer ran over the allocated memory resulting in corruptions.
I did a post about it if anyone cares to see the full implementation:
http://www.touspassagers.com/2011/01/a-working-curlopt_writefunction-function-for-libcurl/