Disclaimer: I am not asking anyone to debug this code, I am more interested to know if anyone sees that I am using libcurl improperly, because as far as I can tell, I am following the documentation exactly.
The problem is in the MakeRequest() method. At curl_easy_perform(), I get std output of
* About to connect() to dynamodb.us-east-1.amazonaws.com port 80 (#0)
* Trying 72.21.195.244... * connected
Then a segfault.
Here is the stack trace:
Thread [1] 30267 [core: 0] (Suspended : Signal : SIGSEGV:Segmentation fault)
Curl_getformdata() at 0x7ffff79069bb
Curl_http() at 0x7ffff790b178
Curl_do() at 0x7ffff791a298
Curl_do_perform() at 0x7ffff7925457
CurlHttpClient::MakeRequest() at CurlHttpClient.cpp:91 0x7ffff7ba17f5
AWSClient::MakeRequest() at AWSClient.cpp:54 0x7ffff7bbac4d
DynamoDbV2Client::GetItem() at DynamoDbV2Client.cpp:34 0x7ffff7bb7380
GetItemResultTest_TestLiveRequest_Test::TestBody() at GetItemResultTest.cpp:88 0x43db5a
testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>() at gtest-all.cc:3,562 0x46502f
testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>() at gtest-all.cc:3,598 0x4602f6
<...more frames...>
Here is the code in question.
#include "http/curl/CurlHttpClient.h"
#include "http/standard/StandardHttpResponse.h"
#include "utils/StringUtils.h"
#include <curl/curl.h>
#include <sstream>
#include <algorithm>
#include <functional>
#include <vector>
bool CurlHttpClient::isInit = false;
void SetOptCodeForHttpMethod(CURL* requestHandle, HttpMethod method)
{
switch (method)
{
case GET:
curl_easy_setopt(requestHandle, CURLOPT_HTTPGET, 1);
break;
case POST:
curl_easy_setopt(requestHandle, CURLOPT_HTTPPOST, 1);
break;
case PUT:
curl_easy_setopt(requestHandle, CURLOPT_PUT, 1);
break;
default:
curl_easy_setopt(requestHandle, CURLOPT_CUSTOMREQUEST, "DELETE");
break;
}
}
CurlHttpClient::CurlHttpClient()
{
if (!isInit)
{
isInit = true;
curl_global_init(CURL_GLOBAL_ALL);
}
}
CurlHttpClient::~CurlHttpClient()
{
}
HttpResponse* CurlHttpClient::MakeRequest(const HttpRequest& request) const
{
struct curl_slist* headers = NULL;
std::stringstream headerStream;
HeaderValueCollection requestHeaders = request.GetHeaders();
for (HeaderValueCollection::iterator iter = requestHeaders.begin();
iter != requestHeaders.end(); ++iter)
{
headerStream.str("");
headerStream << iter->first << ": " << iter->second;
headers = curl_slist_append(headers, headerStream.str().c_str());
}
CURL* singleRequestHandle = curl_easy_init();
HttpResponse* response = NULL;
if (singleRequestHandle)
{
if (headers)
{
curl_easy_setopt(singleRequestHandle, CURLOPT_HTTPHEADER, headers);
}
if(request.GetMethod() == HttpMethod::POST)
{
curl_easy_setopt(singleRequestHandle, CURLOPT_POSTFIELDS, request.GetUri().GetFormParameters().c_str());
}
response = new StandardHttpResponse(request);
SetOptCodeForHttpMethod(singleRequestHandle, request.GetMethod());
std::string url = request.GetURIString(false);
curl_easy_setopt(singleRequestHandle, CURLOPT_URL, url.c_str());
curl_easy_setopt(singleRequestHandle, CURLOPT_WRITEFUNCTION, &CurlHttpClient::WriteData);
curl_easy_setopt(singleRequestHandle, CURLOPT_WRITEDATA, response);
curl_easy_setopt(singleRequestHandle, CURLOPT_HEADERFUNCTION, &CurlHttpClient::WriteHeader);
curl_easy_setopt(singleRequestHandle, CURLOPT_HEADERDATA, response);
if (request.GetContentBody() != NULL)
{
curl_easy_setopt(singleRequestHandle, CURLOPT_POSTFIELDSIZE, request.GetContentBody()->tellp());
curl_easy_setopt(singleRequestHandle, CURLOPT_READFUNCTION, &CurlHttpClient::ReadBody);
curl_easy_setopt(singleRequestHandle, CURLOPT_READDATA, &request);
}
curl_easy_setopt(singleRequestHandle, CURLOPT_VERBOSE, 1L);
curl_easy_perform(singleRequestHandle);
int responseCode;
curl_easy_getinfo(singleRequestHandle, CURLINFO_RESPONSE_CODE, &responseCode);
response->SetResponseCode((HttpResponseCode) responseCode);
char* contentType = NULL;
curl_easy_getinfo(singleRequestHandle, CURLINFO_CONTENT_TYPE, &contentType);
response->SetContentType(contentType);
curl_easy_cleanup(singleRequestHandle);
}
if (headers)
{
curl_slist_free_all(headers);
}
return response;
}
size_t CurlHttpClient::WriteData(char *ptr, size_t size, size_t nmemb, void* userdata)
{
if (ptr)
{
HttpResponse* response = (HttpResponse*)userdata;
if (!response->GetResponseBody())
{
std::streambuf* strBuffer = new std::stringbuf;
response->SetResponseBody(new std::iostream(strBuffer));
}
int sizeToWrite = size * nmemb;
response->GetResponseBody()->write(ptr, sizeToWrite);
return sizeToWrite;
}
return 0;
}
size_t CurlHttpClient::WriteHeader(char *ptr, size_t size, size_t nmemb, void* userdata)
{
if (ptr)
{
HttpResponse* response = (HttpResponse*)userdata;
std::string headerLine(ptr);
std::vector<std::string> keyValuePair = StringUtils::Split(headerLine, ':');
if (keyValuePair.size() == 2)
{
std::string headerName = keyValuePair[0];
headerName = StringUtils::Trim(headerName);
std::string headerValue = keyValuePair[1];
headerValue = StringUtils::Trim(headerValue);
response->AddHeader(headerName, headerValue);
}
return size * nmemb;
}
return 0;
}
size_t CurlHttpClient::ReadBody(char* ptr, size_t size, size_t nmemb, void* userdata)
{
HttpRequest* request = (HttpRequest*)userdata;
std::shared_ptr<std::iostream> outputStream = request->GetContentBody();
if (outputStream != NULL && size * nmemb)
{
size_t written = outputStream->readsome(ptr, size * nmemb);
return written;
}
return 0;
}
For reference here is the definition for CurlHttpClient:
//Curl implementation of an http client. Right now it is only synchronous.
class CurlHttpClient : public HttpClient
{
public:
//Creates client, intializes curl handle if it hasn't been created already.
CurlHttpClient();
//cleans up curl lib
virtual ~CurlHttpClient();
//Makes request and recieves response synchronously
virtual HttpResponse* MakeRequest(const HttpRequest& request) const;
private:
//Callback to read the content from the content body of the request
static size_t ReadBody(char* ptr, size_t size, size_t nmemb, void* userdata);
//callback to write the content from the response to the response object
static size_t WriteData( char* ptr, size_t size, size_t nmemb, void* userdata);
//callback to write the headers from the response to the response
static size_t WriteHeader( char* ptr, size_t size, size_t nmemb, void* userdata);
//init flag.
static bool isInit;
};
One definite problem I see with the code is
curl_easy_setopt(requestHandle, CURLOPT_HTTPPOST, 1);
CURLOPT_HTTPPOST expects a pointer to a structure of type struct curl_httppost. Passing 1 creates a dangling pointer. You probably might want to use the CURLOPT_POST instead.
Related
Closed. This question is not reproducible or was caused by typos. It is not currently accepting answers.
This question was caused by a typo or a problem that can no longer be reproduced. While similar questions may be on-topic here, this one was resolved in a way less likely to help future readers.
Closed last month.
Improve this question
I'm trying to create c++ code that download data from some URLs, but it's throwing a write access violation:
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifndef WIN32
#endif
#include <curl/curl.h>
#include <string>
static const char* urls[] = {
"http://www.example.com",
"http://www.example1.com",
};
#define MAX_PARALLEL 10 /* number of simultaneous transfers */
#define NUM_URLS sizeof(urls)/sizeof(char *)
static size_t write_cb(void* ptr, size_t size, size_t nmemb, void* buffer)
{
((std::string*)buffer)->append((char*)ptr, nmemb);
return nmemb;
}
static void add_transfer(CURLM* cm, int i, int* left)
{
CURL* eh = curl_easy_init();
curl_easy_setopt(eh, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(eh, CURLOPT_URL, urls[i]);
curl_easy_setopt(eh, CURLOPT_PRIVATE, urls[i]);
curl_easy_setopt(eh, CURLOPT_WRITEDATA, &write_cb);
curl_easy_setopt(eh, CURLOPT_VERBOSE, 1L);
curl_multi_add_handle(cm, eh);
(*left)++;
}
int main(void)
{
CURLM* cm;
unsigned int transfers = 0;
int msgs_left = -1;
int left = 0;
curl_global_init(CURL_GLOBAL_ALL);
cm = curl_multi_init();
/* Limit the amount of simultaneous connections curl should allow: */
curl_multi_setopt(cm, CURLMOPT_MAXCONNECTS, (long)MAX_PARALLEL);
for (transfers = 0; transfers < MAX_PARALLEL && transfers < NUM_URLS;
transfers++)
add_transfer(cm, transfers, &left);
do {
int still_alive = 1;
curl_multi_perform(cm, &still_alive);
CURLMsg* msg;
int queued;
CURLMcode mc = curl_multi_perform(cm, &still_alive);
if (cm)
/* wait for activity, timeout or "nothing" */
mc = curl_multi_poll(cm, NULL, 0, 1000, NULL);
if (mc)
break;
do {
msg = curl_multi_info_read(cm, &queued);
if (msg) {
if (msg->msg == CURLMSG_DONE) {
/* a transfer ended */
fprintf(stderr, "Transfer completed\n");
}
}
} while (msg);
if (left)
curl_multi_wait(cm, NULL, 0, 1000, NULL);
} while (left);
curl_multi_cleanup(cm);
curl_global_cleanup();
return EXIT_SUCCESS;
}
Its crashing on the line:
_Mypair._Myval2._Mysize = _Old_size + _Count;
The full error message is:
Exception thrown: write access violation.
this was 0x7FF7941D39D0.
How can I make this code download each Url data without any error?
In your write_cb() callback, you are expecting the buffer parameter to point at a std::string object, but in add_transfer() you are setting CURLOPT_WRITEDATA to point at write_cb itself rather than at a std::string object.
Try something more like this instead:
struct url_info {
const char* url;
std::string data;
};
static url_info urls[] = {
{"http://www.example.com", ""},
{"http://www.example1.com", ""}
};
static const int NUM_URLS = sizeof(urls)/sizeof(urls[0]);
static size_t write_cb(void* ptr, size_t size, size_t nmemb, void* buffer)
{
size_t result = size * nmemb;
static_cast<std::string*>(buffer)->append(static_cast<char*>(ptr), result);
return result;
}
static void add_transfer(CURLM* cm, int i, int* left)
{
...
curl_easy_setopt(eh, CURLOPT_URL, urls[i].url);
curl_easy_setopt(eh, CURLOPT_WRITEDATA, &urls[i].data);
curl_easy_setopt(eh, CURLOPT_PRIVATE, &urls[i]);
...
}
int main(void)
{
...
while ((msg = curl_multi_info_read(cm, &queued)) != NULL) {
if (msg->msg == CURLMSG_DONE) {
/* a transfer ended */
url_info *info;
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, reinterpret_cast<char**>(&info));
// use info->url and info->data as needed...
std::cerr << "Transfer completed from " << info->url << ", bytes received: " << info->data.size() << "\n";
}
}
...
}
I'm trying to use libcurl (http://curl.haxx.se/libcurl/c/) for downloading data from a web, and store these data in a txt file , and here is my code:
// CLASS SinaStk
size_t save_data(char *buffer, size_t size, size_t nmemb, FILE* userdata){
locale loc = std::locale::global(std::locale("")); //TRY TO OPEN FILE WITH CHINESE
userdata = fopen(fpath.c_str(), "w");
if (userdata == NULL)
printf("File not open!\n");
locale::global(loc);
size_t writelen=size * nmemb;
fwrite(buffer, size, nmemb, userdata);
return writelen;
};
virtual void downloadUrl()
{
CURL* stkCURL=NULL;
CURLcode res;
FILE * fp=NULL;
curl_global_init(CURL_GLOBAL_WIN32);
stkCURL = curl_easy_init();
curl_easy_setopt(stkCURL, CURLOPT_URL,"http://hq.sinajs.cn/list=s_sh000001");
curl_easy_setopt(stkCURL, CURLOPT_WRITEFUNCTION, &SinaStk::save_data);
curl_easy_setopt(stkCURL, CURLOPT_WRITEDATA,fp);
res=curl_easy_perform(stkCURL); //<-STOP!!!!
fclose(fp);
curl_easy_cleanup(stkCURL);
curl_global_cleanup();
return;
};
and when I debug my code, it always stop and then jump to xstring:
size_type size() const _NOEXCEPT
{ // return length of sequence
return (this->_Mysize); // <-STOP!!!
}
0xC0000005: Access violation reading location 0x0000009E
I have no idea about the problem for almost a week. I am upset, I asked people around me and nobody knows why.
Thanks for reading, I am really confused.
=============
Problem is solved! Thanks you guys! now my code is:
//CLASS StkApiInfo
size_t writeData(char* buffer, size_t size, size_t nmemb){
if (stkFile.is_open()){
stkFile.close();
stkFile.clear();
};
fpath = "D:\\Code\\代码\\数据文件\\" + fname + ".txt";
stkFile.open(fpath.c_str(), ios::out);
//if (stkFile.is_open())
cout << buffer<<size<<nmemb;
stkFile << buffer<<endl;
stkFile.close();
stkFile.clear();
return size*nmemb;
};
//CLASS SinaStk : public StkApiInfo
static size_t save_data(char *buffer, size_t size, size_t nmemb, void* userdata){
SinaStk* self = (SinaStk*)userdata;
return self->writeData(buffer, size, nmemb);
};
virtual void downloadUrl()
{
CURL* stkCURL = NULL;
CURLcode res;
curl_global_init(CURL_GLOBAL_WIN32);
stkCURL = curl_easy_init();
if (stkCURL)
{
curl_easy_setopt(stkCURL, CURLOPT_URL, stkUrl.c_str());
curl_easy_setopt(stkCURL, CURLOPT_WRITEFUNCTION, &SinaStk::save_data);
curl_easy_setopt(stkCURL, CURLOPT_WRITEDATA, this);
res = curl_easy_perform(stkCURL);
//if (res != CURLE_OK)
curl_easy_cleanup(stkCURL);
curl_global_cleanup();
}
return;
};
Callback passed with CURLOPT_WRITEFUNCTION argument should be of type write_callback (with exact that signature) and therefore cannot be non-static class method. Usual workaround is to define callback as non-member or static method and pass this as an argument:
static size_t save_data(char *buffer, size_t size, size_t nmemb, void* userdata)
{
SinaStk* self = (SinaStk*) userdata;
return self->doStuff(buffer, size, nmemb);
}
virtual void downloadUrl()
{
//...
curl_easy_setopt(stkCURL, CURLOPT_WRITEFUNCTION, &SinaStk::save_data);
curl_easy_setopt(stkCURL, CURLOPT_WRITEDATA, this);
//...
}
If you need to access additional data (like FILE* in your example) you can either store it as class field or introduce temporary structure that would contain this and additional data fields and pass it's address as callback argument.
I'm having some trouble with a C++ program here. Basically I've written a simple wrapper for http requests, with the ability to do multiple requests at once.
Works absolutely fine, but when I do httpS requests, it crashes randomly in multithreaded mode. I'm using curl and posix threads.
Backtrace looks like this:
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(+0x80996)[0x7fea9046d996]
/lib/x86_64-linux-gnu/libc.so.6(+0x82b80)[0x7fea9046fb80]
/lib/x86_64-linux-gnu/libc.so.6(realloc+0xf2)[0x7fea90470ae2]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(CRYPTO_realloc+0x49)[0x7fea8f9c6169]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(lh_insert+0x101)[0x7fea8fa4bfb1]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(+0xe844e)[0x7fea8fa4e44e]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(ERR_get_state+0xde)[0x7fea8fa4eeee]
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0(ERR_clear_error+0x15)[0x7fea8fa4f065]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x24e79)[0x7fea90f10e79]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x39ea0)[0x7fea90f25ea0]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0xf8fd)[0x7fea90efb8fd]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x219f5)[0x7fea90f0d9f5]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(+0x35538)[0x7fea90f21538]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(curl_multi_perform+0x91)[0x7fea90f21d31]
/usr/lib/x86_64-linux-gnu/libcurl.so.4(curl_easy_perform+0x107)[0x7fea90f19457]
./exbot[0x40273a]
/lib/x86_64-linux-gnu/libpthread.so.0(+0x7f6e)[0x7fea90cd6f6e]
/lib/x86_64-linux-gnu/libc.so.6(clone+0x6d)[0x7fea904e79cd]
Could this be a bug in libcrypto?
Can I somehow tell curl not to use libcrypto? Any alternatives?
It only crahes wenn using httpS requests and works fine with even 10000 simultaneous http queries.
Cheers,
Thomas
Just for completeness my code:
// simple wrapper for http requests
#ifndef _REQUEST_H_
#define _REQUEST_H_
#include <curl/curl.h>
#include <pthread.h>
#include <string>
#include <iostream>
//////////////////////////////////
// MACROS
//////////////////////////////////
#define ERR(_msg) std::cerr << __FUNCTION__ << ": " << _msg << std::endl
//////////////////////////////////
// REQUEST WRAPPER
//////////////////////////////////
typedef unsigned int uint;
class RequestWrapper
{
private: // non copyable
RequestWrapper();
RequestWrapper(const RequestWrapper &that);
RequestWrapper &operator=(const RequestWrapper &that);
public:
struct Response
{
Response() : msg(""), success(false) {}
std::string msg;
bool success;
};
static Response simpleGET(std::string url, uint timeout);
static size_t write(char *content, size_t size, size_t nmemb, void *userp);
};
//////////////////////////////////
// GET
//////////////////////////////////
inline size_t RequestWrapper::write(char *content, size_t size, size_t nmemb, void *userp)
{
std::string *buf = static_cast<std::string *>(userp);
size_t realsize = size * nmemb;
for (uint i = 0; i < realsize; ++i)
{
buf->push_back(content[i]);
}
return realsize;
}
inline RequestWrapper::Response RequestWrapper::simpleGET(std::string url, uint timeout)
{
Response resp;
CURL *curl;
CURLcode res;
std::string buf;
// send request
buf.clear();
curl = curl_easy_init();
if (!curl)
{
//ERR("libcurl init failed");
return resp;
}
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, static_cast<void *>(&buf));
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
res = curl_easy_perform(curl);
if(res != CURLE_OK)
{
//ERR("libcurl request failed, CODE: " << res);
return resp;
}
curl_easy_cleanup(curl);
// done
resp.msg = buf;
resp.success = true;
return resp;
}
//////////////////////////////////
// MULTITHREADED REQUEST
//////////////////////////////////
class RequestList
{
private:
std::vector<std::string> _reqs;
static void *sender(void *payload);
static pthread_mutex_t _mutex;
public:
inline void add(std::string request)
{
_reqs.push_back(request);
}
inline void clear()
{
_reqs.clear();
}
std::vector<std::string> send(uint timeout) const;
struct Payload
{
std::string url;
std::vector<std::string> *out;
uint tout, index;
Payload(std::string url,
std::vector<std::string> *out,
uint tout, uint index) : url(url), out(out), tout(tout), index(index) { }
Payload() : url(""), out(NULL), tout(0), index(0) { }
};
};
//////////////////////////////////
// SEND MT REQUEST
//////////////////////////////////
pthread_mutex_t RequestList::_mutex;
void *RequestList::sender(void *payload)
{
Payload *pl = static_cast<Payload *>(payload);
RequestWrapper::Response resp = RequestWrapper::simpleGET(pl->url, pl->tout);
pthread_mutex_lock(&_mutex);
if (resp.success)
{
pl->out->at(pl->index) = resp.msg;
std::cerr << ".";
}
else
{
std::cerr << "x";
}
pthread_mutex_unlock(&_mutex);
return NULL;
}
inline std::vector<std::string> RequestList::send(uint timeout) const
{
std::vector<std::string> resp;
resp.resize(_reqs.size());
Payload *payloads = new Payload[_reqs.size()];
pthread_t *tids = new pthread_t[_reqs.size()];
// create mutex
pthread_mutex_init(&_mutex, NULL);
// prepare payload and create thread
for (uint i = 0; i < _reqs.size(); ++i)
{
payloads[i] = Payload(_reqs[i], &resp, timeout, i);
pthread_create(&tids[i], NULL, RequestList::sender, static_cast<void *>(&payloads[i]));
}
// wait for threads to finish
for (uint i = 0; i < _reqs.size(); ++i)
{
pthread_join(tids[i], NULL);
}
std::cerr << std::endl;
//destroy mutex
pthread_mutex_destroy(&_mutex);
delete[] payloads;
delete[] tids;
return resp;
}
#endif
Libcrypto is part of OpenSSL, which is not thread-safe unless you provide the necessary callbacks. According to the documentation, on a POSIX-compliant system (which has thread-local errno) the default thread-id implementation is acceptable, so you just need a locking function:
void locking_function(int mode, int n, const char *file, int line);
This function will need to maintain a set of CRYPTO_num_locks() mutexes, and lock or unlocks the n-th mutex depending on the value of mode. You can read the documentation for more details. The libcurl website actually has some sample code showing how to do this.
Alternatively, you can build libcurl with a different SSL library that is thread safe, such as GnuTLS.
I read a few articles on c++ / curl here on stackoverflow and assembled the following.
The main goal is to handle the whole request in an instance of a class -- and maybe later in a secondary thread.
My problem is: "content_" seems to stay empty though its the same addr and
HttpFetch.h:
class HttpFetch
{
private:
CURL *curl;
static size_t handle(char * data, size_t size, size_t nmemb, void * p);
size_t handle_impl(char * data, size_t size, size_t nmemb);
public:
std::string content_;
static std::string url_;
HttpFetch(std::string url);
void start();
std::string data();
};
HttpFetch.cpp:
HttpFetch::HttpFetch(std::string url) {
curl_global_init(CURL_GLOBAL_ALL); //pretty obvious
curl = curl_easy_init();
content_.append("Test");
std::cout << &content_ << "\n";
curl_easy_setopt(curl, CURLOPT_URL, &url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &content_);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &HttpFetch::handle);
//curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); //tell curl to output its progress
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
//std::cout << &content_ << "\n";
}
void HttpFetch::start() {
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}
size_t HttpFetch::handle(char * data, size_t size, size_t nmemb, void * p)
{
std::string *stuff = reinterpret_cast<std::string*>(p);
stuff->append(data, size * nmemb);
std::cout << stuff << "\n"; // has content from data in it!
return size * nmemb;
}
main.cpp:
#include "HttpFetch.h"
int main(int argc, const char * argv[])
{
HttpFetch call = *new HttpFetch("http://www.example.com");
call.start();
::std::cout << call.content_ << "\n"
}
Thanks in advance
There are several problems with your code. The main problem is the line
HttpFetch call = *new HttpFetch("http://www.example.com");
You create a new HttpFetch instance and copy it to another one. So you have two instances and two content strings. To remove this issue change it to:
HttpFetch call("http://www.example.com");
Another error is the line
curl_easy_setopt(curl, CURLOPT_URL, &url);
which should be
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
You could have avoided such issues if you had thought about resource managment in a class like HttpFetch. Since HttpFetch manages a resource (a curl handle) you have to think about how to initialize and cleanup this resource an how to handle copy, assignment or move. If you use C++11 the easiest solution is to use a std::unique_ptr which handles all that stuff for you.
class HttpFetch
{
public:
HttpFetch(const std::string& url);
void start();
void Print(std::ostream& stream);
private:
typedef void (*cleanup)(CURL*);
typedef std::unique_ptr<CURL, cleanup> CurlHandle;
CurlHandle curlHandle;
std::string content_;
static size_t handle(char * data, size_t size, size_t nmemb, void * p);
};
HttpFetch::HttpFetch(const std::string& url)
: curlHandle(curl_easy_init(), &curl_easy_cleanup)
{
curl_easy_setopt(curlHandle.get(), CURLOPT_URL, url.c_str());
curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEDATA, &content_);
curl_easy_setopt(curlHandle.get(), CURLOPT_WRITEFUNCTION, &HttpFetch::handle);
curl_easy_setopt(curlHandle.get(), CURLOPT_FOLLOWLOCATION, 1L);
}
size_t HttpFetch::handle(char * data, size_t size, size_t nmemb, void * p){
std::string *stuff = static_cast<std::string*>(p);
stuff->append(data, size * nmemb);
return size * nmemb;
}
void HttpFetch::start() {
content_.clear();
curl_easy_perform(curlHandle.get());
}
void HttpFetch::Print(std::ostream& stream){
stream << content_;
}
int main()
{
//HttpFetch call = *new HttpFetch("..."); // this is a copiler error now
HttpFetch call("http://www.google.com");
call.start();
call.Print(std::cout);
}
Using a unique_ptr your class HttpFetch becomes uncopyable and movable only. This makes sense until you provide logic to copy or share an CURL handle between different instances of HttpFetch.
I'm trying to save site's source code to vector, where every line of source code is a new vector element, because I only need to use one specific line (number 47) in my program. Any idea how to do this?
Load the data from the URL.
Using cURL:
std::vector<char> LoadFromUrl(const std::string& url)
{
struct Content
{
std::vector<char> data;
static size_t Write(char * data, size_t size, size_t nmemb, void * p)
{
return static_cast<Content*>(p)->WriteImpl(data, size, nmemb);
}
size_t WriteImpl(char* ptr, size_t size, size_t nmemb)
{
data.insert(end(data), ptr, ptr + size * nmemb);
return size * nmemb;
}
};
Content content;
CURL* curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &content);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &Content::Write);
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_perform(curl);
content.data.push_back('\0');
return content.data;
}
Tokenize the data using strtok or boost tokenizer or your own implementation:
std::vector<std::string> LoadLines(const std::string& url)
{
std::vector<char> content = LoadFromUrl(url);
std::vector<std::string> lines;
for(char* token = strtok(&content.front(), "\n");
token; token = strtok(0, "\n"))
{
lines.push_back(std::string(token));
}
return lines;
}
int main()
{
std::vector<std::string> lines = LoadLines(
"http://stackoverflow.com/questions/10773009/save-sites-source-code-to-vectorstring");
std::copy(begin(lines), end(lines), std::ostream_iterator<std::string>(std::cout, "\n"));
}