C++ libcurl multithreading - c++

I'm trying to build a small multithreading program which takes a subdomains and test them if they are alive on http or https, I've problem that's my program doesn't produce the correct output each time I get different output and also freeze and doesn't continue execution. I followed http://www.cplusplus.com/reference/thread/thread/thread/ when implementing the multithreading.
int main(int argc, char const *argv[] )
{
if (argc < 2){
cout << "Usage httplive <path to subdomains>" << endl;
}
ifstream http(argv[1]);
string line;
vector <std::thread> thread_pool;
while (getline(http, line)){
thread_pool.push_back(thread(httpTest,line, true));
thread_pool.push_back(thread(httpTest, line, false));
}
for (auto& t : thread_pool){
t.join();
}
return 0;
}
void httpTest(string line, bool Flag){
CURL *curl = curl_easy_init();
CURLcode res;
if (curl) {
line = Flag ? "https://" + line : "http://"+ line;
curl_easy_setopt(curl, CURLOPT_URL, const_cast<char*>(line.c_str()));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
// curl_easy_setopt(curl, CURLOPT_TIMEOUT, 1L);
res = curl_easy_perform(curl);
// cout << res << endl;
if (res == CURLE_OK ) cout << line << endl;
}
curl_easy_cleanup(curl);
}

Related

curl Programming to access https site

I downloaded the source code of Curl and built the library (libcurl.lib). Following is the code to read from the site and dump the contents.
The code works well for http sites and fails for https. I tried downloading openssl libraries but unable to link them as more linker errors are thrown.
What is the best solution to handle this?
#include "stdafx.h"
#include <iostream>
#include <string>
#include <curl/curl.h>
static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
((std::string*)userp)->append((char*)contents, size * nmemb);
return size * nmemb;
}
int main(void)
{
CURL *curl;
CURLcode res;
std::string readBuffer;
std::string curl_url = "https://www.example.com/";
curl = curl_easy_init();
if(curl) {
curl_easy_setopt(curl, CURLOPT_URL, curl_url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
std::cout << "Error from cURL: " << curl_easy_strerror(res) << std::endl;
}
curl_easy_cleanup(curl);
std::cout << "Finished reading from the website" << std::endl;
std::cout << readBuffer << std::endl;
}
return 0;
}

Writing Data Fails With CURLOPT_WRITEFUNCTION After Many Writeback Calls

Essentially, I have a write callback that I set for CURLOPT_WRITEFUNCTION. I also set my pointer for writing the data to in CURLOPT_WRITEDATA.
I run an infinite while loop for around 5 seconds, then CURLOPT fails to right data to the void* up pointer defined in CURLOPT_WRITEDATA, then after that one failure it starts working again. I got tons of successful writes sprinkled with failures. Is this a memory issue, and if so, is there a way to circumvent reaching this issue altogether?
// these locations will likely be different on your local.
#include "../cygwin64/usr/include/curl/curl.h"
#include "../cygwin64/usr/include/json/json.h"
size_t write_callback(char *buf, size_t size, size_t nmemb, void* up) {
size_t num_bytes = size*nmemb;
std::string* data = (std::string*) up;
for(int i = 0; i < num_bytes; i++) {
data->push_back(buf[i]);
}
return num_bytes;
}
CURL* init_curl(struct curl_slist *headers, std::string* chunk) {
CURL *curl;
CURLcode res;
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) chunk);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
return curl;
}
bool curl_get(struct curl_slist *headers, const std::string& url,
Json::Value* json_res) {
// this is where my data should be written to
std::string data;
CURL* curl = init_curl(headers, &data);
CURLcode res;
bool success = true;
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
res = curl_easy_perform(curl);
if(res != CURLE_OK) {
std::cout << "Could not perform get for " << url << std::endl;
std::cout << curl_easy_strerror(res) << std::endl;
success = false;
} else {
Json::Value json_data;
Json::CharReaderBuilder json_reader;
std::istringstream stream_data(data);
std::string errs;
if(Json::parseFromStream(json_reader, stream_data, &json_data, &errs)) {
std::cout << "successfully parsed JSON data for: " << url << std::endl;
*json_res = json_data;
} else {
std::cout << "failed to parse JSON data for: " << url << std::endl;
std::cout << errs << std::endl;
std::cout << json_data << std::endl;
std::cout << "finished failing" << std::endl;
success = false;
}
}
} else {
success = false;
}
curl_slist_free_all(headers);
curl_easy_cleanup(curl);
return success;
}
int main(int argc, char** argv) {
curl_global_init(CURL_GLOBAL_ALL);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Accept: application/json");
while (true) {
Json::Value response;
if (curl_get(headers, "https://api.robinhood.com/quotes/?symbols=AMZN", &response)) {
std::cout << response << std::endl;
} else {
// reaches here sometimes because response (my data) is null
std::cout << "failed to get last trade price" << std::endl;
}
}
curl_global_cleanup();
return 0;
}
Expect no failures, but I get intermittent failures to right the data to my WRITEDATA chunk.

c++ : libcurl request in file format error

i'm a beginner in c++. I want to send request to a API, for this i use libcurl, and stock the response on a string and copy the string in a file. it is my test file :
#include <iostream>
#include <string>
#include <curl/curl.h>
#include <fstream>
int MyCurlObject::curlWriter(char *data, size_t size, size_t nmemb, std::string *buffer) {
int result = 0;
if (buffer != NULL) {
buffer->append(data, size * nmemb);
result = size * nmemb;
}
return result;
}
int main (){
std::string url = "https://www.google.com/";
std::string content;
curl = curl_easy_init();
if(!curl)
{
std::cerr << "impossible d'initialiser curl." << std::endl;
}
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &MyCurlObject::curlWriter);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &content);
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk);
const CURLcode rc = curl_easy_perform(curl);
if( rc != CURLE_OK ) {
std::cout << "Error from cURL: " << curl_easy_strerror(rc) << std::endl;
}
std::ofstream file(fileName);
if(!file){
std::cerr << "can't open this file : " << fileName << std::endl;
}
file << content;
file.close();
return 0;
}
My files contain all of my string, but line in file not contain end of line symbol ( i display all of symbol with notepade++ and i just see CR in end of line ) and if i make this :
std::ifstream file(name);
if(file)
{
std::string crash;
int nbrOfLine = 0;
while(getline(file, crash))
{
std::cout << crash;
nbrOfLine++;
}
}
return 1 but my file contain 1500 lines.
thank you in advance

Way to get word frequency from string?

Hey guys I have the following issue. I've been using C++ to scrape website using to find 5 most frequent words in outputHTML which is string. Currently I have following code. Any hint would be awesome.
curl = curl_easy_init();
if(curl) {
curl_easy_setopt(curl, CURLOPT_URL, "http://example.com");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &htmlOutput);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
std::cout << htmlOutput << std::endl;
}
Here's some hints for more awesomeness:
std::istringstream awsome_stream(web_text);
std::string word;
std::map<std::string, unsigned int> kewl_words;
while (awsome_stream >> word)
{
kewl_words[word]++;
}
std::cout << "Occurances of 'div': " << kewl_words["div"] << "\n";

C++ libcurl seg fault in write callback function

I'm trying to get something done quick and dirty. I saw another SO question and tried to reuse the code. I'm hitting a couple rest services (not multithreaded) that return json and when the CURLOPT_WRITEFUNCTION is called it throws a seg fault. I'm still trying to grasp all the c++ concepts so it's been pretty difficult diagnosing.
Here's what I see
static std::string *DownloadedResponse;
static size_t writer(char *data, size_t size, size_t nmemb, std::string *buffer_in)
{
cout << "In writer callback" << endl;
// Is there anything in the buffer?
if (buffer_in != NULL)
{
cout << "Buffer not null" << endl;
// Append the data to the buffer
buffer_in->append(data, size * nmemb);
cout <<" Buffer appended, seting response" << endl;
DownloadedResponse = buffer_in;
cout << "Set downloadedResponse" << endl;
return size * nmemb;
}
return 0;
}
std::string downloadJSON(std::string URL)
{
CURL *curl;
CURLcode res;
struct curl_slist *headers=NULL; // init to NULL is important
std::ostringstream oss;
curl_slist_append(headers, "Accept: application/json");
curl_slist_append( headers, "Content-Type: application/json");
curl_slist_append( headers, "charsets: utf-8");
curl = curl_easy_init();
if (curl)
{
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_URL, URL.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPGET,1);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,writer); // I comment this to display response in stdout.
cout << "calling easy_perform" << endl;
res = curl_easy_perform(curl);
cout << "call made.." << endl;
if (CURLE_OK == res)
{
char *ct;
res = curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &ct);
if((CURLE_OK == res) && ct)
{
cout << "returning downloaded resposne" << endl;
return *DownloadedResponse;
}
}
else
{
cout << "CURLCode: " << res << endl;
}
}
cout << "Returning null" << endl;
return NULL;
}
Output
$ ./test-rest
calling easy_perform
In writer callback
Buffer not nullSegmentation fault (core dumped)
How am I improperly using the string in the writer callback function?
You forgot to pass in a string pointer/reference with CURLOPT_WRITEDATA.