Hey guys I have the following issue. I've been using C++ to scrape website using to find 5 most frequent words in outputHTML which is string. Currently I have following code. Any hint would be awesome.
curl = curl_easy_init();
if(curl) {
curl_easy_setopt(curl, CURLOPT_URL, "http://example.com");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &htmlOutput);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
std::cout << htmlOutput << std::endl;
}
Here's some hints for more awesomeness:
std::istringstream awsome_stream(web_text);
std::string word;
std::map<std::string, unsigned int> kewl_words;
while (awsome_stream >> word)
{
kewl_words[word]++;
}
std::cout << "Occurances of 'div': " << kewl_words["div"] << "\n";
Related
I'm trying to build a small multithreading program which takes a subdomains and test them if they are alive on http or https, I've problem that's my program doesn't produce the correct output each time I get different output and also freeze and doesn't continue execution. I followed http://www.cplusplus.com/reference/thread/thread/thread/ when implementing the multithreading.
int main(int argc, char const *argv[] )
{
if (argc < 2){
cout << "Usage httplive <path to subdomains>" << endl;
}
ifstream http(argv[1]);
string line;
vector <std::thread> thread_pool;
while (getline(http, line)){
thread_pool.push_back(thread(httpTest,line, true));
thread_pool.push_back(thread(httpTest, line, false));
}
for (auto& t : thread_pool){
t.join();
}
return 0;
}
void httpTest(string line, bool Flag){
CURL *curl = curl_easy_init();
CURLcode res;
if (curl) {
line = Flag ? "https://" + line : "http://"+ line;
curl_easy_setopt(curl, CURLOPT_URL, const_cast<char*>(line.c_str()));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
// curl_easy_setopt(curl, CURLOPT_TIMEOUT, 1L);
res = curl_easy_perform(curl);
// cout << res << endl;
if (res == CURLE_OK ) cout << line << endl;
}
curl_easy_cleanup(curl);
}
Basically, my program/code I'm using now is returning a message from a PHP script. The issue I'm having is when I give it a combined string for the URL, it doesn't return anything. But when I enter it manually, it returns the correct information. Here is the code I am using below:
std::string Login(std::string uname, std::string pass)
{
CURL* curl;
CURLcode res;
std::string readBuffer;
std::string path = "localhost/files/login.php?username=" + uname + "&password=" + pass;
std::cout << uname << " " << pass;
curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, "localhost/files/login.php?username=123&password=123");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
return readBuffer;
}
return "Failed";
}
This way works because I enter the URL manually, but when I do this it doesn't return anything:
std::string path = "localhost/files/login.php?username=" + uname + "&password=" + pass;
curl_easy_setopt(curl, CURLOPT_URL, path);
I'm uncertain whether I'm using the wrong variable or something else. I'm new to using PHP and libCurl and anything web related.
CURLOPT_URL expects a C-style null-terminated char* string pointer as input, not a std::string. You can use the std::string::c_str() method to get a compatible const char* pointer from the std::string:
curl_easy_setopt(curl, CURLOPT_URL, path.c_str());
Sending messages to curl via multiple threads, and once a while I get one of the following errors.
curl_easy_perform(): failed ssl connect error. sschannel: next
initializesecuritycontext failed: SEC_E_MESSAGE_ALTERED
curl_easy_perform(): failed ssl connect error. sschannel: next
initializesecuritycontext failed: SEC_E_BUFFER_SMALL
As of now, I'm resolving this by re-sending the request. But why does this error happen ( Same request in the next 40 seconds works) and what can be done to avoid this.
Source code is written in C++. LibCurl was built using Microsoft visual studio 2010.
Following is the code that invokes the curl library.
CURL *curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, "connection-page");
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, requestToPost.c_str());
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, (long)strlen(requestToPost.c_str()));
curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerInfo);
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_data);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curlErrorbuffer);
std::stringstream resPonseInfo;
std::stringstream headerResponse;
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resPonseInfo);
curl_easy_setopt(curl, CURLOPT_HEADERDATA, &headerResponse);
curl_easy_setopt(curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
res = curl_easy_perform(curl);
if ((res != CURLE_OK))
{
fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
std::cout << "Request === " << std::endl;
std::cout << requestToPost << std::endl;
std::cout << "Error === " << std::endl;
std::cout << curlErrorbuffer << std::endl;
std::cout << "Header == " << std::endl << headerResponse.str() << std::endl;
std::cout << "Response == " << std::endl << resPonseInfo.str() << std::endl;
}
else // if(res == CURLE_OK)
{
std::cout << "Response from the http post was successful " << std::endl;
responseInfo = resPonseInfo.str();
}
curl_easy_cleanup(curl);
curl = NULL;
}
"Sending messages to curl via multiple threads..." - given described symptoms most logical would be to assume a multi-threading related issue
libcurl itself a thread safe, but not the shared data and handles used. You might want to consult this page: https://curl.haxx.se/libcurl/c/threadsafe.html and ensure your threads are not stepping each other toes
one (possibly) easy way to confirm above hypothesis - try running your program in a single thread mode (if you can) and see if the issue reoccurs. If it does then it's definitely not threading.
another way to verify (if above is not an option) put a thread mutex on your curl operation (even before you start setting up curl options) - see if that help avoiding those errors
i'm a beginner in c++. I want to send request to a API, for this i use libcurl, and stock the response on a string and copy the string in a file. it is my test file :
#include <iostream>
#include <string>
#include <curl/curl.h>
#include <fstream>
int MyCurlObject::curlWriter(char *data, size_t size, size_t nmemb, std::string *buffer) {
int result = 0;
if (buffer != NULL) {
buffer->append(data, size * nmemb);
result = size * nmemb;
}
return result;
}
int main (){
std::string url = "https://www.google.com/";
std::string content;
curl = curl_easy_init();
if(!curl)
{
std::cerr << "impossible d'initialiser curl." << std::endl;
}
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &MyCurlObject::curlWriter);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &content);
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk);
const CURLcode rc = curl_easy_perform(curl);
if( rc != CURLE_OK ) {
std::cout << "Error from cURL: " << curl_easy_strerror(rc) << std::endl;
}
std::ofstream file(fileName);
if(!file){
std::cerr << "can't open this file : " << fileName << std::endl;
}
file << content;
file.close();
return 0;
}
My files contain all of my string, but line in file not contain end of line symbol ( i display all of symbol with notepade++ and i just see CR in end of line ) and if i make this :
std::ifstream file(name);
if(file)
{
std::string crash;
int nbrOfLine = 0;
while(getline(file, crash))
{
std::cout << crash;
nbrOfLine++;
}
}
return 1 but my file contain 1500 lines.
thank you in advance
CURL *curl;
CURLcode res;
curl_global_init(CURL_GLOBAL_DEFAULT);
curl = curl_easy_init();
if (curl) {
string html;
curl_easy_setopt(curl, CURLOPT_URL, "https://www.google.com");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &html);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writer);
curl_easy_setopt(curl, CURLOPT_CAINFO, "D:\\certification\\DigiCertHighAssuranceEVRootCA.crt");
res = curl_easy_perform(curl);
cout << "Error : " << curl_easy_strerror(res) << endl;
cout << html;
The Error message is Unsupported protocol
The http site is okay but The https site doesn't work
What should I add in curl_easy_setopt()