How can I extract the domain from a URL? - c++

I'm currently making a few changes in the rTorrent source. I have the following code:
torrent::Object
apply_to_domain(const torrent::Object& rawArgs) {
const char * url = rawArgs.as_string().c_str();
char buffer[50];
snprintf(buffer, 50, "URL: %s.", url);
return std::string(buffer);
}
I need to extract the domain from url. There's a regex.h included in the source but I'm not sure if I can use that or if I need to use a different regex library.
Link to regex.h

The only thing that "regex" implementation handles is the wildcard character, *. (BTW, I'm just assuming it's a wildcard, since it's the only character that's recognised and the comments seem to hint as much, but I haven't actually verified it.)
Use a proper regex library like Boost.Regex.

// This is a hacked up whole string
pattern matching. Replace with
// TR1's regex when that becomes widely
available. It is intended for
// small
strings.
That's not going to work for extracting the domain. Use Boost or VSCRT TR1 instead.

See *get_active_tracker_domain* in command_pyroscope.cc

In windows:
#include <winsock2.h>
#include <windows.h>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <cctype>
#include <locale>
#pragma comment(lib,"ws2_32.lib")
using namespace std;
string website_HTML;
locale local;
//***************************
void get_Website(char *url );
void extract_URL();
//***************************
int main ()
{
char *url="www.bbc.com";
get_Website(url );
extract_URL();
return 0;
}
//***************************
void get_Website(char *url )
{
WSADATA wsaData;
SOCKET Socket;
SOCKADDR_IN SockAddr;
int lineCount=0;
int rowCount=0;
struct hostent *host;
char *get_http= new char[256];
memset(get_http,' ', sizeof(get_http) );
strcpy(get_http,"GET / HTTP/1.1\r\nHost: ");
strcat(get_http,url);
strcat(get_http,"\r\nConnection: close\r\n\r\n");
if (WSAStartup(MAKEWORD(2,2), &wsaData) != 0)
{
cout << "WSAStartup failed.\n";
exit(0);
}
Socket=socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
host = gethostbyname(url);
SockAddr.sin_port=htons(80);
SockAddr.sin_family=AF_INET;
SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr);
cout << "Connecting to ["<< url<<"]...\n";
if(connect(Socket,(SOCKADDR*)(&SockAddr),sizeof(SockAddr)) != 0)
{
cout << "Could not connect\n";
exit(0);
}
cout << "Connected. (success!)\n";
std::cout << std::flush;
send(Socket,get_http, strlen(get_http),0 );
char buffer[10000];
int nDataLength;
int i = 0;
while ((nDataLength = recv(Socket,buffer,10000,0)) > 0)
{
while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r')
{
website_HTML+=buffer[i];
i += 1;
}
}
cout<<"\n"<<i<<" bytes downloaded \n\n";
closesocket(Socket);
WSACleanup();
delete[] get_http;
}
void extract_URL()
{
for (size_t i=0; i<website_HTML.length(); ++i) website_HTML[i]= tolower(website_HTML[i],local);
std::string to_find = "http:";
std::vector<string> extracted_website_URL;
std::string string_to_split;
char chr_String[1000];
int count = 0;
char seps[] = "\"";
char *token;
cout << "\nExtracting url.. ";
for (int j = 0; j < website_HTML.length() - to_find.length(); j++)
{
if (website_HTML.substr(j, to_find.length()) == to_find)
{
count++;
string_to_split=website_HTML.substr(j, to_find.length()+256);
strcpy(chr_String , string_to_split.c_str() );
token = strtok( chr_String, seps );
extracted_website_URL.push_back(token);
//cout<<website_HTML.substr(j, to_find.length()+30)<<" \n";
}
std::cout << "\b\\" << std::flush;
std::cout << "\b|" << std::flush;
std::cout << "\b/" << std::flush;
std::cout << "\b-" << std::flush;
}
for(j=0;j<extracted_website_URL.size();j++) cout<<extracted_website_URL[j] <<" \n";
cout<<"\n"<<extracted_website_URL.size()<<" URL's extracted ";
cout<<"\n\n";
}

something basic but that may do the job:
#include <regex>
std::string getHostFromUrl(const std::string & url) {
std::regex urlRe("^.*://([^/?:]+)/?.*$");
return std::regex_replace(url, urlRe, "$1");
}

Try C++11 Regex:
#include <iostream>
#include <string>
#include <regex>
int main()
{
std::string str("The link of this question: https://stackoverflow.com/questions/3073753/how-can-i-extract-the-domain-from-a-url \
Other urls are https://www.google.com, facebook.com. https://my_site.online.com:1234");
std::regex r("https?:\\/\\/(www\\.)?[-a-zA-Z0-9#:%._\\+~#=]{1,256}");
std::smatch sm;
while(regex_search(str, sm, r))
{
std::cout << sm.str() << '\n';
str = sm.suffix();
}
}

in Qt, you can use QUrl:
QString url("https://somedomain.com/index/of/somepage/blah/blah");
QUrl qu(url);
qDebug() << "qu.host " << qu.host();
it will give you : somedomain.com

Related

wcstombs_s is undefiend (gcc, m1 mac)

I'm trying to make my own programming language and compiler for it. While creating lexical analyzer i have troubles with this function. Also i included stdlib.h and stdio.h. Platform what i use is m1 mac and vscode with gcc
There is code:
#define _CRT_SECURE_NO_WARNINGS
#include "Log.h"
namespace Log
{
LOG getlog(wchar_t logfile[])
{
try
{
LOG struc;
struc.stream = new (std::nothrow) std::ofstream;
if (!struc.stream)
throw ERROR_THROW(112);
struc.stream->open(logfile);
if (!(*(struc.stream)).is_open())
throw ERROR_THROW(112);
wcscpy(struc.logfile, logfile);
return struc;
}
catch (Error::ERROR e)
{
Error::ErrorDisplay(e);
}
}
void WriteLine(LOG log, char *c, ...)
{
for (char **c_ptr = &c; **c_ptr; c_ptr++)
{
*log.stream << *c_ptr;
}
}
void WriteLine(LOG log, wchar_t *c, ...)
{
unsigned lineLength = 0;
char *line = nullptr;
unsigned int n = 0;
for (wchar_t **c_ptr = &c; **c_ptr; c_ptr++)
{
lineLength = wcslen(*c_ptr);
line = new char[lineLength + 1];
wcstombs_s(&n, line, lineLength + 1u, *c_ptr, lineLength + 1u);
*log.stream << line;
delete[] line;
}
}
void WriteLog(LOG log)
{
std::string line = "----Протокол------ ";
time_t result = time(NULL);
char str[26];
ctime_s(str, sizeof str, &result);
for (int i = 0; i < 26; i++)
line += str[i];
*log.stream << line;
}
void WriteParm(LOG log, Parm::PARM parm)
{
char in_text[PARM_MAX_SIZE];
char out_text[PARM_MAX_SIZE];
char log_text[PARM_MAX_SIZE];
wcstombs(in_text, parm.in, PARM_MAX_SIZE);
wcstombs(out_text, parm.out, PARM_MAX_SIZE);
wcstombs(log_text, parm.log, PARM_MAX_SIZE);
*log.stream << "---- Параметры ---- \n-in: " << in_text
<< "\n-out: " << out_text
<< "\n-log: " << log_text;
}
void WriteIn(LOG log, In::IN in)
{
*log.stream << "\n----Исходные данные------\nКол-во символов: " << (in.size < 0 ? 0 : in.size) << std::endl
<< "Проигнорировано: " << (in.ignor < 0 ? 0 : in.ignor) << std::endl
<< "Кол-во строк: " << (in.lines < 0 ? 0 : in.lines) << std::endl;
}
void WriteError(LOG log, Error::ERROR error)
{
*log.stream << "Ошибка " << error.id << ": " << error.message << ", Строка " << error.inext.line << ", Позиция " << error.inext.col << std::endl;
}
void Close(LOG log)
{
(*log.stream).close();
delete log.stream;
}
}
And header file:
#pragma once
#include <fstream>
#include <iostream>
#include <cwchar>
#include <string>
#include <stdlib.h>
#include <stdio.h>
#include "In.h"
#include "Parm.h"
#include "Error.h"
namespace Log
{
struct LOG
{
wchar_t logfile[PARM_MAX_SIZE];
std::ofstream *stream;
};
static const LOG INITLOG{L"", NULL};
LOG getlog(wchar_t logfile[]);
void WriteLine(LOG log, char *c, ...);
void WriteLine(LOG log, wchar_t *c, ...);
void WriteLog(LOG log);
void WriteParm(LOG log, Parm::PARM parm);
void WriteIn(LOG log, In::IN in);
void WriteError(LOG log, Error::ERROR error);
void Close(LOG log);
}
I tried to change include path and change versions of c++, std and compilers

Segmentation fault with client C++ program

I am having some difficulty with finding out how to correct the segmentation fault.
client.cpp
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <arpa/inet.h>
using namespace std;
int call_socket(char *hostname, unsigned short portnum)
{
struct sockaddr_in sa;
struct hostent *hp;
int a;
int s;
if ((hp = gethostbyname(hostname)) == NULL)
{
errno = ECONNREFUSED;
return(-1);
}
memset(&sa, 0, sizeof(sa));
memcpy((char*) &sa.sin_addr, hp->h_addr, hp->h_length);
sa.sin_family = hp->h_addrtype;
sa.sin_port = htons((u_short)portnum);
if ((s=socket(hp->h_addrtype, SOCK_STREAM, 0)) < 0)
{
return(-1);
}
if (connect(s, (struct sockaddr*)&sa, sizeof (sa)) < 0)
{
close(s);
return(-1);
}
return(s);
}
int main(int argc, char* argv[])
{
int n;
int s;
int b;
char username[256];
string hostname;
string usern;
string terminated = "Server Terminated";
string doesnotexist = "does not exist.";
hostname = "localhost";
char *hostn[hostname.size()+1];
strcpy(*hostn, hostname.c_str());
cout << "Enter a server port number: " << endl;
cin >> s;
while (s <2000 || s > 65535)
{
cout << "Server port number must be between 2000 and 65535. Please enter the server port number again." << endl;
cin >> s;
}
if ((b = call_socket(*hostn,s)) < 0)
{
perror("call Socket");
exit(1);
}
cout << "Enter a user name: " << endl;
cin >> usern;
strcpy(username, usern.c_str());
n = write(b, username, strlen(username));
if (n < 0)
{
cout << "Error writing to socket" << endl;
}
bzero(username, 256);
n = read(b,username,255);
if (n < 0)
{
cout << "Error reading from socket" << endl;
}
if (username == terminated)
{
printf("%s\n",username);
}
else if (username == doesnotexist)
{
cout << "The username: " << usern << ", ";
printf("%s\n",username);
}
else
{
cout << "The public key for " << usern << " is: ";
printf("%s\n",username);
}
close(b);
return 0;
}
I tried valgrind and got: Conditional jump or move depends on uninitialised value(s), Use of uninitialised value of size 8, Invalid write of size 8. As I am unexperienced with valgrind, I don't really know how to fix it.
Your problem is almost certainly here:
char *hostn[hostname.size()+1];
strcpy(*hostn, hostname.c_str());
This has several issues:
hostn is a variable-length-array, which is not a standard part of C++
hostn is an array of char*, not an array of char
The pointers in hostn are uninitialized, and thus don't point anywhere useful.
You pass the first element of hostn to strcpy as the destination
Since strcpy will attempt to dereference the pointer passed to it and that pointer is uninitialized, that means your program's behavior is undefined.
Note that all of this could be avoided easily. Just get rid of hostn entirely. It's not serving any purpose beyond what hostname.c_str() already serves. Simply change call_socket to accept a const char* instead of a char*, and you can call it directly using hostname.c_str() (i.e. call_socket(hostname.c_str(), s))

C++ regex program not working as expected on Raspberry Pi

I'm having a confusing situation.
I wrote a program in C++11 which searches timestamps within an email file and replaces them with another one that is x hours older. It receives the amout of hours via localhost UDP packets.
If i compile (using CLion and the c++ compiler) and run my program on my x86_64 ubuntu machine, everything works just fine. But when i compile (tested c++ and g++ compiler) and run my file on my Raspberry Pi (running raspbian), it doesn't find any regex matches to be altered in the email file.
It doesn't work when i pass an email file as parameter on raspberry pi, whilst on my ubuntu machine, it worked fine. I used the same email file on both machines.
Here's the significant code (all of main.cpp)
#include <iostream>
#include <cstdio>
#include <cstring>
#include <regex>
#include <fstream>
#include <vector>
#include <exception>
#include <string>
#include <ctime>
#include <iomanip>
#include <sstream>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <stdexcept>
#include<arpa/inet.h>
#include "func.h"
using namespace std;
int main(int argc, char* argv[]) {
string filename = "/SOME/TEST/EMAIL.EML";
string StampPat = "[MTWFS][ouehra][neduit]\\x2C(\\s+|\\S)[0-9]+\\s[A-S][a-z][a-z]((\\s)|(\\r\\s))[0-2][0-9][0-9][0-9]((\\s)|(\\r\\s))[0-2][0-9]\\x3A[0-5][0-9]\\x3A[0-5][0-9]";
regex DatePat(StampPat,regex_constants::ECMAScript);
if (argc == 2)
{
filename=argv[1];
}
string currentline ="Hello world";
string previousline = "";
string memoryForFileLines="";
smatch results;
fstream email_file;
email_file.open(filename,fstream::in | fstream::out);
long long int oldreadpos = 0;
long long int readpos = 0;
long long int newWritePos = 0;
int matches = 0;
int setbackHours = 0;
bool previousmatch = false;
string stimestamp;
cout << stimestamp << "\n";
int sockfd = socket(AF_INET,SOCK_DGRAM,0);
struct sockaddr_in serv, client;
serv.sin_family = AF_INET;
serv.sin_port = htons(1337);
serv.sin_addr.s_addr = inet_addr("127.0.0.1");
socklen_t l = sizeof(client);
socklen_t m = sizeof(serv);
char buffer[10] = "REQTO";
char buffer2[5]="0000";
int sent = sendto(sockfd,buffer,5,0,(struct sockaddr *)&serv,m);
recvfrom(sockfd,buffer,10,0,(struct sockaddr *)&client,&l);
char *ptr;
if (strncmp(buffer,"RCVTO",5) == 0)
{
cout << "RECEIVED OFFSET INFO\n";
char *endOfBuffer = &buffer[10];
setbackHours = (int)strtol(buffer+6,&endOfBuffer,10);
}
while (getline(email_file,currentline) && currentline.find("Content-Type"))
{
//tm dateMod;
memoryForFileLines = "";
memoryForFileLines.append(currentline);
bool match = regex_search(memoryForFileLines,results,DatePat,regex_constants::match_any);
if (match) {
cout << "Found on position " << results.position(0) << " " << memoryForFileLines << "\n";
newWritePos = readpos + results.position(0);
email_file.seekp(newWritePos);
stimestamp = modTimestamp(setbackHours,results.str(),true, false);
cout <<"new timestamp: "<< stimestamp << std::endl;
email_file << stimestamp;
email_file.seekg(readpos+currentline.length());
matches++;
previousmatch = true;
}
else if (!previousmatch)
{
memoryForFileLines = "";
memoryForFileLines.append(previousline);
memoryForFileLines.append(currentline);
match = regex_search(memoryForFileLines,results,DatePat);
if(match)
{
cout << "Found on position " << results.position(0) << " " << memoryForFileLines << "\n";
newWritePos = oldreadpos + results.position(0);
email_file.seekp(newWritePos);
unsigned long oldsize = results.str().size();
stimestamp = modTimestamp(setbackHours,results.str(),true,true);
cout <<"new timestamp: "<< stimestamp << std::endl;
email_file << stimestamp;
matches++;
}
} else
{
previousmatch=false;
}
previousline="";
previousline.append(currentline);
oldreadpos = readpos;
readpos = email_file.tellg();
}
cout << matches << std::endl;
email_file.close();
sent = sendto(sockfd,"MAILD",5,0,(struct sockaddr *)&serv,m);
return 0;
}

Target address reversed

I use to code in Python. Now I'm trying C++. When I run the program I see the target address (w/ Wireshark) reverse, even if I use htonl. In Python this same program worked fine.
Follow the code. At the bottom I printed the result.
I'm using Ubuntu 12.04LTS and g++ (Ubuntu/Linaro 4.6.3).
//UdpClient.cpp
#include <iostream>
#include <string>
#include <sstream>
#include <sys/socket.h>
#include <sys/types.h>
#include <netdb.h>
#include <cstdlib>
#include <arpa/inet.h>
#include <typeinfo>
using namespace std;
int main(int argc, char* argv[])
{
int s, p, rb,rs;
int bytesend;
char buf[1024];
int len;
char ent[16];
char Porta[5];
unsigned long EndServ;
struct sockaddr_in UdpServer, UdpClient;
int UdpServerLen = sizeof(UdpServer);
//do text
string msg("The quick brown fox jumps over the lazy dog\n");
len = msg.copy(buf, msg.size(), 0);
buf[len] = '\0';
//do socket
s = socket(AF_INET, SOCK_DGRAM, 0);
if (s == -1){
cout << "No socket done\n";
}
else {
cout << "Socket done\n";
}
//populate UdpClient struct
UdpClient.sin_family = AF_INET;
UdpClient.sin_addr.s_addr = INADDR_ANY;
UdpClient.sin_port = 0;
//populate UdpServer struct
UdpServer.sin_family = AF_INET;
UdpServer.sin_addr.s_addr = inet_addr(argv[1]);
//check if addres is correct
cout << "ServerAddress: " << hex << UdpServer.sin_addr.s_addr << endl;
UdpServer.sin_port = htons(atoi(argv[2]));
//bind socket
rb = bind(s, (struct sockaddr *)&UdpClient, sizeof(UdpClient));
if (rb == 0){
cout << "Bind OK!\n";
}
else {
cout << "Bind NOK!!!\n";
close(s);
exit(1);
}
//send text to Server
cout << "UdpServSiz: " << sizeof(UdpServer) << endl;
rs = sendto(s, buf, 1024, 0, (struct sockaddr *)&UdpServer, sizeof(UdpServer));
if (rs == -1){
cout << "Message NOT sent!!!\n";
}
else {
cout << "Message SENT!!!\n";
}
close(s);
return 0;
}
/*
edison#edison-AO532h:~/CmmPGMs$ ./UdpClient 127.0.0.1 6789
Socket done
ServerAddress: 100007f (using htonl or not!!)
Bind OK!
Message SENT!!!
edison#edison-AO532h:~/CmmPGMs$
*/
Sounds like you're on ARM (Linaro)? In which case the endianness of the processor matches network order, so htonl and ntohl basically do nothing.

Boost asio: Send OpenCV IplImage from Ubuntu-Server to Win7-Client

I try to transmit an OpenCV IplImage from a Server (Ubuntu x64) to a Client (Win7 x64) using the boost asio library.
The following code works fine if both (Client and Server) are on the same operating system. But when the server is on Ubuntu and the client on Win7 it doesn't work. The image header is correct, but something with the image data is wrong.
I think this is because of the different bit-order between the two OS. Is it? How can I resolve this problem?
And second: The transmission with this code is very slow.
How can I improve the speed?
Client:
#define _WIN32_WINNT 0x0601
#include <iostream>
#include <sstream>
#include <boost/array.hpp>
#include <boost/asio.hpp>
#include <cv.h>
#include <cxcore.h>
#include <highgui.h>
using boost::asio::ip::tcp;
using namespace std;
char* splitImage(const string& str, const string& delim, vector<string>& parts) {
size_t start, end = 0;
int i = 0;
while (end < str.size() && i < 8) {
start = end;
while (start < str.size() && (delim.find(str[start]) != string::npos)) {
start++; // skip initial whitespace
}
end = start;
while (end < str.size() && (delim.find(str[end]) == string::npos)) {
end++; // skip to end of word
}
if (end-start != 0) { // just ignore zero-length strings.
parts.push_back(string(str, start, end-start));
}
i++;
}
int size = atoi(parts[6].c_str());
char *imgdata = new char[size];
memcpy(imgdata, string(str, end+1, size).c_str(), size);
return imgdata;
}
int main(int argc, char* argv[])
{
string header;
try
{
boost::asio::io_service io_service;
tcp::resolver resolver(io_service);
tcp::resolver::query query("localhost", "1234");
tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);
tcp::resolver::iterator end;
tcp::socket socket(io_service);
boost::system::error_code error = boost::asio::error::host_not_found;
while (error && endpoint_iterator != end)
{
socket.close();
socket.connect(*endpoint_iterator++, error);
}
if (error)
throw boost::system::system_error(error);
stringstream ss;
cout << "reading";
for (;;)
{
boost::array<char, 1024> buf;
boost::system::error_code error;
size_t len = socket.read_some(boost::asio::buffer(buf), error);
cout << ".";
if (error == boost::asio::error::eof)
break; // Connection closed cleanly by peer.
else if (error)
throw boost::system::system_error(error); // Some other error.
ss.write(buf.data(), len);
header = ss.str();
}
cout << "done: data size: "<< header.size() << endl;
}
catch (std::exception& e)
{
std::cerr << e.what() << std::endl;
return 1;
}
vector<string> parts;
char* imgdata = splitImage(header,"#",parts);
IplImage img2;
img2.nSize = atoi(parts[0].c_str());
img2.ID = 0;
img2.nChannels = atoi(parts[1].c_str());
img2.depth = atoi(parts[2].c_str());
img2.dataOrder = atoi(parts[3].c_str());;
img2.height = atoi(parts[4].c_str());
img2.width = atoi(parts[5].c_str());
img2.roi = NULL;
img2.maskROI = NULL;
img2.imageId = NULL;
img2.tileInfo = NULL;
img2.imageSize = atoi(parts[6].c_str());
img2.widthStep = atoi(parts[7].c_str());
img2.imageData = imgdata;
cvNamedWindow("Image:",1);
cvShowImage("Image:",&img2);
cvWaitKey();
cvDestroyWindow("Image:");
delete[] imgdata;
return 0;
}
Server:
#define _WIN32_WINNT 0x0601
#define WIN32_LEAN_AND_MEAN
#include <string>
#include <iostream>
#include <cv.h>
#include <cxcore.h>
#include <highgui.h>
#include <boost/asio.hpp>
#define DELIMITER "#"
using boost::asio::ip::tcp;
using namespace std;
IplImage *img;
string serializeImageHeader(){
stringstream ss;
ss << img->nSize << DELIMITER;
ss << img->nChannels << DELIMITER;
ss << img->depth << DELIMITER;
ss << img->dataOrder << DELIMITER;
ss << img->height << DELIMITER;
ss << img->width << DELIMITER;
ss << img->imageSize << DELIMITER;
ss << img->widthStep << DELIMITER;
return ss.str();
}
int main()
{
try
{
boost::asio::io_service io_service;
tcp::acceptor acceptor(io_service, tcp::endpoint(tcp::v4(), 1234));
img = cvLoadImage("Test.bmp");
cout << "Server is running" << endl;
for (;;)
{
tcp::socket socket(io_service);
acceptor.accept(socket);
cout << "socket accepted" << endl;
string message = serializeImageHeader().append(img->imageData, img->imageSize);
boost::system::error_code ignored_error;
boost::asio::write(socket, boost::asio::buffer(message),
boost::asio::transfer_all(), ignored_error);
cout << "sent: size: "<< message.size() << endl;
}
}
catch (std::exception& e)
{
std::cerr << e.what() << std::endl;
}
cvReleaseImage(&img);
return 0;
}
You might try a library like Boost.Serialization rather than rolling your own version to see if it makes a difference:
7) Data Portability - Streams of bytes created on one platform should be
readable on any other.
You should also create a new image on the destination, with the values for the image header parameters you copied over then copy the image data into the new image (ideally a row at a time)
There is no guarrantee that openCV lays out the image data in the same way, it may have different padding or different rowstrides