Boost Log severity level output in upper case - c++

I try to setup a logger with Boost Log V2. The output should look like (file blubb.log):
2019-08-24 23:24:08 - ERROR: Hi from source code.
I'm currently struggling with the uppercase of the severity level. The rest works so far.
severity level provides a ::to_string() conversation, but I didn't get it working in severity_type. So I tried to put the data into a stringstream.
logger.cpp (without includes)
namespace logging = boost::log;
namespace src = boost::log::sources;
namespace expr = boost::log::expressions;
namespace sinks = boost::log::sinks;
namespace keywords = boost::log::keywords;
BOOST_LOG_ATTRIBUTE_KEYWORD(att_channel, "Channel", std::string);
typedef src::severity_channel_logger<logging::trivial::severity_level, std::string> severity_channel_logger_t;
typedef sinks::synchronous_sink< sinks::text_ostream_backend > synchronous_sink_t;
static std::map<std::string, severity_channel_logger_t> map_logger;
static std::unordered_set<std::string> profiles = { "blubb", "configreader", "connector", "downloader", "mlog", "output", "provider", "rest" };
static std::unordered_map<std::string, logging::trivial::severity_level> levels = {
{ "trace" , logging::trivial::trace},
{ "debug" , logging::trivial::debug},
{ "info" , logging::trivial::info},
{ "warning" , logging::trivial::warning},
{ "error" , logging::trivial::error},
{ "fatal" , logging::trivial::fatal}
};
Logger::Logger()
{
for (std::unordered_set<std::string>::iterator it = profiles.begin(); it != profiles.end(); it++)
{
map_logger[*it] = severity_channel_logger_t(keywords::channel = *it);
create_channel_logger(*it);
}
logging::add_common_attributes();
}
std::stringstream Logger::sev_toupper(logging::trivial::severity_type severity)
{
std::stringstream in;
std::stringstream out;
char c;
in << severity;
while (in >> c)
{
out << toupper(c);
}
return out;
}
void Logger::logmsg(std::string channel, const std::string level, const std::string message)
{
// fallback channel, if not in list
if (profiles.find(channel) == profiles.end())
channel = "rest";
BOOST_LOG_SEV(map_logger[channel], levels[level]) << message;
}
void Logger::create_channel_logger(std::string channel)
{
logging::add_file_log
(
keywords::file_name = channel+".log",
keywords::open_mode = std::ios_base::app,
keywords::filter = att_channel == channel,
keywords::auto_flush = true,
keywords::format =
(
expr::stream
<< expr::format_date_time<boost::posix_time::ptime>("TimeStamp", "%Y-%m-%d %H:%M:%S")
<< " - "
<< std::right << std::setw(7) << sev_toupper(logging::trivial::severity).str().c_str()
<< std::left << ": " << expr::smessage
)
);
}
main.cpp
int main(int, char*[])
{
Logger * lg = new Logger();
lg->logmsg("blubb", "error", "Hi, from Source Code!");
delete lg;
}
My function sev_toupper() produces gargabe. expr::stream seems to handle logging::trivial::severity in a different way than std::stringstream.

I would avoid using stringstreams for something simple as a single word. You can use std::transform to run each character of a string through a function that transforms it, like so:
#include <algorithm>
#include <cctype>
std::string Logger::sev_toupper(logging::trivial::severity_type severity)
{
std::string text = logging::trivial::to_string(severity);
std::transform(text.begin(), text.end(), text.begin(), toupper);
return text;
}

Related

Search partial filenames in C++ using boost filesystem

the question is simple , I want to find a file path inside a directory but I have only part of the filename, so here is a functions for this task
void getfiles(const fs::path& root, const string& ext, vector<fs::path>& ret)
{
if(!fs::exists(root) || !fs::is_directory(root)) return;
fs::recursive_directory_iterator it(root);
fs::recursive_directory_iterator endit;
while(it != endit)
{
if(fs::is_regular_file(*it)&&it->path().extension()==ext) ret.push_back(it->path());//
++it;
}
}
bool find_file(const filesystem::path& dir_path, const filesystem::path file_name, filesystem::path& path_found) {
const fs::recursive_directory_iterator end;
const auto it = find_if(fs::recursive_directory_iterator(dir_path), end,
[file_name](fs::path e) {
cerr<<boost::algorithm::icontains(e.filename().native() ,file_name.native())<<endl;
return boost::algorithm::icontains(e.filename().native() ,file_name.native());//
});
if (it == end) {
return false;
} else {
path_found = it->path();
return true;
}
}
int main (int argc, char* argv[])
{
vector<fs::path> inputClass ;
fs::path textFiles,datasetPath,imgpath;
textFiles=argv[1];
datasetPath=argv[2];
getfiles(textFiles,".txt",inputClass);
for (int i=0;i<inputClass.size();i++)
{
ifstream lblFile(inputClass[i].string().c_str());
string line;
fs::path classname=inputClass[i].parent_path()/inputClass[i].stem().string();
cerr<<classname.stem()<<endl;
while (getline(lblFile,line))
{
bool find=find_file(datasetPath,line,imgpath);
if (find)
{
while(!fs::exists(classname))
fs::create_directories (classname);
fs::copy(imgpath,classname/imgpath.filename());
cerr<<"Found\n";
}
else
cerr<<"Not Found \n";
}
lblFile.close();
}
}
Console out:
"490"
vfv343434.jpeg||E9408000EC0
0
fsdfdsfdfsf.jpeg||E9408000EC0
0
1200E9408000EC0.jpeg||E9408000EC0
0
Not Found
but when I set the search string manually it works fine ! I tried other methods for searching string like std::find but all the methods fail to find the substring, it seems there is problem with input string (line) I printed all the chars but no especial characters or anything.
if I set the search string manually it works as desired
string search="E9408000EC0";
cerr<<e.filename().native()<<"||"<<search<<endl;
cerr<<boost::algorithm::icontains(e.filename().native() ,search)<<endl;
the results for above change is like
"490"
vfv343434.jpeg||E9408000EC0
0
fsdfdsfdfsf.jpeg||E9408000EC0
0
1200E9408000EC0.jpeg||E9408000EC0
1
Found
I cannot reproduce this.
The only hunch I have is that on your platform, perhaps the string() accessor is not returning the plain string, but e.g. the quoted path. That would break the search. Consider using the native() accessor instead.
(In fact, since file_name is NOT a path, but a string pattern, suggest passing the argument as std::string__view or similar instead.)
Live On Coliru
#include <boost/filesystem.hpp>
#include <boost/algorithm/string.hpp>
#include <iostream>
namespace fs = boost::filesystem;
template <typename Out>
void find_file(const fs::path& dir_path, const fs::path file_name, Out out) {
fs::recursive_directory_iterator it(dir_path), end;
std::copy_if(it, end, out, [file_name](fs::path e) {
return boost::algorithm::icontains(e.filename().native(),
file_name.native());
});
}
int main() {
fs::path d = "a/b/c/e";
fs::create_directories(d);
{
std::ofstream ofs(d / "1200E9408000EC0.jpeg");
}
std::cout << fs::path("000EC0").native() << "\n";
std::vector<fs::path> found;
find_file(".", "000EC0", back_inserter(found));
for (auto &f : found)
{
std::cout << "Found: " << f << "\n";
}
}
Prints
000EC0
Found: "./a/b/c/e/1200E9408000EC0.jpeg"
UPDATE: Code Review
To the updated question, came up with an somewhat improved tester that works with boost::filesystem and with std::filesystem just the same.
There are many small improvements (removing repetition, explicit conversions, using optional to return optional matches, etc.
Also added a whitespace trim to avoid choking on extraneous whitespace on the input lines:
Live On Coliru (-DUSE_BOOST_FS)
Live On Coliru (std library)
#include <boost/algorithm/string.hpp>
#include <fstream>
#include <iostream>
using boost::algorithm::icontains;
using boost::algorithm::trim;
#if defined(USE_BOOST_FS)
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
using boost::system::error_code;
#else
#include <filesystem>
namespace fs = std::filesystem;
using std::error_code;
#endif
void getfiles(
const fs::path& root, const std::string& ext, std::vector<fs::path>& ret)
{
if (!exists(root) || !is_directory(root))
return;
for (fs::recursive_directory_iterator it(root), endit; it != endit; ++it) {
if (is_regular_file(*it) && it->path().extension() == ext)
ret.push_back(it->path()); //
}
}
std::optional<fs::path> find_file(const fs::path& dir_path, fs::path partial)
{
fs::recursive_directory_iterator end,
it = fs::recursive_directory_iterator(dir_path);
it = std::find_if(it, end, [partial](fs::path e) {
auto search = partial.native();
//std::cerr << e.filename().native() << "||" << search << std::endl;
auto matches = icontains(e.filename().native(), search);
std::cerr << e << " Matches: " << std::boolalpha << matches
<< std::endl;
return matches;
});
return (it != end)
? std::make_optional(it->path())
: std::nullopt;
}
auto readInputClass(fs::path const& textFiles)
{
std::vector<fs::path> found;
getfiles(textFiles, ".txt", found);
return found;
}
int main(int argc, char** argv)
{
std::vector<std::string> const args(argv, argv + argc);
auto const textFiles = readInputClass(args.at(1));
std::string const datasetPath = args.at(2);
for (fs::path classname : textFiles) {
// open the text file
std::ifstream lblFile(classname);
// use base without extension as output directory
classname.replace_extension();
if (!fs::exists(classname)) {
if (fs::create_directories(classname))
std::cerr << classname << " created" << std::endl;
}
for (std::string line; getline(lblFile, line);) {
trim(line);
if (auto found = find_file(datasetPath, line)) {
auto dest = classname / found->filename();
error_code ec;
copy(*found, dest, ec);
std::cerr << dest << " (" << ec.message() << ")\n";
} else {
std::cerr << "Not Found \n";
}
}
}
}
Testing from scratch with
mkdir -pv textfiles dataset
touch dataset/{vfv343434,fsdfdsfdfsf,1200E9408000EC0}.jpeg
echo 'E9408000EC0 ' > textfiles/490.txt
Running
./a.out textfiles/ dataset/
Prints
"textfiles/490" created
"dataset/1200E9408000EC0.jpeg" Matches: true
"textfiles/490/1200E9408000EC0.jpeg" (Success)
Or on subsequent run
"dataset/fsdfdsfdfsf.jpeg" Matches: false
"dataset/1200E9408000EC0.jpeg" Matches: true
"textfiles/490/1200E9408000EC0.jpeg" (File exists)
BONUS
Doing some more diagnostics and avoiding repeatedly traversing the filesystem for each pattern. The main program is now:
Live On Coliru
int main(int argc, char** argv)
{
std::vector<std::string> const args(argv, argv + argc);
Paths const classes = getfiles(args.at(1), ".txt");
Mappings map = readClassMappings(classes);
std::cout << "Procesing " << map.size() << " patterns from "
<< classes.size() << " classes" << std::endl;
processDatasetDir(args.at(2), map);
}
And the remaining functions are implemented as:
// be smart about case insenstiive patterns
struct Pattern : std::string {
using std::string::string;
using std::string::operator=;
#ifdef __cpp_lib_three_way_comparison
std::weak_ordering operator<=>(Pattern const& other) const {
if (boost::ilexicographical_compare(*this, other)) {
return std::weak_ordering::less;
} else if (boost::ilexicographical_compare(other, *this)) {
return std::weak_ordering::less;
}
return std::weak_ordering::equivalent;
}
#else
bool operator<(Pattern const& other) const {
return boost::ilexicographical_compare(*this, other);
}
#endif
};
using Paths = std::vector<fs::path>;
using Mapping = std::pair<Pattern, fs::path>;
using Patterns = std::set<Pattern>;
using Mappings = std::set<Mapping>;
Mappings readClassMappings(Paths const& classes)
{
Mappings mappings;
for (fs::path classname : classes) {
std::ifstream lblFile(classname);
classname.replace_extension();
for (Pattern pattern; getline(lblFile, pattern);) {
trim(pattern);
if (auto [it, ok] = mappings.emplace(pattern, classname); !ok) {
std::cerr << "WARNING: " << std::quoted(pattern)
<< " duplicates " << std::quoted(it->first)
<< std::endl;
}
}
}
return mappings;
}
size_t processDatasetDir(const fs::path& datasetPath, Mappings const& patterns)
{
size_t copied = 0, failed = 0;
Patterns found;
using It = fs::recursive_directory_iterator;
for (It it = It(datasetPath), end; it != end; ++it) {
if (!it->is_regular_file())
continue;
fs::path const& entry = *it;
for (auto& [pattern, location]: patterns) {
if (icontains(it->path().filename().native(), pattern)) {
found.emplace(pattern);
if (!exists(location) && fs::create_directories(location))
std::cerr << location << " created" << std::endl;
auto dest = location / entry.filename();
error_code ec;
copy(entry, dest, ec);
std::cerr << dest << " (" << ec.message() << ") from "
<< std::quoted(pattern) << "\n";
(ec? failed : copied) += 1;
}
}
}
std::cout << "Copied:" << copied
<< ", missing:" << patterns.size() - found.size()
<< ", failed: " << failed << std::endl;
return copied;
}
With some more "random" test data:
mkdir -pv textfiles dataset
touch dataset/{vfv343434,fsdfdsfdfsf,1200E9408000EC0}.jpeg
echo .jPeg > textfiles/all_of_them.txt
echo $'E9408000EC0 \n e9408000ec0\nE9408\nbOgUs' > textfiles/490.txt
Running as
./a.out textfiles/ dataset/
Prints:
WARNING: "e9408000ec0" duplicates "E9408000EC0"
Procesing 4 patterns from 2 classes
"textfiles/all_of_them" created
"textfiles/all_of_them/1200E9408000EC0.jpeg" (Success) from ".jPeg"
"textfiles/490" created
"textfiles/490/1200E9408000EC0.jpeg" (Success) from "E9408"
"textfiles/490/1200E9408000EC0.jpeg" (File exists) from "E9408000EC0"
"textfiles/all_of_them/vfv343434.jpeg" (Success) from ".jPeg"
"textfiles/all_of_them/fsdfdsfdfsf.jpeg" (Success) from ".jPeg"
Copied:4, missing:1, failed: 1

X3: Linker Error (unresolved external symbol "parse_rule") on nonterminal parser

First of all I am using MSVC 2017 (latest version).
Here is my code for the nonterminal parser:
player.hpp
namespace parse
{
namespace impl
{
namespace x3 = boost::spirit::x3;
struct _tag;
using player_type = x3::rule<_tag, PlayerIterator>;
using player_vector_type = x3::rule<_tag, std::vector<PlayerIterator>>;
BOOST_SPIRIT_DECLARE(player_type);
BOOST_SPIRIT_DECLARE(player_vector_type);
}; //impl
impl::player_type player();
impl::player_vector_type player_vector();
}; //parse
player.cpp
namespace parse
{
namespace impl
{
const player_type player = "player";
const player_vector_type player_vector = "player_vector";
auto player_find = [](auto &ctx)
{
auto &attr = x3::_attr(ctx);
if(attr.which() == 0)
return x3::_val(ctx) = PlayerManager::find(boost::get<int>(attr));
return x3::_val(ctx) = PlayerManager::find(boost::get<std::string>(attr));
};
auto player_vector_find = [](auto &ctx)
{
return x3::_val(ctx) = PlayerManager::vector_find(x3::_attr(ctx));
};
auto const player_def = (x3::int_ | (+x3::char_))[player_find];
auto const player_vector_def = (((+x3::char_)[player_vector_find]));
BOOST_SPIRIT_DEFINE(player);
BOOST_SPIRIT_DEFINE(player_vector);
BOOST_SPIRIT_INSTANTIATE(player_type, iterator_type, context_type);
BOOST_SPIRIT_INSTANTIATE(player_vector_type, iterator_type, context_type);
} //impl
parse::impl::player_type player() { return impl::player; }
parse::impl::player_vector_type player_vector() { return impl::player_vector; }
}//parse
I get linker LNK2019 errors about "unresolved external symbols referenced":
Pastebin.com link with the errors
Any ideas about them?
Thanks in advance.
EDIT:
That's how I call it in my source file:
void test(std::string &params)
{
std::tuple<PlayerIterator, std::vector<PlayerIterator>, std::string> tuple;
if (!x3::phrase_parse(params.begin(), params.end(), parse::player()>> parse::player_vector() >> (+x3::char_), x3::space,tuple))
{
std::cout << "Error: Parsing failed" << std::endl;
return;
}
std::cout << "Parsing succeded" << std::endl;
std::cout << "Found player, size of player vector: "<< std::get<1>(tuple).size() << ", also parsed string:" << std::get<2>(tuple);
return;
};
I'm willing to bet $10 that you mismatched the context or iterator types on the instantiations.
E.g. in your test function, the argument is std::string&, hence params.begin() will be std::string::iterator. If you had the iterator_type configured as follows:
using iterator_type = std::string::const_iterator; // very sensible!
you would have unresolved externals because the iterator type doesn't match the one actually required.
Same thing for the context. To match your invocation it needs to be exactly:
using context_type = x3::phrase_parse_context<x3::space_type>::type;
Sadly you didn't show the whole code, so you'll have to check on your own.
Notes
re-using the tag type is recipe for disaster. I don't think it can work. The rule tags are what dispatches the implementation function in the case of separated compilation units. Fix it:
using player_type = x3::rule<struct player_tag, PlayerIterator>;
using player_vector_type = x3::rule<struct player_vector_tag, std::vector<PlayerIterator>>;
copying the rules seems wasteful, consider returning by reference:
impl::player_type const& player();
impl::player_vector_type const& player_vector();
Note: this should be fine w.r.t. static initialization order fiasco
using which() on a variant is an anti-pattern. You can replace
auto player_find = [](auto &ctx) {
auto &attr = x3::_attr(ctx);
if (attr.which() == 0)
return x3::_val(ctx) = PlayerManager::find(boost::get<int>(attr));
return x3::_val(ctx) = PlayerManager::find(boost::get<std::string>(attr));
};
With
auto find = [](auto const& key) { return PlayerManager::find(key); };
auto player_find = [](auto &ctx) {
return x3::_val(ctx) = boost::apply_visitor(find, x3::_attr(ctx));
};
(+x3::char_) always matches all input
(+x3::graph) still matches all input because of the skipper
Instead you wanted a lexeme:
auto const name = x3::lexeme[+x3::graph];
auto const player_def = (x3::int_ | name) [player_find];
auto const player_vector_def = name[ player_vector_find];
May I suggest to write the test function a lot more concisely:
void test(std::string const &params) {
auto comment_ = x3::lexeme[+x3::char_];
PlayerIterator player;
PlayerIterators vec;
std::string comment;
auto tuple = std::tie(player, vec, comment);
if (phrase_parse(params.cbegin(), params.cend(), parse::player() >> parse::player_vector() >> comment_, x3::space, tuple)) {
std::cout << "Parsing succeded" << std::endl;
std::cout << "Found player, size of player vector: " << vec.size() << "\n";
std::cout << "Also parsed string: " << std::quoted(comment);
} else {
std::cout << "Error: Parsing failed" << std::endl;
}
}
Full Demo
See it Live On Wandbox
stuff.h
Contains mockup PlayerManager
#pragma once
#include <string>
#include <vector>
#include <iostream>
struct PlayerIterator { };
using PlayerIterators = std::vector<PlayerIterator>;
struct PlayerManager {
static PlayerIterator find(std::string const&) { std::cout << __PRETTY_FUNCTION__ << "\n"; return {}; }
static PlayerIterator find(int) { std::cout << __PRETTY_FUNCTION__ << "\n"; return {}; }
static PlayerIterators vector_find(std::string const&) { std::cout << __PRETTY_FUNCTION__ << "\n"; return {}; }
};
test.h
#pragma once
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted.hpp>
#include "stuff.h"
namespace x3 = boost::spirit::x3;
namespace parse
{
namespace impl
{
using player_type = x3::rule<struct player_tag, PlayerIterator>;
using player_vector_type = x3::rule<struct player_vector_tag, PlayerIterators>;
BOOST_SPIRIT_DECLARE(player_type)
BOOST_SPIRIT_DECLARE(player_vector_type)
} //impl
impl::player_type const& player();
impl::player_vector_type const& player_vector();
} //parse
test.cpp
#include "stuff.h"
#include "test.h"
using iterator_type = std::string::const_iterator;
using context_type = x3::phrase_parse_context<x3::space_type>::type;
namespace parse {
namespace impl {
const player_type player = "player";
const player_vector_type player_vector = "player_vector";
auto find = [](auto const& key) { return PlayerManager::find(key); } ;
auto player_find = [](auto &ctx) { return x3::_val(ctx) = boost::apply_visitor(find, x3::_attr(ctx)); } ;
auto player_vector_find = [](auto &ctx) { return x3::_val(ctx) = PlayerManager::vector_find(x3::_attr(ctx)); } ;
auto const name = x3::lexeme[+x3::graph];
auto const player_def = (x3::int_ | name) [player_find];
auto const player_vector_def = name[ player_vector_find];
BOOST_SPIRIT_DEFINE(player)
BOOST_SPIRIT_DEFINE(player_vector)
BOOST_SPIRIT_INSTANTIATE(player_type, iterator_type, context_type)
BOOST_SPIRIT_INSTANTIATE(player_vector_type, iterator_type, context_type)
} // namespace impl
parse::impl::player_type const& player() { return impl::player; }
parse::impl::player_vector_type const& player_vector() { return impl::player_vector; }
} // namespace parse
main.cpp
#include "stuff.h"
#include "test.h"
#include <iostream>
#include <iomanip>
void test(std::string const &params) {
auto comment_ = x3::lexeme[+x3::char_];
PlayerIterator player;
PlayerIterators vec;
std::string comment;
auto tuple = std::tie(player, vec, comment);
if (phrase_parse(params.cbegin(), params.cend(), parse::player() >> parse::player_vector() >> comment_, x3::space, tuple)) {
std::cout << "Parsing succeded" << std::endl;
std::cout << "Found player, size of player vector: " << vec.size() << "\n";
std::cout << "Also parsed string: " << std::quoted(comment);
} else {
std::cout << "Error: Parsing failed" << std::endl;
}
}
int main() {
test("42 someword # bogus trailing comment");
}
Prints:
static PlayerIterator PlayerManager::find(int)
static PlayerIterators PlayerManager::vector_find(const std::string &)
Parsing succeded
Found player, size of player vector: 0
Also parsed string: "# bogus trailing comment"

Boost Log thread-safe is not working

I'm trying to make Log Macro with Boost and my macro is non thread-safe. Here is my sample code:
Initialization
inline boost::shared_ptr<cilog_async_sink_t> init_async_logger(const std::string& app_name,
const std::string& target = "./log", int64_t rotation_size = 1024 * 1024 * 1024,
bool auto_flush = true) {
namespace expr = boost::log::expressions;
namespace attrs = boost::log::attributes;
namespace keywords = boost::log::keywords;
boost::log::add_common_attributes();
boost::shared_ptr<cilog_backend> backend(
new cilog_backend(boost::filesystem::path(target), app_name, rotation_size, auto_flush));
boost::shared_ptr<cilog_async_sink_t> sink(new cilog_async_sink_t(backend));
sink->set_formatter(
expr::stream << expr::format_date_time<boost::posix_time::ptime>("TimeStamp", "[%Y-%m-%d_%H:%M:%S.%f] ")
<< "[" << expr::attr<severity_level, severity_tag>("Severity") << "] "
<< "[" << expr::attr<attrs::current_process_id::value_type>("ProcessID") << "] "
<< "[" << expr::attr<attrs::current_thread_id::value_type>("ThreadID") << "] "
<< expr::smessage);
boost::log::core::get()->add_sink(sink);
cilog_async_sink_t::locked_backend_ptr p = sink->locked_backend();
return sink;
}
Format
class cilog_backend: public boost::log::sinks::basic_formatted_sink_backend<char,
boost::log::sinks::synchronized_feeding> {
private:
bool auto_flush_;
boost::filesystem::ofstream file_;
boost::filesystem::path target_path_;
boost::filesystem::path file_path_;
std::string file_name_suffix_;
uintmax_t rotation_size_;
uintmax_t characters_written_;
boost::gregorian::date current_date_;
public:
explicit cilog_backend(boost::filesystem::path const& target_path,
std::string const& file_name_suffix, uintmax_t rotation_size,
bool auto_flush) :
auto_flush_(auto_flush), target_path_(target_path), file_name_suffix_(file_name_suffix),
rotation_size_(rotation_size), characters_written_(0),
current_date_(boost::gregorian::day_clock::local_day()) {
}
void consume(boost::log::record_view const& /*rec*/,
string_type const& formatted_message) {
if (current_date_ != boost::gregorian::day_clock::local_day())
rotate_file();
if (!file_.is_open()) {
file_path_ = generate_filepath();
boost::filesystem::create_directories(file_path_.parent_path());
file_.open(file_path_, std::ofstream::out | std::ofstream::app);
if (!file_.is_open()) return; // failed to open file
characters_written_ = static_cast<std::streamoff>(file_.tellp());
}
file_.write(formatted_message.data(), static_cast<std::streamsize>(formatted_message.size()));
file_.put('\n');
characters_written_ += formatted_message.size() + 1;
if (auto_flush_)
file_.flush();
if ((file_.is_open() && (characters_written_ >= rotation_size_)) || (!file_.good()))
rotate_file();
}
};
I found some code for thread-safe on Boost:
cilog_async_sink_t::locked_backend_ptr p = sink->locked_backend();
in my init but not working.
Can anyone advice for me?

Read every word in a string C++

I am trying to read every word a string. I want a string to go in and the first word to come out, then I'll process it, then the second, and so on. But the internet isn't helping me, I know it's probably right under my nose but I can't figure it out!
string lex(string filecontent) {
string t = filecontent;
getline(cin, t);
istringstream iss(t);
string word;
while (iss >> word) {
return word;
}
}
int main() {
string data = load_file(); // Returns a string of words
cout << data;
cout << lex(data);
getchar();
}
Right now this works... sort of it prints out a lot of random gibberish and crazy characters, The file I'm reading's output is ok I check this at cout << data and it is what I expect. Any ideas?
Here is the solution I think you are looking for:
int main() {
string data = load_file(); // Returns a string of words
istringstream iss(data);
while(iss)
{
string tok;
iss >> tok;
cout << "token: " << tok << endl;
//you can do what ever you want with the token here
}
}
Have a look at this, it should help you.
main.cpp
#include "stdafx.h"
#include "Utility.h"
int main() {
using namespace util;
std::string fileName( "sample.txt" );
if ( fileName.empty() ) {
std::cout << "Missing or invalid filename." << std::endl;
return RETURN_ERROR;
}
std::string line;
std::vector<std::string> results;
std::fstream fin;
// Try To Open File For Reading
fin.open( fileName.c_str(), std::ios_base::in );
if ( !fin.is_open() ) {
std::cout << "Can not open file(" << fileName << ") for reading." << std::endl;
return RETURN_ERROR;
}
// Read Line By Line To Get Data Contents Store Into String To Be Parsed
while ( !fin.eof() ) {
std::getline( fin, line );
// Parse Each Line Using Space Character As Delimiter
results = Utility::splitString( line, " " );
// Print The Results On Each Iteration Of This While Loop
// This Is Where You Would Parse The Data Or Store Results Into
// Class Objects, Variables Or Structures.
for ( unsigned u = 0; u < results.size(); u++ ) {
std::cout << results[u] << " ";
}
std::cout << std::endl;
}
// Close File Pointer
fin.close();
// Now Print The Full Vector Of Results - This Is To Show You That Each
// New Line Will Be Overwritten And That Only The Last Line Of The File Will
// Be Stored After The While Loop.
std::cout << "\n-------------------------------------\n";
for ( unsigned u = 0; u < results.size(); u++ ) {
std::cout << results[u] << " ";
}
Utility::pressAnyKeyToQuit();
return RETURN_OK;
} // main
sample.txt
Please help me parse this text file
It spans multiple lines of text
I would like to get each individual word
stdafx.h - Some of these include files may not be needed they are here for I have a larger solution that requires them.
#ifndef STDAFX_H
#define STDAFX_H
#include <Windows.h>
#include <stdio.h>
#include <tchar.h>
#include <conio.h>
#include <string>
#include <sstream>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <vector>
#include <array>
#include <memory>
#include <queue>
#include <functional>
#include <algorithm>
// User Application Specific
// #include "ExceptionHandler.h" - One Of My Class Objects Not Used Here
namespace util {
enum ReturnCode {
RETURN_OK = 0,
RETURN_ERROR = 1,
}; // ReturnCode
extern const unsigned INVALID_UNSIGNED;
extern const unsigned INVALID_UNSIGNED_SHORT;
} // namespace util
#endif // STDAFX_H
stdafx.cpp
#include "stdafx.h"
namespace util {
const unsigned INVALID_UNSIGNED = static_cast<const unsigned>( -1 );
const unsigned INVALID_UNSIGNED_SHORT = static_cast<const unsigned short>( -1 );
} // namespace util
Utility.h
#ifndef UTILITY_H
#define UTILITY_H
namespace util {
class Utility {
public:
static void pressAnyKeyToQuit();
static std::string toUpper(const std::string& str);
static std::string toLower(const std::string& str);
static std::string trim(const std::string& str, const std::string elementsToTrim = " \t\n\r");
static unsigned convertToUnsigned(const std::string& str);
static int convertToInt(const std::string& str);
static float convertToFloat(const std::string& str);
static std::vector<std::string> splitString(const std::string& strStringToSplit, const std::string& strDelimiter, const bool keepEmpty = true);
private:
Utility(); // Private - Not A Class Object
Utility(const Utility& c); // Not Implemented
Utility& operator=(const Utility& c); // Not Implemented
template<typename T>
static bool stringToValue(const std::string& str, T* pValue, unsigned uNumValues);
template<typename T>
static T getValue(const std::string& str, std::size_t& remainder);
}; // Utility
#include "Utility.inl"
} // namespace util
#endif // UTILITY_H
Utility.inl
// ----------------------------------------------------------------------------
// stringToValue()
template<typename T>
static bool Utility::stringToValue(const std::string& str, T* pValue, unsigned uNumValues) {
int numCommas = std::count(str.begin(), str.end(), ',');
if (numCommas != uNumValues - 1) {
return false;
}
std::size_t remainder;
pValue[0] = getValue<T>(str, remainder);
if (uNumValues == 1) {
if (str.size() != remainder) {
return false;
}
}
else {
std::size_t offset = remainder;
if (str.at(offset) != ',') {
return false;
}
unsigned uLastIdx = uNumValues - 1;
for (unsigned u = 1; u < uNumValues; ++u) {
pValue[u] = getValue<T>(str.substr(++offset), remainder);
offset += remainder;
if ((u < uLastIdx && str.at(offset) != ',') ||
(u == uLastIdx && offset != str.size()))
{
return false;
}
}
}
return true;
} // stringToValue
Utility.cpp
#include "stdafx.h"
#include "Utility.h"
namespace util {
// ----------------------------------------------------------------------------
// pressAnyKeyToQuit()
void Utility::pressAnyKeyToQuit() {
std::cout << "\nPress any key to quit" << std::endl;
_getch();
} // pressAnyKeyToQuit
// ----------------------------------------------------------------------------
// toUpper()
std::string Utility::toUpper( const std::string& str ) {
std::string result = str;
std::transform( str.begin(), str.end(), result.begin(), ::toupper );
return result;
} // toUpper
// ----------------------------------------------------------------------------
// toLower()
std::string Utility::toLower( const std::string& str ) {
std::string result = str;
std::transform( str.begin(), str.end(), result.begin(), ::tolower );
return result;
} // toLower
// ----------------------------------------------------------------------------
// trim()
// Removes Elements To Trim From Left And Right Side Of The str
std::string Utility::trim( const std::string& str, const std::string elementsToTrim ) {
std::basic_string<char>::size_type firstIndex = str.find_first_not_of( elementsToTrim );
if ( firstIndex == std::string::npos ) {
return std::string(); // Nothing Left
}
std::basic_string<char>::size_type lastIndex = str.find_last_not_of( elementsToTrim );
return str.substr( firstIndex, lastIndex - firstIndex + 1 );
} // trim
// ----------------------------------------------------------------------------
// getValue()
template<>
float Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stof( str, &remainder );
} // getValue <float>
// ----------------------------------------------------------------------------
// getValue()
template<>
int Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stoi( str, &remainder );
} // getValue <int>
// ----------------------------------------------------------------------------
// getValue()
template<>
unsigned Utility::getValue( const std::string& str, std::size_t& remainder ) {
return std::stoul( str, &remainder );
} // getValue <unsigned>
// ----------------------------------------------------------------------------
// convertToUnsigned()
unsigned Utility::convertToUnsigned( const std::string& str ) {
unsigned u = 0;
if ( !stringToValue( str, &u, 1 ) ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to unsigned";
throw strStream.str();
}
return u;
} // convertToUnsigned
// ----------------------------------------------------------------------------
// convertToInt()
int Utility::convertToInt( const std::string& str ) {
int i = 0;
if ( !stringToValue( str, &i, 1 ) ) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to int";
throw strStream.str();
}
return i;
} // convertToInt
// ----------------------------------------------------------------------------
// convertToFloat()
float Utility::convertToFloat(const std::string& str) {
float f = 0;
if (!stringToValue(str, &f, 1)) {
std::ostringstream strStream;
strStream << __FUNCTION__ << " Bad conversion of [" << str << "] to float";
throw strStream.str();
}
return f;
} // convertToFloat
// ----------------------------------------------------------------------------
// splitString()
std::vector<std::string> Utility::splitString( const std::string& strStringToSplit, const std::string& strDelimiter, const bool keepEmpty ) {
std::vector<std::string> vResult;
if ( strDelimiter.empty() ) {
vResult.push_back( strStringToSplit );
return vResult;
}
std::string::const_iterator itSubStrStart = strStringToSplit.begin(), itSubStrEnd;
while ( true ) {
itSubStrEnd = search( itSubStrStart, strStringToSplit.end(), strDelimiter.begin(), strDelimiter.end() );
std::string strTemp( itSubStrStart, itSubStrEnd );
if ( keepEmpty || !strTemp.empty() ) {
vResult.push_back( strTemp );
}
if ( itSubStrEnd == strStringToSplit.end() ) {
break;
}
itSubStrStart = itSubStrEnd + strDelimiter.size();
}
return vResult;
} // splitString
} // namspace util
In my small utility library I have a function that will split a string that can use any delimiter that the user defines. It will search for the first occurrence of that character delimiter and it will save everything before it into a string and it will push that string into a vector of strings, and it will continue this for every occurrence of that character until it is finished parsing the full string that is passed to it. It will then return a vector of strings back to the user. This is very helpful when engaged in parsing text files or even just data types with long strings that need to be broken down. Now if there is a case where you are parsing a text file and lets say you need to have more than one word as a single string, this can be done but requires more work on your part. For example a text file might have personal record on a single line.
LastName, FirstName MiddleInitial Age Phone# Address
Cook, John S 33 1-888-323-4545 324 Complex Avenue
And you would want the 324 Complex Avenue to be in a single string also you don't want the comma stored after the last name. Your structure in code to store this info might look like this:
struct PersonalRecord {
std::string firstName;
std::string lastName;
char middleInitial;
unsigned age;
std::string phoneNumber;
std:string address;
};
What you would have to do is after you read this line in from your file on that same iteration of the while loop is you would have to do multiple parsing.
You would first start by using a temporary string and vector of strings and use the utility function splitString with the delimeter being the comma. So this would save 2 strings in the temp vector of strings the first being: Cook and the second being the rest of the line after the comma including the leading space. The reason you have the temp string and temp vector of strings is that you will need to pop values at when needed. So in this case we would have to do the following, but first how do we resolve the case with multiple words to one string? We can change the line of text in the text file to be enclosed with double quotes as such:
textfile
Cook, John S 33 1-888-323-4545 "324 Complex Avenue"
Evens, Sue A 24 1-888-323-6996 "128 Mission Rd"
Adams, Chris B 49 1-777-293-8234 "2304 Helms Drive"
Then parse it with this logic flow or algorithm.
main.cpp
#including "stdafx.h"
#including "Utility.h"
int main() {
using namespace util;
std::string strFilename( "personalRecord.txt" );
std::ifstream file;
std::string strLine;
std::vector<std::string> vTemp;
std::vector<std::string> vResult;
std::vector<PersonalRecord> vData;
// Open File For Reading
file.open( strFilename.c_str() );
// Check For Error Of Opening File
if ( !file.is_open() ) {
std::cout << "Error opening file (" << strFilename << ")" << std::endl;
return RETURN_ERROR;
}
// Continue Until End Of File
while( !file.eof() ) {
// Get Single Full Line Save To String
std::getline( file, strLine );
// Check For Comma
vTemp = Utility::splitString( strLine, ",");
// Save First String For Laster
std::string lastName = vTemp[0];
// Split String Using A Double Quote Delimiter Delimiter
vTemp = Utility::splitString( vTemp[1], "\"" );
// Check To See If vTemp Has More Than One String
if ( vTemp.size() > 1 ) {
// We Need To Use Pop Back To Account For Last Double Quote
vTemp.pop_back(); // Remove Last Double Quote
std::string temp = vTemp.back();
vTemp.pop_back(); // Remove Wanted String From vTemp.
// At This Point We Need To Parse vTemp Again Using Space Delimiter
vResult = Utility::splitString( vTemp[0], " " );
// Need To Account For Leading Space In Vector
vResult[0].erase();
// Need To Account For Last Space In Vector
vResult.pop_back();
// Now We Can Push Our Last String Back Into vResult
vResult.push_back( temp );
// Replace The First String " " With Our LastName
vResult[0] = lastName;
} else if ( vTemp.size() == 1 ) {
// Just Parse vTemp Using Space Delimiter
vResult = Utility::splitString( vTemp[0], " " );
}
// Print Out Results For Validity
for ( unsigned u = 0; u < vResult.size(); u++) {
std::cout << vResult.at(u) << " ";
}
std::cout << std::endl;
// Here Is Where You Would Populate Your Variables, Structures Or Classes On Each Pass Of The While Loop.
// With This Structure There Should Only Be 8 Entries Into Our vResult
PersonalRecord temp;
temp.lastName = vResult[0];
temp.firstName = vResult[1];
temp.middleInitial = vResult[2][0];
temp.age = Utility::convertToUnsigned( vResult[3] );
temp.phoneNumber = vResult[4];
temp.address = vResult[5];
vData.push_back( temp );
} // while
// Close File
file.close();
std::cout << std::endl << std::endl;
// Print Using Structure For Validity
std::cout << "---------------------------------------\n";
for ( unsigned u = 0; u < vData.size(); u++ ) {
std::cout << vData[u].lastName << " "
<< vData[u].firstName << " "
<< vData[u].middleInitial << " "
<< vData[u].age << " "
<< vData[u].phoneNumber << " "
<< vData[u].address << std::endl;
}
Utility::pressAnyKeyToQuit();
return RETURN_OK;
} // main
So both consideration and are has to be taken when parsing text or strings. You have to account for every single character including your carriage returns, spaces etc. So the format that the text file is written in has to be considered.
Yes the splitString() will also parse tabs, you would just have to use "\t" for tabs, etc. Just remember that it will make a split at every occurrence. So if you have a sentence that has a colon ":" in it, but then you decide to use the colon as your delimiter between values, it will split that sentence as well. Now you could have different rules for each line of text from the file and if you know what line you are on you can parse each line accordingly. This is why most people prefer to write their code to read and parse binary, because it is much easier to program, then writing a text parser.
I chose to use the PersonalRecord structure to show you how you can extract strings from a line of text and to convert them to basic types such as int, float or double by using some of my other functions in my Utility class. All methods in this class are declared as static and the constructor is private, so the class name acts as a wrapper or a namespace so to speak. You can not create an instance of a Utility util; // invalid object. Just include the header file and use the class name with the scope resolution operator :: to access any of the functions and make sure you are using the namespace util.

Easy way to parse a url in C++ cross platform?

I need to parse a URL to get the protocol, host, path, and query in an application I am writing in C++. The application is intended to be cross-platform. I'm surprised I can't find anything that does this in the boost or POCO libraries. Is it somewhere obvious I'm not looking? Any suggestions on appropriate open source libs? Or is this something I just have to do my self? It's not super complicated but it seems like such a common task I am surprised there isn't a common solution.
There is a library that's proposed for Boost inclusion and allows you to parse HTTP URI's easily. It uses Boost.Spirit and is also released under the Boost Software License. The library is cpp-netlib which you can find the documentation for at http://cpp-netlib.github.com/ -- you can download the latest release from http://github.com/cpp-netlib/cpp-netlib/downloads .
The relevant type you'll want to use is boost::network::http::uri and is documented here.
Wstring version of above, added other fields I needed. Could definitely be refined, but good enough for my purposes.
#include <string>
#include <algorithm> // find
struct Uri
{
public:
std::wstring QueryString, Path, Protocol, Host, Port;
static Uri Parse(const std::wstring &uri)
{
Uri result;
typedef std::wstring::const_iterator iterator_t;
if (uri.length() == 0)
return result;
iterator_t uriEnd = uri.end();
// get query start
iterator_t queryStart = std::find(uri.begin(), uriEnd, L'?');
// protocol
iterator_t protocolStart = uri.begin();
iterator_t protocolEnd = std::find(protocolStart, uriEnd, L':'); //"://");
if (protocolEnd != uriEnd)
{
std::wstring prot = &*(protocolEnd);
if ((prot.length() > 3) && (prot.substr(0, 3) == L"://"))
{
result.Protocol = std::wstring(protocolStart, protocolEnd);
protocolEnd += 3; // ://
}
else
protocolEnd = uri.begin(); // no protocol
}
else
protocolEnd = uri.begin(); // no protocol
// host
iterator_t hostStart = protocolEnd;
iterator_t pathStart = std::find(hostStart, uriEnd, L'/'); // get pathStart
iterator_t hostEnd = std::find(protocolEnd,
(pathStart != uriEnd) ? pathStart : queryStart,
L':'); // check for port
result.Host = std::wstring(hostStart, hostEnd);
// port
if ((hostEnd != uriEnd) && ((&*(hostEnd))[0] == L':')) // we have a port
{
hostEnd++;
iterator_t portEnd = (pathStart != uriEnd) ? pathStart : queryStart;
result.Port = std::wstring(hostEnd, portEnd);
}
// path
if (pathStart != uriEnd)
result.Path = std::wstring(pathStart, queryStart);
// query
if (queryStart != uriEnd)
result.QueryString = std::wstring(queryStart, uri.end());
return result;
} // Parse
}; // uri
Tests/Usage
Uri u0 = Uri::Parse(L"http://localhost:80/foo.html?&q=1:2:3");
Uri u1 = Uri::Parse(L"https://localhost:80/foo.html?&q=1");
Uri u2 = Uri::Parse(L"localhost/foo");
Uri u3 = Uri::Parse(L"https://localhost/foo");
Uri u4 = Uri::Parse(L"localhost:8080");
Uri u5 = Uri::Parse(L"localhost?&foo=1");
Uri u6 = Uri::Parse(L"localhost?&foo=1:2:3");
u0.QueryString, u0.Path, u0.Protocol, u0.Host, u0.Port....
Terribly sorry, couldn't help it. :s
url.hh
#ifndef URL_HH_
#define URL_HH_
#include <string>
struct url {
url(const std::string& url_s); // omitted copy, ==, accessors, ...
private:
void parse(const std::string& url_s);
private:
std::string protocol_, host_, path_, query_;
};
#endif /* URL_HH_ */
url.cc
#include "url.hh"
#include <string>
#include <algorithm>
#include <cctype>
#include <functional>
using namespace std;
// ctors, copy, equality, ...
void url::parse(const string& url_s)
{
const string prot_end("://");
string::const_iterator prot_i = search(url_s.begin(), url_s.end(),
prot_end.begin(), prot_end.end());
protocol_.reserve(distance(url_s.begin(), prot_i));
transform(url_s.begin(), prot_i,
back_inserter(protocol_),
ptr_fun<int,int>(tolower)); // protocol is icase
if( prot_i == url_s.end() )
return;
advance(prot_i, prot_end.length());
string::const_iterator path_i = find(prot_i, url_s.end(), '/');
host_.reserve(distance(prot_i, path_i));
transform(prot_i, path_i,
back_inserter(host_),
ptr_fun<int,int>(tolower)); // host is icase
string::const_iterator query_i = find(path_i, url_s.end(), '?');
path_.assign(path_i, query_i);
if( query_i != url_s.end() )
++query_i;
query_.assign(query_i, url_s.end());
}
main.cc
// ...
url u("HTTP://stackoverflow.com/questions/2616011/parse-a.py?url=1");
cout << u.protocol() << '\t' << u.host() << ...
POCO's URI class can parse URLs for you. The following example is shortened version of the one in POCO URI and UUID slides:
#include "Poco/URI.h"
#include <iostream>
int main(int argc, char** argv)
{
Poco::URI uri1("http://www.appinf.com:88/sample?example-query#frag");
std::string scheme(uri1.getScheme()); // "http"
std::string auth(uri1.getAuthority()); // "www.appinf.com:88"
std::string host(uri1.getHost()); // "www.appinf.com"
unsigned short port = uri1.getPort(); // 88
std::string path(uri1.getPath()); // "/sample"
std::string query(uri1.getQuery()); // "example-query"
std::string frag(uri1.getFragment()); // "frag"
std::string pathEtc(uri1.getPathEtc()); // "/sample?example-query#frag"
return 0;
}
For completeness, there is one written in C that you could use (with a little wrapping, no doubt): https://uriparser.github.io/
[RFC-compliant and supports Unicode]
Here's a very basic wrapper I've been using for simply grabbing the results of a parse.
#include <string>
#include <uriparser/Uri.h>
namespace uriparser
{
class Uri //: boost::noncopyable
{
public:
Uri(std::string uri)
: uri_(uri)
{
UriParserStateA state_;
state_.uri = &uriParse_;
isValid_ = uriParseUriA(&state_, uri_.c_str()) == URI_SUCCESS;
}
~Uri() { uriFreeUriMembersA(&uriParse_); }
bool isValid() const { return isValid_; }
std::string scheme() const { return fromRange(uriParse_.scheme); }
std::string host() const { return fromRange(uriParse_.hostText); }
std::string port() const { return fromRange(uriParse_.portText); }
std::string path() const { return fromList(uriParse_.pathHead, "/"); }
std::string query() const { return fromRange(uriParse_.query); }
std::string fragment() const { return fromRange(uriParse_.fragment); }
private:
std::string uri_;
UriUriA uriParse_;
bool isValid_;
std::string fromRange(const UriTextRangeA & rng) const
{
return std::string(rng.first, rng.afterLast);
}
std::string fromList(UriPathSegmentA * xs, const std::string & delim) const
{
UriPathSegmentStructA * head(xs);
std::string accum;
while (head)
{
accum += delim + fromRange(head->text);
head = head->next;
}
return accum;
}
};
}
//sudo apt-get install libboost-all-dev; #install boost
//g++ urlregex.cpp -lboost_regex; #compile
#include <string>
#include <iostream>
#include <boost/regex.hpp>
using namespace std;
int main(int argc, char* argv[])
{
string url="https://www.google.com:443/webhp?gws_rd=ssl#q=cpp";
boost::regex ex("(http|https)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)");
boost::cmatch what;
if(regex_match(url.c_str(), what, ex))
{
cout << "protocol: " << string(what[1].first, what[1].second) << endl;
cout << "domain: " << string(what[2].first, what[2].second) << endl;
cout << "port: " << string(what[3].first, what[3].second) << endl;
cout << "path: " << string(what[4].first, what[4].second) << endl;
cout << "query: " << string(what[5].first, what[5].second) << endl;
cout << "fragment: " << string(what[6].first, what[6].second) << endl;
}
return 0;
}
The Poco library now has a class for dissecting URI's and feeding back the host, path segments and query string etc.
https://pocoproject.org/pro/docs/Poco.URI.html
QT has QUrl for this. GNOME has SoupURI in libsoup, which you'll probably find a little more light-weight.
Facebook's Folly library can do the job for you easily. Simply use the Uri class:
#include <folly/Uri.h>
int main() {
folly::Uri folly("https://code.facebook.com/posts/177011135812493/");
folly.scheme(); // https
folly.host(); // code.facebook.com
folly.path(); // posts/177011135812493/
}
I know this is a very old question, but I've found the following useful:
http://www.zedwood.com/article/cpp-boost-url-regex
It gives 3 examples:
(With Boost)
//sudo apt-get install libboost-all-dev;
//g++ urlregex.cpp -lboost_regex
#include <string>
#include <iostream>
#include <boost/regex.hpp>
using std::string;
using std::cout;
using std::endl;
using std::stringstream;
void parse_url(const string& url) //with boost
{
boost::regex ex("(http|https)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)");
boost::cmatch what;
if(regex_match(url.c_str(), what, ex))
{
string protocol = string(what[1].first, what[1].second);
string domain = string(what[2].first, what[2].second);
string port = string(what[3].first, what[3].second);
string path = string(what[4].first, what[4].second);
string query = string(what[5].first, what[5].second);
cout << "[" << url << "]" << endl;
cout << protocol << endl;
cout << domain << endl;
cout << port << endl;
cout << path << endl;
cout << query << endl;
cout << "-------------------------------" << endl;
}
}
int main(int argc, char* argv[])
{
parse_url("http://www.google.com");
parse_url("https://mail.google.com/mail/");
parse_url("https://www.google.com:443/webhp?gws_rd=ssl");
return 0;
}
(Without Boost)
#include <string>
#include <iostream>
using std::string;
using std::cout;
using std::endl;
using std::stringstream;
string _trim(const string& str)
{
size_t start = str.find_first_not_of(" \n\r\t");
size_t until = str.find_last_not_of(" \n\r\t");
string::const_iterator i = start==string::npos ? str.begin() : str.begin() + start;
string::const_iterator x = until==string::npos ? str.end() : str.begin() + until+1;
return string(i,x);
}
void parse_url(const string& raw_url) //no boost
{
string path,domain,x,protocol,port,query;
int offset = 0;
size_t pos1,pos2,pos3,pos4;
x = _trim(raw_url);
offset = offset==0 && x.compare(0, 8, "https://")==0 ? 8 : offset;
offset = offset==0 && x.compare(0, 7, "http://" )==0 ? 7 : offset;
pos1 = x.find_first_of('/', offset+1 );
path = pos1==string::npos ? "" : x.substr(pos1);
domain = string( x.begin()+offset, pos1 != string::npos ? x.begin()+pos1 : x.end() );
path = (pos2 = path.find("#"))!=string::npos ? path.substr(0,pos2) : path;
port = (pos3 = domain.find(":"))!=string::npos ? domain.substr(pos3+1) : "";
domain = domain.substr(0, pos3!=string::npos ? pos3 : domain.length());
protocol = offset > 0 ? x.substr(0,offset-3) : "";
query = (pos4 = path.find("?"))!=string::npos ? path.substr(pos4+1) : "";
path = pos4!=string::npos ? path.substr(0,pos4) : path;
cout << "[" << raw_url << "]" << endl;
cout << "protocol: " << protocol << endl;
cout << "domain: " << domain << endl;
cout << "port: " << port << endl;
cout << "path: " << path << endl;
cout << "query: " << query << endl;
}
int main(int argc, char* argv[])
{
parse_url("http://www.google.com");
parse_url("https://mail.google.com/mail/");
parse_url("https://www.google.com:443/webhp?gws_rd=ssl");
return 0;
}
(Different way without Boost)
#include <string>
#include <stdint.h>
#include <cstring>
#include <sstream>
#include <algorithm>
#include <iostream>
using std::cerr; using std::cout; using std::endl;
using std::string;
class HTTPURL
{
private:
string _protocol;// http vs https
string _domain; // mail.google.com
uint16_t _port; // 80,443
string _path; // /mail/
string _query; // [after ?] a=b&c=b
public:
const string &protocol;
const string &domain;
const uint16_t &port;
const string &path;
const string &query;
HTTPURL(const string& url): protocol(_protocol),domain(_domain),port(_port),path(_path),query(_query)
{
string u = _trim(url);
size_t offset=0, slash_pos, hash_pos, colon_pos, qmark_pos;
string urlpath,urldomain,urlport;
uint16_t default_port;
static const char* allowed[] = { "https://", "http://", "ftp://", NULL};
for(int i=0; allowed[i]!=NULL && this->_protocol.length()==0; i++)
{
const char* c=allowed[i];
if (u.compare(0,strlen(c), c)==0) {
offset = strlen(c);
this->_protocol=string(c,0,offset-3);
}
}
default_port = this->_protocol=="https" ? 443 : 80;
slash_pos = u.find_first_of('/', offset+1 );
urlpath = slash_pos==string::npos ? "/" : u.substr(slash_pos);
urldomain = string( u.begin()+offset, slash_pos != string::npos ? u.begin()+slash_pos : u.end() );
urlpath = (hash_pos = urlpath.find("#"))!=string::npos ? urlpath.substr(0,hash_pos) : urlpath;
urlport = (colon_pos = urldomain.find(":"))!=string::npos ? urldomain.substr(colon_pos+1) : "";
urldomain = urldomain.substr(0, colon_pos!=string::npos ? colon_pos : urldomain.length());
this->_domain = _tolower(urldomain);
this->_query = (qmark_pos = urlpath.find("?"))!=string::npos ? urlpath.substr(qmark_pos+1) : "";
this->_path = qmark_pos!=string::npos ? urlpath.substr(0,qmark_pos) : urlpath;
this->_port = urlport.length()==0 ? default_port : _atoi(urlport) ;
};
private:
static inline string _trim(const string& input)
{
string str = input;
size_t endpos = str.find_last_not_of(" \t\n\r");
if( string::npos != endpos )
{
str = str.substr( 0, endpos+1 );
}
size_t startpos = str.find_first_not_of(" \t\n\r");
if( string::npos != startpos )
{
str = str.substr( startpos );
}
return str;
};
static inline string _tolower(const string& input)
{
string str = input;
std::transform(str.begin(), str.end(), str.begin(), ::tolower);
return str;
};
static inline int _atoi(const string& input)
{
int r;
std::stringstream(input) >> r;
return r;
};
};
int main(int argc, char **argv)
{
HTTPURL u("https://Mail.google.com:80/mail/?action=send#action=send");
cout << "protocol: " << u.protocol << endl;
cout << "domain: " << u.domain << endl;
cout << "port: " << u.port << endl;
cout << "path: " << u.path << endl;
cout << "query: " << u.query << endl;
return 0;
}
This library is very tiny and lightweight: https://github.com/corporateshark/LUrlParser
However, it is parsing only, no URL normalization/validation.
Also of interest could be http://code.google.com/p/uri-grammar/ which like Dean Michael's netlib uses boost spirit to parse a URI. Came across it at Simple expression parser example using Boost::Spirit?
There is the newly released google-url lib:
http://code.google.com/p/google-url/
The library provides a low-level url parsing API as well as a higher-level abstraction called GURL. Here's an example using that:
#include <googleurl\src\gurl.h>
wchar_t url[] = L"http://www.facebook.com";
GURL parsedUrl (url);
assert(parsedUrl.DomainIs("facebook.com"));
Two small complaints I have with it: (1) it wants to use ICU by default to deal with different string encodings and (2) it makes some assumptions about logging (but I think they can be disabled). In other words, the library is not completely stand-alone as it exists, but I think it's still a good basis to start with, especially if you are already using ICU.
May I offer another self-contained solution based on std::regex :
const char* SCHEME_REGEX = "((http[s]?)://)?"; // match http or https before the ://
const char* USER_REGEX = "(([^#/:\\s]+)#)?"; // match anything other than # / : or whitespace before the ending #
const char* HOST_REGEX = "([^#/:\\s]+)"; // mandatory. match anything other than # / : or whitespace
const char* PORT_REGEX = "(:([0-9]{1,5}))?"; // after the : match 1 to 5 digits
const char* PATH_REGEX = "(/[^:#?\\s]*)?"; // after the / match anything other than : # ? or whitespace
const char* QUERY_REGEX = "(\\?(([^?;&#=]+=[^?;&#=]+)([;|&]([^?;&#=]+=[^?;&#=]+))*))?"; // after the ? match any number of x=y pairs, seperated by & or ;
const char* FRAGMENT_REGEX = "(#([^#\\s]*))?"; // after the # match anything other than # or whitespace
bool parseUri(const std::string &i_uri)
{
static const std::regex regExpr(std::string("^")
+ SCHEME_REGEX + USER_REGEX
+ HOST_REGEX + PORT_REGEX
+ PATH_REGEX + QUERY_REGEX
+ FRAGMENT_REGEX + "$");
std::smatch matchResults;
if (std::regex_match(i_uri.cbegin(), i_uri.cend(), matchResults, regExpr))
{
m_scheme.assign(matchResults[2].first, matchResults[2].second);
m_user.assign(matchResults[4].first, matchResults[4].second);
m_host.assign(matchResults[5].first, matchResults[5].second);
m_port.assign(matchResults[7].first, matchResults[7].second);
m_path.assign(matchResults[8].first, matchResults[8].second);
m_query.assign(matchResults[10].first, matchResults[10].second);
m_fragment.assign(matchResults[15].first, matchResults[15].second);
return true;
}
return false;
}
I added explanations for each part of the regular expression. This way allows you to choose exactly the relevant parts to parse for the URL that you're expecting to get. Just remember to change the desired regular expression group indices accordingly.
A small dependency you can use is uriparser, which recently moved to GitHub.
You can find a minimal example in their code: https://github.com/uriparser/uriparser/blob/63384be4fb8197264c55ff53a135110ecd5bd8c4/tool/uriparse.c
This will be more lightweight than Boost or Poco. The only catch is that it is C.
There is also a Buckaroo package:
buckaroo add github.com/buckaroo-pm/uriparser
I tried a couple of the solutions here, but then decided to write my own that could just be dropped into a project without any external dependencies (except c++17).
Right now, it passes all tests. But, if you find any cases that don't succeed, please feel free to create a Pull Request or an Issue.
I'll keep it up to date and improve its quality. Suggestions welcome! I'm also trying out this design to only have a single, high-quality class per repository so that the header and source can just be dropped into a project (as opposed to building a library or header-only). It appears to be working out well (I'm using git submodules and symlinks in my own projects).
https://github.com/homer6/url
You could try the open-source library called C++ REST SDK (created by Microsoft, distributed under the Apache License 2.0). It can be built for several platforms including Windows, Linux, OSX, iOS, Android). There is a class called web::uri where you put in a string and can retrieve individual URL components. Here is a code sample (tested on Windows):
#include <cpprest/base_uri.h>
#include <iostream>
#include <ostream>
web::uri sample_uri( L"http://dummyuser#localhost:7777/dummypath?dummyquery#dummyfragment" );
std::wcout << L"scheme: " << sample_uri.scheme() << std::endl;
std::wcout << L"user: " << sample_uri.user_info() << std::endl;
std::wcout << L"host: " << sample_uri.host() << std::endl;
std::wcout << L"port: " << sample_uri.port() << std::endl;
std::wcout << L"path: " << sample_uri.path() << std::endl;
std::wcout << L"query: " << sample_uri.query() << std::endl;
std::wcout << L"fragment: " << sample_uri.fragment() << std::endl;
The output will be:
scheme: http
user: dummyuser
host: localhost
port: 7777
path: /dummypath
query: dummyquery
fragment: dummyfragment
There are also other easy-to-use methods, e.g. to access individual attribute/value pairs from the query, split the path into components, etc.
If you use oatpp for web request handling, you can find its built-in URL parsing useful:
std::string url = /* ... */;
oatpp::String oatUrl(url.c_str(), url.size(), false);
oatpp::String oatHost = oatpp::network::Url::Parser::parseUrl(oatUrl).authority.host->toLowerCase();
std::string host(oatHost->c_str(), oatHost->getSize());
The above snippet retrieves the hostname. In a similar way:
oatpp::network::Url parsedUrl = oatpp::network::Url::Parser::parseUrl(oatUrl);
// parsedUrl.authority.port
// parsedUrl.path
// parsedUrl.scheme
// parsedUrl.queryParams
There is yet another library https://snapwebsites.org/project/libtld which handles all possible top level domains and URI shema
I have developed an "object oriented" solution, one C++ class, that works with one regex like #Mr.Jones and #velcrow solutions. My Url class performs url/uri 'parsing'.
I think I improved velcrow regex to be more robust and includes also the username part.
Follows the first version of my idea, I have released the same code, improved, in my GPL3 licensed open source project Cpp URL Parser.
Omitted #ifdef/ndef bloat part, follows Url.h
#include <string>
#include <iostream>
#include <boost/regex.hpp>
using namespace std;
class Url {
public:
boost::regex ex;
string rawUrl;
string username;
string protocol;
string domain;
string port;
string path;
string query;
string fragment;
Url();
Url(string &rawUrl);
Url &update(string &rawUrl);
};
This is the code of the Url.cpp implementation file:
#include "Url.h"
Url::Url() {
this -> ex = boost::regex("(ssh|sftp|ftp|smb|http|https):\\/\\/(?:([^# ]*)#)?([^:?# ]+)(?::(\\d+))?([^?# ]*)(?:\\?([^# ]*))?(?:#([^ ]*))?");
}
Url::Url(string &rawUrl) : Url() {
this->rawUrl = rawUrl;
this->update(this->rawUrl);
}
Url &Url::update(string &rawUrl) {
this->rawUrl = rawUrl;
boost::cmatch what;
if (regex_match(rawUrl.c_str(), what, ex)) {
this -> protocol = string(what[1].first, what[1].second);
this -> username = string(what[2].first, what[2].second);
this -> domain = string(what[3].first, what[3].second);
this -> port = string(what[4].first, what[4].second);
this -> path = string(what[5].first, what[5].second);
this -> query = string(what[6].first, what[6].second);
this -> fragment = string(what[7].first, what[7].second);
}
return *this;
}
Usage example:
string urlString = "http://gino#ciao.it:67/ciao?roba=ciao#34";
Url *url = new Url(urlString);
std::cout << " username: " << url->username << " URL domain: " << url->domain;
std::cout << " port: " << url->port << " protocol: " << url->protocol;
You can also update the Url object to represent (and parse) another URL:
url.update("http://gino#nuovociao.it:68/nuovociao?roba=ciaoooo#")
I'm not a full-time C++ developer, so, I'm not sure I followed 100% C++ best-practises.
Any tip is appreciated.
P.s: let's look at Cpp URL Parser, there are refinements there.
Have fun
simple solution to get the protocol, host, path
int url_get(const std::string& uri)
{
//parse URI
std::size_t start = uri.find("://", 0);
if (start == std::string::npos)
{
return -1;
}
start += 3; //"://"
std::size_t end = uri.find("/", start + 1);
std::string protocol = uri.substr(0, start - 3);
std::string host = uri.substr(start, end - start);
std::string path = uri.substr(end);
return 0;
}