I need to parse a date-time string like 2012-12-21 12:10:35 into a time_t value using boost::spirit. here is my code snippet:
tc_ = lexeme[int_[phx::ref(tm_.tm_year)=(_1-1900)]>>'-'
>>int_[phx::ref(tm_.tm_mon)=(_1-1)]>>'-'
>>int_[phx::ref(tm_.tm_mday)=_1]>>+space
>>int_[phx::ref(tm_.tm_hour)=_1]>>':'
>>int_[phx::ref(tm_.tm_min)=_1]>>':'
>>int_[phx::ref(tm_.tm_sec)=_1]] [_val = (long)mktime(&tm_)];
where tc_ is a qi rule of type: qi::rule<Iterator, long(), Skipper>, tm_ is a member variable of type struct tm.
The code compiles, but doesn't work. it seems that mktime() didn't get called at all. what am I doing wrong?
you can do it in c++ 11 vi the regex.
This will be portable and standard, if your compiler is recent enough.
#include <iostream>
#include <string>
#include <regex>
using namespace std;
int main()
{
std::regex txt_regex("([0-9]{4})[-]([0-9]{2})[-]([0-9]{2})[ ]([0-9]{2})([:])([0-9]{2})([:])([0-9]{2})");//
string strTmp;
strTmp="2010-12-15 15:25:46";
std::smatch match;
std::regex_search( strTmp, match, txt_regex );
if(regex_match(strTmp,txt_regex))
cout<<"Ok"<<endl;
else
{
cout<<"Invalid input"<<endl;
return 0;
}
if ( match.empty() )
{
std::cout << "...no more matches" << std::endl;
return 0;
}
for ( auto x : match )
{
std::cout << "found: " << x << std::endl;
}
string str = match.suffix().str();
cout <<str <<std::endl;
return 0;
}
With this you can display the different parts of the string to display and then fill the structure.
Hope it helps and open for comments as usual (if something is not clear or incomplete).
Related
Extraction Function
string extractStr(string str, string regExpStr) {
regex regexp(regExpStr);
smatch m;
regex_search(str, m, regexp);
string result = "";
for (string x : m)
result = result + x;
return result;
}
The Main Code
#include <iostream>
#include <regex>
using namespace std;
string extractStr(string, string);
int main(void) {
string test = "(1+1)*(n+n)";
cout << extractStr(test, "n\\+n") << endl;
cout << extractStr(test, "(\\d)\\+\\1") << endl;
cout << extractStr(test, "([a-zA-Z])[+-/*]\\1") << endl;
cout << extractStr(test, "([a-zA-Z])[+-/*]([a-zA-Z])") << endl;
return 0;
}
The Output
String = (1+1)*(n+n)
n\+n = n+n
(\d)\+\1 = 1+11
([a-zA-Z])[+-/*]\1 = n+nn
([a-zA-Z])[+-/*]([a-zA-Z]) = n+nnn
If anyone could kindly point the error I've done or point me to a similar question in SO that I've missed while searching, it would be greatly appreciated.
Regexes in C++ don't work quite like "normal" regexes. Specialy when you are looking for multiple groups later. I also have some C++ tips in here (constness and references).
#include <cassert>
#include <iostream>
#include <sstream>
#include <regex>
#include <string>
// using namespace std; don't do this!
// https://stackoverflow.com/questions/1452721/why-is-using-namespace-std-considered-bad-practice
// pass strings by const reference
// 1. const, you promise not to change them in this function
// 2. by reference, you avoid making copies
std::string extractStr(const std::string& str, const std::string& regExpStr)
{
std::regex regexp(regExpStr);
std::smatch m;
std::ostringstream os; // streams are more efficient for building up strings
auto begin = str.cbegin();
bool comma = false;
// C++ matches regexes in parts so work you need to loop
while (std::regex_search(begin, str.end(), m, regexp))
{
if (comma) os << ", ";
os << m[0];
comma = true;
begin = m.suffix().first;
}
return os.str();
}
// small helper function to produce nicer output for your tests.
void test(const std::string& input, const std::string& regex, const std::string& expected)
{
auto output = extractStr(input, regex);
if (output == expected)
{
std::cout << "test succeeded : output = " << output << "\n";
}
else
{
std::cout << "test failed : output = " << output << ", expected : " << expected << "\n";
}
}
int main(void)
{
std::string input = "(1+1)*(n+n)";
test(input, "n\\+n", "n+n");
test(input, "(\\d)\\+\\1", "1+1");
test(input, "([a-zA-Z])[+-/*]\\1", "n+n");
return 0;
}
I'm getting myself familiarized with boost spirit v3. The question I want to ask is how to state the fact that you don't want to use skip parser in any way.
Consider a simple example of parsing comma-separated sequence of integers:
#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/home/x3.hpp>
int main()
{
using namespace boost::spirit::x3;
const std::string input{"2,4,5"};
const auto parser = int_ % ',';
std::vector<int> numbers;
auto start = input.cbegin();
auto r = phrase_parse(start, input.end(), parser, space, numbers);
if(r && start == input.cend())
{
// success
for(const auto &item: numbers)
std::cout << item << std::endl;
return 0;
}
std::cerr << "Input was not parsed successfully" << std::endl;
return 1;
}
This works totally fine. However, I would like to forbid having spaces in between (i.e. "2, 4,5" should not be parsed well).
I tried using eps as a skip parser in phrase_parse, but as you can guess, the program ended up in the infinite loop because eps matches to an empty string.
Solution I found is to use no_skip directive (https://www.boost.org/doc/libs/1_75_0/libs/spirit/doc/html/spirit/qi/reference/directive/no_skip.html). So the parser now becomes:
const auto parser = no_skip[int_ % ','];
This works fine, but I don't find it to be an elegant solution (especially providing "space" parser in phrase_parse when I want no whitespace skips). Are there no skip parsers that would simply do nothing? Am I missing something?
Thanks for Your time. Looking forward to any replies.
You can use either no_skip[] or lexeme[]. They're almost identical, except for pre-skip (Boost Spirit lexeme vs no_skip).
Are there no skip parsers that would simply do nothing? Am I missing something?
A wild guess, but you might be missing the parse API that doesn't accept a skipper in the first place
Live On Coliru
#include <iostream>
#include <iomanip>
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
int main() {
std::string const input{ "2,4,5" };
auto f = begin(input), l = end(input);
const auto parser = x3::int_ % ',';
std::vector<int> numbers;
auto r = parse(f, l, parser, numbers);
if (r) {
// success
for (const auto& item : numbers)
std::cout << item << std::endl;
} else {
std::cerr << "Input was not parsed successfully" << std::endl;
return 1;
}
if (f!=l) {
std::cout << "Remaining input " << std::quoted(std::string(f,l)) << "\n";
return 2;
}
}
Prints
2
4
5
Below is my following code
#include <iostream>
#include <stdlib.h>
#include <boost/regex.hpp>
#include <string>
using namespace std;
using namespace boost;
int main() {
std::string s = "Hello my name is bob";
boost::regex re("name");
boost::cmatch matches;
try{
// if (boost::regex_match(s.begin(), s.end(), re))
if (boost::regex_match(s.c_str(), matches, re)){
cout << matches.size();
// matches[0] contains the original string. matches[n]
// contains a sub_match object for each matching
// subexpression
for (int i = 1; i < matches.size(); i++){
// sub_match::first and sub_match::second are iterators that
// refer to the first and one past the last chars of the
// matching subexpression
string match(matches[i].first, matches[i].second);
cout << "\tmatches[" << i << "] = " << match << endl;
}
}
else{
cout << "No Matches(" << matches.size() << ")\n";
}
}
catch (boost::regex_error& e){
cout << "Error: " << e.what() << "\n";
}
}
It always outputs with no Matches.
Im sure that regex should work.
I used this example
http://onlamp.com/pub/a/onlamp/2006/04/06/boostregex.html?page=3
from boost regex:
Important
Note that the result is true only if the expression matches the whole of the input sequence. If you want to search for an expression somewhere within the sequence then use regex_search. If you want to match a prefix of the character string then use regex_search with the flag match_continuous set.
Try boost::regex re("(.*)name(.*)"); if you want to use the expression with regex_match.
I have some difficulties in understanding if-then-else conditionals in regular expressions.
After reading If-Then-Else Conditionals in Regular Expressions I decided to write a simple test. I use C++, Boost 1.38 Regex and MS VC 8.0.
I have written this program:
#include <iostream>
#include <string>
#include <boost/regex.hpp>
int main()
{
std::string str_to_modify = "123";
//std::string str_to_modify = "ttt";
boost::regex regex_to_search ("(\\d\\d\\d)");
std::string regex_format ("(?($1)$1|000)");
std::string modified_str =
boost::regex_replace(
str_to_modify,
regex_to_search,
regex_format,
boost::match_default | boost::format_all | format_no_copy );
std::cout << modified_str << std::endl;
return 0;
}
I expected to get "123" if str_to_modify has "123" and to get "000" if I str_to_modify has "ttt". However I get ?123123|000 in the first case and nothing in second one.
Coluld you tell me, please, what is wrong with my test?
The second example that still doesn't work :
#include <iostream>
#include <string>
#include <boost/regex.hpp>
int main()
{
//std::string str_to_modify = "123";
std::string str_to_modify = "ttt";
boost::regex regex_to_search ("(\\d\\d\\d)");
std::string regex_format ("(?1foo:bar");
std::string modified_str =
boost::regex_replace(str_to_modify, regex_to_search, regex_format,
boost::match_default | boost::format_all | boost::format_no_copy );
std::cout << modified_str << std::endl;
return 0;
}
I think the format string should be (?1$1:000) as described in the Boost.Regex docs.
Edit: I don't think regex_replace can do what you want. Why don't you try the following instead? regex_match will tell you whether the match succeeded (or you can use match[i].matched to check whether the i-th tagged sub-expression matched). You can format the match using the match.format member function.
#include <iostream>
#include <string>
#include <boost/regex.hpp>
int main()
{
boost::regex regex_to_search ("(\\d\\d\\d)");
std::string str_to_modify;
while (std::getline(std::cin, str_to_modify))
{
boost::smatch match;
if (boost::regex_match(str_to_modify, match, regex_to_search))
std::cout << match.format("foo:$1") << std::endl;
else
std::cout << "error" << std::endl;
}
}
From the following code, I expect to get this output from the corresponding input:
Input: FOO Output: Match
Input: FOOBAR Output: Match
Input: BAR Output: No Match
Input: fOOBar Output: No Match
But why it gives "No Match" for input FOOBAR?
#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <boost/regex.hpp>
using namespace std;
using namespace boost;
int main ( int arg_count, char *arg_vec[] ) {
if (arg_count !=2 ) {
cerr << "expected one argument" << endl;
return EXIT_FAILURE;
}
string InputString = arg_vec[1];
string toMatch = "FOO";
const regex e(toMatch);
if (regex_match(InputString, e,match_partial)) {
cout << "Match" << endl;
} else {
cout << "No Match" << endl;
}
return 0;
}
Update:
Finally it works with the following approach:
#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <boost/regex.hpp>
using namespace std;
using namespace boost;
bool testSearchBool(const boost::regex &ex, const string st) {
cout << "Searching " << st << endl;
string::const_iterator start, end;
start = st.begin();
end = st.end();
boost::match_results<std::string::const_iterator> what;
boost::match_flag_type flags = boost::match_default;
return boost::regex_search(start, end, what, ex, flags);
}
int main ( int arg_count, char *arg_vec[] ) {
if (arg_count !=2 ) {
cerr << "expected one argument" << endl;
return EXIT_FAILURE;
}
string InputString = arg_vec[1];
string toMatch = "FOO*";
static const regex e(toMatch);
if ( testSearchBool(e,InputString) ) {
cout << "MATCH" << endl;
}
else {
cout << "NOMATCH" << endl;
}
return 0;
}
Use regex_search instead of regex_match.
Your regular expression has to account for characters at the beginning and end of the sub-string "FOO".
I'm not sure but "FOO*" might do the trick
match_partial would only return true if the partial string was found at the end of the text input, not the beginning.
A partial match is one that matched
one or more characters at the end of
the text input, but did not match all
of the regular expression (although it
may have done so had more input been
available)
So FOOBAR matched with "FOO" would return false.
As the other answer suggests, using regex.search would allow you to search for sub-strings more effectively.