XML SAX parser using xerces in c++ - c++

#include <xercesc/sax2/SAX2XMLReader.hpp>
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <xercesc/sax2/DefaultHandler.hpp>
#include <xercesc/util/XMLString.hpp>
#include <conio.h>
#include <xercesc/parsers/SAXParser.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <iostream>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
using namespace std;
using namespace xercesc;
class ErnstSax2Handler : public DefaultHandler
{
public:
ErnstSax2Handler(void);
virtual ~ErnstSax2Handler(void);
void startElement(
const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname,
const Attributes& attrs
);
void endElement(
const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname
);
void characters(
const XMLCh* const chars,
const XMLSize_t length
);
void fatalError(const SAXParseException&);
protected:
// define variables to save the state
};
void ErnstSax2Handler::startElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname,
const Attributes& attrs)
{
char* name = XMLString::transcode(localname);
cout<<name;
// ...
XMLString::release(&name);
}
void ErnstSax2Handler::endElement(
const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname)
{
char* name = XMLString::transcode(localname);
//...
XMLString::release(&name);
}
void ErnstSax2Handler::fatalError(const SAXParseException& exception)
{
char* message = XMLString::transcode(exception.getMessage());
cout << "Fatal Error: " << message
<< " at line: " << exception.getLineNumber()
<< endl;
XMLString::release(&message);
}
void ErnstSax2Handler::characters(
const XMLCh* const chars,
const XMLSize_t length
)
{
// This is called when the parser is reading text.
// You will need to save what state you are in via
// startElement / endElement.
}
int main () {
try {
XMLPlatformUtils::Initialize();
}
catch (const XMLException& toCatch) {
char* message = XMLString::transcode(toCatch.getMessage());
cout << "Error during initialization! :\n";
cout << "Exception message is: \n"
<< message << "\n";
XMLString::release(&message);
return 1;
}
char* xmlFile = "test.xml";
SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();
parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true); // optional
DefaultHandler* defaultHandler = new DefaultHandler();
xercesc::ContentHandler* h = new DefaultHandler();
parser->setContentHandler(h);
parser->setErrorHandler(defaultHandler);
try {
parser->parse(xmlFile);
}
catch (const XMLException& toCatch) {
char* message = XMLString::transcode(toCatch.getMessage());
cout << "Exception message is: \n"
<< message << "\n";
XMLString::release(&message);
return -1;
}
catch (const SAXParseException& toCatch) {
char* message = XMLString::transcode(toCatch.getMessage());
cout << "Exception message is: \n"
<< message << "\n";
XMLString::release(&message);
return -1;
}
catch (...) {
cout << "Unexpected Exception \n" ;
return -1;
}
getch();
delete parser;
delete defaultHandler;
return 0;
}
When I run above code in visual studio 2010, it won't print any element name and their content on the screen. Can anybody help me how to call any handler and how to get content from elements(nodes)?
Thanx in advance.

you are creating the default handlers and pass those to the parser which is not what you want to do as I guess:
DefaultHandler* defaultHandler = new DefaultHandler();
xercesc::ContentHandler* h = new DefaultHandler();
parser->setContentHandler(h);
parser->setErrorHandler(defaultHandler);
Instead you need to create your own classes:
DefaultHandler* defaultHandler = new ErnstSax2Handler();
xercesc::ContentHandler* h = new ErnstSax2Handler();
Hope this helps.

If someone would like to compile this code: you must remove the declaration of the default constructor from the class, or you must provide an implementation.
eg.
public:
//ErnstSax2Handler(void);
//virtual ~ErnstSax2Handler(void);

Related

Dynamically generate protobuf Message and return a pointer to it

First of all I'm not very experienced with C++, so maybe I'm overseeing something here.
I'm trying to dynamically generate protobuf Messages from .proto files with the following code:
int init_msg(const std::string & filename, protobuf::Arena* arena, protobuf::Message** new_msg){
using namespace google::protobuf;
using namespace google::protobuf::compiler;
DiskSourceTree source_tree;
source_tree.MapPath("file", filename);
MuFiErCo error_mist;
Importer imp(&source_tree, &error_mist);
printf("Lade Datei:%s \n", filename.c_str());
const FileDescriptor* f_desc = imp.Import("file");
const Descriptor* desc = f_desc->FindMessageTypeByName("TestNachricht");
const Message* new_msg_proto = dmf.GetPrototype(desc);
*new_msg = new_msg_proto->New(arena);
//Debug
cout << (*new_msg)->GetTypeName() << endl;
return 0;
}
int main(int argc, char* argv[]){
protobuf::Arena arena;
protobuf::Message *adr2, *adr1;
init_msg("schema-1.proto", &arena, &adr1);
init_msg("schema-1.proto", &arena, &adr2);
printf("MSG_Pointer: %p, %p\n", adr1, adr2);
cout << adr1->GetTypeName() << endl;
arena.Reset();
return 0;
}
I thought if i use Arena, the new Message is also available outside the scope of the function.
But there is always a segfault if i try to access the Message.
I guess it's a simple error, but I couldn't figure out, how to solve this.
Here is the ouput:
Lade Datei:schema-1.proto
packet.TestNachricht
Lade Datei:schema-1.proto
packet.TestNachricht
MSG_Pointer: 0x1b293b0, 0x1b287f0
Speicherzugriffsfehler (Speicherabzug geschrieben)
The problem, I think, is that FileDescriptor et al are destroyed when
init_msg returns, leaving the newly created message with no way to
interrogate its .proto definition. You'd need to move Importer
instance to main and keep it alive. This has nothing to do with
arenas. – Igor Tandetnik
That was the solution.
Here is some working example code
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <memory>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/message.h>
#include <google/protobuf/compiler/importer.h>
#include <google/protobuf/dynamic_message.h>
#include <google/protobuf/arena.h>
using namespace std;
using namespace google::protobuf;
class MuFiErCo : public compiler::MultiFileErrorCollector
{
public:
void AddError(const string & filename, int line, int column, const string & message){
printf("Err: %s\n", message.c_str());
}
void AddWarning(const string & filename, int line, int column, const string & message){
printf("Warn: %s\n", message.c_str());
}
};
compiler::Importer* init_proto_dir(Arena* arena, const std::string &root_dir){
using namespace compiler;
static DiskSourceTree source_tree;
source_tree.MapPath("", root_dir);
static MuFiErCo error_mist;
static Importer* imp = Arena::Create<Importer>(arena, &source_tree, &error_mist);
return imp;
}
void init_proto_def(compiler::Importer* imp, const std::string &proto_file){
using namespace compiler;
imp->Import(proto_file);
return;
}
Message* init_msg(compiler::Importer* imp, Arena* arena, const std::string &msg_name){
const DescriptorPool* pool = imp->pool();
static DynamicMessageFactory dmf;
const Descriptor* desc = pool->FindMessageTypeByName(msg_name);
const Message* msg_proto = dmf.GetPrototype(desc);
return msg_proto->New(arena);
}
int set_value(Message* msg, const char* value_name, unsigned long int value){
const Message::Reflection* reflec = msg->GetReflection();
const Descriptor* desc = msg->GetDescriptor();
const FieldDescriptor* fdesc = desc->FindFieldByName(value_name);
reflec->SetUInt64(msg, fdesc, value);
return 0;
}
int main(int argc, char* argv[]){
Arena arena;
compiler::Importer* imp = init_proto_dir(&arena, "");
init_proto_def(imp, "schema-1.proto");
Message* msg = init_msg(imp, &arena, "packet.TestNachricht");
set_value(msg, "zahl", 23434);
cout << msg->DebugString() << endl;
return 0;
}

C++ decode e-mail's subject

I've downloaded mails with Poco/Net/POP3ClientSession, I wanted to convert e-mail subject into human readable, so I tried to use neagoegab's solution from here:
https://stackoverflow.com/a/8104496/1350091
unfortunately it doesn't work:
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MailMessage.h>
#include <iostream>
#include <string>
using namespace std;
using namespace Poco::Net;
#include <iconv.h>
const size_t BUF_SIZE=1024;
class IConv {
iconv_t ic_;
public:
IConv(const char* to, const char* from)
: ic_(iconv_open(to,from)) { }
~IConv() { iconv_close(ic_); }
bool convert(char* input, char* output, size_t& out_size) {
size_t inbufsize = strlen(input)+1;
return iconv(ic_, &input, &inbufsize, &output, &out_size);
}
};
int main()
{
POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
cout << "id: " << messages[0].id << " size: " << messages[0].size << endl;
MailMessage message;
session.retrieveMessage(messages[0].id, message);
const string subject = message.getSubject();
cout << "Original subject: " << subject << endl;
IConv iconv_("UTF8","ISO-8859-2");
char from[BUF_SIZE];// "=?ISO-8859-2?Q?Re: M=F3j sen o JP II?=";
subject.copy(from, sizeof(from));
char to[BUF_SIZE] = "bye";
size_t outsize = BUF_SIZE;//you will need it
iconv_.convert(from, to, outsize);
cout << "converted: " << to << endl;
}
The output is:
id: 1 size: 2792
Original subject: =?ISO-8859-2?Q?Re: M=F3j sen o JP II?=
converted: =?ISO-8859-2?Q?Re: M=F3j sen o JP II?=
The interesting thing is that when I try to convert the subject with POCO it fails:
cout << "Encoded with POCO: " << MailMessage::encodeWord("Re: Mój sen o JP II", "ISO-8859-2") << endl; // output: Encoded with POCO: =?ISO-8859-2?q?Re=3A_M=C3=B3j_sen_o_JP_II?=
But the subject I want to receive is:
"Re: Mój sen o JP II"
The only succesfull way I found to convert the subject is:
https://docs.python.org/2/library/email.header.html#email.header.decode_header
So my question is -how to convert e-mail's subject in C++ into some format like UTF-8?
The relevant RFC to your situation is RFC 2047. That RFC specifies how non-ASCII data should be encoded in mail messages. The basic gist is that all bytes besides printable ASCII characters are escaped as an '=' character followed by two hexadecimal digits. Since "ó" is represented by the byte 0xF3 in ISO-8859-2, and 0xF3 is not a printable ASCII character, it is encoded as "=F3". You'll need to decode all of the encoded characters in your message.
I found out how to solve the problem (I'm not sure that it is 100% correct solution), but it looks like it is enough to use:
Poco::UTF8Encoding::convert to convert from characterCode to utf8:
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/MailMessage.h>
#include <Poco/UTF8Encoding.h>
#include <iostream>
#include <string>
using namespace std;
using namespace Poco::Net;
class EncoderLatin2
{
public:
EncoderLatin2(const string& encodedSubject)
{
/// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
int charsetBeginPosition = strlen("=?");
int charsetEndPosition = encodedSubject.find("?", charsetBeginPosition);
charset = encodedSubject.substr(charsetBeginPosition, charsetEndPosition-charsetBeginPosition);
int encodingPosition = charsetEndPosition + strlen("?");
encoding = encodedSubject[encodingPosition];
if ("ISO-8859-2" != charset)
throw std::invalid_argument("Invalid encoding!");
const int lenghtOfEncodedText = encodedSubject.length() - encodingPosition-strlen("?=")-2;
extractedEncodedSubjectToConvert = encodedSubject.substr(encodingPosition+2, lenghtOfEncodedText);
}
string convert()
{
size_t positionOfAssignment = -1;
while (true)
{
positionOfAssignment = extractedEncodedSubjectToConvert.find('=', positionOfAssignment+1);
if (string::npos != positionOfAssignment)
{
const string& charHexCode = extractedEncodedSubjectToConvert.substr(positionOfAssignment + 1, 2);
replaceAllSubstringsWithUnicode(extractedEncodedSubjectToConvert, charHexCode);
}
else
break;
}
return extractedEncodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(string& s, const string& charHexCode)
{
const int charCode = stoi(charHexCode, nullptr, 16);
char buffer[10] = {};
encodingConverter.convert(charCode, (unsigned char*)buffer, sizeof(buffer));
replaceAll(s, '=' + charHexCode, buffer);
}
void replaceAll(string& s, const string& replaceFrom, const string& replaceTo)
{
size_t needlePosition = -1;
while (true)
{
needlePosition = s.find(replaceFrom, needlePosition + 1);
if (string::npos == needlePosition)
break;
s.replace(needlePosition, replaceFrom.length(), replaceTo);
}
}
private:
string charset;
char encoding;
Poco::UTF8Encoding encodingConverter;
string extractedEncodedSubjectToConvert;
};
int main()
{
POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
MessageHeader header;
MailMessage message;
auto currentMessage = messages[0];
session.retrieveHeader(currentMessage.id, header);
session.retrieveMessage(currentMessage.id, message);
const string subject = message.getSubject();
EncoderLatin2 encoder(subject);
cout << "Original subject: " << subject << endl;
cout << "Encoded: " << encoder.convert() << endl;
}
I found another solution, better than before.
Some e-mails subjects has different encodings, I noticed:
Latin2, encoded like: =?ISO-8859-2?Q?...?=
UTF-8 Base64 like:
=?utf-8?B?Wm9iYWN6Y2llIGNvIGRsYSBXYXMgcHJ6eWdvdG93YWxpxZtteSAvIHN0eWN6ZcWEIHcgTGFzZXJwYXJrdQ==?=
UTF-8 quoted printable like:
=?utf-8?Q?...?=
No encoding (if only ASCII characters) like: ...
So with POCO (Base64Decoder, Latin2Encoding, UTF8Encoding, QuotedPrintableDecoder) I managed to convert all the cases:
#include <iostream>
#include <string>
#include <sstream>
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/MailMessage.h>
#include <Poco/Base64Decoder.h>
#include <Poco/Latin2Encoding.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Net/QuotedPrintableDecoder.h>
using namespace std;
class Encoder
{
public:
Encoder(const string& encodedText)
{
isStringEncoded = isEncoded(encodedText);
if (!isStringEncoded)
{
extractedEncodedSubjectToConvert = encodedText;
return;
}
splitEncodedText(encodedText);
}
string convert()
{
if (isStringEncoded)
{
if (Poco::Latin2Encoding().isA(charset))
return decodeFromLatin2();
if (Poco::UTF8Encoding().isA(charset))
return decodeFromUtf8();
}
return extractedEncodedSubjectToConvert;
}
private:
void splitEncodedText(const string& encodedText)
{
/// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
const int charsetBeginPosition = strlen(sequenceBeginEncodedText);
const int charsetEndPosition = encodedText.find("?", charsetBeginPosition);
charset = encodedText.substr(charsetBeginPosition, charsetEndPosition-charsetBeginPosition);
const int encodingPosition = charsetEndPosition + strlen("?");
encoding = encodedText[encodingPosition];
const int lenghtOfEncodedText = encodedText.length() - encodingPosition-strlen(sequenceBeginEncodedText)-strlen(sequenceEndEncodedText);
extractedEncodedSubjectToConvert = encodedText.substr(encodingPosition+2, lenghtOfEncodedText);
}
bool isEncoded(const string& encodedSubject)
{
if (encodedSubject.size() < 4)
return false;
if (0 != encodedSubject.find(sequenceBeginEncodedText))
return false;
const unsigned positionOfLastTwoCharacters = encodedSubject.size() - strlen(sequenceEndEncodedText);
return positionOfLastTwoCharacters == encodedSubject.rfind(sequenceEndEncodedText);
}
string decodeFromLatin2()
{
size_t positionOfAssignment = -1;
while (true)
{
positionOfAssignment = extractedEncodedSubjectToConvert.find('=', positionOfAssignment+1);
if (string::npos != positionOfAssignment)
{
const string& charHexCode = extractedEncodedSubjectToConvert.substr(positionOfAssignment + 1, 2);
replaceAllSubstringsWithUnicode(extractedEncodedSubjectToConvert, charHexCode);
}
else
break;
}
return extractedEncodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(string& s, const string& charHexCode)
{
static Poco::UTF8Encoding encodingConverter;
const int charCode = stoi(charHexCode, nullptr, 16);
char buffer[10] = {};
encodingConverter.convert(charCode, (unsigned char*)buffer, sizeof(buffer));
replaceAll(s, '=' + charHexCode, buffer);
}
void replaceAll(string& s, const string& replaceFrom, const string& replaceTo)
{
size_t needlePosition = -1;
while (true)
{
needlePosition = s.find(replaceFrom, needlePosition + 1);
if (string::npos == needlePosition)
break;
s.replace(needlePosition, replaceFrom.length(), replaceTo);
}
}
string decodeFromUtf8()
{
if('B' == toupper(encoding))
{
return decodeFromBase64();
}
else // if Q:
{
return decodeFromQuatedPrintable();
}
}
string decodeFromBase64()
{
istringstream is(extractedEncodedSubjectToConvert);
Poco::Base64Decoder e64(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(e64, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
string decodeFromQuatedPrintable()
{
replaceAll(extractedEncodedSubjectToConvert, "_", " ");
istringstream is(extractedEncodedSubjectToConvert);
Poco::Net::QuotedPrintableDecoder qp(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(qp, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
private:
string charset;
char encoding;
string extractedEncodedSubjectToConvert;
bool isStringEncoded;
static constexpr const char* sequenceBeginEncodedText = "=?";
static constexpr const char* sequenceEndEncodedText = "?=";
};
int main()
{
Poco::Net::POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
Poco::Net::POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
Poco::Net::MessageHeader header;
Poco::Net::MailMessage message;
auto currentMessage = messages[0];
session.retrieveHeader(currentMessage.id, header);
session.retrieveMessage(currentMessage.id, message);
const string subject = message.getSubject();
Encoder encoder(subject);
cout << "Original subject: " << subject << endl;
cout << "Encoded: " << encoder.convert() << endl;
}

std::vector segv because of an improbable corruption

I have a segmentation fault in the following code ( on _answers.push_back(tmp); ).
Gdb said
(gdb) p tmp
$7 = "HTTP/1.0 200 OK\r\nContent-Type: text/plain; charset=UTF-8\r\nSet-Cookie: color=black;path=/\r\nSet-Cookie: code=f69a2d941420d23be97bbb1ae963295647a91c4f3faf9c5fa80727399927d9d5;path=/\r\nSet-Cookie: game=c1e"...
(gdb) call _answers.size()
$8 = 271275648142580811
So I guess the array has been corrupted. But I don't know where it happened.
// Network.hpp
#pragma once
#include <utility>
#include <string>
#include <vector>
#include <boost/asio.hpp>
#include <boost/bind.hpp>
class Network
{
public:
Network(std::string const &, std::string const &);
~Network() {};
void init();
void connect();
void update();
void sendQuery(const std::string);
bool isConnected();
void reset();
std::string getAnswer();
void handleRead(const boost::system::error_code &, size_t);
void handleWrite(const boost::system::error_code &);
boost::asio::io_service _io_service;
private:
boost::asio::ip::tcp::resolver _resolver;
boost::asio::ip::tcp::socket _sock;
boost::asio::ip::tcp::resolver::iterator _it;
char _buff[2048];
std::vector<std::string> _answers;
std::string const &_host;
std::string const &_port;
bool _answered;
};
// Network.cpp
Network::Network(std::string const &host, std::string const &port) : _resolver(_io_service), _sock(_io_service), _host(host), _port(port), _answered(true) {}
void Network::connect() {
_answers.reserve(2048);
boost::asio::ip::tcp::resolver::query query(_host, _port);
boost::asio::ip::tcp::resolver::iterator iterator = _resolver.resolve(query);
boost::asio::connect(_sock.lowest_layer(), iterator);
}
void Network::handleRead(const boost::system::error_code &err, size_t bread) {
_answered = true;
if (err && err.value() != 2)
throw Gomoku::NetworkException(err.message());
if (bread > 0) {
std::string tmp(_buff);
_answers.push_back(tmp);
}
memset(_buff, 0, 2048);
}
void Network::handleWrite(const boost::system::error_code &err) {
if (err)
throw Gomoku::NetworkException(err.message());
}
void Network::reset() {
_io_service.poll();
_io_service.reset();
_answers.clear();
_answered = true;
}
void Network::sendQuery(const std::string req) {
_io_service.poll();
_io_service.reset();
if (_answered == 0)
return;
_answered = false;
connect();
const char *str = new char[req.length()];
str = req.c_str();
boost::asio::async_write(_sock, boost::asio::buffer(str, req.length()), boost::bind(&Network::handleWrite, this, boost::asio::placeholders::error));
boost::asio::async_read(_sock, boost::asio::buffer(_buff, 2048), boost::bind(&Network::handleRead, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
}
std::string Network::getAnswer() {
if (_answers.empty())
return "";
std::string tmp = _answers.back();
_answers.pop_back();
return tmp;
}
// Player.cpp
Player::Player(std::string const &host, std::string const &port) : _network(host, port) {
_myTurn = false;
_whiteScore = _blackScore = 0;
_host = host + ":" + port;
initMap();
}
void Player::connect() {
std::string str = "GET /players/connect/ HTTP/1.0\r\nHost: " + _host + "\r\nAccept: */*\r\n\r\n";
_network.sendQuery(str);
}
void Player::sendClick(std::pair<int, int> click, std::string const &header) {
std::stringstream ss;
ss << "POST /game/play/" << click.first << "/" << click.second << header << _cookie << "\r\n\r\n";
std::string req = ss.str();
_network.sendQuery(req);
_network._io_service.run();
_network._io_service.reset();
std::string ans = _network.getAnswer();
parseAnswer(ans);
req = "GET /game/map.txt" + header + _cookie + "\r\n\r\n";
_network.sendQuery(req);
}
I also saw the following code by following the segv trace (basic_string.h:400):
: _M_dataplus(_M_local_data(), __str._M_get_allocator()) // TODO A traits
At first glance: std::string tmp(_buff); is wrong, because _buff may not be null terminated, thus reading 271275648142580811 bytes into memory.
Additionally, sendQuery's parameter is a local string, so as soon as the function exits, the string is free'd, and async_write continues to read from invalid memory, which is undefined behavior. Anything can happen.
Also, all requests use the same incoming buffer, so if multiple occur at the same time, you get undefined or useless behavior.

xml file validation with in-memory schema in xerces c++

#include <xercesc/framework/Wrapper4InputSource.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/sax2/SAX2XMLReader.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOMLSParser.hpp>
#include <xercesc/dom/DOMImplementation.hpp>
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/framework/LocalFileInputSource.hpp>
#include <xercesc/sax/ErrorHandler.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/sax/Parser.hpp>
#include <xercesc/validators/common/Grammar.hpp>
class CErrorHandler : public xercesc::DefaultHandler
{
public:
CErrorHandler();
virtual ~CErrorHandler();
void startElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname,
const XERCES_CPP_NAMESPACE::Attributes& attrs
);
void endElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname
);
void characters(const XMLCh* const chars,
const XMLSize_t length
);
void fatalError(const xercesc::SAXParseException&);
};
CErrorHandler::CErrorHandler()
{
}
CErrorHandler::~CErrorHandler()
{
}
void CErrorHandler::startElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname,
const xercesc::Attributes& attrs)
{
char* name = xercesc::XMLString::transcode(localname);
std::cout << name;
xercesc::XMLString::release(&name);
}
void CErrorHandler::endElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname)
{
char* name = xercesc::XMLString::transcode(localname);
xercesc::XMLString::release(&name);
}
void CErrorHandler::fatalError(const xercesc::SAXParseException& exception)
{
char* message = xercesc::XMLString::transcode(exception.getMessage());
std::cout << "Error: " << message << " at line: " << exception.getLineNumber() << std::endl;
xercesc::XMLString::release(&message);
}
void CErrorHandler::characters(const XMLCh* const chars,
const XMLSize_t length
)
{
}
bool validateSchema()
{
std::string XSD_SHEMA ="<?xml version=\"1.0\" encoding=\"UTF-8\" ?>...";
try
{
xercesc::XMLPlatformUtils::Initialize();
}
catch (const XERCES_CPP_NAMESPACE::XMLException& toCatch)
{
char* message = xercesc::XMLString::transcode(toCatch.getMessage());
std::cout << "Error during initialization!" << std::endl;
std::cout << "Exception message is: " << message;
XERCES_CPP_NAMESPACE::XMLString::release(&message);
return false;
}
xercesc::SAX2XMLReader* parser = xercesc::XMLReaderFactory::createXMLReader();
parser->setFeature( xercesc::XMLUni::fgSAX2CoreValidation, true);
parser->setFeature( xercesc::XMLUni::fgSAX2CoreNameSpaces, true);
xercesc::DefaultHandler* defaultHandler = new CErrorHandler();
xercesc::ContentHandler* h = new CErrorHandler();
xercesc::MemBufInputSource mis(reinterpret_cast< const XMLByte* >(XSD_SHEMA.c_str() ), XSD_SHEMA.size (), "/schema.xsd");
xercesc::Wrapper4InputSource wmis (&mis, false);
parser->loadGrammar (&wmis, xercesc::Grammar::SchemaGrammarType, true);
parser->setFeature (xercesc::XMLUni::fgXercesUseCachedGrammarInParse, true);
void* id = (void*)("file:///schema.xsd");
parser->setProperty (xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, id);
parser->setContentHandler(h);
parser->setErrorHandler(defaultHandler);
try
{
parser->parse(mXMLFilePath.c_str());
}
catch (const xercesc::XMLException& toCatch)
{
char* message = xercesc::XMLString::transcode(toCatch.getMessage());
std::cout << "Exception message is: "<< message << std::endl;;
xercesc::XMLString::release(&message);
return false;
}
catch (const xercesc::SAXParseException& toCatch)
{
char* message = xercesc::XMLString::transcode(toCatch.getMessage());
std::cout << "Exception message is: " << message << std::endl;;
xercesc::XMLString::release(&message);
return false;
}
catch (...)
{
std::cout << "Unexpected Exception" ;
return false;
}
delete parser;
delete defaultHandler;
return true;
}
I am trying to validate xml file with path mXMLFilePath and xsd schema in string XSD_SHEMA in c++ with Xerces lib.
I created CErrorHandler class and initialized it, set schema nolocation parameter for not setting in xml path to schema.
It build`s, but not work. Have somebody any ideas?
The Xerces library (for both parsing and loading a grammar) can handle input sources (aka classes implementing the InputSource interface). MemBufInputSource would be the class for cases when something exists only in-memory.
XMLPlatformUtils::Initialize();
XercesDOMParser* domParser;
domParser = new XercesDOMParser();
char *str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" elementFormDefault=\"qualified\" attributeFormDefault=\"unqualified\"> \r\n </xs:schema>";
std::string strContent = str;
xercesc::MemBufInputSource pMemBufIS((XMLByte*)strContent.c_str(),
strContent.size(), "xsd");
if (domParser->loadGrammar(pMemBufIS, Grammar::SchemaGrammarType) == NULL)
{
fprintf(stderr, "couldn't load schema\n");
return false;
}
domParser->setValidationScheme(XercesDOMParser::Val_Auto);
domParser->setDoNamespaces(true);
domParser->setDoSchema(true);
domParser->setValidationConstraintFatal(true);
domParser->setExternalNoNamespaceSchemaLocation("C:\\User\\b.xsd");
domParser->setValidationConstraintFatal(true);
domParser->parse("file.xml");
if (domParser->getErrorCount() == 0)
printf("XML file validated against the schema successfully\n");
else
printf("XML file doesn't conform to the schema\n");
#include <xercesc/sax2/SAX2XMLReader.hpp>
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <xercesc/sax/ErrorHandler.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/validators/common/Grammar.hpp>
#include <xercesc/parsers/SAXParser.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/util/XMLString.hpp>
#include <string>
#include <iostream>
class CErrorHandler : public xercesc::ErrorHandler
{
public:
/** Warning message method */
void warning(const xercesc::SAXParseException& ex);
/** Error message method */
void error(const xercesc::SAXParseException& ex);
/** Fatal error message method */
void fatalError(const xercesc::SAXParseException& ex);
/** Errors resetter method */
void resetErrors();
private:
/** Based message reporter method */
void reportParseException(const xercesc::SAXParseException& ex);
};
void CErrorHandler::reportParseException(const xercesc::SAXParseException& ex)
{
char* message = xercesc::XMLString::transcode(ex.getMessage());
std::cout << message << " at line " << ex.getLineNumber() << " column " << ex.getColumnNumber() << std::endl;
xercesc::XMLString::release(&message);
}
void CErrorHandler::warning(const xercesc::SAXParseException& ex)
{
reportParseException(ex);
}
void CErrorHandler::error(const xercesc::SAXParseException& ex)
{
reportParseException(ex);
}
void CErrorHandler::fatalError(const xercesc::SAXParseException& ex)
{
reportParseException(ex);
}
void CErrorHandler::resetErrors()
{
}
class CXmlValidator
{
public:
/** Constructor method */
CXmlValidator();
/** Xml file setter method */
void setFilePath(const std::string &filePath);
/** Destructor method */
~CXmlValidator();
/** Xml file with schema validation method */
bool validateSchema();
private:
/** Xml file */
std::string mXMLFilePath;
};
CXmlValidator::CXmlValidator():
mXMLFilePath("")
{
}
CXmlValidator::~CXmlValidator()
{
}
void CXmlValidator::setFilePath(const std::string &filePath)
{
mXMLFilePath = filePath;
}
bool CXmlValidator::validateSchema()
{
std::cout << std::endl;
xercesc::XMLPlatformUtils::Initialize();
std::string xsdFile = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>...";
xercesc::SAX2XMLReader *parser = xercesc::XMLReaderFactory::createXMLReader();
xercesc::ErrorHandler *handler = new CErrorHandler();
xercesc::MemBufInputSource inMemorySchemaSource(reinterpret_cast<const XMLByte*>(xsdFile.c_str()), xsdFile.size (), "/schema.xsd");
parser->loadGrammar(inMemorySchemaSource, xercesc::Grammar::SchemaGrammarType, true);
parser->setFeature(xercesc::XMLUni::fgXercesUseCachedGrammarInParse, true);
parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, true);
parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpaces, true);
parser->setProperty(xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, const_cast<void*>(static_cast<const void*>("")));
parser->setErrorHandler(handler);
parser->parse("file.xml");
if (parser->getErrorCount() != 0)
{
std::cout << "ERROR: XML file '" << mXMLFilePath << "' not confirm to the schema" << std::endl;
return false;
}
else
{
return true;
}
}
Here is correct realizations of error handler and validator classes, if somebody will need them

Reading xml attributes' value using xerces

I have a C++ program that uses xerces library to write data into an xml file. Can anyone help me out in parsing(reading) the attribute values from the xml file using the same xerces library?
I got the below program that is said to serve the purpose:
#include <xercesc/sax2/SAX2XMLReader.hpp>
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <xercesc/sax2/DefaultHandler.hpp>
#include <xercesc/util/XMLString.hpp>
#include <conio.h>
#include <xercesc/parsers/SAXParser.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <iostream>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
using namespace std;
using namespace xercesc;
class ErnstSax2Handler : public DefaultHandler
{
public:
ErnstSax2Handler(void);
virtual ~ErnstSax2Handler(void);
void startElement(
const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname,
const Attributes& attrs
);
void endElement(
const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname
);
void characters(
const XMLCh* const chars,
const XMLSize_t length
);
void fatalError(const SAXParseException&);
protected:
// define variables to save the state
};
void ErnstSax2Handler::startElement(const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname,
const Attributes& attrs)
{
char* name = XMLString::transcode(localname);
cout<<name;
// ...
XMLString::release(&name);
}
void ErnstSax2Handler::endElement(
const XMLCh* const uri,
const XMLCh* const localname,
const XMLCh* const qname)
{
char* name = XMLString::transcode(localname);
//...
XMLString::release(&name);
}
void ErnstSax2Handler::fatalError(const SAXParseException& exception)
{
char* message = XMLString::transcode(exception.getMessage());
cout << "Fatal Error: " << message
<< " at line: " << exception.getLineNumber()
<< endl;
XMLString::release(&message);
}
void ErnstSax2Handler::characters(
const XMLCh* const chars,
const XMLSize_t length
)
{
// This is called when the parser is reading text.
// You will need to save what state you are in via
// startElement / endElement.
}
int main () {
try {
XMLPlatformUtils::Initialize();
}
catch (const XMLException& toCatch) {
char* message = XMLString::transcode(toCatch.getMessage());
cout << "Error during initialization! :\n";
cout << "Exception message is: \n"
<< message << "\n";
XMLString::release(&message);
return 1;
}
char* xmlFile = "test.xml";
SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();
parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true); // optional
DefaultHandler* defaultHandler = new DefaultHandler();
xercesc::ContentHandler* h = new DefaultHandler();
parser->setContentHandler(h);
parser->setErrorHandler(defaultHandler);
try {
parser->parse(xmlFile);
}
catch (const XMLException& toCatch) {
char* message = XMLString::transcode(toCatch.getMessage());
cout << "Exception message is: \n"
<< message << "\n";
XMLString::release(&message);
return -1;
}
catch (const SAXParseException& toCatch) {
char* message = XMLString::transcode(toCatch.getMessage());
cout << "Exception message is: \n"
<< message << "\n";
XMLString::release(&message);
return -1;
}
catch (...) {
cout << "Unexpected Exception \n" ;
return -1;
}
//getch();
delete parser;
delete defaultHandler;
return 0;
}
But ended up getting a lot of errors like this:
error LNK2019: unresolved external symbol "__declspec(dllimport) public: static void __cdecl xercesc_3_1::XMLString::release(char * *,class xercesc_3_1::MemoryManager * const)" (_imp?release#XMLString#xercesc_3_1##SAXPAPADQAVMemoryManager#2##Z) referenced in function "public: virtual void __thiscall ErnstSax2Handler::startElement(wchar_t const * const,wchar_t const * const,wchar_t const * const,class xercesc_3_1::Attributes const &)" (?startElement#ErnstSax2Handler##UAEXQB_W00ABVAttributes#xercesc_3_1###Z) XmlRead.obj
error LNK2019: unresolved external symbol "__declspec(dllimport) public: static void __cdecl xercesc_3_1::XMLString::release(char * *,class xercesc_3_1::MemoryManager * const)" (_imp?release#XMLString#xercesc_3_1##SAXPAPADQAVMemoryManager#2##Z) referenced in function "public: virtual void __thiscall ErnstSax2Handler::startElement(wchar_t const * const,wchar_t const * const,wchar_t const * const,class xercesc_3_1::Attributes const &)" (?startElement#ErnstSax2Handler##UAEXQB_W00ABVAttributes#xercesc_3_1###Z) XmlRead.obj
.
Can anyone suggest me a different program or a fix to the above program?
I had encountered the same error while working with Xerces Sax Parser.
I was able to resolve the issue by specifying the additional dependencies properly.
You need to specify Xerces(both library as well as the header)as additional dependencies in your project.This will hopefully solve the issue..
Also in your code,you have created the Default Handler,i.e
DefaultHandler* defaultHandler = new DefaultHandler();
xercesc::ContentHandler* h = new DefaultHandler();
Instead you have to create the instance of your handler i.e
DefaultHandler* defaultHandler = new ErnstSax2Handler();
xercesc::ContentHandler* h = new ErnstSax2Handler();
whenever the parser encounters the Start tag or content tag or end tag(or any other),the instance of your handler will be created and functions in your handler will be executed.
Useful Link: http://www.onlamp.com/pub/a/onlamp/2005/11/10/xerces_sax.html