I've got the following code:
std::for_each(tokens.begin(), tokens.end(), [&](Token& t) {
static const std::unordered_map<std::wstring, Wide::Lexer::TokenType> mapping([]() -> std::unordered_map<std::wstring, Wide::Lexer::TokenType>
{
// Maps strings to TokenType enumerated values
std::unordered_map<std::wstring, Wide::Lexer::TokenType> result;
// RESERVED WORD
result[L"namespace"] = Wide::Lexer::TokenType::Namespace;
result[L"for"] = Wide::Lexer::TokenType::For;
result[L"while"] = Wide::Lexer::TokenType::While;
result[L"do"] = Wide::Lexer::TokenType::Do;
result[L"type"] = Wide::Lexer::TokenType::Type;
// PUNCTUATION
result[L"{"] = Wide::Lexer::TokenType::OpenCurlyBracket;
result[L"}"] = Wide::Lexer::TokenType::CloseCurlyBacket;
return result;
}());
if (mapping.find(t.Codepoints) != mapping.end()) {
t.type = mapping.find(t.Codepoints)->second;
return;
}
t.type = Wide::Lexer::TokenType::Identifier; // line 121
});
This iterates through a list of tokens, and judging by the contents of the codepoints, assigns them a value from the associated enum. If it's not found, then give it a value of "Identifier". But this fails to compile.
1>Lexer.cpp(121): error C2065: '__this' : undeclared identifier
1>Lexer.cpp(121): error C2227: left of '->Identifier' must point to class/struct/union/generic type
This is the full error, no warnings, no other errors. What? How can I fix this error?
Edit: I did some significant refactoring, and I've got the exact same problem in a somewhat simpler lambda.
auto end_current_token = [&] {
if (current != Wide::Lexer::Token()) {
current.type = Wide::Lexer::TokenType::Identifier; // error line
if (reserved_words.find(current.Codepoints) != reserved_words.end())
current.type = reserved_words.find(current.Codepoints)->second;
if (punctuation.find(current.Codepoints[0]) != punctuation.end())
current.type = punctuation.find(current.Codepoints[0])->second;
tokens.push_back(current);
current = Wide::Lexer::Token();
}
};
I've cleaned and rebuilt the project.
I fixed the problem.
auto end_current_token = [&] {
if (current != Wide::Lexer::Token()) {
// WORKAROUND compiler bug- dead code
struct bug_workaround_type {
int Identifier;
};
bug_workaround_type bug;
bug_workaround_type* __this = &bug;
current.type = Wide::Lexer::TokenType::Identifier;
if (reserved_words.find(current.Codepoints) != reserved_words.end())
current.type = reserved_words.find(current.Codepoints)->second;
if (punctuation.find(current.Codepoints[0]) != punctuation.end())
current.type = punctuation.find(current.Codepoints[0])->second;
tokens.push_back(current);
current = Wide::Lexer::Token();
}
};
No, really. Now it compiles and runs just fine.
FWIW I tried to concoct a minimal working sample in order to compile on VS2010 and compiled the following without error.
#include <string>
#include <vector>
#include <algorithm>
#include <unordered_map>
namespace Wide { namespace Lexer {
enum TokenType
{
OpenCurlyBracket,
CloseCurlyBacket,
Namespace,
For,
While,
Do,
Type,
Identifier,
};
} }
struct Token
{
std::wstring Codepoints;
Wide::Lexer::TokenType type;
};
int main()
{
std::vector<Token> tokens;
std::for_each(tokens.begin(), tokens.end(), [&](Token& t) {
static const std::unordered_map<std::wstring, Wide::Lexer::TokenType> mapping([]() -> std::unordered_map<std::wstring, Wide::Lexer::TokenType>
{
// Maps strings to TokenType enumerated values
std::unordered_map<std::wstring, Wide::Lexer::TokenType> result;
// RESERVED WORD
result[L"namespace"] = Wide::Lexer::TokenType::Namespace;
result[L"for"] = Wide::Lexer::TokenType::For;
result[L"while"] = Wide::Lexer::TokenType::While;
result[L"do"] = Wide::Lexer::TokenType::Do;
result[L"type"] = Wide::Lexer::TokenType::Type;
// PUNCTUATION
result[L"{"] = Wide::Lexer::TokenType::OpenCurlyBracket;
result[L"}"] = Wide::Lexer::TokenType::CloseCurlyBacket;
return result;
}());
if (mapping.find(t.Codepoints) != mapping.end()) {
t.type = mapping.find(t.Codepoints)->second;
return;
}
t.type = Wide::Lexer::TokenType::Identifier; // line 121
});
}
Could you bisect the minimum edit that show the problem, starting from this code?
I've got the same problem right now. I used other types, but for your case it will be like this:
auto end_current_token = [&] {
using Wide::Lexer::TokenType; // <-- this line solves problem
if (current != Wide::Lexer::Token()) {
current.type = Wide::Lexer::TokenType::Identifier;
Now it compiles well.
Related
I am working on a project base on LLVM 7.0.0, I transfer the llvm version form 3.5 to 7.0.0, I have build the project and is ok, but when I running llc on a .bc file, here is the bug confused me, I have find everything on the Internet and no solution, here is the stack dump message:
llc: llvm/include/llvm/Support/Casting.h:106: static bool llvm::isa_impl_cl<To, const From*>::doit(const From*) [with To = llvm::StructType; From = llvm::CompositeType]: Assertion `Val && "isa<> used on a null pointer"' failed.
Bug in code here:
int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
ArrayRef<Value *> Indices) const {
int64_t Result = 0;
generic_gep_type_iterator<Value* const*>
GTI = gep_type_begin(ElemTy, Indices),
GTE = gep_type_end(ElemTy, Indices);
for (; GTI != GTE; ++GTI) { // stack dump here when ++GTI
Value *Idx = GTI.getOperand();
if (StructType *STy = GTI.getStructTypeOrNull()) {
assert(Idx->getType()->isIntegerTy(32) && "Illegal struct idx");
unsigned FieldNo = cast<ConstantInt>(Idx)->getZExtValue();
// Get structure layout information...
const StructLayout *Layout = getStructLayout(STy);
// Add in the offset, as calculated by the structure layout info...
Result += Layout->getElementOffset(FieldNo);
} else {
// Get the array index and the size of each array element.
if (int64_t arrayIdx = cast<ConstantInt>(Idx)->getSExtValue())
Result += arrayIdx * getTypeAllocSize(GTI.getIndexedType());
}
}
return Result;
}
generic_gep_type_iterator& operator++() { // Preincrement
Type *Ty = getIndexedType(); // program is stack dump here.
if (auto *STy = dyn_cast<SequentialType>(Ty)) {
CurTy = STy->getElementType();
NumElements = STy->getNumElements();
} else
CurTy = dyn_cast<StructType>(Ty);
++OpIt;
return *this;
}
template <class X, class Y>
LLVM_NODISCARD inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) {
return isa<X>(Val) ? cast<X>(Val) : nullptr; //stack dump here
}
Then when I debug the program, I find this message:
llvm/include/llvm/IR/GetElementPtrTypeIterator.h:
// FIXME: Make this the iterator's operator*() after the 4.0 release.
// operator*() had a different meaning in earlier releases, so we're
// temporarily not giving this iterator an operator*() to avoid a subtle
// semantics break.
Type *getIndexedType() const {
if (auto *T = CurTy.dyn_cast<Type *>())
return T;
return CurTy.get<StructType *>()->getTypeAtIndex(getOperand());
}
Value *getOperand() const { return const_cast<Value *>(&**OpIt); }
generic_gep_type_iterator &operator++() { // Preincrement
Type *Ty = getIndexedType();
if (auto *ATy = dyn_cast<ArrayType>(Ty))
CurTy = ATy->getElementType();
else if (auto *VTy = dyn_cast<VectorType>(Ty))
CurTy = VTy->getElementType();
else
CurTy = dyn_cast<StructType>(Ty);
++OpIt;
return *this;
}
generic_gep_type_iterator operator++(int) { // Postincrement
generic_gep_type_iterator tmp = *this;
++*this;
return tmp;
}
That "// FIXME: Make this the iterator's operator*() after the 4.0 release.", I am confused about what is that message want let me do, there is anything I need to add or fix at that position, so that helpful to fix the Stack dump.
Any suggestions will be be appreciated, thanks a lot!
You are having:
if (auto *STy = dyn_cast<SequentialType>(Ty)) {
CurTy = STy->getElementType();
NumElements = STy->getNumElements();
} else
CurTy = dyn_cast<StructType>(Ty);
so, if dyn_cast fails, then CurTy might be nullptr and on next getIndexedType() you'll obtain the assertion. Likely you're passing neither SequentialType nor StructType here.
Consider the following sample text line:
"Hello : World 2020 :tag1:tag2:tag3"
I want to design a spirit X3 parser that can extract:
Content := "Hello : world 2020 "
Tags := { tag1,tag2,tag3 }
The problem: Content is defined as leftover char sequence(excluding eol) after matching the tags and I am not sure how to write a rule that can synthesize two attributes: one representing the extracted tags and another representing leftover characters(the content)
So far I've written the rule for extracting the tags:
...
namespace ast {
struct sample {
std::u32string content;
std::vector<std::u32string> tags;
};
//BOOST FUSION STUFF .....
}
namespace grammar {
using x3 = boost::spirit::x3;
using x3::unicode::lit;
using x3::unicode::char_;
using x3::unicode::alnum;
auto const tag
= x3::rule<class tag_class, std::u32string> {"tag"}
%=
lit(U":")
>>
+(alnum | lit(U"_") | lit(U"#") | lit(U"#") | lit(U"%") )
;
auto const tags
= x3::rule<class tags_class, std::vector<std::u32string>{"tags"}
%= +tag >> lit(U":");
}
But stuck over here:
auto const sample_rule =
= x3::rule<class sample_rule_class, ast::sample> {"sample"}
= ?? // something like (+char_ - (eol|tags);
I'm sure there is a much elegant solution out there. In the meantime, a messy solution:
Parse each sample line as a single string unit.
Use semantic action to filer out the tags from each matched string unit.
Discard the filtered tags from the string unit to be left with only content.
sample_ast.h
#prgama once
#include <string>
namespace ast {
struct sample {
std::u32string content;
std::vector<std::u32string> tags;
};
}
sample.h
#pgrama once
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/char_encoding/unicode.hpp>
#include <boost/spirit/home/x3.hpp>
#include "sample_ast.hpp"
//tags property is intentionally ignored.
//It will be synthesized
//manually using semantic actions
BOOST_FUSION_ADAPT_STRUCT( ast::sample,content )
namespace grammar {
namespace detail {
using x3 = boost::spirit::x3;
using x3::unicode::char_;
using x3::eol;
using x3::eoi;
using x3::lexeme;
auto const sample_line
= x3::rule<class sample_line_class, std::u32string>{"sample_line"}
= lexeme[ +(char_ - (eol|eoi)) ];
auto filter_tags = /*.... definition moved to next page for clarity */
auto const sample
= x3::rule<class sample, ast::sample >{"sample"}
=% filter_tags[ sample_line ];
}}
namespace grammar {
using grammar::detail::sample;
}
filter_tags definition
iterate the matched data right to left collecting
colon separated tags until an invalid tag char is
encountered or all chars have been exhausted.
pos_saved is used to track the beginning
of the tag list, which is used to discard the tags
from the content after collecting them into the ast.
auto filter_tags = []( auto& context )
{
auto &attr = _attr(context); // content string
auto &val = _val(context); // ast::sample
std::stack<char32_t> mem;
auto pos = attr.rbegin();
auto& const pos_end = attr.rend();
auto pos_saved = atrr.end();
do{
//tag start or end
if( *pos == U':' ){
if( mem.empty() ) { //tag start
mem.push(U':');
}
else { //tag end
//tag closed state:
//all chars for the current tag
//are ready for transfer into
//the ast.
std::u32string tag;
while( mem.top() != ':' ){
//since we're reverse iterating the data
//the tags wont be backwards
tag.push_back( mem.top());
mem.pop();
}
val.tags.push_back(tag);
//update the start offset of
//that tags
pos_saved = pos.base();
}
} else { // tag char or not
using u = spirit::char_encoding::unicode;
if( !mem.empty() ) {
if(u::isalnum(*pos)) mem.push( *pos ); //tag char found
else break; //invalid tag char found
}
else {
//space after tag list but before content end
if(u::isspace(*pos) pos_saved = pos.base();
}
}
}while(++pos != pos_end);
if( pos_saved != attr.end()) attr.erase(pos_saved, attr.end() );
if( attr.empty() ) _pass(context) = false;
};
So the question explains the problem...
Background:
I'm trying to solve this problem from HackerRank.
It's basically an html tag parser. Valid input guaranteed, attributes are strings only.
My Approach
I created a custom Tag class that can store a map<string,Tag> of other Tag's, as well as a map<string,string> of attributes. The parsing seems to be working correctly.
The Problem
During the querying part, I get a BAD_ACCESS error on the following query/html combo:
4 1
<a value = "GoodVal">
<b value = "BadVal" size = "10">
</b>
</a>
a.b~size
The error occurs when I try to access the b Tag from a. Specifically, it's in the t=t.tags[tag_name], Line 118 below.
Code
#include <cmath>
#include <cstdio>
#include <vector>
#include <iostream>
#include <algorithm>
#include <sstream>
#include <map>
#include <stack>
using namespace std;
class Tag {
public:
Tag(){};
Tag(string name):name(name){};
string name;
map<string,Tag> tags = map<string, Tag>();
map<string,string> attribs=map<string,string>();
};
int main() {
int lines, queries;
std::cin>>lines>>queries;
std:string str;
getline(cin, str);
stack<string> open;
auto tags = map<string, Tag>();
for (int i = 0; i < lines; i++) {
getline(cin, str);
if (str.length()>1){
// If it's not </tag>, then it's an opening tag
if (str[1] != '/') {
// Parse tag name
auto wordidx = str.find(" ");
if (wordidx == -1) {
wordidx = str.length()-1.f;
}
string name = str.substr(1,wordidx-1);
auto t = Tag(name);
string sub = str.substr(wordidx);
auto equalidx=sub.find("=");
// Parse Attributes
while (equalidx != std::string::npos) {
string key = sub.substr(1,equalidx-2);
sub = sub.substr(equalidx);
auto attrib_start = sub.find("\"");
sub = sub.substr(attrib_start+1);
auto attrib_end = sub.find("\"");
string val = sub.substr(0, attrib_end);
sub = sub.substr(attrib_end+1);
t.attribs[key] = val;
equalidx=sub.find("=");
}
// If we're in a tag, push to that, else push to the base tags
if (open.size() == 0) {
tags[name] = t;
} else {
tags[open.top()].tags[name]=t;
}
open.push(name);
} else {
// Pop the stack if we reached a closing tag
auto wordidx = str.find(">");
string name = str.substr(2,wordidx-2);
// Sanity check, but we're assuming valid input
if (name.compare(open.top())) {
cout<<"FUCK"<<name<<open.top()<<endl;
return 9;
}
open.pop();
}
} else {
std::cout<<"FUCK\n";
}
}
//
// Parse in queries
//
for (int i = 0; i < queries; i++) {
getline(cin, str);
Tag t = Tag();
bool defined = false;
auto next_dot = str.find(".");
while (next_dot!=string::npos) {
string name = str.substr(0,next_dot);
if (defined && t.tags.find(name) == t.tags.end()) {
//TAG NOT IN T
cout<<"Not Found!"<<endl;
continue;
}
t = !defined ? tags[name] : t.tags[name];
defined = true;
str = str.substr(next_dot+1);
next_dot = str.find(".");
}
auto splitter = str.find("~");
string tag_name = str.substr(0,splitter);
string attrib_name = str.substr(splitter+1);
if (!defined) {
t = tags[tag_name];
} else if (t.tags.find(tag_name) == t.tags.end()) {
//TAG NOT IN T
cout<<"Not Found!"<<endl;
continue;
} else {
t = t.tags[tag_name];
}
// T is now set, check the attribute
if (t.attribs.find(attrib_name) == t.attribs.end()) {
cout<<"Not Found!"<<endl;
} else {
cout<<t.attribs[attrib_name]<<endl;
}
}
return 0;
}
What I've tried
This is fixed by just defining Tag x = t.tags[tag_name]; in the line above as a new variable, and then doing t = x; but why is this even happening?
Also, the following query also then fails: a.b.c~height, but it fails on Line 99 when it tried to get a.tags["b"]. No idea why. I was gonna just go with the hacky fix above, but this seems like a big core issue that i'm doing wrong.
I would suggest running this on an IDE and verifying that the parsing is indeed correct.
t=t.tags[tag_name]
This expression is unsafe because you are copy-assigning an object that is owned by that object over the owning object.
Consider what happens on this line:
The map lookup is performed and returns a Tag&.
You try to copy-assign this to t, invoking the implicit copy-assigment operator.
This operator copy-assigns t.tags from the tags attribute of the copy source -- which lives in t.tags.
The result is that the object you're copying into t is destroyed in the middle of that copy. This causes undefined behavior, and an immediate crash is honestly the best possible outcome as it told you exactly where the problem was. (This kind of problem frequently manifests at some point later in the program, at which point you've lost the state necessary to figure out what caused the UB.)
One workaround would be to move the source object into a temporary and then move-assign that temporary over t:
t = Tag{std::move(t.tags[tag_name])};
This lifts the data we want to assign to t out of t before we try to put it in t. Then, when t's assignment operator goes to replace t.tags, the data you're trying to assign to t doesn't live there anymore.
However, this overall approach involves a lot of unnecessary copying. It would be better to declare t as Tag const *t; instead -- have it be a pointer to a tag. Then you can just move that pointer around to point at other tags in your data structure without making copies.
Side note: I just did this problem the other day! Here's a hint that might help you simplify things: do you actually need a structure of tags? Is there a simpler type of lookup structure that would work instead of nested tags?
I want to transform an expression into negation normal form. For this, I have a binary expression tree using smart pointers. The problem is that removing double negations is not working when they occur in binary expressions although the function removeDoubleNot() is called at the right time. So e.g. ¬(A∨¬B) becomes ¬A∧¬¬B instead of ¬A∧B, but it works on ¬¬B alone. I assume the mistake is in evaluate() but I could not find it yet. Maybe the recursion is wrong?
// It is assumed that all Expressions are valid
std::shared_ptr<Expression> NNF::removeDoubleNot(std::shared_ptr<Not> expr) {
// Left is a Not -> remove both Nots
if (auto node = dynamic_cast<Not *>(expr->getLeft().get()))
return node->getLeft();
return expr;
}
std::shared_ptr<Expression> NNF::applyDeMorgan(std::shared_ptr<Not> expr) {
// And
if (auto node = dynamic_cast<And *>(expr->getLeft().get())) {
auto newLeft = std::make_shared<Not>(node->getLeft());
auto newRight = std::make_shared<Not>(node->getRight());
return std::make_shared<Or>(newLeft, newRight);
}
// Or
if (auto node = dynamic_cast<Or *>(expr->getLeft().get())) {
auto newLeft = std::make_shared<Not>(node->getLeft());
auto newRight = std::make_shared<Not>(node->getRight());
return std::make_shared<And>(newLeft, newRight);
}
return expr;
}
std::shared_ptr<Expression> NNF::removeImplication(const std::shared_ptr<Implication> &expr) {
auto newLeft = std::make_shared<Not>(expr->getLeft());
auto newRight = expr->getRight();
return std::make_shared<Or>(newLeft, newRight);
}
std::shared_ptr<Expression> NNF::moveNegationInwards(const std::shared_ptr<Not> ¬Expr) {
expr = applyDeMorgan(node);
if (auto node = std::dynamic_pointer_cast<Not>(expr))
expr = removeDoubleNot(node);
return expr;
}
std::shared_ptr<Expression> NNF::evaluate(std::shared_ptr<Expression> expr) {
if (expr == nullptr)
return nullptr;
// Implication
if(auto node = std::dynamic_pointer_cast<Implication>(expr)){
auto ret = removeImplication(node);
evaluate(ret->getLeft());
evaluate(ret->getRight());
return ret;
}
// Other binary than implication
if(auto node = dynamic_cast<Binary*>(expr.get())){
evaluate(node->getLeft());
evaluate(node->getRight());
return expr;
}
// Not
if(auto node = std::dynamic_pointer_cast<Not>(expr)) {
auto ret = moveNegationInwards(node);
evaluate(ret->getLeft());
evaluate(ret->getRight());
return ret;
}
return expr;
}
When you call evaluate(ret->getLeft()) you are not using the return value, thus you never change your current child expressions.
So you need to change this to:
ret->setLeft(evaluate(ret->getLeft()));
The same goes for right.
You might want to consider using [[nodiscard]] to get compiler warnings on mistakes like these.
SQLParser.h:
class SQLParser{
/*____Variables____*/
private:
std::string _vendor;
antlr4::CommonTokenStream* _tokenStream;
antlr4::Parser* _parser;
antlr4::Lexer* _lexer;
/*____Functions____*/
public:
SQLParser(const std::string& Vendor);
~SQLParser();
antlr4::CommonTokenStream* get_tokens(const std::string& text);
std::vector<std::string> get_lexems(const std::string& text);
antlr4::ParserRuleContext* parse(const std::string& text);
bool check_syntax(const std::string& text);
void print_string_tree(const std::string& text); // parse and print in LISP format
};
SQLParser.cpp:
...
CommonTokenStream* SQLParser::get_tokens(const std::string& text){
(dynamic_cast<ANTLRInputStream*>(_lexer->getInputStream()))->load(text);
_tokenStream->reset();
_tokenStream->fill();
return _tokenStream;
}
std::vector<std::string> SQLParser::get_lexems(const std::string& text){
get_tokens(text);
std::vector<std::string> lexems;
for(auto token : _tokenStream->getTokens()) {
lexems.push_back(token->getText());
}
return lexems;
}
ParserRuleContext* SQLParser::parse(const std::string& text){
get_tokens(text);
_parser->setInputStream(_tokenStream);
ParserRuleContext* tree;
try{
if(_vendor == "tsql"){
tree = (dynamic_cast<tsqlParser*>(_parser))->root();
}
if(_vendor == "mysql"){
tree = (dynamic_cast<mysqlParser*>(_parser))->root();
}
}
catch(std::_Nested_exception<ParseCancellationException>& e){
return nullptr;
}
return tree;
}
An object SQLParser is created for each concrete vendor.
I want to use this object to parse several input texts. But I have problems with TokenStream's size. I expected that its size will changing dynamically.
For example, main like this:
main.cpp:
#include <iostream>
#include <string>
#include <antlr4-runtime.h>
#include "SQLParser.h"
using namespace antlr4;
int main(){
SQLParser parser("tsql");
std::cout << "'select 1;': ";
parser.print_string_tree("select 1;");
std::cout << "\n\n'select 1,2,3;': ";
parser.print_string_tree("select 1,2;");
std::cout << "\n";
return 0;
}
is giving output like this:
'select 1;': (root (sql_clauses (sql_clause (dml_clause (select_statement (query_expression (query_specification select (select_list (select_list_elem (expression (constant 1)))))) ;)))) <EOF>)
'select 1,2,3;': (root (sql_clauses (sql_clause (dml_clause (select_statement (query_expression (query_specification select (select_list (select_list_elem (expression (constant 1)))))) ,)))) )
How should I use TokenStream to avoid this error?
I have a similar setup like you. Context class keeps lexer + parser + listeners etc. together which act as a whole. To restart parsing with new input you have to make your token stream reload all tokens again. In my context class I do it so:
struct MySQLParserContextImpl : public MySQLParserContext {
ANTLRInputStream input;
MySQLLexer lexer;
CommonTokenStream tokens;
MySQLParser parser;
ContextErrorListener errorListener;
bool caseSensitive;
std::vector<ParserErrorInfo> errors;
...
ParseTree *parse(const std::string &text, MySQLParseUnit unit) {
input.load(text);
return startParsing(false, unit);
}
bool errorCheck(const std::string &text, MySQLParseUnit unit) {
parser.removeParseListeners();
input.load(text);
startParsing(true, unit);
return errors.empty();
}
private:
ParseTree *parseUnit(MySQLParseUnit unit) {
switch (unit) {
case MySQLParseUnit::PuCreateSchema:
return parser.createDatabase();
case MySQLParseUnit::PuCreateTable:
return parser.createTable();
case MySQLParseUnit::PuCreateTrigger:
return parser.createTrigger();
case MySQLParseUnit::PuCreateView:
return parser.createView();
case MySQLParseUnit::PuCreateFunction:
return parser.createFunction();
case MySQLParseUnit::PuCreateProcedure:
return parser.createProcedure();
case MySQLParseUnit::PuCreateUdf:
return parser.createUdf();
case MySQLParseUnit::PuCreateRoutine:
return parser.createRoutine();
case MySQLParseUnit::PuCreateEvent:
return parser.createEvent();
case MySQLParseUnit::PuCreateIndex:
return parser.createIndex();
case MySQLParseUnit::PuGrant:
return parser.grant();
case MySQLParseUnit::PuDataType:
return parser.dataTypeDefinition();
case MySQLParseUnit::PuCreateLogfileGroup:
return parser.createLogfileGroup();
case MySQLParseUnit::PuCreateServer:
return parser.createServer();
case MySQLParseUnit::PuCreateTablespace:
return parser.createTablespace();
default:
return parser.query();
}
}
ParseTree *startParsing(bool fast, MySQLParseUnit unit) {
errors.clear();
lexer.reset();
lexer.setInputStream(&input); // Not just reset(), which only rewinds the current position.
tokens.setTokenSource(&lexer);
parser.reset();
parser.setBuildParseTree(!fast);
// First parse with the bail error strategy to get quick feedback for correct queries.
parser.setErrorHandler(std::make_shared<BailErrorStrategy>());
parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(PredictionMode::SLL);
ParseTree *tree;
try {
tree = parseUnit(unit);
} catch (ParseCancellationException &) {
if (fast)
tree = nullptr;
else {
// If parsing was cancelled we either really have a syntax error or we need to do a second step,
// now with the default strategy and LL parsing.
tokens.reset();
parser.reset();
parser.setErrorHandler(std::make_shared<DefaultErrorStrategy>());
parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(PredictionMode::LL);
tree = parseUnit(unit);
}
}
if (errors.empty() && !lexer.hitEOF) {
// There is more input than needed for the given parse unit. Make this a fail as we don't allow
// extra input after the specific rule.
// This part is only needed if the grammar has no explicit EOF token at the end of the parsed rule.
Token *token = tokens.LT(1);
ParserErrorInfo info = {"extraneous input found, expecting end of input",
token->getType(),
token->getStartIndex(),
token->getLine(),
token->getCharPositionInLine(),
token->getStopIndex() - token->getStartIndex() + 1};
errors.push_back(info);
}
return tree;
}
...