Commit 9e911419 authored by twanvl's avatar twanvl

better script parser with respect to newlines

parent 2e870711
...@@ -27,7 +27,7 @@ enum TokenType ...@@ -27,7 +27,7 @@ enum TokenType
, TOK_OPER // + - * / . ; , TOK_OPER // + - * / . ;
, TOK_LPAREN // ( { [ , TOK_LPAREN // ( { [
, TOK_RPAREN // ) } ] , TOK_RPAREN // ) } ]
, TOK_NEWLINE // newline , TOK_DUMMY // placeholder for putBack
, TOK_EOF // end of input , TOK_EOF // end of input
}; };
...@@ -35,6 +35,7 @@ enum TokenType ...@@ -35,6 +35,7 @@ enum TokenType
struct Token { struct Token {
TokenType type; TokenType type;
String value; String value;
bool newline; ///< Is there a newline between this token and the previous one?
inline operator == (TokenType t) const { return type == t; } inline operator == (TokenType t) const { return type == t; }
inline operator != (TokenType t) const { return type != t; } inline operator != (TokenType t) const { return type != t; }
...@@ -65,10 +66,13 @@ class TokenIterator { ...@@ -65,10 +66,13 @@ class TokenIterator {
size_t pos; size_t pos;
vector<Token> buffer; // buffer of unread tokens, front() = current vector<Token> buffer; // buffer of unread tokens, front() = current
stack<bool> open_braces; // braces we entered, true if the brace was from a smart string escape stack<bool> open_braces; // braces we entered, true if the brace was from a smart string escape
bool newline; ///< Did we just pass a newline?
/// Add a token to the buffer, with the current newline value, resets newline
void addToken(TokenType type, const String& value);
/// Read the next token, and add it to the buffer /// Read the next token, and add it to the buffer
void addToken(); void readToken();
/// Read the next token which is a string (after the opening ") /// Read the next token which is a string (after the opening ")
void addStringToken(); void readStringToken();
}; };
// ----------------------------------------------------------------------------- : Characters // ----------------------------------------------------------------------------- : Characters
...@@ -92,7 +96,7 @@ TokenIterator::TokenIterator(const String& str) ...@@ -92,7 +96,7 @@ TokenIterator::TokenIterator(const String& str)
const Token& TokenIterator::peek(size_t offset) { const Token& TokenIterator::peek(size_t offset) {
// read the next token until we have enough // read the next token until we have enough
while (buffer.size() <= offset) { while (buffer.size() <= offset) {
addToken(); readToken();
} }
return buffer[offset]; return buffer[offset];
} }
...@@ -103,71 +107,70 @@ const Token& TokenIterator::read() { ...@@ -103,71 +107,70 @@ const Token& TokenIterator::read() {
} }
void TokenIterator::putBack() { void TokenIterator::putBack() {
Token t = {TOK_NEWLINE, _("\n")}; // Don't use addToken, because it canges newline
// Also, we want to push_front
Token t = {TOK_DUMMY, _(""), false};
buffer.insert(buffer.begin(), t); buffer.insert(buffer.begin(), t);
} }
void TokenIterator::addToken() { void TokenIterator::addToken(TokenType type, const String& value) {
Token t = {type, value, newline};
buffer.push_back(t);
newline = false;
}
void TokenIterator::readToken() {
if (pos >= input.size()) { if (pos >= input.size()) {
// EOF // EOF
Token t = {TOK_EOF, _("end of input")}; addToken(TOK_EOF, _("end of input"));
buffer.push_back(t);
return; return;
} }
// read a character from the input // read a character from the input
Char c = input.GetChar(pos++); Char c = input.GetChar(pos++);
if (c == _('\n')) { if (c == _('\n')) {
Token t = {TOK_NEWLINE, _("newline")}; newline = true;
buffer.push_back(t);
} else if (isSpace(c)) { } else if (isSpace(c)) {
// ignore // ignore
} else if (isAlpha(c)) { } else if (isAlpha(c)) {
// name // name
size_t start = pos - 1; size_t start = pos - 1;
while (pos < input.size() && isAlnum_(input.GetChar(pos))) ++pos; while (pos < input.size() && isAlnum_(input.GetChar(pos))) ++pos;
Token t = {TOK_NAME, cannocial_name_form(input.substr(start, pos-start)) }; // convert name to cannocial form addToken(TOK_NAME, cannocial_name_form(input.substr(start, pos-start))); // convert name to cannocial form
buffer.push_back(t);
} else if (isDigit(c)) { } else if (isDigit(c)) {
// number // number
size_t start = pos - 1; size_t start = pos - 1;
while (pos < input.size() && isDigitOrDot(input.GetChar(pos))) ++pos; while (pos < input.size() && isDigitOrDot(input.GetChar(pos))) ++pos;
String num = input.substr(start, pos-start); String num = input.substr(start, pos-start);
Token t = { addToken(
num.find_first_of('.') == String::npos ? TOK_INT : TOK_DOUBLE, num.find_first_of('.') == String::npos ? TOK_INT : TOK_DOUBLE,
num num
}; );
buffer.push_back(t);
} else if (isOper(c)) { } else if (isOper(c)) {
// operator // operator
Token t = { TOK_OPER };
if (pos < input.size() && isLongOper(input.substr(pos - 1, 2))) { if (pos < input.size() && isLongOper(input.substr(pos - 1, 2))) {
// long operator // long operator
t.value = input.substr(pos - 1, 2); addToken(TOK_OPER, input.substr(pos - 1, 2));
pos += 1; pos += 1;
} else { } else {
t.value = input.substr(pos - 1, 1); addToken(TOK_OPER, input.substr(pos - 1, 1));
} }
buffer.push_back(t);
} else if (c==_('"')) { } else if (c==_('"')) {
// string // string
addStringToken(); readStringToken();
} else if (c == _('}') && !open_braces.empty() && open_braces.top()) { } else if (c == _('}') && !open_braces.empty() && open_braces.top()) {
// closing smart string, resume to string parsing // closing smart string, resume to string parsing
// "a{e}b" --> "a" "{ e }" "b" // "a{e}b" --> "a" "{ e }" "b"
open_braces.pop(); open_braces.pop();
Token t2 = {TOK_RPAREN, _("}\"")}; addToken(TOK_RPAREN, _("}\""));
buffer.push_back(t2); readStringToken();
addStringToken();
} else if (isLparen(c)) { } else if (isLparen(c)) {
// paranthesis/brace // paranthesis/brace
open_braces.push(false); open_braces.push(false);
Token t = { TOK_LPAREN, String(1,c) }; addToken(TOK_LPAREN, String(1,c));
buffer.push_back(t);
} else if (isRparen(c)) { } else if (isRparen(c)) {
// paranthesis/brace // paranthesis/brace
if (!open_braces.empty()) open_braces.pop(); if (!open_braces.empty()) open_braces.pop();
Token t = { TOK_RPAREN, String(1,c) }; addToken(TOK_RPAREN, String(1,c));
buffer.push_back(t);
} else if(c==_('#')) { } else if(c==_('#')) {
// comment untill end of line // comment untill end of line
while (pos < input.size() && input[pos] != _('\n')) ++pos; while (pos < input.size() && input[pos] != _('\n')) ++pos;
...@@ -176,33 +179,32 @@ void TokenIterator::addToken() { ...@@ -176,33 +179,32 @@ void TokenIterator::addToken() {
} }
} }
void TokenIterator::addStringToken() { void TokenIterator::readStringToken() {
Token t = {TOK_STRING}; String str;
while (true) { while (true) {
if (pos >= input.size()) throw ScriptParseError(_("Unexpected end of input in string constant")); if (pos >= input.size()) throw ScriptParseError(_("Unexpected end of input in string constant"));
Char c = input[pos++]; //% input.GetChar(pos++); Char c = input[pos++]; //% input.GetChar(pos++);
// parse the string constant // parse the string constant
if (c == _('"')) { if (c == _('"')) {
// end of string // end of string
buffer.push_back(t); addToken(TOK_STRING, str);
return; return;
} else if (c == _('\\')) { } else if (c == _('\\')) {
// escape // escape
if (pos >= input.size()) throw ScriptParseError(_("Unexpected end of input in string constant")); if (pos >= input.size()) throw ScriptParseError(_("Unexpected end of input in string constant"));
c = input[pos++]; c = input[pos++];
if (c == _('n')) t.value += _('\n'); if (c == _('n')) str += _('\n');
if (c == _('<')) t.value += _('\1'); // escape for < if (c == _('<')) str += _('\1'); // escape for <
else t.value += c; // \ or { or " else str += c; // \ or { or "
} else if (c == _('{')) { } else if (c == _('{')) {
// smart string // smart string
// "a{e}b" --> "a" "{ e }" "b" // "a{e}b" --> "a" "{ e }" "b"
buffer.push_back(t); addToken(TOK_STRING, str);
open_braces.push(true); open_braces.push(true);
Token t2 = {TOK_LPAREN, _("\"{")}; addToken(TOK_LPAREN, _("\"{"));
buffer.push_back(t2);
return; return;
} else { } else {
t.value += c; str += c;
} }
} }
} }
...@@ -257,7 +259,6 @@ ScriptP parse(const String& s) { ...@@ -257,7 +259,6 @@ ScriptP parse(const String& s) {
// Expect a token, throws if it is not found // Expect a token, throws if it is not found
void expectToken(TokenIterator& input, const Char* expect) { void expectToken(TokenIterator& input, const Char* expect) {
Token token = input.read(); Token token = input.read();
while (token == TOK_NEWLINE) token = input.read(); // skip newlines
if (token != expect) { if (token != expect) {
throw ScriptParseError(expect, token.value); throw ScriptParseError(expect, token.value);
} }
...@@ -371,8 +372,6 @@ void parseExpr(TokenIterator& input, Script& script, Precedence minPrec) { ...@@ -371,8 +372,6 @@ void parseExpr(TokenIterator& input, Script& script, Precedence minPrec) {
script.addInstruction(I_PUSH_CONST, toScript(d)); script.addInstruction(I_PUSH_CONST, toScript(d));
} else if (token == TOK_STRING) { } else if (token == TOK_STRING) {
script.addInstruction(I_PUSH_CONST, toScript(token.value)); script.addInstruction(I_PUSH_CONST, toScript(token.value));
} else if (token == TOK_NEWLINE) {
continue; // ignore
} else { } else {
throw ScriptParseError(_("Unexpected token '") + token.value + _("'")); throw ScriptParseError(_("Unexpected token '") + token.value + _("'"));
} }
...@@ -382,123 +381,104 @@ void parseExpr(TokenIterator& input, Script& script, Precedence minPrec) { ...@@ -382,123 +381,104 @@ void parseExpr(TokenIterator& input, Script& script, Precedence minPrec) {
void parseOper(TokenIterator& input, Script& script, Precedence minPrec, InstructionType closeWith, int closeWithData) { void parseOper(TokenIterator& input, Script& script, Precedence minPrec, InstructionType closeWith, int closeWithData) {
parseExpr(input, script, minPrec); // first argument parseExpr(input, script, minPrec); // first argument
bool newlines = false; // did we skip any newlines?
// read any operators after an expression // read any operators after an expression
// EBNF: expr = expr | expr oper expr // EBNF: expr = expr | expr oper expr
// without left recursion: expr = expr (oper expr)* // without left recursion: expr = expr (oper expr)*
while (true) { while (true) {
const Token& token = input.read(); const Token& token = input.read();
bool newlines2 = newlines; if (token != TOK_OPER && token != TOK_NAME && token!=TOK_LPAREN) {
newlines = false; // not an operator-like token
if (token == TOK_OPER || token == TOK_NAME) { input.putBack();
if (minPrec <= PREC_SEQ && token==_(";")) { break;
Token next = input.peek(1); }
if (next == TOK_RPAREN || next == TOK_EOF) { if (minPrec <= PREC_SEQ && token==_(";")) {
// allow ; at end of expression without errors Token next = input.peek(1);
return; if (next == TOK_RPAREN || next == TOK_EOF) {
} // allow ; at end of expression without errors
script.addInstruction(I_POP); // discard result of first expression return;
parseOper(input, script, PREC_SET);
} else if (minPrec <= PREC_SET && token==_(":=")) {
// We made a mistake, the part before the := should be a variable name,
// not an expression. Remove that instruction.
Instruction instr = script.getInstructions().back();
if (instr.instr != I_GET_VAR) {
throw ScriptParseError(_("Can only assign to variables"));
} else {
script.getInstructions().pop_back();
parseOper(input, script, PREC_SET, I_SET_VAR, instr.data);
}
} }
else if (minPrec <= PREC_AND && token==_("and")) parseOper(input, script, PREC_CMP, I_BINARY, I_AND); script.addInstruction(I_POP); // discard result of first expression
else if (minPrec <= PREC_AND && token==_("or" )) parseOper(input, script, PREC_CMP, I_BINARY, I_OR); parseOper(input, script, PREC_SET);
else if (minPrec <= PREC_CMP && token==_("=")) parseOper(input, script, PREC_ADD, I_BINARY, I_EQ); } else if (minPrec <= PREC_SET && token==_(":=")) {
else if (minPrec <= PREC_CMP && token==_("==")) parseOper(input, script, PREC_ADD, I_BINARY, I_EQ); // We made a mistake, the part before the := should be a variable name,
else if (minPrec <= PREC_CMP && token==_("!=")) parseOper(input, script, PREC_ADD, I_BINARY, I_NEQ); // not an expression. Remove that instruction.
else if (minPrec <= PREC_CMP && token==_("<")) parseOper(input, script, PREC_ADD, I_BINARY, I_LT); Instruction instr = script.getInstructions().back();
else if (minPrec <= PREC_CMP && token==_(">")) parseOper(input, script, PREC_ADD, I_BINARY, I_GT); if (instr.instr != I_GET_VAR) {
else if (minPrec <= PREC_CMP && token==_("<=")) parseOper(input, script, PREC_ADD, I_BINARY, I_LE); throw ScriptParseError(_("Can only assign to variables"));
else if (minPrec <= PREC_CMP && token==_(">=")) parseOper(input, script, PREC_ADD, I_BINARY, I_GE);
else if (minPrec <= PREC_ADD && token==_("+")) parseOper(input, script, PREC_MUL, I_BINARY, I_ADD);
else if (minPrec <= PREC_ADD && token==_("-")) parseOper(input, script, PREC_MUL, I_BINARY, I_SUB);
else if (minPrec <= PREC_MUL && token==_("*")) parseOper(input, script, PREC_UNARY, I_BINARY, I_MUL);
else if (minPrec <= PREC_MUL && token==_("/")) parseOper(input, script, PREC_UNARY, I_BINARY, I_DIV);
else if (minPrec <= PREC_MUL && token==_("mod")) parseOper(input, script, PREC_UNARY, I_BINARY, I_MOD);
else if (minPrec <= PREC_FUN && token==_(".")) { // get member by name
const Token& token = input.read();
if (token == TOK_NAME || token == TOK_INT || token == TOK_DOUBLE || token == TOK_STRING) {
script.addInstruction(I_MEMBER_C, token.value);
} else {
throw ScriptParseError(_("name"), input.peek().value);
}
} else { } else {
input.putBack(); script.getInstructions().pop_back();
newlines = newlines2; // remember newlines parseOper(input, script, PREC_SET, I_SET_VAR, instr.data);
break; // unknown operator
} }
} else if (token==TOK_LPAREN) { }
if (minPrec <= PREC_FUN && token==_("(")) { else if (minPrec <= PREC_AND && token==_("and")) parseOper(input, script, PREC_CMP, I_BINARY, I_AND);
// function call, read arguments else if (minPrec <= PREC_AND && token==_("or" )) parseOper(input, script, PREC_CMP, I_BINARY, I_OR);
vector<int> arguments; else if (minPrec <= PREC_CMP && token==_("=")) parseOper(input, script, PREC_ADD, I_BINARY, I_EQ);
Token t = input.peek(); else if (minPrec <= PREC_CMP && token==_("==")) parseOper(input, script, PREC_ADD, I_BINARY, I_EQ);
while (t != _(")")) { else if (minPrec <= PREC_CMP && token==_("!=")) parseOper(input, script, PREC_ADD, I_BINARY, I_NEQ);
if (input.peek(2) == _(":")) { else if (minPrec <= PREC_CMP && token==_("<")) parseOper(input, script, PREC_ADD, I_BINARY, I_LT);
// name: ... else if (minPrec <= PREC_CMP && token==_(">")) parseOper(input, script, PREC_ADD, I_BINARY, I_GT);
arguments.push_back(stringToVariable(t.value)); else if (minPrec <= PREC_CMP && token==_("<=")) parseOper(input, script, PREC_ADD, I_BINARY, I_LE);
input.read(); // skip the name else if (minPrec <= PREC_CMP && token==_(">=")) parseOper(input, script, PREC_ADD, I_BINARY, I_GE);
input.read(); // and the : else if (minPrec <= PREC_ADD && token==_("+")) parseOper(input, script, PREC_MUL, I_BINARY, I_ADD);
parseOper(input, script, PREC_SEQ); else if (minPrec <= PREC_ADD && token==_("-")) parseOper(input, script, PREC_MUL, I_BINARY, I_SUB);
} else { else if (minPrec <= PREC_MUL && token==_("*")) parseOper(input, script, PREC_UNARY, I_BINARY, I_MUL);
// implicit "input" argument else if (minPrec <= PREC_MUL && token==_("/")) parseOper(input, script, PREC_UNARY, I_BINARY, I_DIV);
arguments.push_back(stringToVariable(_("input"))); else if (minPrec <= PREC_MUL && token==_("mod")) parseOper(input, script, PREC_UNARY, I_BINARY, I_MOD);
parseOper(input, script, PREC_SEQ); else if (minPrec <= PREC_FUN && token==_(".")) { // get member by name
} const Token& token = input.read();
t = input.peek(); if (token == TOK_NAME || token == TOK_INT || token == TOK_DOUBLE || token == TOK_STRING) {
if (t == _(",")) { script.addInstruction(I_MEMBER_C, token.value);
// Comma separating the arguments } else {
input.read(); throw ScriptParseError(_("name"), input.peek().value);
t = input.peek(); }
} } else if (minPrec <= PREC_FUN && token==_("[")) { // get member by expr
parseOper(input, script, PREC_ALL, I_BINARY, I_MEMBER);
expectToken(input, _("]"));
} else if (minPrec <= PREC_FUN && token==_("(")) {
// function call, read arguments
vector<int> arguments;
Token t = input.peek();
while (t != _(")")) {
if (input.peek(2) == _(":")) {
// name: ...
arguments.push_back(stringToVariable(t.value));
input.read(); // skip the name
input.read(); // and the :
parseOper(input, script, PREC_SEQ);
} else {
// implicit "input" argument
arguments.push_back(stringToVariable(_("input")));
parseOper(input, script, PREC_SEQ);
} }
input.read(); // skip the ) t = input.peek();
// generate instruction if (t == _(",")) {
script.addInstruction(I_CALL, (unsigned int)arguments.size()); // Comma separating the arguments
FOR_EACH(arg,arguments) { input.read();
script.addInstruction(I_NOP, arg); t = input.peek();
} }
} else if (minPrec <= PREC_FUN && token==_("[")) { // get member by expr
parseOper(input, script, PREC_ALL, I_BINARY, I_MEMBER);
expectToken(input, _("]"));
} else if (minPrec <= PREC_STRING && token==_("\"{")) {
// for smart strings: "x" {{ e }} "y"
parseOper(input, script, PREC_ALL, I_BINARY, I_ADD); // e
expectToken(input, _("}\""));
parseOper(input, script, PREC_NONE, I_BINARY, I_ADD); // y
} else {
input.putBack();
newlines = newlines2; // remember newlines
break; // unknown LPAREN, has to be {
} }
} else if (token == TOK_NEWLINE) { input.read(); // skip the )
const Token& next = input.peek(1); // generate instruction
if (minPrec <= PREC_NEWLINE && (next == TOK_NAME || next == TOK_LPAREN)) { script.addInstruction(I_CALL, (unsigned int)arguments.size());
// function as ; FOR_EACH(arg,arguments) {
script.addInstruction(I_POP); script.addInstruction(I_NOP, arg);
parseOper(input, script, PREC_SET);
} else {
// skip newlines
newlines = true;
} }
} else if (minPrec <= PREC_STRING && token==_("\"{")) {
// for smart strings: "x" {{ e }} "y"
parseOper(input, script, PREC_ALL, I_BINARY, I_ADD); // e
expectToken(input, _("}\""));
parseOper(input, script, PREC_NONE, I_BINARY, I_ADD); // y
} else if (minPrec <= PREC_NEWLINE && token.newline) {
// newline functions as ;
// only if we don't match another token!
input.putBack();
script.addInstruction(I_POP);
parseOper(input, script, PREC_SET);
} else { } else {
input.putBack(); input.putBack();
newlines = newlines2; // remember newlines
break; break;
} }
} }
if (newlines) {
// we accidentally ate a newline, restore it
input.putBack();
}
// add closing instruction // add closing instruction
if (closeWith != I_NOP) { if (closeWith != I_NOP) {
script.addInstruction(closeWith, closeWithData); script.addInstruction(closeWith, closeWithData);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment