better script parser with respect to newlines

9e911419 · twanvl · 2e870711 · 9e911419
Commit 9e911419 authored Oct 12, 2006 by twanvl
Show whitespace changes
Inline Side-by-side

Showing with 120 additions and 140 deletions

src/script/parser.cpp src/script/parser.cpp +120 -140

No files found.
--- a/src/script/parser.cpp
+++ b/src/script/parser.cpp
@@ -27,7 +27,7 @@ enum TokenType
 ,	TOK_OPER	// + - * / . ;
 ,	TOK_LPAREN	// ( { [
 ,	TOK_RPAREN	// ) } ]
-,	TOK_NEWLINE	// newline
+,	TOK_DUMMY	// placeholder for putBack
 ,	TOK_EOF		// end of input
 };

@@ -35,6 +35,7 @@ enum TokenType
 struct Token {
 	TokenType type;
 	String    value;
+	bool      newline; ///< Is there a newline between this token and the previous one?
 	
 	inline operator == (TokenType     t) const { return type  == t; }
 	inline operator != (TokenType     t) const { return type  != t; }
@@ -65,10 +66,13 @@ class TokenIterator {
 	size_t pos;
 	vector<Token> buffer;      // buffer of unread tokens, front() = current
 	stack<bool>   open_braces; // braces we entered, true if the brace was from a smart string escape
+	bool          newline;    ///< Did we just pass a newline?
+	/// Add a token to the buffer, with the current newline value, resets newline
+	void addToken(TokenType type, const String& value);
 	/// Read the next token, and add it to the buffer
-	void addToken();
+	void readToken();
 	/// Read the next token which is a string (after the opening ")
-	void addStringToken();
+	void readStringToken();
 };

 // ----------------------------------------------------------------------------- : Characters
@@ -92,7 +96,7 @@ TokenIterator::TokenIterator(const String& str)
 const Token& TokenIterator::peek(size_t offset) {
 	// read the next token until we have enough
 	while (buffer.size() <= offset) {
-		addToken();
+		readToken();
 	}
 	return buffer[offset];
 }
@@ -103,71 +107,70 @@ const Token& TokenIterator::read() {
 }

 void TokenIterator::putBack() {
-	Token t = {TOK_NEWLINE, _("\n")};
+	// Don't use addToken, because it canges newline
+	// Also, we want to push_front
+	Token t = {TOK_DUMMY, _(""), false};
 	buffer.insert(buffer.begin(), t);
 }

-void TokenIterator::addToken() {
+void TokenIterator::addToken(TokenType type, const String& value) {
+	Token t = {type, value, newline};
+	buffer.push_back(t);
+	newline = false;
+}
+
+void TokenIterator::readToken() {
 	if (pos >= input.size()) {
 		// EOF
-		Token t = {TOK_EOF, _("end of input")};
-		buffer.push_back(t);
+		addToken(TOK_EOF, _("end of input"));
 		return;
 	}
 	// read a character from the input
 	Char c = input.GetChar(pos++);
 	if (c == _('\n')) {
-		Token t = {TOK_NEWLINE, _("newline")};
-		buffer.push_back(t);
+		newline = true;
 	} else if (isSpace(c)) {
 		// ignore
 	} else if (isAlpha(c)) {
 		// name
 		size_t start = pos - 1;
 		while (pos < input.size() && isAlnum_(input.GetChar(pos))) ++pos;
-		Token t = {TOK_NAME, cannocial_name_form(input.substr(start, pos-start)) }; // convert name to cannocial form
-		buffer.push_back(t);
+		addToken(TOK_NAME, cannocial_name_form(input.substr(start, pos-start))); // convert name to cannocial form
 	} else if (isDigit(c)) {
 		// number
 		size_t start = pos - 1;
 		while (pos < input.size() && isDigitOrDot(input.GetChar(pos))) ++pos;
 		String num = input.substr(start, pos-start);
-		Token t = {
+		addToken(
 			num.find_first_of('.') == String::npos ? TOK_INT : TOK_DOUBLE,
 			num
-		};
-		buffer.push_back(t);
+		);
 	} else if (isOper(c)) {
 		// operator
-		Token t = { TOK_OPER };
 		if (pos < input.size() && isLongOper(input.substr(pos - 1, 2))) {
 			// long operator
-			t.value = input.substr(pos - 1, 2);
+			addToken(TOK_OPER, input.substr(pos - 1, 2));
 			pos += 1;
 		} else {
-			t.value = input.substr(pos - 1, 1);
+			addToken(TOK_OPER, input.substr(pos - 1, 1));
 		}
-		buffer.push_back(t);
 	} else if (c==_('"')) {
 		// string
-		addStringToken();
+		readStringToken();
 	} else if (c == _('}') && !open_braces.empty() && open_braces.top()) {
 		// closing smart string, resume to string parsing
 		//   "a{e}b"  -->  "a"  "{  e  }"  "b"
 		open_braces.pop();
-		Token t2 = {TOK_RPAREN, _("}\"")};
-		buffer.push_back(t2);
-		addStringToken();
+		addToken(TOK_RPAREN, _("}\""));
+		readStringToken();
 	} else if (isLparen(c)) {
 		// paranthesis/brace
 		open_braces.push(false);
-		Token t = { TOK_LPAREN, String(1,c) };
-		buffer.push_back(t);
+		addToken(TOK_LPAREN, String(1,c));
 	} else if (isRparen(c)) {
 		// paranthesis/brace
 		if (!open_braces.empty()) open_braces.pop();
-		Token t = { TOK_RPAREN, String(1,c) };
-		buffer.push_back(t);
+		addToken(TOK_RPAREN, String(1,c));
 	} else if(c==_('#')) {
 		// comment untill end of line
 		while (pos < input.size() && input[pos] != _('\n')) ++pos;
@@ -176,33 +179,32 @@ void TokenIterator::addToken() {
 	}
 }

-void TokenIterator::addStringToken() {
-	Token t = {TOK_STRING};
+void TokenIterator::readStringToken() {
+	String str;
 	while (true) {
 		if (pos >= input.size()) throw ScriptParseError(_("Unexpected end of input in string constant"));
 		Char c = input[pos++];			//% input.GetChar(pos++);
 		// parse the string constant
 		if (c == _('"')) {
 			// end of string
-			buffer.push_back(t);
+			addToken(TOK_STRING, str);
 			return;
 		} else if (c == _('\\')) {
 			// escape
 			if (pos >= input.size()) throw ScriptParseError(_("Unexpected end of input in string constant"));
 			c = input[pos++];
-			if (c == _('n')) t.value += _('\n');
-			if (c == _('<')) t.value += _('\1'); // escape for <
-			else             t.value += c;       // \ or { or "
+			if (c == _('n')) str += _('\n');
+			if (c == _('<')) str += _('\1'); // escape for <
+			else             str += c;       // \ or { or "
 		} else if (c == _('{')) {
 			// smart string
 			//   "a{e}b"  -->  "a"  "{  e  }"  "b"
-			buffer.push_back(t);
+			addToken(TOK_STRING, str);
 			open_braces.push(true);
-			Token t2 = {TOK_LPAREN, _("\"{")};
-			buffer.push_back(t2);
+			addToken(TOK_LPAREN, _("\"{"));
 			return;
 		} else {
-			t.value += c;
+			str += c;
 		}
 	}
 }
@@ -257,7 +259,6 @@ ScriptP parse(const String& s) {
 // Expect a token, throws if it is not found
 void expectToken(TokenIterator& input, const Char* expect) {
 	Token token = input.read();
-	while (token == TOK_NEWLINE) token = input.read(); // skip newlines
 	if (token != expect) {
 		throw ScriptParseError(expect, token.value);
 	}
@@ -371,8 +372,6 @@ void parseExpr(TokenIterator& input, Script& script, Precedence minPrec) {
 			script.addInstruction(I_PUSH_CONST, toScript(d));
 		} else if (token == TOK_STRING) {
 			script.addInstruction(I_PUSH_CONST, toScript(token.value));
-		} else if (token == TOK_NEWLINE) {
-			continue; // ignore
 		} else {
 			throw ScriptParseError(_("Unexpected token '") + token.value + _("'"));
 		}
@@ -382,15 +381,16 @@ void parseExpr(TokenIterator& input, Script& script, Precedence minPrec) {

 void parseOper(TokenIterator& input, Script& script, Precedence minPrec, InstructionType closeWith, int closeWithData) {
 	parseExpr(input, script, minPrec); // first argument
-	bool newlines = false; // did we skip any newlines?
 	// read any operators after an expression
 	// EBNF:                    expr = expr | expr oper expr
 	// without left recursion:  expr = expr (oper expr)*
 	while (true) {
 		const Token& token = input.read();
-		bool newlines2 = newlines;
-		newlines = false;
-		if (token == TOK_OPER || token == TOK_NAME) {
+		if (token != TOK_OPER && token != TOK_NAME && token!=TOK_LPAREN) {
+			// not an operator-like token
+			input.putBack();
+			break;
+		}
 		if (minPrec <= PREC_SEQ && token==_(";")) {
 			Token next = input.peek(1);
 			if (next == TOK_RPAREN || next == TOK_EOF) {
@@ -431,13 +431,10 @@ void parseOper(TokenIterator& input, Script& script, Precedence minPrec, Instruc
 			} else {
 				throw ScriptParseError(_("name"), input.peek().value);
 			}
-			} else {
-				input.putBack();
-				newlines = newlines2; // remember newlines
-				break; // unknown operator
-			}
-		} else if (token==TOK_LPAREN) {
-			if (minPrec <= PREC_FUN && token==_("(")) {
+		} else if (minPrec <= PREC_FUN && token==_("[")) { // get member by expr
+			parseOper(input, script, PREC_ALL, I_BINARY, I_MEMBER);
+			expectToken(input, _("]"));
+		} else if (minPrec <= PREC_FUN && token==_("(")) {
 			// function call, read arguments
 			vector<int> arguments;
 			Token t = input.peek();
@@ -466,39 +463,22 @@ void parseOper(TokenIterator& input, Script& script, Precedence minPrec, Instruc
 			FOR_EACH(arg,arguments) {
 				script.addInstruction(I_NOP, arg);
 			}
-			} else if (minPrec <= PREC_FUN && token==_("[")) { // get member by expr
-				parseOper(input, script, PREC_ALL, I_BINARY, I_MEMBER);
-				expectToken(input, _("]"));
 		} else if (minPrec <= PREC_STRING && token==_("\"{")) {
 			// for smart strings: "x" {{ e }} "y"
 			parseOper(input, script, PREC_ALL,  I_BINARY, I_ADD);	// e
 			expectToken(input, _("}\""));
 			parseOper(input, script, PREC_NONE, I_BINARY, I_ADD);	// y
-			} else {
+		} else if (minPrec <= PREC_NEWLINE && token.newline) {
+			// newline functions as ;
+			// only if we don't match another token!
 			input.putBack();
-				newlines = newlines2; // remember newlines
-				break; // unknown LPAREN, has to be {
-			}
-		} else if (token == TOK_NEWLINE) {
-			const Token& next = input.peek(1);
-			if (minPrec <= PREC_NEWLINE && (next == TOK_NAME || next == TOK_LPAREN)) {
-				// function as ;
 			script.addInstruction(I_POP);
 			parseOper(input, script, PREC_SET);
-			} else {
-				// skip newlines
-				newlines = true;
-			}
 		} else {
 			input.putBack();
-			newlines = newlines2; // remember newlines
 			break;
 		}
 	}
-	if (newlines) {
-		// we accidentally ate a newline, restore it
-		input.putBack();
-	}
 	// add closing instruction
 	if (closeWith != I_NOP) {
 		script.addInstruction(closeWith, closeWithData);