diff --git a/Builds/Tests/4.Assignment.uf b/Builds/Tests/4.Assignment.uf index bebb747..f1ce0bf 100644 --- a/Builds/Tests/4.Assignment.uf +++ b/Builds/Tests/4.Assignment.uf @@ -2,4 +2,10 @@ x = 42; // Chained -x = (y = 42); \ No newline at end of file +x = (y = 42); + +x + y; + + +x += y; +y += 5; \ No newline at end of file diff --git a/Editor/Lectures/Lecture.7.gd b/Editor/Lectures/Lecture.7.gd index 8d196cb..38c22d2 100644 --- a/Editor/Lectures/Lecture.7.gd +++ b/Editor/Lectures/Lecture.7.gd @@ -219,19 +219,6 @@ class Parser: return currToken - # Literal - # : NumericLiteral - # : StringLiteral - # ; - func parse_Literal(): - match NextToken.Type : - TokenType.Number: - return parse_NumericLiteral() - TokenType.String: - return parse_StringLiteral() - - assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken") - # NumericLiteral # : Number # ; @@ -255,7 +242,20 @@ class Parser: node.Value = Token.Value.substr( 1, Token.Value.length() - 2 ) return node - + + # Literal + # : NumericLiteral + # : StringLiteral + # ; + func parse_Literal(): + match NextToken.Type : + TokenType.Number: + return parse_NumericLiteral() + TokenType.String: + return parse_StringLiteral() + + assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken") + # ParenthesizedExpression # : ( Expression ) # ; @@ -434,16 +434,16 @@ func tout(text): const Tests = \ { - # MultiStatement = \ - # { - # Name = "Multi-Statement", - # File = "1.Multi-Statement.uf" - # }, - # BlockStatement = \ - # { - # Name = "Block Statement", - # File = "2.BlockStatement.uf" - # }, + MultiStatement = \ + { + Name = "Multi-Statement", + File = "1.Multi-Statement.uf" + }, + BlockStatement = \ + { + Name = "Block Statement", + File = "2.BlockStatement.uf" + }, BinaryExpression = \ { Name = "Binary Expression", diff --git a/Editor/Lectures/Lecture.8.gd b/Editor/Lectures/Lecture.8.gd new file mode 100644 index 0000000..4eee514 --- /dev/null +++ b/Editor/Lectures/Lecture.8.gd @@ -0,0 +1,563 @@ +extends Node + +# This closesly follows the source provided in the lectures. +# Later on after the lectures are complete or when I deem +# Necessary there will be heavy refactors. + +const TokenType = \ +{ + Program = "Program", + + # Comments + CommentLine = "CommentLine", + CommentMultiLine = "CommentMultiLine", + + # Formatting + Whitespace = "Whitespace", + + # Expressions + + ExpressionPStart = "Expresssion Parenthesis Start", + ExpressionPEnd = "Expression Parenthesis End", + + # Arithmetic + ComplexAssignment = "ComplexAssignment", + Assignment = "Assignment", + AdditiveOp = "AdditiveOperator", + MultiplicativeOp = "MultiplicativeOperator", + + # Statements + StatementEnd = "StatementEnd", + StmtBlockStart = "BlockStatementStart", + StmtBlockEnd = "BlockStatementEnd", + + # Literals + Number = "Number", + String = "String", + + # Symbols + Identifier = "Identifier" +} + +const TokenSpec = \ +{ + # Comments + TokenType.CommentLine : "^\/\/.*", + TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/", + + # Formatting + TokenType.Whitespace : "^\\s+", + + # Expressions + TokenType.ExpressionPStart : "^\\(", + TokenType.ExpressionPEnd : "^\\)", + + # Arithmetic + TokenType.ComplexAssignment : "^[*\\/\\+\\-]=", + TokenType.Assignment : "^=", + TokenType.AdditiveOp : "^[+\\-]", + TokenType.MultiplicativeOp : "^[*\\/]", + + # Literal + TokenType.Number : "\\d+", + TokenType.String : "^\"[^\"]*\"", + + # Statements + TokenType.StatementEnd : "^;", + TokenType.StmtBlockStart : "^{", + TokenType.StmtBlockEnd : "^}", + + # Symbols + TokenType.Identifier : "^\\w+" +} + +class Token: + var Type : String + var Value : String + + func to_Dictionary(): + var result = \ + { + Type = self.Type, + Value = self.Value + } + return result + +class Tokenizer: + var SrcTxt : String + var Cursor : int; + + # Sets up the tokenizer with the program source text. + func init(programSrcText): + SrcTxt = programSrcText + Cursor = 0 + + # Provides the next token in the source text. + func next_Token(): + if reached_EndOfTxt() == true : + return null + + var srcLeft = SrcTxt.substr(Cursor) + var regex = RegEx.new() + var token = Token.new() + + for type in TokenSpec : + regex.compile(TokenSpec[type]) + + var result = regex.search(srcLeft) + if result == null || result.get_start() != 0 : + continue + + # Skip Comments + if type == TokenType.CommentLine || type == TokenType.CommentMultiLine : + Cursor += result.get_string().length() + return next_Token() + + # Skip Whitespace + if type == TokenType.Whitespace : + var addVal = result.get_string().length() + Cursor += addVal + + return next_Token() + + token.Type = type + token.Value = result.get_string() + Cursor += ( result.get_string().length() ) + + return token + + var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}" + var assertStr = assertStrTmplt.format({"value" : Cursor}) + assert(true != true, assertStr) + return null + + func reached_EndOfTxt(): + return Cursor >= ( SrcTxt.length() ) + +var GTokenizer = Tokenizer.new() + + + +const AST_Format = \ +{ + Dictionary = "Dictionary", + SExpression = "S-Expression" +} + +const SyntaxNodeType = \ +{ + NumericLiteral = "NumericLiteral", + StringLiteral = "StringLiteral", + ExpressionStatement = "ExpressionStatement", + BlockStatement = "BlockStatement", + EmptyStatement = "EmptyStatement", + BinaryExpression = "BinaryExpression", + Identifier = "Identifier", + AssignmentExpression = "AssignmentExpression" +} + +class SyntaxNode: + var Type : String + var Value # Not specifing a type implicity declares a Variant type. + + func to_SExpression(): + var expression = [ Type ] + + if typeof(Value) == TYPE_ARRAY : + var array = [] + for entry in self.Value : + if typeof(entry) == TYPE_OBJECT : + array.append( entry.to_SExpression() ) + else : + array.append( entry ) + + expression.append(array) + return expression + + if typeof(Value) == TYPE_OBJECT : + var result = [ Type, Value.to_SExpression() ] + return result + + expression.append(Value) + return expression + + func to_Dictionary(): + if typeof(Value) == TYPE_ARRAY : + var array = [] + for entry in self.Value : + if typeof(entry) == TYPE_OBJECT : + array.append( entry.to_Dictionary() ) + else : + array.append( entry ) + + var result = \ + { + Type = self.Type, + Value = array + } + return result + + if typeof(Value) == TYPE_OBJECT : + var result = \ + { + Type = self.Type, + Value = self.Value.to_Dictionary() + } + return result + + var result = \ + { + Type = self.Type, + Value = self.Value + } + return result + +class Parser: + var TokenizerRef : Tokenizer + var NextToken : Token + + func is_Literal(): + return NextToken.Type == TokenType.Number || NextToken.Type == TokenType.String + + func eat(tokenType): + var currToken = self.NextToken + + assert(currToken != null, "eat: NextToken was null") + + var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}" + var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType}) + + assert(currToken.Type == tokenType, assertStr) + + NextToken = TokenizerRef.next_Token() + + return currToken + + # Literal + # : NumericLiteral + # : StringLiteral + # ; + func parse_Literal(): + match NextToken.Type : + TokenType.Number: + return parse_NumericLiteral() + TokenType.String: + return parse_StringLiteral() + + assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken") + + # NumericLiteral + # : Number + # ; + func parse_NumericLiteral(): + var Token = eat(TokenType.Number) + var \ + node = SyntaxNode.new() + node.Type = SyntaxNodeType.NumericLiteral + node.Value = int( Token.Value ) + + return node + + # StringLiteral + # : String + # ; + func parse_StringLiteral(): + var Token = eat(TokenType.String) + var \ + node = SyntaxNode.new() + node.Type = SyntaxNodeType.StringLiteral + node.Value = Token.Value.substr( 1, Token.Value.length() - 2 ) + + return node + + # ParenthesizedExpression + # : ( Expression ) + # ; + func parse_ParenthesizedExpression(): + eat(TokenType.ExpressionPStart) + + var expression = parse_Expression() + + eat(TokenType.ExpressionPEnd) + + return expression + + # MultiplicativeExpression + # : PrimaryExpression + # : MultiplicativeExpression MultiplicativeOp PrimaryExpression -> PrimaryExpression MultiplicativeOp ... Literal + # ; + func parse_MultiplicativeExpression(): + var \ + parseFn = FuncRef.new() + parseFn.set_instance(self) + parseFn.set_function("parse_PrimaryExpression") + + return parse_BinaryExpression(parseFn, TokenType.MultiplicativeOp) + + # AdditiveExpression + # : MultiplicativeExpression + # | AdditiveExpression AdditiveOp MultiplicativeExpression -> MultiplicativeExpression AdditiveOp ... Literal + # ; + func parse_AdditiveExpression(): + var \ + parseFn = FuncRef.new() + parseFn.set_instance(self) + parseFn.set_function("parse_MultiplicativeExpression") + + return parse_BinaryExpression(parseFn, TokenType.AdditiveOp) + + # BinaryExpression + # : MultiplicativeExpression + # | AdditiveExpression + # ; + func parse_BinaryExpression(parse_fn, operatorToken): + var left = parse_fn.call_func() + + while NextToken.Type == operatorToken: + var operator = eat(operatorToken) + var right = parse_fn.call_func() + + var \ + nestedNode = SyntaxNode.new() + nestedNode.Type = SyntaxNodeType.BinaryExpression + nestedNode.Value = [] + nestedNode.Value.append(operator.Value) + nestedNode.Value.append(left) + nestedNode.Value.append(right) + + left = nestedNode; + + return left + + # Identifier + # : IdentifierSymbol + # ; + func parse_Identifier(): + var name = eat(TokenType.Identifier).Value + + var \ + node = SyntaxNode.new() + node.Type = SyntaxNodeType.Identifier + node.Value = name + + return node + + # ResolvedSymbol + # : Identiifer + # ; + func parse_ResolvedSymbol(): + var resolvedSymbol = parse_Identifier() + + if resolvedSymbol.Type == SyntaxNodeType.Identifier : + return resolvedSymbol + + var assertStrTmplt = "parse_ResolvedSymbol: Unexpected symbol: {value}" + var assertStr = assertStrTmplt.format({"value" : resolvedSymbol.Type}) + + assert(true != true, assertStr) + + # PrimaryExpression + # : Literal + # | ParenthesizedExpression + # | ResolvedSymbol + # ; + func parse_PrimaryExpression(): + if is_Literal(): + return parse_Literal() + + match NextToken.Type: + TokenType.ExpressionPStart: + return parse_ParenthesizedExpression() + + return parse_ResolvedSymbol() + + # AssignmentExpression + # : AdditiveExpression + # | ResolvedSymbol AssignmentOperator AssignmetnExpression + # ; + func parse_AssignmentExpression(): + var left = parse_AdditiveExpression() + + if NextToken.Type != TokenType.Assignment && NextToken.Type != TokenType.ComplexAssignment : + return left + + var assignmentOp; + + if NextToken.Type == TokenType.Assignment : + assignmentOp = eat(TokenType.Assignment) + elif NextToken.Type == TokenType.ComplexAssignment : + assignmentOp = eat(TokenType.ComplexAssignment) + + var \ + node = SyntaxNode.new() + node.Type = SyntaxNodeType.AssignmentExpression + node.Value = \ + [ + assignmentOp.Value, + left, + parse_AssignmentExpression() + ] + + return node + + # Expression + # : AssignmentExpression + # ; + func parse_Expression(): + return parse_AssignmentExpression() + + # EmptyStatement + # ; + func parse_EmptyStatement(): + eat(TokenType.StatementEnd) + + var \ + node = SyntaxNode.new() + node.Type = SyntaxNodeType.EmptyStatement + + return node + + # BlockStatement + # : { OptStatementList } + # ; + func parse_BlockStatement(): + eat(TokenType.StmtBlockStart) + + var \ + node = SyntaxNode.new() + node.Type = SyntaxNodeType.BlockStatement + + if NextToken.Type != TokenType.StmtBlockEnd : + node.Value = parse_StatementList(TokenType.StmtBlockEnd) + else : + node.Value = [] + + eat(TokenType.StmtBlockEnd) + + return node + + # ExpressionStatement + # : Expression + # ; + func parse_ExpressionStatement(): + var expression = parse_Expression() + eat(TokenType.StatementEnd) + + var \ + node = SyntaxNode.new() + node.Type = SyntaxNodeType.ExpressionStatement + node.Value = expression + + return expression + + # Statement + # : ExpressionStatement + # : BlockStatement + # : EmptyStatement + # ; + func parse_Statement(): + match NextToken.Type : + TokenType.StatementEnd : + return parse_EmptyStatement() + TokenType.StmtBlockStart : + return parse_BlockStatement() + + return parse_ExpressionStatement() + + # StatementList + # : Statement + # | StatementList Statement -> Statement ... + # ; + func parse_StatementList(endToken): + var statementList = [ parse_Statement() ] + + while NextToken != null && NextToken.Type != endToken : + statementList.append( parse_Statement() ) + + return statementList + + # Program + # : StatementList + # : Literal + # ; + func parse_Program(): + var \ + node = SyntaxNode.new() + node.Type = TokenType.Program + node.Value = parse_StatementList(null) + + return node + + # Parses the text program description into an AST. + func parse(TokenizerRef): + self.TokenizerRef = TokenizerRef + + NextToken = TokenizerRef.next_Token() + + return parse_Program() + +var GParser = Parser.new() + + + +onready var TextOut = GScene.get_node("TextOutput") + +func tout(text): + TextOut.insert_text_at_cursor(text) + +const Tests = \ +{ + MultiStatement = \ + { + Name = "Multi-Statement", + File = "1.Multi-Statement.uf" + }, + BlockStatement = \ + { + Name = "Block Statement", + File = "2.BlockStatement.uf" + }, + BinaryExpression = \ + { + Name = "Binary Expression", + File = "3.BinaryExpression.uf" + }, + Assignment = \ + { + Name = "Assignment", + File = "4.Assignment.uf" + } +} + +func test(entry): + var introMessage = "Testing: {Name}\n" + var introMessageFormatted = introMessage.format({"Name" : entry.Name}) + tout(introMessageFormatted) + + var path + if Engine.editor_hint : + path = "res://../Tests/{TestName}" + else : + path = "res://../Builds/Tests/{TestName}" + var pathFormatted = path.format({"TestName" : entry.File}) + + var \ + file = File.new() + file.open(pathFormatted, File.READ) + + var programDescription = file.get_as_text() + file.close() + + GTokenizer.init(programDescription) + var ast = GParser.parse(GTokenizer) + + var json = JSON.print(ast.to_SExpression(), '\t') + + tout(json + "\n") + tout("Passed!\n") + + +# Main Entry point. +func _ready(): + for Key in Tests : + test(Tests[Key]) diff --git a/Editor/project.godot b/Editor/project.godot index 5329c1f..cd62add 100644 --- a/Editor/project.godot +++ b/Editor/project.godot @@ -18,7 +18,7 @@ config/icon="res://Assets/Branding/RDP_Class_cover_small.png" [autoload] GScene="*res://Lectures/Lecture.tscn" -GScript="*res://Lectures/Lecture.7.gd" +GScript="*res://Lectures/Lecture.8.gd" [gui]