extends Node # This closesly follows the source provided in the lectures. # Later on after the lectures are complete or when I deem # Necessary there will be heavy refactors. const TokenType = \ { Program = "Program", # Comments CommentLine = "CommentLine", CommentMultiLine = "CommentMultiLine", # Formatting Whitespace = "Whitespace", # Statements StatementEnd = "StatementEnd", # Literals Number = "Number", String = "String" } const TokenSpec = \ { TokenType.CommentLine : "^\/\/.*", TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/", TokenType.Whitespace : "^\\s+", TokenType.Number : "\\d+", TokenType.String : "^\"[^\"]*\"", TokenType.StatementEnd : "^;" } class Token: var Type : String var Value : String func toDict(): var result = \ { Type = self.Type, Value = self.Value } return result class Tokenizer: var SrcTxt : String var Cursor : int; # Sets up the tokenizer with the program source text. func init(programSrcText): SrcTxt = programSrcText Cursor = 0 # Provides the next token in the source text. func next_Token(): if self.reached_EndOfTxt() == true : return null var srcLeft = self.SrcTxt.substr(Cursor) var regex = RegEx.new() var token = Token.new() for type in TokenSpec : regex.compile(TokenSpec[type]) var result = regex.search(srcLeft) if result == null || result.get_start() != 0 : continue # Skip Comments if type == TokenType.CommentLine || type == TokenType.CommentMultiLine : self.Cursor += result.get_string().length() return next_Token() # Skip Whitespace if type == TokenType.Whitespace : var addVal = result.get_string().length() self.Cursor += addVal return next_Token() token.Type = type token.Value = result.get_string() self.Cursor += ( result.get_string().length() ) return token var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}" var assertStr = assertStrTmplt.format({"value" : self.Cursor}) assert(true != true, assertStr) return null func reached_EndOfTxt(): return self.Cursor >= ( self.SrcTxt.length() - 1 ) var GTokenizer = Tokenizer.new() const SyntaxNodeType = \ { NumericLiteral = "NumericLiteral", StringLiteral = "StringLiteral", ExpressionStatement = "ExpressionStatement" } class SyntaxNode: var Type : String var Value # Not specifing a type implicity declares a Variant type. func toDict(): var ValueDict = self.Value if typeof(Value) == TYPE_ARRAY : var dict = {} var index = 0 for entry in self.Value : dict[index] = entry.toDict() index += 1 ValueDict = dict var result = \ { Type = self.Type, Value = ValueDict } return result class ProgramNode: var Type : String var Body : Object func toDict(): var result = \ { Type = self.Type, Body = self.Body.toDict() } return result class Parser: var TokenizerRef : Tokenizer var NextToken : Token func eat(tokenType): var currToken = self.NextToken assert(currToken != null, "eat: NextToken was null") var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}" var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType}) assert(currToken.Type == tokenType, assertStr) self.NextToken = self.TokenizerRef.next_Token() return currToken # Literal # : NumericLiteral # : StringLiteral # ; # func parse_Literal(): match NextToken.Type : TokenType.Number: return parse_NumericLiteral() TokenType.String: return parse_StringLiteral() assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken") # NumericLiteral # : Number # ; # func parse_NumericLiteral(): var Token = eat(TokenType.Number) var \ node = SyntaxNode.new() node.Type = SyntaxNodeType.NumericLiteral node.Value = int( Token.Value ) return node # StringLiteral # : String # ; # func parse_StringLiteral(): var Token = eat(TokenType.String) var \ node = SyntaxNode.new() node.Type = SyntaxNodeType.StringLiteral node.Value = Token.Value.substr( 1, Token.Value.length() - 2 ) return node # Expression # : Literal # ; # func parse_Expression(): return parse_Literal() # ExpressionStatement # : Expression # ; # func parse_ExpressionStatement(): var expression = parse_Expression() eat(TokenType.StatementEnd) var \ node = SyntaxNode.new() node.Type = SyntaxNodeType.ExpressionStatement node.Value = expression return expression # Statement # : ExpressionStatement # ; # func parse_Statement(): return parse_ExpressionStatement() # StatementList # : Statement # | StatementList Statement -> Statement ... # ; # func parse_StatementList(): var statementList = [ parse_Statement() ] while NextToken != null : statementList.append( parse_Statement() ) var \ node = SyntaxNode.new() node.Type = "StatementList" node.Value = statementList return node # Program # : StatementList # : Literal # ; # func parse_Program(): var \ node = ProgramNode.new() node.Type = TokenType.Program node.Body = parse_StatementList() return node # Parses the text program description into an AST. func parse(TokenizerRef): self.TokenizerRef = TokenizerRef NextToken = TokenizerRef.next_Token() return parse_Program() var GParser = Parser.new() const Tests = \ { MultiStatement = \ { Name = "Multi-Statement", File = "1.Multi-Statement.uf" } } func test(entry): var introMessage = "Testing: {Name}" var introMessageFormatted = introMessage.format({"Name" : entry.Name}) print(introMessageFormatted) var path if Engine.editor_hint() : path = "res://Tests/{TestName}" else : path = "res://../Builds/Tests/{TestName}" var pathFormatted = path.format({"TestName" : entry.File}) var \ file = File.new() file.open(pathFormatted, File.READ) var programDescription = file.get_as_text() file.close() GTokenizer.init(programDescription) var ast = GParser.parse(GTokenizer) var json = JSON.print(ast.toDict(), "\t") print(JSON.print(ast.toDict(), "\t")) print("Passed!\n") # Main Entry point. func _ready(): for Key in Tests : test(Tests[Key])