extends Node # This closesly follows the source provided in the lectures. # Later on after the lectures are complete or when I deem # Necessary there will be heavy refactors. const TokenType = \ { Program = "Program", # Comments CommentLine = "CommentLine", CommentMultiLine = "CommentMultiLine", # Formatting Whitespace = "Whitespace", # Literals Number = "Number", String = "String" } const TokenSpec = \ { TokenType.CommentLine : "^\/\/.*", TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/", TokenType.Whitespace : "^\\s+", TokenType.Number : "\\d+", TokenType.String : "^\"[^\"]*\"" } class Token: var Type : String var Value : String func toDict(): var result = \ { Type = self.Type, Value = self.Value } return result class Tokenizer: var SrcTxt : String var Cursor : int; # Sets up the tokenizer with the program source text. func init(programSrcText): SrcTxt = programSrcText Cursor = 0 # Provides the next token in the source text. func next_Token(): if self.reached_EndOfTxt() == true : return null var srcLeft = self.SrcTxt.substr(Cursor) var regex = RegEx.new() var token = Token.new() for type in TokenSpec : regex.compile(TokenSpec[type]) var result = regex.search(srcLeft) if result == null : continue # Skip Comments if type == TokenType.CommentLine || type == TokenType.CommentMultiLine : self.Cursor += result.get_string().length() return next_Token() # Skip Whitespace if type == TokenType.Whitespace : var addVal = result.get_string().length() self.Cursor += addVal return next_Token() token.Type = type token.Value = result.get_string() self.Cursor += ( result.get_string().length() -1 ) return token var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}" var assertStr = assertStrTmplt.format({"value" : self.Cursor}) assert(true != true, assertStr) return null func reached_EndOfTxt(): return self.Cursor >= ( self.SrcTxt.length() - 1 ) var GTokenizer = Tokenizer.new() class SyntaxNode: var Type : String var Value # Not specifing a type implicity declares a Variant type. func toDict(): var result = \ { Type = self.Type, Value = self.Value } return result class ProgramNode: var Type : String var Body : Object func toDict(): var result = \ { Type = self.Type, Body = self.Body.toDict() } return result class Parser: var TokenizerRef : Tokenizer var NextToken : Token func eat(tokenType): var currToken = self.NextToken assert(currToken != null, "eat: NextToken was null") var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}" var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType}) assert(currToken.Type == tokenType, assertStr) self.NextToken = self.TokenizerRef.next_Token() return currToken # Literal # : NumericLiteral # : StringLiteral # ; func parse_Literal(): match self.NextToken.Type : TokenType.Number: return parse_NumericLiteral() TokenType.String: return parse_StringLiteral() assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken") # NumericLiteral # : Number # ; # func parse_NumericLiteral(): var Token = self.eat(TokenType.Number) var \ node = SyntaxNode.new() node.Type = TokenType.Number node.Value = int( Token.Value ) return node # StringLiteral # : String # ; # func parse_StringLiteral(): var Token = self.eat(TokenType.String) var \ node = SyntaxNode.new() node.Type = TokenType.String node.Value = Token.Value.substr( 1, Token.Value.length() - 2 ) return node # Program # : Literal # ; # func parse_Program(): var \ node = ProgramNode.new() node.Type = TokenType.Program node.Body = parse_Literal() return node # Parses the text program description into an AST. func parse(TokenizerRef): self.TokenizerRef = TokenizerRef NextToken = TokenizerRef.next_Token() return parse_Program() var GParser = Parser.new() var ProgramDescription : String func test(): GTokenizer.init(ProgramDescription) var ast = GParser.parse(GTokenizer) print(JSON.print(ast.toDict(), "\t")) # Main Entry point. func _ready(): # Numerical test ProgramDescription = "47" test() # String Test ProgramDescription = "\"hello\"" test() # Whitespace test ProgramDescription = " \"we got past whitespace\" " test() # Comment Single Test ProgramDescription = \ """ // Testing a comment \"hello sir\" """ test() # Comment Multi-Line Test ProgramDescription = \ """ /** * * Testing a comment */ \"may I have some grapes\" """ test() # Multi-statement test ProgramDescription = \ """ // Testing a comment \"hello sir\"; /** * * Testing a comment */ \"may I have some grapes\"; """ test() # Called every frame. 'delta' is the elapsed time since the previous frame. #func _process(delta): # pass