Renamed BAPFS -> RDP, RDP completed.

This commit is contained in:
2022-07-14 17:12:25 -04:00
parent 0acd6ecbaf
commit d4ee6574b2
53 changed files with 1617 additions and 140 deletions

View File

@ -0,0 +1,61 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
class SyntaxNode:
var Type : String
var Value : int
func Dictionary():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class LetterParser:
var Str : String
# NumericLiteral
# : NUMBER
# ;
#
func NumericLiteral():
var \
node = SyntaxNode.new()
node.Type = "NumericLiteral"
node.Value = int(self.Str)
return node
# Parses the text program description into an AST.
func Parse(programDescription):
self.Str = programDescription
return NumericLiteral()
var ProgramDescription = "7"
var LParser = LetterParser.new()
# Note: _ready is being used for Program func of the lectures.
# Main Entry point.
#
# Program
# : NumericLiteral
# ;
#
func _ready():
var ast = LParser.Parse(ProgramDescription)
print(to_json(ast.Dictionary()))
# Called every frame. 'delta' is the elapsed time since the previous frame.
#func _process(delta):
# pass

View File

@ -0,0 +1,702 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Expressions
ExpressionPStart = "ExpresssionParenthesisStart",
ExpressionPEnd = "ExpressionParenthesisEnd",
# Logical
RelationalOp = "RelationalOperator",
# Arithmetic
ComplexAssignment = "ComplexAssignment",
Assignment = "Assignment",
AdditiveOp = "AdditiveOperator",
MultiplicativeOp = "MultiplicativeOperator",
# Conditional
Conditional_if = "if Conditional",
Conditional_else = "else Conditional",
# Statements
StatementEnd = "StatementEnd",
StmtBlockStart = "BlockStatementStart",
StmtBlockEnd = "BlockStatementEnd",
CommaDelimiter = "CommaDelimiter",
# Literals
Number = "Number",
String = "String",
# Symbols
VarDeclare = "Variable Declaration",
Identifier = "Identifier"
}
const TokenSpec = \
{
# Comments
TokenType.CommentLine : "^\\/\\/.*",
TokenType.CommentMultiLine : "^\\/\\*[\\s\\S]*?\\*\\/",
# Formatting
TokenType.Whitespace : "^\\s+",
# Expressions
TokenType.ExpressionPStart : "^\\(",
TokenType.ExpressionPEnd : "^\\)",
# Logical
TokenType.RelationalOp : "^[>\\<]=?",
# Arithmetic
TokenType.ComplexAssignment : "^[*\\/\\+\\-]=",
TokenType.Assignment : "^=",
TokenType.AdditiveOp : "^[+\\-]",
TokenType.MultiplicativeOp : "^[*\\/]",
# Literal
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
TokenType.Conditional_if : "^\\bif\\b",
TokenType.Conditional_else : "^\\belse\\b",
# Statements
TokenType.StatementEnd : "^;",
TokenType.StmtBlockStart : "^{",
TokenType.StmtBlockEnd : "^}",
TokenType.CommaDelimiter : "^,",
# Symbols
TokenType.VarDeclare : "^\\blet\\b",
TokenType.Identifier : "^\\w+"
}
class Token:
var Type : String
var Value : String
func to_Dictionary():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if reached_EndOfTxt() == true :
return null
var srcLeft = SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return Cursor >= ( SrcTxt.length() )
var GTokenizer = Tokenizer.new()
const AST_Format = \
{
Dictionary = "Dictionary",
SExpression = "S-Expression"
}
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement",
BlockStatement = "BlockStatement",
EmptyStatement = "EmptyStatement",
BinaryExpression = "BinaryExpression",
Identifier = "Identifier",
AssignmentExpression = "AssignmentExpression",
VariableStatement = "VariableStatement",
VariableDeclaration = "VariableDeclaration",
ConditionalStatement = "ConditionalStatement"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func to_SExpression():
var expression = [ Type ]
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_SExpression() )
else :
array.append( entry )
expression.append(array)
return expression
if typeof(Value) == TYPE_OBJECT :
var result = [ Type, Value.to_SExpression() ]
return result
expression.append(Value)
return expression
func to_Dictionary():
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_Dictionary() )
else :
array.append( entry )
var result = \
{
Type = self.Type,
Value = array
}
return result
if typeof(Value) == TYPE_OBJECT :
var result = \
{
Type = self.Type,
Value = self.Value.to_Dictionary()
}
return result
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func is_Literal():
return NextToken.Type == TokenType.Number || NextToken.Type == TokenType.String
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
NextToken = TokenizerRef.next_Token()
return currToken
# NumericLiteral
# : Number
# ;
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# Literal
# : NumericLiteral
# : StringLiteral
# ;
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# ParenthesizedExpression
# : ( Expression )
# ;
func parse_ParenthesizedExpression():
eat(TokenType.ExpressionPStart)
var expression = parse_Expression()
eat(TokenType.ExpressionPEnd)
return expression
# Relational Operators: >, >=, <, <=
#
# Relational Expression
# : AdditiveExpression
# | AdditiveExpression RelationalOp RelationalExpression
# ;
func parse_RelationalExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_AdditiveExpression")
return parse_BinaryExpression(parseFn, TokenType.RelationalOp)
# MultiplicativeExpression
# : PrimaryExpression
# : MultiplicativeExpression MultiplicativeOp PrimaryExpression -> PrimaryExpression MultiplicativeOp ... Literal
# ;
func parse_MultiplicativeExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_PrimaryExpression")
return parse_BinaryExpression(parseFn, TokenType.MultiplicativeOp)
# AdditiveExpression
# : MultiplicativeExpression
# | AdditiveExpression AdditiveOp MultiplicativeExpression -> MultiplicativeExpression AdditiveOp ... Literal
# ;
func parse_AdditiveExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_MultiplicativeExpression")
return parse_BinaryExpression(parseFn, TokenType.AdditiveOp)
# BinaryExpression
# : MultiplicativeExpression
# | AdditiveExpression
# ;
func parse_BinaryExpression(parse_fn, operatorToken):
var left = parse_fn.call_func()
while NextToken.Type == operatorToken:
var operator = eat(operatorToken)
var right = parse_fn.call_func()
var \
nestedNode = SyntaxNode.new()
nestedNode.Type = SyntaxNodeType.BinaryExpression
nestedNode.Value = []
nestedNode.Value.append(operator.Value)
nestedNode.Value.append(left)
nestedNode.Value.append(right)
left = nestedNode;
return left
# Identifier
# : IdentifierSymbol
# ;
func parse_Identifier():
var name = eat(TokenType.Identifier).Value
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.Identifier
node.Value = name
return node
# ResolvedSymbol
# : Identiifer
# ;
func parse_ResolvedSymbol():
var resolvedSymbol = parse_Identifier()
if resolvedSymbol.Type == SyntaxNodeType.Identifier :
return resolvedSymbol
var assertStrTmplt = "parse_ResolvedSymbol: Unexpected symbol: {value}"
var assertStr = assertStrTmplt.format({"value" : resolvedSymbol.Type})
assert(true != true, assertStr)
# PrimaryExpression
# : Literal
# | ParenthesizedExpression
# | ResolvedSymbol
# ;
func parse_PrimaryExpression():
if is_Literal():
return parse_Literal()
match NextToken.Type:
TokenType.ExpressionPStart:
return parse_ParenthesizedExpression()
return parse_ResolvedSymbol()
# AssignmentExpression
# : RelationalExpression
# | ResolvedSymbol AssignmentOperator AssignmetnExpression
# ;
func parse_AssignmentExpression():
var left = parse_RelationalExpression()
if NextToken.Type != TokenType.Assignment && NextToken.Type != TokenType.ComplexAssignment :
return left
var assignmentOp;
if NextToken.Type == TokenType.Assignment :
assignmentOp = eat(TokenType.Assignment)
elif NextToken.Type == TokenType.ComplexAssignment :
assignmentOp = eat(TokenType.ComplexAssignment)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.AssignmentExpression
node.Value = \
[
assignmentOp.Value,
left,
parse_AssignmentExpression()
]
return node
# Expression
# : AssignmentExpression
# ;
func parse_Expression():
return parse_AssignmentExpression()
# EmptyStatement
# ;
func parse_EmptyStatement():
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.EmptyStatement
return node
# If Statement
# : if ( Expression ) Statement
# | if ( Expression ) Statement else Statement
# ;
func parse_If_Statement():
eat(TokenType.Conditional_if)
eat(TokenType.ExpressionPStart)
var condition = parse_Expression()
eat(TokenType.ExpressionPEnd)
var consequent = parse_Statement()
var alternative = null
if NextToken != null && NextToken.Type == TokenType.Conditional_else :
eat(TokenType.Conditional_else)
alternative = parse_Statement()
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ConditionalStatement
node.Value = [ condition, consequent, alternative ]
return node
# VariableInitializer
# : Assignment AssignmentExpression
# ;
func parse_VariableInitializer():
eat(TokenType.Assignment)
return parse_AssignmentExpression()
# VariableDeclaration
# : Identifier OptVariableInitalizer
# ;
func parse_VariableDeclaration():
var identifier = parse_Identifier()
var initalizer
if NextToken.Type != TokenType.StatementEnd && NextToken.Type != TokenType.CommaDelimiter :
initalizer = parse_VariableInitializer()
else :
initalizer = null
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.VariableDeclaration
node.Value = [ identifier, initalizer ]
return node
# VariableDeclarationList
# : VariableDeclaration
# | VariableDelcarationList , VariableDeclaration -> VariableDelcaration , ...
func parse_VariableDeclarationList():
var \
declarations = []
declarations.append(parse_VariableDeclaration())
while NextToken.Type == TokenType.CommaDelimiter :
eat(TokenType.CommaDelimiter)
declarations.append(parse_VariableDeclaration())
return declarations
# VariableStatement
# : VarDeclare VariableDeclarationList StatementEnd
# ;
func parse_VariableStatement():
eat(TokenType.VarDeclare)
var declarations = parse_VariableDeclarationList()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.VariableStatement
node.Value = declarations
return node
# BlockStatement
# : { OptStatementList }
# ;
func parse_BlockStatement():
eat(TokenType.StmtBlockStart)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BlockStatement
if NextToken.Type != TokenType.StmtBlockEnd :
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
else :
node.Value = []
eat(TokenType.StmtBlockEnd)
return node
# ExpressionStatement
# : Expression
# ;
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# Statement
# : ExpressionStatement
# | BlockStatement
# | EmptyStatement
# | VariableStatement
# | If_Statement
# ;
func parse_Statement():
if NextToken == null :
return null
match NextToken.Type :
TokenType.Conditional_if :
return parse_If_Statement()
TokenType.StatementEnd :
return parse_EmptyStatement()
TokenType.StmtBlockStart :
return parse_BlockStatement()
TokenType.VarDeclare :
return parse_VariableStatement()
return parse_ExpressionStatement()
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
func parse_StatementList(endToken):
var statementList = [ parse_Statement() ]
while NextToken != null && NextToken.Type != endToken :
statementList.append( parse_Statement() )
return statementList
# Program
# : StatementList
# : Literal
# ;
func parse_Program():
var \
node = SyntaxNode.new()
node.Type = TokenType.Program
node.Value = parse_StatementList(null)
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
onready var TextOut = GScene.get_node("TextOutput")
func tout(text):
TextOut.insert_text_at_cursor(text)
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
},
BlockStatement = \
{
Name = "Block Statement",
File = "2.BlockStatement.uf"
},
BinaryExpression = \
{
Name = "Binary Expression",
File = "3.BinaryExpression.uf"
},
Assignment = \
{
Name = "Assignment",
File = "4.Assignment.uf"
},
VaraibleDeclaration = \
{
Name = "Variable Declaration",
File = "5.VariableDeclaration.uf"
},
Conditionals = \
{
Name = "Conditionals",
File = "6.Conditionals.uf"
},
Relations = \
{
Name = "Relations",
File = "7.Relations.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}\n"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
tout(introMessageFormatted)
var path
if Engine.editor_hint :
path = "res://../Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.to_SExpression(), '\t')
tout(json + "\n")
tout("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

View File

@ -0,0 +1,829 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Expressions
ExpressionPStart = "ExpresssionParenthesisStart",
ExpressionPEnd = "ExpressionParenthesisEnd",
# Logical
RelationalOp = "RelationalOperator",
EqualityOp = "EqualityOperator",
Logical_And = "Logical_And_Op",
Logical_Or = "Logical_Or_Op",
# Arithmetic
ComplexAssignment = "ComplexAssignment",
Assignment = "Assignment",
AdditiveOp = "AdditiveOperator",
MultiplicativeOp = "MultiplicativeOperator",
# Conditional
Conditional_if = "if Conditional",
Conditional_else = "else Conditional",
# Statements
StatementEnd = "StatementEnd",
StmtBlockStart = "BlockStatementStart",
StmtBlockEnd = "BlockStatementEnd",
CommaDelimiter = "CommaDelimiter",
# Literals
Number = "Number",
String = "String",
# Symbols
Bool_true = "Boolean True",
Bool_false = "Boolean False",
VarDeclare = "Variable Declaration",
Identifier = "Identifier",
NullValue = "Null Value"
}
const TokenSpec = \
{
# Comments
TokenType.CommentLine : "^\\/\\/.*",
TokenType.CommentMultiLine : "^\\/\\*[\\s\\S]*?\\*\\/",
# Formatting
TokenType.Whitespace : "^\\s+",
# Expressions
TokenType.ExpressionPStart : "^\\(",
TokenType.ExpressionPEnd : "^\\)",
# Logical
TokenType.RelationalOp : "^[>\\<]=?",
TokenType.EqualityOp : "^[=!]=",
TokenType.Logical_And : "^&&",
TokenType.Logical_Or : "^\\|\\|",
# Arithmetic
TokenType.ComplexAssignment : "^[*\\/\\+\\-]=",
TokenType.Assignment : "^=",
TokenType.AdditiveOp : "^[+\\-]",
TokenType.MultiplicativeOp : "^[*\\/]",
# Literal
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
TokenType.Conditional_if : "^\\bif\\b",
TokenType.Conditional_else : "^\\belse\\b",
# Statements
TokenType.StatementEnd : "^;",
TokenType.StmtBlockStart : "^{",
TokenType.StmtBlockEnd : "^}",
TokenType.CommaDelimiter : "^,",
# Symbols
TokenType.Bool_true : "^\\btrue\\b",
TokenType.Bool_false : "^\\bfalse\\b",
TokenType.VarDeclare : "^\\blet\\b",
TokenType.Identifier : "^\\w+",
TokenType.NullValue : "^\\bnull\\b"
}
class Token:
var Type : String
var Value : String
func to_Dictionary():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if reached_EndOfTxt() == true :
return null
var srcLeft = SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return Cursor >= ( SrcTxt.length() )
var GTokenizer = Tokenizer.new()
const AST_Format = \
{
Dictionary = "Dictionary",
SExpression = "S-Expression"
}
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement",
BlockStatement = "BlockStatement",
EmptyStatement = "EmptyStatement",
BinaryExpression = "BinaryExpression",
Identifier = "Identifier",
AssignmentExpression = "AssignmentExpression",
VariableStatement = "VariableStatement",
VariableDeclaration = "VariableDeclaration",
ConditionalStatement = "ConditionalStatement",
BooleanLiteral = "BooleanLiteral",
NullLiteral = "NullLiteral",
LogicalExpression = "LogicalExpression"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func to_SExpression():
var expression = [ Type ]
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_SExpression() )
else :
array.append( entry )
expression.append(array)
return expression
if typeof(Value) == TYPE_OBJECT :
var result = [ Type, Value.to_SExpression() ]
return result
expression.append(Value)
return expression
func to_Dictionary():
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_Dictionary() )
else :
array.append( entry )
var result = \
{
Type = self.Type,
Value = array
}
return result
if typeof(Value) == TYPE_OBJECT :
var result = \
{
Type = self.Type,
Value = self.Value.to_Dictionary()
}
return result
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func is_Literal():
return \
NextToken.Type == TokenType.Number \
|| NextToken.Type == TokenType.String \
|| NextToken.Type == TokenType.Bool_true \
|| NextToken.Type == TokenType.Bool_false \
|| NextToken.Type == TokenType.NullValue
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
NextToken = TokenizerRef.next_Token()
return currToken
# NumericLiteral
# : Number
# ;
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# BooleanLiteral
# : true
# | false
# ;
func parse_BooleanLiteral(token):
eat(token)
var value
if (TokenType.Bool_true == token) :
value = true
elif (TokenType.Bool_false == token) :
value = false
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BooleanLiteral
node.Value = value
return node
# NullLiteral
# : null
# ;
func parse_NullLiteral():
eat(TokenType.NullLiteral)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NullLiteral
node.Value = null
return node
# Literal
# : NumericLiteral
# | StringLiteral
# | BooleanLiteral
# | NullLiteral
# ;
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
TokenType.Bool_true:
return parse_BooleanLiteral(TokenType.Bool_true)
TokenType.Bool_false:
return parse_BooleanLiteral(TokenType.Bool_false)
TokenType.NullValue:
return parse_NullLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# ParenthesizedExpression
# : ( Expression )
# ;
func parse_ParenthesizedExpression():
eat(TokenType.ExpressionPStart)
var expression = parse_Expression()
eat(TokenType.ExpressionPEnd)
return expression
# Relational Operators: >, >=, <, <=
#
# Relational Expression
# : AdditiveExpression
# | AdditiveExpression RelationalOp RelationalExpression
# ;
func parse_RelationalExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_AdditiveExpression")
return parse_BinaryExpression(parseFn, TokenType.RelationalOp)
# Equality Operators: ==, !=
#
# EqualityExpression
# : RelationalExpression EqualityOp RelationalExpression
# | RelationalExpression
# ;
func parse_EqualityExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_RelationalExpression")
return parse_BinaryExpression(parseFn, TokenType.EqualityOp)
# Logical Or Expression
# : LogicalAndExpression Logical_Or LogicalOrExpression
# | LogicalOrExpression
# ;
func parse_LogicalOrExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("pasre_LogicalAndExpression")
return parse_LogicalExpression(parseFn, TokenType.Logical_Or)
# Logical And Expression
# : EqualityExpression Logical_And LogicalAndExpression
# | EqualityExpression
# ;
func pasre_LogicalAndExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_EqualityExpression")
return parse_LogicalExpression(parseFn, TokenType.Logical_And)
func parse_LogicalExpression(parse_fn, operatorToken):
var left = parse_fn.call_func()
while NextToken.Type == operatorToken :
var operator = eat(operatorToken).Value
var right = parse_fn.call_func()
var \
nestedNode = SyntaxNode.new()
nestedNode.Type = SyntaxNodeType.LogicalExpression
nestedNode.Value = []
nestedNode.Value.append(operator)
nestedNode.Value.append(left)
nestedNode.Value.append(right)
left = nestedNode
return left
# MultiplicativeExpression
# : PrimaryExpression
# : MultiplicativeExpression MultiplicativeOp PrimaryExpression -> PrimaryExpression MultiplicativeOp ... Literal
# ;
func parse_MultiplicativeExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_PrimaryExpression")
return parse_BinaryExpression(parseFn, TokenType.MultiplicativeOp)
# AdditiveExpression
# : MultiplicativeExpression
# | AdditiveExpression AdditiveOp MultiplicativeExpression -> MultiplicativeExpression AdditiveOp ... Literal
# ;
func parse_AdditiveExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_MultiplicativeExpression")
return parse_BinaryExpression(parseFn, TokenType.AdditiveOp)
# BinaryExpression
# : MultiplicativeExpression
# | AdditiveExpression
# ;
func parse_BinaryExpression(parse_fn, operatorToken):
var left = parse_fn.call_func()
while NextToken.Type == operatorToken:
var operator = eat(operatorToken)
var right = parse_fn.call_func()
var \
nestedNode = SyntaxNode.new()
nestedNode.Type = SyntaxNodeType.BinaryExpression
nestedNode.Value = []
nestedNode.Value.append(operator.Value)
nestedNode.Value.append(left)
nestedNode.Value.append(right)
left = nestedNode;
return left
# Identifier
# : IdentifierSymbol
# ;
func parse_Identifier():
var name = eat(TokenType.Identifier).Value
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.Identifier
node.Value = name
return node
# ResolvedSymbol
# : Identiifer
# ;
func parse_ResolvedSymbol():
var resolvedSymbol = parse_Identifier()
if resolvedSymbol.Type == SyntaxNodeType.Identifier :
return resolvedSymbol
var assertStrTmplt = "parse_ResolvedSymbol: Unexpected symbol: {value}"
var assertStr = assertStrTmplt.format({"value" : resolvedSymbol.Type})
assert(true != true, assertStr)
# PrimaryExpression
# : Literal
# | ParenthesizedExpression
# | ResolvedSymbol
# ;
func parse_PrimaryExpression():
if is_Literal():
return parse_Literal()
match NextToken.Type:
TokenType.ExpressionPStart:
return parse_ParenthesizedExpression()
return parse_ResolvedSymbol()
# AssignmentExpression
# : RelationalExpression
# | ResolvedSymbol AssignmentOperator AssignmetnExpression
# ;
func parse_AssignmentExpression():
var left = parse_LogicalOrExpression()
if NextToken.Type != TokenType.Assignment && NextToken.Type != TokenType.ComplexAssignment :
return left
var assignmentOp;
if NextToken.Type == TokenType.Assignment :
assignmentOp = eat(TokenType.Assignment)
elif NextToken.Type == TokenType.ComplexAssignment :
assignmentOp = eat(TokenType.ComplexAssignment)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.AssignmentExpression
node.Value = \
[
assignmentOp.Value,
left,
parse_AssignmentExpression()
]
return node
# Expression
# : AssignmentExpression
# ;
func parse_Expression():
return parse_AssignmentExpression()
# EmptyStatement
# ;
func parse_EmptyStatement():
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.EmptyStatement
return node
# If Statement
# : if ( Expression ) Statement
# | if ( Expression ) Statement else Statement
# ;
func parse_If_Statement():
eat(TokenType.Conditional_if)
eat(TokenType.ExpressionPStart)
var condition = parse_Expression()
eat(TokenType.ExpressionPEnd)
var consequent = parse_Statement()
var alternative = null
if NextToken != null && NextToken.Type == TokenType.Conditional_else :
eat(TokenType.Conditional_else)
alternative = parse_Statement()
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ConditionalStatement
node.Value = [ condition, consequent, alternative ]
return node
# VariableInitializer
# : Assignment AssignmentExpression
# ;
func parse_VariableInitializer():
eat(TokenType.Assignment)
return parse_AssignmentExpression()
# VariableDeclaration
# : Identifier OptVariableInitalizer
# ;
func parse_VariableDeclaration():
var identifier = parse_Identifier()
var initalizer
if NextToken.Type != TokenType.StatementEnd && NextToken.Type != TokenType.CommaDelimiter :
initalizer = parse_VariableInitializer()
else :
initalizer = null
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.VariableDeclaration
node.Value = [ identifier, initalizer ]
return node
# VariableDeclarationList
# : VariableDeclaration
# | VariableDelcarationList , VariableDeclaration -> VariableDelcaration , ...
func parse_VariableDeclarationList():
var \
declarations = []
declarations.append(parse_VariableDeclaration())
while NextToken.Type == TokenType.CommaDelimiter :
eat(TokenType.CommaDelimiter)
declarations.append(parse_VariableDeclaration())
return declarations
# VariableStatement
# : VarDeclare VariableDeclarationList StatementEnd
# ;
func parse_VariableStatement():
eat(TokenType.VarDeclare)
var declarations = parse_VariableDeclarationList()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.VariableStatement
node.Value = declarations
return node
# BlockStatement
# : { OptStatementList }
# ;
func parse_BlockStatement():
eat(TokenType.StmtBlockStart)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BlockStatement
if NextToken.Type != TokenType.StmtBlockEnd :
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
else :
node.Value = []
eat(TokenType.StmtBlockEnd)
return node
# ExpressionStatement
# : Expression
# ;
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# Statement
# : ExpressionStatement
# | BlockStatement
# | EmptyStatement
# | VariableStatement
# | If_Statement
# ;
func parse_Statement():
if NextToken == null :
return null
match NextToken.Type :
TokenType.Conditional_if :
return parse_If_Statement()
TokenType.StatementEnd :
return parse_EmptyStatement()
TokenType.StmtBlockStart :
return parse_BlockStatement()
TokenType.VarDeclare :
return parse_VariableStatement()
return parse_ExpressionStatement()
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
func parse_StatementList(endToken):
var statementList = [ parse_Statement() ]
while NextToken != null && NextToken.Type != endToken :
statementList.append( parse_Statement() )
return statementList
# Program
# : StatementList
# : Literal
# ;
func parse_Program():
var \
node = SyntaxNode.new()
node.Type = TokenType.Program
node.Value = parse_StatementList(null)
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
onready var TextOut = GScene.get_node("TextOutput")
func tout(text):
TextOut.insert_text_at_cursor(text)
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
},
BlockStatement = \
{
Name = "Block Statement",
File = "2.BlockStatement.uf"
},
BinaryExpression = \
{
Name = "Binary Expression",
File = "3.BinaryExpression.uf"
},
Assignment = \
{
Name = "Assignment",
File = "4.Assignment.uf"
},
VaraibleDeclaration = \
{
Name = "Variable Declaration",
File = "5.VariableDeclaration.uf"
},
Conditionals = \
{
Name = "Conditionals",
File = "6.Conditionals.uf"
},
Relations = \
{
Name = "Relations",
File = "7.Relations.uf"
},
Equality = \
{
Name = "Equality",
File = "8.Equality.uf"
},
Logical = \
{
Name = "Logical",
File = "9.Logical.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}\n"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
tout(introMessageFormatted)
var path
if Engine.editor_hint :
path = "res://../Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.to_Dictionary(), '\t')
tout(json + "\n")
tout("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

View File

@ -0,0 +1,916 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Expressions
ExpressionPStart = "ExpresssionParenthesisStart",
ExpressionPEnd = "ExpressionParenthesisEnd",
# Logical
RelationalOp = "RelationalOperator",
EqualityOp = "EqualityOperator",
Logical_And = "Logical_And_Op",
Logical_Or = "Logical_Or_Op",
Logical_Not = "Logical_Not_Op",
# Arithmetic
ComplexAssignment = "ComplexAssignment",
Assignment = "Assignment",
AdditiveOp = "AdditiveOperator",
MultiplicativeOp = "MultiplicativeOperator",
# Conditional
Conditional_if = "if Conditional",
Conditional_else = "else Conditional",
# Statements
StatementEnd = "StatementEnd",
StmtBlockStart = "BlockStatementStart",
StmtBlockEnd = "BlockStatementEnd",
CommaDelimiter = "CommaDelimiter",
# Literals
Number = "Number",
String = "String",
# Symbols
Bool_true = "Boolean True",
Bool_false = "Boolean False",
VarDeclare = "Variable Declaration",
Identifier = "Identifier",
NullValue = "Null Value"
}
const TokenSpec = \
{
# Comments
TokenType.CommentLine : "^\\/\\/.*",
TokenType.CommentMultiLine : "^\\/\\*[\\s\\S]*?\\*\\/",
# Formatting
TokenType.Whitespace : "^\\s+",
# Expressions
TokenType.ExpressionPStart : "^\\(",
TokenType.ExpressionPEnd : "^\\)",
# Logical
TokenType.RelationalOp : "^[>\\<]=?",
TokenType.EqualityOp : "^[=!]=",
TokenType.Logical_And : "^&&",
TokenType.Logical_Or : "^\\|\\|",
TokenType.Logical_Not : "^!",
# Arithmetic
TokenType.ComplexAssignment : "^[*\\/\\+\\-]=",
TokenType.Assignment : "^=",
TokenType.AdditiveOp : "^[+\\-]",
TokenType.MultiplicativeOp : "^[*\\/]",
# Literal
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
TokenType.Conditional_if : "^\\bif\\b",
TokenType.Conditional_else : "^\\belse\\b",
# Statements
TokenType.StatementEnd : "^;",
TokenType.StmtBlockStart : "^{",
TokenType.StmtBlockEnd : "^}",
TokenType.CommaDelimiter : "^,",
# Symbols
TokenType.Bool_true : "^\\btrue\\b",
TokenType.Bool_false : "^\\bfalse\\b",
TokenType.VarDeclare : "^\\blet\\b",
TokenType.Identifier : "^\\w+",
TokenType.NullValue : "^\\bnull\\b"
}
class Token:
var Type : String
var Value : String
func to_Dictionary():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if reached_EndOfTxt() == true :
return null
var srcLeft = SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return Cursor >= ( SrcTxt.length() )
var GTokenizer = Tokenizer.new()
const AST_Format = \
{
Dictionary = "Dictionary",
SExpression = "S-Expression"
}
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement",
BlockStatement = "BlockStatement",
EmptyStatement = "EmptyStatement",
BinaryExpression = "BinaryExpression",
Identifier = "Identifier",
AssignmentExpression = "AssignmentExpression",
VariableStatement = "VariableStatement",
VariableDeclaration = "VariableDeclaration",
ConditionalStatement = "ConditionalStatement",
BooleanLiteral = "BooleanLiteral",
NullLiteral = "NullLiteral",
LogicalExpression = "LogicalExpression",
UnaryExpression = "UnaryExpression"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func to_SExpression():
var expression = [ Type ]
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_SExpression() )
else :
array.append( entry )
expression.append(array)
return expression
if typeof(Value) == TYPE_OBJECT :
var result = [ Type, Value.to_SExpression() ]
return result
expression.append(Value)
return expression
func to_Dictionary():
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_Dictionary() )
else :
array.append( entry )
var result = \
{
Type = self.Type,
Value = array
}
return result
if typeof(Value) == TYPE_OBJECT :
var result = \
{
Type = self.Type,
Value = self.Value.to_Dictionary()
}
return result
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
# --------------------------------------------------------------------- HELPERS
# Gets the next token only if the current token is the specified intended token (tokenType)
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
NextToken = TokenizerRef.next_Token()
return currToken
func is_Literal():
return \
NextToken.Type == TokenType.Number \
|| NextToken.Type == TokenType.String \
|| NextToken.Type == TokenType.Bool_true \
|| NextToken.Type == TokenType.Bool_false \
|| NextToken.Type == TokenType.NullValue
# BinaryExpression
# : MultiplicativeExpression
# | AdditiveExpression
# ;
func parse_BinaryExpression(parse_fn, operatorToken):
var left = parse_fn.call_func()
while NextToken.Type == operatorToken:
var operator = eat(operatorToken)
var right = parse_fn.call_func()
var \
nestedNode = SyntaxNode.new()
nestedNode.Type = SyntaxNodeType.BinaryExpression
nestedNode.Value = []
nestedNode.Value.append(operator.Value)
nestedNode.Value.append(left)
nestedNode.Value.append(right)
left = nestedNode;
return left
# LogicalExpression
# : LogicalAndExpression
# | LogicalOrExpression
# ;
func parse_LogicalExpression(parse_fn, operatorToken):
var left = parse_fn.call_func()
while NextToken.Type == operatorToken :
var operator = eat(operatorToken).Value
var right = parse_fn.call_func()
var \
nestedNode = SyntaxNode.new()
nestedNode.Type = SyntaxNodeType.LogicalExpression
nestedNode.Value = []
nestedNode.Value.append(operator)
nestedNode.Value.append(left)
nestedNode.Value.append(right)
left = nestedNode
return left
# ------------------------------------------------------------------ END HELPERS
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
# > parse
# Program
# : StatementList
# : Literal
# ;
func parse_Program():
var \
node = SyntaxNode.new()
node.Type = TokenType.Program
node.Value = parse_StatementList(null)
return node
# > Program
# > BlockStatement
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
func parse_StatementList(endToken):
var statementList = [ parse_Statement() ]
while NextToken != null && NextToken.Type != endToken :
statementList.append( parse_Statement() )
return statementList
# > StatementList
# > If_Statement
# >
# Statement
# : ExpressionStatement
# | BlockStatement
# | EmptyStatement
# | VariableStatement
# | If_Statement
# ;
func parse_Statement():
if NextToken == null :
return null
match NextToken.Type :
TokenType.Conditional_if :
return parse_If_Statement()
TokenType.StatementEnd :
return parse_EmptyStatement()
TokenType.StmtBlockStart :
return parse_BlockStatement()
TokenType.VarDeclare :
return parse_VariableStatement()
return parse_ExpressionStatement()
# If Statement
# : if ( Expression ) Statement
# | if ( Expression ) Statement else Statement
# ;
func parse_If_Statement():
eat(TokenType.Conditional_if)
eat(TokenType.ExpressionPStart)
var condition = parse_Expression()
eat(TokenType.ExpressionPEnd)
var consequent = parse_Statement()
var alternative = null
if NextToken != null && NextToken.Type == TokenType.Conditional_else :
eat(TokenType.Conditional_else)
alternative = parse_Statement()
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ConditionalStatement
node.Value = [ condition, consequent, alternative ]
return node
# > Statement
# EmptyStatement
# ;
func parse_EmptyStatement():
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.EmptyStatement
return node
# > Statement
# BlockStatement
# : { OptStatementList }
# ;
func parse_BlockStatement():
eat(TokenType.StmtBlockStart)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BlockStatement
if NextToken.Type != TokenType.StmtBlockEnd :
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
else :
node.Value = []
eat(TokenType.StmtBlockEnd)
return node
# > Statement
# VariableStatement
# : VarDeclare VariableDeclarationList StatementEnd
# ;
func parse_VariableStatement():
eat(TokenType.VarDeclare)
var declarations = parse_VariableDeclarationList()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.VariableStatement
node.Value = declarations
return node
# > Statement
# ExpressionStatement
# : Expression
# ;
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# > ExpressionStatement
# > If_Statement
# > PrimaryExpression
# Expression
# : AssignmentExpression
# ;
func parse_Expression():
return parse_AssignmentExpression()
# > VariableStatement
# VariableDeclarationList
# : VariableDeclaration
# | VariableDelcarationList , VariableDeclaration -> VariableDelcaration , ...
func parse_VariableDeclarationList():
var \
declarations = []
declarations.append(parse_VariableDeclaration())
while NextToken.Type == TokenType.CommaDelimiter :
eat(TokenType.CommaDelimiter)
declarations.append(parse_VariableDeclaration())
return declarations
# > VariableDeclarationList
# VariableDeclaration
# : Identifier OptVariableInitalizer
# ;
func parse_VariableDeclaration():
var identifier = parse_Identifier()
var initalizer
if NextToken.Type != TokenType.StatementEnd && NextToken.Type != TokenType.CommaDelimiter :
initalizer = parse_VariableInitializer()
else :
initalizer = null
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.VariableDeclaration
node.Value = [ identifier, initalizer ]
return node
# > VariableDeclaration
# VariableInitializer
# : Assignment AssignmentExpression
# ;
func parse_VariableInitializer():
eat(TokenType.Assignment)
return parse_AssignmentExpression()
# > Expression
# > VariableInitializer
# > AssignmentExpression
# AssignmentExpression
# : RelationalExpression
# | ResolvedSymbol AssignmentOperator AssignmetnExpression
# ;
func parse_AssignmentExpression():
var left = parse_LogicalOrExpression()
if NextToken.Type != TokenType.Assignment && NextToken.Type != TokenType.ComplexAssignment :
return left
var assignmentOp;
if NextToken.Type == TokenType.Assignment :
assignmentOp = eat(TokenType.Assignment)
elif NextToken.Type == TokenType.ComplexAssignment :
assignmentOp = eat(TokenType.ComplexAssignment)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.AssignmentExpression
node.Value = \
[
assignmentOp.Value,
left,
parse_AssignmentExpression()
]
return node
# > VariableDeclaration
# > ParenthesizedExpression
# Identifier
# : IdentifierSymbol
# ;
func parse_Identifier():
var name = eat(TokenType.Identifier).Value
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.Identifier
node.Value = name
return node
# > AssignmentExpression
# Logical Or Expression
# : LogicalAndExpression Logical_Or LogicalOrExpression
# | LogicalOrExpression
# ;
func parse_LogicalOrExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("pasre_LogicalAndExpression")
return parse_LogicalExpression(parseFn, TokenType.Logical_Or)
# > LogicaOrExpression
# Logical And Expression
# : EqualityExpression Logical_And LogicalAndExpression
# | EqualityExpression
# ;
func pasre_LogicalAndExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_EqualityExpression")
return parse_LogicalExpression(parseFn, TokenType.Logical_And)
# Equality Operators: ==, !=
#
# > LogicalAndExpression
# EqualityExpression
# : RelationalExpression EqualityOp RelationalExpression
# | RelationalExpression
# ;
func parse_EqualityExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_RelationalExpression")
return parse_BinaryExpression(parseFn, TokenType.EqualityOp)
# Relational Operators: >, >=, <, <=
#
# > EqualityExpression
# Relational Expression
# : AdditiveExpression
# | AdditiveExpression RelationalOp RelationalExpression
# ;
func parse_RelationalExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_AdditiveExpression")
return parse_BinaryExpression(parseFn, TokenType.RelationalOp)
# > RelationalExpression
# AdditiveExpression
# : MultiplicativeExpression
# | AdditiveExpression AdditiveOp MultiplicativeExpression -> MultiplicativeExpression AdditiveOp ... Literal
# ;
func parse_AdditiveExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_MultiplicativeExpression")
return parse_BinaryExpression(parseFn, TokenType.AdditiveOp)
# > AdditiveExpression
# MultiplicativeExpression
# : UnaryExpressioon
# : MultiplicativeExpression MultiplicativeOp UnaryExpression -> UnaryExpression MultiplicativeOp ... Literal
# ;
func parse_MultiplicativeExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_UnaryExpression")
return parse_BinaryExpression(parseFn, TokenType.MultiplicativeOp)
# > MultiplicativeExpression
# > UnaryExpression
# UnaryExpression
# : ResolvedSymbol
# | AdditiveOp UnaryExpression
# | Logical_Not UnaryExpression
# ;
func parse_UnaryExpression():
var operator
match NextToken.Type:
TokenType.AdditiveOp:
operator = eat(TokenType.AdditiveOp).Value
TokenType.Logical_Not:
operator = eat(TokenType.Logical_Not).Value
if operator == null :
return parse_ResolvedSymbol()
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.UnaryExpression
node.Value = [ operator, parse_UnaryExpression() ]
return node;
# > UnaryExpression
# > PrimaryExpression
# ResolvedSymbol (LeftHandExpression)
# : PrimaryExpression
# ;
func parse_ResolvedSymbol():
return parse_PrimaryExpression()
# var resolvedSymbol = parse_Identifier()
# if resolvedSymbol.Type == SyntaxNodeType.Identifier :
# return resolvedSymbol
# var assertStrTmplt = "parse_ResolvedSymbol: Unexpected symbol: {value}"
# var assertStr = assertStrTmplt.format({"value" : resolvedSymbol.Type})
# assert(true != true, assertStr)
# > ResolvedSymbol
# PrimaryExpression
# : Literal
# | ParenthesizedExpression
# ;
func parse_PrimaryExpression():
if is_Literal():
return parse_Literal()
match NextToken.Type:
TokenType.ExpressionPStart:
return parse_ParenthesizedExpression()
TokenType.Identifier:
var identifier = parse_Identifier()
if identifier.Type == SyntaxNodeType.Identifier :
return identifier
var assertStrTmplt = "parse_PrimaryExpression: (Identifier) Unexpected symbol: {value}"
var assertStr = assertStrTmplt.format({"value" : identifier.Type})
assert(true != true, assertStr)
return parse_ResolvedSymbol()
# > PrimaryExpression
# Literal
# : NumericLiteral
# | StringLiteral
# | BooleanLiteral
# | NullLiteral
# ;
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
TokenType.Bool_true:
return parse_BooleanLiteral(TokenType.Bool_true)
TokenType.Bool_false:
return parse_BooleanLiteral(TokenType.Bool_false)
TokenType.NullValue:
return parse_NullLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# > PrimaryExpression
# ParenthesizedExpression
# : ( Expression )
# ;
func parse_ParenthesizedExpression():
eat(TokenType.ExpressionPStart)
var expression = parse_Expression()
eat(TokenType.ExpressionPEnd)
return expression
# > Literal
# NumericLiteral
# : Number
# ;
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# > Literal
# StringLiteral
# : String
# ;
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# > Literal
# BooleanLiteral
# : true
# | false
# ;
func parse_BooleanLiteral(token):
eat(token)
var value
if (TokenType.Bool_true == token) :
value = true
elif (TokenType.Bool_false == token) :
value = false
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BooleanLiteral
node.Value = value
return node
# > Literal
# NullLiteral
# : null
# ;
func parse_NullLiteral():
eat(TokenType.NullLiteral)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NullLiteral
node.Value = null
return node
var GParser = Parser.new()
onready var TextOut = GScene.get_node("TextOutput")
func tout(text):
TextOut.insert_text_at_cursor(text)
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
},
BlockStatement = \
{
Name = "Block Statement",
File = "2.BlockStatement.uf"
},
BinaryExpression = \
{
Name = "Binary Expression",
File = "3.BinaryExpression.uf"
},
Assignment = \
{
Name = "Assignment",
File = "4.Assignment.uf"
},
VaraibleDeclaration = \
{
Name = "Variable Declaration",
File = "5.VariableDeclaration.uf"
},
Conditionals = \
{
Name = "Conditionals",
File = "6.Conditionals.uf"
},
Relations = \
{
Name = "Relations",
File = "7.Relations.uf"
},
Equality = \
{
Name = "Equality",
File = "8.Equality.uf"
},
Logical = \
{
Name = "Logical",
File = "9.Logical.uf"
},
Unary = \
{
Name = "Unary",
File = "10.Unary.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}\n"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
tout(introMessageFormatted)
var path
if Engine.editor_hint :
path = "res://../Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.to_Dictionary(), '\t')
tout(json + "\n")
tout("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,210 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
enum TokenTypes \
{
Token_Number,
Token_String
}
const StrTokenTypes = \
{
Token_Number = "Number",
Token_String = "String"
}
class Token:
var Type : String
var Value : String
func toDict():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if self.reached_EndOfTxt() == true :
return null
var token = self.SrcTxt.substr(Cursor)
# Numbers
if token[self.Cursor].is_valid_integer() :
var \
numberTok = Token.new()
numberTok.Type = "Number"
numberTok.Value = ""
while token.length() > self.Cursor && token[self.Cursor].is_valid_integer() :
numberTok.Value += token[self.Cursor]
self.Cursor += 1
return numberTok
# String:
if token[self.Cursor] == '"' :
var \
stringTok = Token.new()
stringTok.Type = "String"
stringTok.Value = "\""
self.Cursor += 1
while token.length() > self.Cursor :
stringTok.Value += token[self.Cursor]
self.Cursor += 1
return stringTok
return null
func reached_EndOfTxt():
return self.Cursor >= ( self.SrcTxt.length() - 1 )
var GTokenizer = Tokenizer.new()
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func toDict():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class ProgramNode:
var Type : String
var Body : Object
func toDict():
var result = \
{
Type = self.Type,
Body = self.Body.toDict()
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
self.NextToken = self.TokenizerRef.next_Token()
return currToken
# Literal
# : NumericLiteral
# : StringLiteral
# ;
func parse_Literal():
match self.NextToken.Type :
"Number":
return parse_NumericLiteral()
"String":
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# NumericLiteral
# : Number
# ;
#
func parse_NumericLiteral():
var Token = self.eat("Number")
var \
node = SyntaxNode.new()
node.Type = "NumericLiteral"
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
#
func parse_StringLiteral():
var Token = self.eat("String")
var \
node = SyntaxNode.new()
node.Type = "StringLiteral"
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# Program
# : Literal
# ;
#
func parse_Program():
var \
node = ProgramNode.new()
node.Type = "Program"
node.Body = parse_Literal()
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
# Main Entry point.
func _ready():
# Numerical test
var ProgramDescription = "47"
GTokenizer.init(ProgramDescription)
var ast = GParser.parse(GTokenizer)
print(JSON.print(ast.toDict(), "\t"))
# String Test
ProgramDescription = "\"hello\""
GTokenizer.init(ProgramDescription)
ast = GParser.parse(GTokenizer)
print(JSON.print(ast.toDict(), "\t"))
# Called every frame. 'delta' is the elapsed time since the previous frame.
#func _process(delta):
# pass

View File

@ -0,0 +1,264 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Literals
Number = "Number",
String = "String"
}
const TokenSpec = \
{
TokenType.CommentLine : "^\/\/.*",
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
TokenType.Whitespace : "^\\s+",
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\""
}
class Token:
var Type : String
var Value : String
func toDict():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if self.reached_EndOfTxt() == true :
return null
var srcLeft = self.SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
self.Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
self.Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
self.Cursor += ( result.get_string().length() -1 )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : self.Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return self.Cursor >= ( self.SrcTxt.length() - 1 )
var GTokenizer = Tokenizer.new()
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func toDict():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class ProgramNode:
var Type : String
var Body : Object
func toDict():
var result = \
{
Type = self.Type,
Body = self.Body.toDict()
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
self.NextToken = self.TokenizerRef.next_Token()
return currToken
# Literal
# : NumericLiteral
# : StringLiteral
# ;
func parse_Literal():
match self.NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# NumericLiteral
# : Number
# ;
#
func parse_NumericLiteral():
var Token = self.eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = TokenType.Number
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
#
func parse_StringLiteral():
var Token = self.eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = TokenType.String
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# Program
# : Literal
# ;
#
func parse_Program():
var \
node = ProgramNode.new()
node.Type = TokenType.Program
node.Body = parse_Literal()
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
var ProgramDescription : String
func test():
GTokenizer.init(ProgramDescription)
var ast = GParser.parse(GTokenizer)
print(JSON.print(ast.toDict(), "\t"))
# Main Entry point.
func _ready():
# Numerical test
ProgramDescription = "47"
test()
# String Test
ProgramDescription = "\"hello\""
test()
# Whitespace test
ProgramDescription = " \"we got past whitespace\" "
test()
# Comment Single Test
ProgramDescription = \
"""
// Testing a comment
\"hello sir\"
"""
test()
# Comment Multi-Line Test
ProgramDescription = \
"""
/**
*
* Testing a comment
*/
\"may I have some grapes\"
"""
test()
# Multi-statement test
ProgramDescription = \
"""
// Testing a comment
\"hello sir\";
/**
*
* Testing a comment
*/
\"may I have some grapes\";
"""
test()
# Called every frame. 'delta' is the elapsed time since the previous frame.
#func _process(delta):
# pass

View File

@ -0,0 +1,311 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Statements
StatementEnd = "StatementEnd",
# Literals
Number = "Number",
String = "String"
}
const TokenSpec = \
{
TokenType.CommentLine : "^\/\/.*",
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
TokenType.Whitespace : "^\\s+",
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
TokenType.StatementEnd : "^;"
}
class Token:
var Type : String
var Value : String
func toDict():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if self.reached_EndOfTxt() == true :
return null
var srcLeft = self.SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
self.Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
self.Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
self.Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : self.Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return self.Cursor >= ( self.SrcTxt.length() - 1 )
var GTokenizer = Tokenizer.new()
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func toDict():
var ValueDict = self.Value
if typeof(Value) == TYPE_ARRAY :
var dict = {}
var index = 0
for entry in self.Value :
dict[index] = entry.toDict()
index += 1
ValueDict = dict
var result = \
{
Type = self.Type,
Value = ValueDict
}
return result
class ProgramNode:
var Type : String
var Body : Object
func toDict():
var result = \
{
Type = self.Type,
Body = self.Body.toDict()
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
self.NextToken = self.TokenizerRef.next_Token()
return currToken
# Literal
# : NumericLiteral
# : StringLiteral
# ;
#
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# NumericLiteral
# : Number
# ;
#
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
#
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# Expression
# : Literal
# ;
#
func parse_Expression():
return parse_Literal()
# ExpressionStatement
# : Expression
# ;
#
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# Statement
# : ExpressionStatement
# ;
#
func parse_Statement():
return parse_ExpressionStatement()
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
#
func parse_StatementList():
var statementList = [ parse_Statement() ]
while NextToken != null :
statementList.append( parse_Statement() )
var \
node = SyntaxNode.new()
node.Type = "StatementList"
node.Value = statementList
return node
# Program
# : StatementList
# : Literal
# ;
#
func parse_Program():
var \
node = ProgramNode.new()
node.Type = TokenType.Program
node.Body = parse_StatementList()
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
print(introMessageFormatted)
var path
if Engine.editor_hint() :
path = "res://Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.toDict(), "\t")
print(JSON.print(ast.toDict(), "\t"))
print("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

View File

@ -0,0 +1,377 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Statements
StatementEnd = "StatementEnd",
StmtBlockStart = "BlockStatementStart",
StmtBlockEnd = "BlockStatementEnd",
# Literals
Number = "Number",
String = "String"
}
const TokenSpec = \
{
TokenType.CommentLine : "^\/\/.*",
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
TokenType.Whitespace : "^\\s+",
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
TokenType.StatementEnd : "^;",
TokenType.StmtBlockStart : "^{",
TokenType.StmtBlockEnd : "^}"
}
class Token:
var Type : String
var Value : String
func toDict():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if self.reached_EndOfTxt() == true :
return null
var srcLeft = self.SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
self.Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
self.Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
self.Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : self.Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return self.Cursor >= ( self.SrcTxt.length() )
var GTokenizer = Tokenizer.new()
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement",
BlockStatement = "BlockStatement",
EmptyStatement = "EmptyStatement"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func toDict():
var ValueDict = self.Value
if typeof(Value) == TYPE_ARRAY :
var dict = {}
var index = 0
for entry in self.Value :
dict[index] = entry.toDict()
index += 1
ValueDict = dict
if typeof(Value) == TYPE_OBJECT :
var reuslt = \
{
Type = self.Type,
Value = self.Value.toDict()
}
return reuslt
var result = \
{
Type = self.Type,
Value = ValueDict
}
return result
class ProgramNode:
var Type : String
var Body : Object
func toDict():
var result = \
{
Type = self.Type,
Body = self.Body.toDict()
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
self.NextToken = self.TokenizerRef.next_Token()
return currToken
# Literal
# : NumericLiteral
# : StringLiteral
# ;
#
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# NumericLiteral
# : Number
# ;
#
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
#
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# Expression
# : Literal
# ;
#
func parse_Expression():
return parse_Literal()
# EmptyStatement
# ;
#
func parse_EmptyStatement():
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.EmptyStatement
return node
# BlockStatement
# : { OptStatementList }
# ;
#
func parse_BlockStatement():
eat(TokenType.StmtBlockStart)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BlockStatement
if NextToken.Type != TokenType.StmtBlockEnd :
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
else :
node.Value = []
eat(TokenType.StmtBlockEnd)
return node
# ExpressionStatement
# : Expression
# ;
#
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# Statement
# : ExpressionStatement
# : BlockStatement
# : EmptyStatement
# ;
#
func parse_Statement():
match NextToken.Type :
TokenType.StatementEnd :
return parse_EmptyStatement()
TokenType.StmtBlockStart :
return parse_BlockStatement()
return parse_ExpressionStatement()
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
#
func parse_StatementList(endToken):
var statementList = [ parse_Statement() ]
while NextToken != null && NextToken.Type != endToken :
statementList.append( parse_Statement() )
var \
node = SyntaxNode.new()
node.Type = "StatementList"
node.Value = statementList
return node
# Program
# : StatementList
# : Literal
# ;
#
func parse_Program():
var \
node = ProgramNode.new()
node.Type = TokenType.Program
node.Body = parse_StatementList(null)
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
onready var TextOut = GScene.get_node("TextOutput")
func tout(text):
TextOut.insert_text_at_cursor(text)
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
},
BlockStatement = \
{
Name = "Block Statement",
File = "2.BlockStatement.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}\n"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
tout(introMessageFormatted)
var path
if Engine.editor_hint :
path = "res://../Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.toDict(), "\t")
tout(json + "\n")
tout("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

View File

@ -0,0 +1,386 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Statements
StatementEnd = "StatementEnd",
StmtBlockStart = "BlockStatementStart",
StmtBlockEnd = "BlockStatementEnd",
# Literals
Number = "Number",
String = "String"
}
const TokenSpec = \
{
TokenType.CommentLine : "^\/\/.*",
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
TokenType.Whitespace : "^\\s+",
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
TokenType.StatementEnd : "^;",
TokenType.StmtBlockStart : "^{",
TokenType.StmtBlockEnd : "^}"
}
class Token:
var Type : String
var Value : String
func to_Dictionary():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if self.reached_EndOfTxt() == true :
return null
var srcLeft = self.SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
self.Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
self.Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
self.Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : self.Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return self.Cursor >= ( self.SrcTxt.length() )
var GTokenizer = Tokenizer.new()
const AST_Format = \
{
Dictionary = "Dictionary",
SExpression = "S-Expression"
}
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement",
BlockStatement = "BlockStatement",
EmptyStatement = "EmptyStatement"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func to_SExpression():
var expression = [ Type ]
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
array.append( entry.to_SExpression() )
expression.append(array)
return expression
if typeof(Value) == TYPE_OBJECT :
var result = [ Type, Value.to_SExpression() ]
return result
expression.append(Value)
return expression
func to_Dictionary():
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
array.append(entry.to_Dictionary())
var result = \
{
Type = self.Type,
Value = array
}
return result
if typeof(Value) == TYPE_OBJECT :
var result = \
{
Type = self.Type,
Value = self.Value.to_Dictionary()
}
return result
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
self.NextToken = self.TokenizerRef.next_Token()
return currToken
# Literal
# : NumericLiteral
# : StringLiteral
# ;
#
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# NumericLiteral
# : Number
# ;
#
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
#
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# Expression
# : Literal
# ;
#
func parse_Expression():
return parse_Literal()
# EmptyStatement
# ;
#
func parse_EmptyStatement():
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.EmptyStatement
return node
# BlockStatement
# : { OptStatementList }
# ;
#
func parse_BlockStatement():
eat(TokenType.StmtBlockStart)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BlockStatement
if NextToken.Type != TokenType.StmtBlockEnd :
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
else :
node.Value = []
eat(TokenType.StmtBlockEnd)
return node
# ExpressionStatement
# : Expression
# ;
#
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# Statement
# : ExpressionStatement
# : BlockStatement
# : EmptyStatement
# ;
#
func parse_Statement():
match NextToken.Type :
TokenType.StatementEnd :
return parse_EmptyStatement()
TokenType.StmtBlockStart :
return parse_BlockStatement()
return parse_ExpressionStatement()
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
#
func parse_StatementList(endToken):
var statementList = [ parse_Statement() ]
while NextToken != null && NextToken.Type != endToken :
statementList.append( parse_Statement() )
return statementList
# Program
# : StatementList
# : Literal
# ;
#
func parse_Program():
var \
node = SyntaxNode.new()
node.Type = TokenType.Program
node.Value = parse_StatementList(null)
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
onready var TextOut = GScene.get_node("TextOutput")
func tout(text):
TextOut.insert_text_at_cursor(text)
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
},
BlockStatement = \
{
Name = "Block Statement",
File = "2.BlockStatement.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}\n"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
tout(introMessageFormatted)
var path
if Engine.editor_hint :
path = "res://../Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.to_SExpression(), '\t')
tout(json + "\n")
tout("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

View File

@ -0,0 +1,485 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Expressions
ExpressionPStart = "Expresssion Parenthesis Start",
ExpressionPEnd = "Expression Parenthesis End",
# Arithmetic
AdditiveOp = "AdditiveOperator",
MultiplicativeOp = "MultiplicativeOperator",
# Statements
StatementEnd = "StatementEnd",
StmtBlockStart = "BlockStatementStart",
StmtBlockEnd = "BlockStatementEnd",
# Literals
Number = "Number",
String = "String"
}
const TokenSpec = \
{
# Comments
TokenType.CommentLine : "^\/\/.*",
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
# Formatting
TokenType.Whitespace : "^\\s+",
# Expressions
TokenType.ExpressionPStart : "^\\(",
TokenType.ExpressionPEnd : "^\\)",
# Arithmetic
TokenType.AdditiveOp : "^[+\\-]",
TokenType.MultiplicativeOp : "^[*\\/]",
# Literal
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
# Statements
TokenType.StatementEnd : "^;",
TokenType.StmtBlockStart : "^{",
TokenType.StmtBlockEnd : "^}"
}
class Token:
var Type : String
var Value : String
func to_Dictionary():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if reached_EndOfTxt() == true :
return null
var srcLeft = SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return Cursor >= ( SrcTxt.length() )
var GTokenizer = Tokenizer.new()
const AST_Format = \
{
Dictionary = "Dictionary",
SExpression = "S-Expression"
}
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement",
BlockStatement = "BlockStatement",
EmptyStatement = "EmptyStatement",
BinaryExpression = "BinaryExpression",
# MultiplicativeExpression = "MultiplicativeExpression"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func to_SExpression():
var expression = [ Type ]
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_SExpression() )
else :
array.append( entry )
expression.append(array)
return expression
if typeof(Value) == TYPE_OBJECT :
var result = [ Type, Value.to_SExpression() ]
return result
expression.append(Value)
return expression
func to_Dictionary():
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_Dictionary() )
else :
array.append( entry )
var result = \
{
Type = self.Type,
Value = array
}
return result
if typeof(Value) == TYPE_OBJECT :
var result = \
{
Type = self.Type,
Value = self.Value.to_Dictionary()
}
return result
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
NextToken = TokenizerRef.next_Token()
return currToken
# NumericLiteral
# : Number
# ;
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# Literal
# : NumericLiteral
# : StringLiteral
# ;
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# ParenthesizedExpression
# : ( Expression )
# ;
func parse_ParenthesizedExpression():
eat(TokenType.ExpressionPStart)
var expression = parse_Expression()
eat(TokenType.ExpressionPEnd)
return expression
# PrimaryExpression
# : Literal
# | ParenthesizedExpression
# ;
func parse_PrimaryExpression():
match NextToken.Type:
TokenType.ExpressionPStart:
return parse_ParenthesizedExpression()
return parse_Literal()
# MultiplicativeExpression
# : PrimaryExpression
# : MultiplicativeExpression MultiplicativeOp PrimaryExpression -> PrimaryExpression MultiplicativeOp ... Literal
# ;
func parse_MultiplicativeExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_PrimaryExpression")
return parse_BinaryExpression(parseFn, TokenType.MultiplicativeOp)
# AdditiveExpression
# : MultiplicativeExpression
# | AdditiveExpression AdditiveOp MultiplicativeExpression -> MultiplicativeExpression AdditiveOp ... Literal
# ;
func parse_AdditiveExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_MultiplicativeExpression")
return parse_BinaryExpression(parseFn, TokenType.AdditiveOp)
# BinaryExpression
# : MultiplicativeExpression
# | AdditiveExpression
# ;
func parse_BinaryExpression(parse_fn, operatorToken):
var left = parse_fn.call_func()
while NextToken.Type == operatorToken:
var operator = eat(operatorToken)
var right = parse_fn.call_func()
var \
nestedNode = SyntaxNode.new()
nestedNode.Type = SyntaxNodeType.BinaryExpression
nestedNode.Value = []
nestedNode.Value.append(operator.Value)
nestedNode.Value.append(left)
nestedNode.Value.append(right)
left = nestedNode;
return left
# Expression
# : Literal
# : AdditiveExpression
# ;
func parse_Expression():
return parse_AdditiveExpression()
# EmptyStatement
# ;
func parse_EmptyStatement():
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.EmptyStatement
return node
# BlockStatement
# : { OptStatementList }
# ;
func parse_BlockStatement():
eat(TokenType.StmtBlockStart)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BlockStatement
if NextToken.Type != TokenType.StmtBlockEnd :
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
else :
node.Value = []
eat(TokenType.StmtBlockEnd)
return node
# ExpressionStatement
# : Expression
# ;
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# Statement
# : ExpressionStatement
# : BlockStatement
# : EmptyStatement
# ;
func parse_Statement():
match NextToken.Type :
TokenType.StatementEnd :
return parse_EmptyStatement()
TokenType.StmtBlockStart :
return parse_BlockStatement()
return parse_ExpressionStatement()
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
func parse_StatementList(endToken):
var statementList = [ parse_Statement() ]
while NextToken != null && NextToken.Type != endToken :
statementList.append( parse_Statement() )
return statementList
# Program
# : StatementList
# : Literal
# ;
func parse_Program():
var \
node = SyntaxNode.new()
node.Type = TokenType.Program
node.Value = parse_StatementList(null)
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
onready var TextOut = GScene.get_node("TextOutput")
func tout(text):
TextOut.insert_text_at_cursor(text)
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
},
BlockStatement = \
{
Name = "Block Statement",
File = "2.BlockStatement.uf"
},
BinaryExpression = \
{
Name = "Binary Expression",
File = "3.BinaryExpression.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}\n"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
tout(introMessageFormatted)
var path
if Engine.editor_hint :
path = "res://../Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.to_SExpression(), '\t')
tout(json + "\n")
tout("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

View File

@ -0,0 +1,563 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Expressions
ExpressionPStart = "Expresssion Parenthesis Start",
ExpressionPEnd = "Expression Parenthesis End",
# Arithmetic
ComplexAssignment = "ComplexAssignment",
Assignment = "Assignment",
AdditiveOp = "AdditiveOperator",
MultiplicativeOp = "MultiplicativeOperator",
# Statements
StatementEnd = "StatementEnd",
StmtBlockStart = "BlockStatementStart",
StmtBlockEnd = "BlockStatementEnd",
# Literals
Number = "Number",
String = "String",
# Symbols
Identifier = "Identifier"
}
const TokenSpec = \
{
# Comments
TokenType.CommentLine : "^\/\/.*",
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
# Formatting
TokenType.Whitespace : "^\\s+",
# Expressions
TokenType.ExpressionPStart : "^\\(",
TokenType.ExpressionPEnd : "^\\)",
# Arithmetic
TokenType.ComplexAssignment : "^[*\\/\\+\\-]=",
TokenType.Assignment : "^=",
TokenType.AdditiveOp : "^[+\\-]",
TokenType.MultiplicativeOp : "^[*\\/]",
# Literal
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
# Statements
TokenType.StatementEnd : "^;",
TokenType.StmtBlockStart : "^{",
TokenType.StmtBlockEnd : "^}",
# Symbols
TokenType.Identifier : "^\\w+"
}
class Token:
var Type : String
var Value : String
func to_Dictionary():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if reached_EndOfTxt() == true :
return null
var srcLeft = SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return Cursor >= ( SrcTxt.length() )
var GTokenizer = Tokenizer.new()
const AST_Format = \
{
Dictionary = "Dictionary",
SExpression = "S-Expression"
}
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement",
BlockStatement = "BlockStatement",
EmptyStatement = "EmptyStatement",
BinaryExpression = "BinaryExpression",
Identifier = "Identifier",
AssignmentExpression = "AssignmentExpression"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func to_SExpression():
var expression = [ Type ]
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_SExpression() )
else :
array.append( entry )
expression.append(array)
return expression
if typeof(Value) == TYPE_OBJECT :
var result = [ Type, Value.to_SExpression() ]
return result
expression.append(Value)
return expression
func to_Dictionary():
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_Dictionary() )
else :
array.append( entry )
var result = \
{
Type = self.Type,
Value = array
}
return result
if typeof(Value) == TYPE_OBJECT :
var result = \
{
Type = self.Type,
Value = self.Value.to_Dictionary()
}
return result
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func is_Literal():
return NextToken.Type == TokenType.Number || NextToken.Type == TokenType.String
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
NextToken = TokenizerRef.next_Token()
return currToken
# Literal
# : NumericLiteral
# : StringLiteral
# ;
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# NumericLiteral
# : Number
# ;
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# ParenthesizedExpression
# : ( Expression )
# ;
func parse_ParenthesizedExpression():
eat(TokenType.ExpressionPStart)
var expression = parse_Expression()
eat(TokenType.ExpressionPEnd)
return expression
# MultiplicativeExpression
# : PrimaryExpression
# : MultiplicativeExpression MultiplicativeOp PrimaryExpression -> PrimaryExpression MultiplicativeOp ... Literal
# ;
func parse_MultiplicativeExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_PrimaryExpression")
return parse_BinaryExpression(parseFn, TokenType.MultiplicativeOp)
# AdditiveExpression
# : MultiplicativeExpression
# | AdditiveExpression AdditiveOp MultiplicativeExpression -> MultiplicativeExpression AdditiveOp ... Literal
# ;
func parse_AdditiveExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_MultiplicativeExpression")
return parse_BinaryExpression(parseFn, TokenType.AdditiveOp)
# BinaryExpression
# : MultiplicativeExpression
# | AdditiveExpression
# ;
func parse_BinaryExpression(parse_fn, operatorToken):
var left = parse_fn.call_func()
while NextToken.Type == operatorToken:
var operator = eat(operatorToken)
var right = parse_fn.call_func()
var \
nestedNode = SyntaxNode.new()
nestedNode.Type = SyntaxNodeType.BinaryExpression
nestedNode.Value = []
nestedNode.Value.append(operator.Value)
nestedNode.Value.append(left)
nestedNode.Value.append(right)
left = nestedNode;
return left
# Identifier
# : IdentifierSymbol
# ;
func parse_Identifier():
var name = eat(TokenType.Identifier).Value
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.Identifier
node.Value = name
return node
# ResolvedSymbol
# : Identiifer
# ;
func parse_ResolvedSymbol():
var resolvedSymbol = parse_Identifier()
if resolvedSymbol.Type == SyntaxNodeType.Identifier :
return resolvedSymbol
var assertStrTmplt = "parse_ResolvedSymbol: Unexpected symbol: {value}"
var assertStr = assertStrTmplt.format({"value" : resolvedSymbol.Type})
assert(true != true, assertStr)
# PrimaryExpression
# : Literal
# | ParenthesizedExpression
# | ResolvedSymbol
# ;
func parse_PrimaryExpression():
if is_Literal():
return parse_Literal()
match NextToken.Type:
TokenType.ExpressionPStart:
return parse_ParenthesizedExpression()
return parse_ResolvedSymbol()
# AssignmentExpression
# : AdditiveExpression
# | ResolvedSymbol AssignmentOperator AssignmetnExpression
# ;
func parse_AssignmentExpression():
var left = parse_AdditiveExpression()
if NextToken.Type != TokenType.Assignment && NextToken.Type != TokenType.ComplexAssignment :
return left
var assignmentOp;
if NextToken.Type == TokenType.Assignment :
assignmentOp = eat(TokenType.Assignment)
elif NextToken.Type == TokenType.ComplexAssignment :
assignmentOp = eat(TokenType.ComplexAssignment)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.AssignmentExpression
node.Value = \
[
assignmentOp.Value,
left,
parse_AssignmentExpression()
]
return node
# Expression
# : AssignmentExpression
# ;
func parse_Expression():
return parse_AssignmentExpression()
# EmptyStatement
# ;
func parse_EmptyStatement():
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.EmptyStatement
return node
# BlockStatement
# : { OptStatementList }
# ;
func parse_BlockStatement():
eat(TokenType.StmtBlockStart)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BlockStatement
if NextToken.Type != TokenType.StmtBlockEnd :
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
else :
node.Value = []
eat(TokenType.StmtBlockEnd)
return node
# ExpressionStatement
# : Expression
# ;
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# Statement
# : ExpressionStatement
# : BlockStatement
# : EmptyStatement
# ;
func parse_Statement():
match NextToken.Type :
TokenType.StatementEnd :
return parse_EmptyStatement()
TokenType.StmtBlockStart :
return parse_BlockStatement()
return parse_ExpressionStatement()
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
func parse_StatementList(endToken):
var statementList = [ parse_Statement() ]
while NextToken != null && NextToken.Type != endToken :
statementList.append( parse_Statement() )
return statementList
# Program
# : StatementList
# : Literal
# ;
func parse_Program():
var \
node = SyntaxNode.new()
node.Type = TokenType.Program
node.Value = parse_StatementList(null)
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
onready var TextOut = GScene.get_node("TextOutput")
func tout(text):
TextOut.insert_text_at_cursor(text)
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
},
BlockStatement = \
{
Name = "Block Statement",
File = "2.BlockStatement.uf"
},
BinaryExpression = \
{
Name = "Binary Expression",
File = "3.BinaryExpression.uf"
},
Assignment = \
{
Name = "Assignment",
File = "4.Assignment.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}\n"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
tout(introMessageFormatted)
var path
if Engine.editor_hint :
path = "res://../Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.to_SExpression(), '\t')
tout(json + "\n")
tout("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

View File

@ -0,0 +1,634 @@
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Expressions
ExpressionPStart = "ExpresssionParenthesisStart",
ExpressionPEnd = "ExpressionParenthesisEnd",
# Arithmetic
ComplexAssignment = "ComplexAssignment",
Assignment = "Assignment",
AdditiveOp = "AdditiveOperator",
MultiplicativeOp = "MultiplicativeOperator",
# Statements
StatementEnd = "StatementEnd",
StmtBlockStart = "BlockStatementStart",
StmtBlockEnd = "BlockStatementEnd",
CommaDelimiter = "CommaDelimiter",
# Literals
Number = "Number",
String = "String",
# Symbols
VarDeclare = "Variable Declaration",
Identifier = "Identifier"
}
const TokenSpec = \
{
# Comments
TokenType.CommentLine : "^\\/\\/.*",
TokenType.CommentMultiLine : "^\\/\\*[\\s\\S]*?\\*\\/",
# Formatting
TokenType.Whitespace : "^\\s+",
# Expressions
TokenType.ExpressionPStart : "^\\(",
TokenType.ExpressionPEnd : "^\\)",
# Arithmetic
TokenType.ComplexAssignment : "^[*\\/\\+\\-]=",
TokenType.Assignment : "^=",
TokenType.AdditiveOp : "^[+\\-]",
TokenType.MultiplicativeOp : "^[*\\/]",
# Literal
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\"",
# Statements
TokenType.StatementEnd : "^;",
TokenType.StmtBlockStart : "^{",
TokenType.StmtBlockEnd : "^}",
TokenType.CommaDelimiter : "^,",
# Symbols
TokenType.VarDeclare : "^\\blet\\b",
TokenType.Identifier : "^\\w+"
}
class Token:
var Type : String
var Value : String
func to_Dictionary():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if reached_EndOfTxt() == true :
return null
var srcLeft = SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null || result.get_start() != 0 :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
Cursor += ( result.get_string().length() )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return Cursor >= ( SrcTxt.length() )
var GTokenizer = Tokenizer.new()
const AST_Format = \
{
Dictionary = "Dictionary",
SExpression = "S-Expression"
}
const SyntaxNodeType = \
{
NumericLiteral = "NumericLiteral",
StringLiteral = "StringLiteral",
ExpressionStatement = "ExpressionStatement",
BlockStatement = "BlockStatement",
EmptyStatement = "EmptyStatement",
BinaryExpression = "BinaryExpression",
Identifier = "Identifier",
AssignmentExpression = "AssignmentExpression",
VariableStatement = "VariableStatement",
VariableDeclaration = "VariableDeclaration"
}
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func to_SExpression():
var expression = [ Type ]
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_SExpression() )
else :
array.append( entry )
expression.append(array)
return expression
if typeof(Value) == TYPE_OBJECT :
var result = [ Type, Value.to_SExpression() ]
return result
expression.append(Value)
return expression
func to_Dictionary():
if typeof(Value) == TYPE_ARRAY :
var array = []
for entry in self.Value :
if typeof(entry) == TYPE_OBJECT :
array.append( entry.to_Dictionary() )
else :
array.append( entry )
var result = \
{
Type = self.Type,
Value = array
}
return result
if typeof(Value) == TYPE_OBJECT :
var result = \
{
Type = self.Type,
Value = self.Value.to_Dictionary()
}
return result
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func is_Literal():
return NextToken.Type == TokenType.Number || NextToken.Type == TokenType.String
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
NextToken = TokenizerRef.next_Token()
return currToken
# Literal
# : NumericLiteral
# : StringLiteral
# ;
func parse_Literal():
match NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# NumericLiteral
# : Number
# ;
func parse_NumericLiteral():
var Token = eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.NumericLiteral
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
func parse_StringLiteral():
var Token = eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.StringLiteral
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# ParenthesizedExpression
# : ( Expression )
# ;
func parse_ParenthesizedExpression():
eat(TokenType.ExpressionPStart)
var expression = parse_Expression()
eat(TokenType.ExpressionPEnd)
return expression
# MultiplicativeExpression
# : PrimaryExpression
# : MultiplicativeExpression MultiplicativeOp PrimaryExpression -> PrimaryExpression MultiplicativeOp ... Literal
# ;
func parse_MultiplicativeExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_PrimaryExpression")
return parse_BinaryExpression(parseFn, TokenType.MultiplicativeOp)
# AdditiveExpression
# : MultiplicativeExpression
# | AdditiveExpression AdditiveOp MultiplicativeExpression -> MultiplicativeExpression AdditiveOp ... Literal
# ;
func parse_AdditiveExpression():
var \
parseFn = FuncRef.new()
parseFn.set_instance(self)
parseFn.set_function("parse_MultiplicativeExpression")
return parse_BinaryExpression(parseFn, TokenType.AdditiveOp)
# BinaryExpression
# : MultiplicativeExpression
# | AdditiveExpression
# ;
func parse_BinaryExpression(parse_fn, operatorToken):
var left = parse_fn.call_func()
while NextToken.Type == operatorToken:
var operator = eat(operatorToken)
var right = parse_fn.call_func()
var \
nestedNode = SyntaxNode.new()
nestedNode.Type = SyntaxNodeType.BinaryExpression
nestedNode.Value = []
nestedNode.Value.append(operator.Value)
nestedNode.Value.append(left)
nestedNode.Value.append(right)
left = nestedNode;
return left
# Identifier
# : IdentifierSymbol
# ;
func parse_Identifier():
var name = eat(TokenType.Identifier).Value
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.Identifier
node.Value = name
return node
# ResolvedSymbol
# : Identiifer
# ;
func parse_ResolvedSymbol():
var resolvedSymbol = parse_Identifier()
if resolvedSymbol.Type == SyntaxNodeType.Identifier :
return resolvedSymbol
var assertStrTmplt = "parse_ResolvedSymbol: Unexpected symbol: {value}"
var assertStr = assertStrTmplt.format({"value" : resolvedSymbol.Type})
assert(true != true, assertStr)
# PrimaryExpression
# : Literal
# | ParenthesizedExpression
# | ResolvedSymbol
# ;
func parse_PrimaryExpression():
if is_Literal():
return parse_Literal()
match NextToken.Type:
TokenType.ExpressionPStart:
return parse_ParenthesizedExpression()
return parse_ResolvedSymbol()
# AssignmentExpression
# : AdditiveExpression
# | ResolvedSymbol AssignmentOperator AssignmetnExpression
# ;
func parse_AssignmentExpression():
var left = parse_AdditiveExpression()
if NextToken.Type != TokenType.Assignment && NextToken.Type != TokenType.ComplexAssignment :
return left
var assignmentOp;
if NextToken.Type == TokenType.Assignment :
assignmentOp = eat(TokenType.Assignment)
elif NextToken.Type == TokenType.ComplexAssignment :
assignmentOp = eat(TokenType.ComplexAssignment)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.AssignmentExpression
node.Value = \
[
assignmentOp.Value,
left,
parse_AssignmentExpression()
]
return node
# Expression
# : AssignmentExpression
# ;
func parse_Expression():
return parse_AssignmentExpression()
# EmptyStatement
# ;
func parse_EmptyStatement():
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.EmptyStatement
return node
# VariableInitializer
# : Assignment AssignmentExpression
# ;
func parse_VariableInitializer():
eat(TokenType.Assignment)
return parse_AssignmentExpression()
# VariableDeclaration
# : Identifier OptVariableInitalizer
# ;
func parse_VariableDeclaration():
var identifier = parse_Identifier()
var initalizer
if NextToken.Type != TokenType.StatementEnd && NextToken.Type != TokenType.CommaDelimiter :
initalizer = parse_VariableInitializer()
else :
initalizer = null
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.VariableDeclaration
node.Value = [ identifier, initalizer ]
return node
# VariableDeclarationList
# : VariableDeclaration
# | VariableDelcarationList , VariableDeclaration -> VariableDelcaration , ...
func parse_VariableDeclarationList():
var \
declarations = []
declarations.append(parse_VariableDeclaration())
while NextToken.Type == TokenType.CommaDelimiter :
eat(TokenType.CommaDelimiter)
declarations.append(parse_VariableDeclaration())
return declarations
# VariableStatement
# : VarDeclare VariableDeclarationList StatementEnd
# ;
func parse_VariableStatement():
eat(TokenType.VarDeclare)
var declarations = parse_VariableDeclarationList()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.VariableStatement
node.Value = declarations
return node
# BlockStatement
# : { OptStatementList }
# ;
func parse_BlockStatement():
eat(TokenType.StmtBlockStart)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.BlockStatement
if NextToken.Type != TokenType.StmtBlockEnd :
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
else :
node.Value = []
eat(TokenType.StmtBlockEnd)
return node
# ExpressionStatement
# : Expression
# ;
func parse_ExpressionStatement():
var expression = parse_Expression()
eat(TokenType.StatementEnd)
var \
node = SyntaxNode.new()
node.Type = SyntaxNodeType.ExpressionStatement
node.Value = expression
return expression
# Statement
# : ExpressionStatement
# | BlockStatement
# | EmptyStatement
# | VariableStatement
# ;
func parse_Statement():
match NextToken.Type :
TokenType.StatementEnd :
return parse_EmptyStatement()
TokenType.StmtBlockStart :
return parse_BlockStatement()
TokenType.VarDeclare :
return parse_VariableStatement()
return parse_ExpressionStatement()
# StatementList
# : Statement
# | StatementList Statement -> Statement ...
# ;
func parse_StatementList(endToken):
var statementList = [ parse_Statement() ]
while NextToken != null && NextToken.Type != endToken :
statementList.append( parse_Statement() )
return statementList
# Program
# : StatementList
# : Literal
# ;
func parse_Program():
var \
node = SyntaxNode.new()
node.Type = TokenType.Program
node.Value = parse_StatementList(null)
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
onready var TextOut = GScene.get_node("TextOutput")
func tout(text):
TextOut.insert_text_at_cursor(text)
const Tests = \
{
MultiStatement = \
{
Name = "Multi-Statement",
File = "1.Multi-Statement.uf"
},
BlockStatement = \
{
Name = "Block Statement",
File = "2.BlockStatement.uf"
},
BinaryExpression = \
{
Name = "Binary Expression",
File = "3.BinaryExpression.uf"
},
Assignment = \
{
Name = "Assignment",
File = "4.Assignment.uf"
},
VaraibleDeclaration = \
{
Name = "Variable Declaration",
File = "5.VariableDeclaration.uf"
}
}
func test(entry):
var introMessage = "Testing: {Name}\n"
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
tout(introMessageFormatted)
var path
if Engine.editor_hint :
path = "res://../Tests/{TestName}"
else :
path = "res://../Builds/Tests/{TestName}"
var pathFormatted = path.format({"TestName" : entry.File})
var \
file = File.new()
file.open(pathFormatted, File.READ)
var programDescription = file.get_as_text()
file.close()
GTokenizer.init(programDescription)
var ast = GParser.parse(GTokenizer)
var json = JSON.print(ast.to_SExpression(), '\t')
tout(json + "\n")
tout("Passed!\n")
# Main Entry point.
func _ready():
for Key in Tests :
test(Tests[Key])

View File

@ -0,0 +1,92 @@
Following the first lecture of "Building a Parser from scratch"
By Dmitry Soshnikov.
Lecture 1:
Phases:
Data - Text Content
Processor - Tokenizer
Data - Tokens
Processor - Parser
Data - AST
Example of syntaxes :
S-Expression :
(class Point
(begin
(def constructor (self x y)
(begin
(set (prop self x) x)
(set (prop self y) y)
)
)
(def calc (self)
(+ (prop self x)
(prop self y)
)
)
)
)
(var p (new Point 10 20))
((prop p calc) p)
User Syntax :
class Point
{
def constructor( x, y )
{
this.x = x;
this.y = y;
}
def calc() {
return this.x + this.y;
}
}
let
p = new Point(10, 20);
p.calc();
Tokenizer - Lexial Analysis : Uses Regular Expressions (Optimal)
Parser - Syntactic Analysis : Uses Backus-Naur Form
Backus-Naur Example :
Program
: StatementList
;
StatementList
: BlockStatement
| IfStatement
| FunctionDeclaration
...
;
FunctionDeclaration
: def Identifier ( Arguments ) BlockStatement
;
Hand-written parsers :
Use recursive descent.
Automatically generated
All kinds of stuff...