mirror of
https://github.com/Ed94/LangStudies.git
synced 2024-11-10 04:14:53 -08:00
486 lines
10 KiB
GDScript
486 lines
10 KiB
GDScript
extends Node
|
|
|
|
# This closesly follows the source provided in the lectures.
|
|
# Later on after the lectures are complete or when I deem
|
|
# Necessary there will be heavy refactors.
|
|
|
|
const TokenType = \
|
|
{
|
|
Program = "Program",
|
|
|
|
# Comments
|
|
CommentLine = "CommentLine",
|
|
CommentMultiLine = "CommentMultiLine",
|
|
|
|
# Formatting
|
|
Whitespace = "Whitespace",
|
|
|
|
# Expressions
|
|
|
|
ExpressionPStart = "Expresssion Parenthesis Start",
|
|
ExpressionPEnd = "Expression Parenthesis End",
|
|
|
|
# Arithmetic
|
|
AdditiveOp = "AdditiveOperator",
|
|
MultiplicativeOp = "MultiplicativeOperator",
|
|
|
|
# Statements
|
|
StatementEnd = "StatementEnd",
|
|
StmtBlockStart = "BlockStatementStart",
|
|
StmtBlockEnd = "BlockStatementEnd",
|
|
|
|
# Literals
|
|
Number = "Number",
|
|
String = "String"
|
|
}
|
|
|
|
const TokenSpec = \
|
|
{
|
|
# Comments
|
|
TokenType.CommentLine : "^\/\/.*",
|
|
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
|
|
|
|
# Formatting
|
|
TokenType.Whitespace : "^\\s+",
|
|
|
|
# Expressions
|
|
TokenType.ExpressionPStart : "^\\(",
|
|
TokenType.ExpressionPEnd : "^\\)",
|
|
|
|
# Arithmetic
|
|
TokenType.AdditiveOp : "^[+\\-]",
|
|
TokenType.MultiplicativeOp : "^[*\\/]",
|
|
|
|
# Literal
|
|
TokenType.Number : "\\d+",
|
|
TokenType.String : "^\"[^\"]*\"",
|
|
|
|
# Statements
|
|
TokenType.StatementEnd : "^;",
|
|
TokenType.StmtBlockStart : "^{",
|
|
TokenType.StmtBlockEnd : "^}"
|
|
}
|
|
|
|
class Token:
|
|
var Type : String
|
|
var Value : String
|
|
|
|
func to_Dictionary():
|
|
var result = \
|
|
{
|
|
Type = self.Type,
|
|
Value = self.Value
|
|
}
|
|
return result
|
|
|
|
class Tokenizer:
|
|
var SrcTxt : String
|
|
var Cursor : int;
|
|
|
|
# Sets up the tokenizer with the program source text.
|
|
func init(programSrcText):
|
|
SrcTxt = programSrcText
|
|
Cursor = 0
|
|
|
|
# Provides the next token in the source text.
|
|
func next_Token():
|
|
if reached_EndOfTxt() == true :
|
|
return null
|
|
|
|
var srcLeft = SrcTxt.substr(Cursor)
|
|
var regex = RegEx.new()
|
|
var token = Token.new()
|
|
|
|
for type in TokenSpec :
|
|
regex.compile(TokenSpec[type])
|
|
|
|
var result = regex.search(srcLeft)
|
|
if result == null || result.get_start() != 0 :
|
|
continue
|
|
|
|
# Skip Comments
|
|
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
|
|
Cursor += result.get_string().length()
|
|
return next_Token()
|
|
|
|
# Skip Whitespace
|
|
if type == TokenType.Whitespace :
|
|
var addVal = result.get_string().length()
|
|
Cursor += addVal
|
|
|
|
return next_Token()
|
|
|
|
token.Type = type
|
|
token.Value = result.get_string()
|
|
Cursor += ( result.get_string().length() )
|
|
|
|
return token
|
|
|
|
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
|
|
var assertStr = assertStrTmplt.format({"value" : Cursor})
|
|
assert(true != true, assertStr)
|
|
return null
|
|
|
|
func reached_EndOfTxt():
|
|
return Cursor >= ( SrcTxt.length() )
|
|
|
|
var GTokenizer = Tokenizer.new()
|
|
|
|
|
|
|
|
const AST_Format = \
|
|
{
|
|
Dictionary = "Dictionary",
|
|
SExpression = "S-Expression"
|
|
}
|
|
|
|
const SyntaxNodeType = \
|
|
{
|
|
NumericLiteral = "NumericLiteral",
|
|
StringLiteral = "StringLiteral",
|
|
ExpressionStatement = "ExpressionStatement",
|
|
BlockStatement = "BlockStatement",
|
|
EmptyStatement = "EmptyStatement",
|
|
BinaryExpression = "BinaryExpression",
|
|
# MultiplicativeExpression = "MultiplicativeExpression"
|
|
}
|
|
|
|
class SyntaxNode:
|
|
var Type : String
|
|
var Value # Not specifing a type implicity declares a Variant type.
|
|
|
|
func to_SExpression():
|
|
var expression = [ Type ]
|
|
|
|
if typeof(Value) == TYPE_ARRAY :
|
|
var array = []
|
|
for entry in self.Value :
|
|
if typeof(entry) == TYPE_OBJECT :
|
|
array.append( entry.to_SExpression() )
|
|
else :
|
|
array.append( entry )
|
|
|
|
expression.append(array)
|
|
return expression
|
|
|
|
if typeof(Value) == TYPE_OBJECT :
|
|
var result = [ Type, Value.to_SExpression() ]
|
|
return result
|
|
|
|
expression.append(Value)
|
|
return expression
|
|
|
|
func to_Dictionary():
|
|
if typeof(Value) == TYPE_ARRAY :
|
|
var array = []
|
|
for entry in self.Value :
|
|
if typeof(entry) == TYPE_OBJECT :
|
|
array.append( entry.to_Dictionary() )
|
|
else :
|
|
array.append( entry )
|
|
|
|
var result = \
|
|
{
|
|
Type = self.Type,
|
|
Value = array
|
|
}
|
|
return result
|
|
|
|
if typeof(Value) == TYPE_OBJECT :
|
|
var result = \
|
|
{
|
|
Type = self.Type,
|
|
Value = self.Value.to_Dictionary()
|
|
}
|
|
return result
|
|
|
|
var result = \
|
|
{
|
|
Type = self.Type,
|
|
Value = self.Value
|
|
}
|
|
return result
|
|
|
|
class Parser:
|
|
var TokenizerRef : Tokenizer
|
|
var NextToken : Token
|
|
|
|
func eat(tokenType):
|
|
var currToken = self.NextToken
|
|
|
|
assert(currToken != null, "eat: NextToken was null")
|
|
|
|
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
|
|
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
|
|
|
|
assert(currToken.Type == tokenType, assertStr)
|
|
|
|
NextToken = TokenizerRef.next_Token()
|
|
|
|
return currToken
|
|
|
|
# NumericLiteral
|
|
# : Number
|
|
# ;
|
|
func parse_NumericLiteral():
|
|
var Token = eat(TokenType.Number)
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = SyntaxNodeType.NumericLiteral
|
|
node.Value = int( Token.Value )
|
|
|
|
return node
|
|
|
|
# StringLiteral
|
|
# : String
|
|
# ;
|
|
func parse_StringLiteral():
|
|
var Token = eat(TokenType.String)
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = SyntaxNodeType.StringLiteral
|
|
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
|
|
|
|
return node
|
|
|
|
# Literal
|
|
# : NumericLiteral
|
|
# : StringLiteral
|
|
# ;
|
|
func parse_Literal():
|
|
match NextToken.Type :
|
|
TokenType.Number:
|
|
return parse_NumericLiteral()
|
|
TokenType.String:
|
|
return parse_StringLiteral()
|
|
|
|
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
|
|
|
|
# ParenthesizedExpression
|
|
# : ( Expression )
|
|
# ;
|
|
func parse_ParenthesizedExpression():
|
|
eat(TokenType.ExpressionPStart)
|
|
|
|
var expression = parse_Expression()
|
|
|
|
eat(TokenType.ExpressionPEnd)
|
|
|
|
return expression
|
|
|
|
# PrimaryExpression
|
|
# : Literal
|
|
# | ParenthesizedExpression
|
|
# ;
|
|
func parse_PrimaryExpression():
|
|
match NextToken.Type:
|
|
TokenType.ExpressionPStart:
|
|
return parse_ParenthesizedExpression()
|
|
|
|
return parse_Literal()
|
|
|
|
# MultiplicativeExpression
|
|
# : PrimaryExpression
|
|
# : MultiplicativeExpression MultiplicativeOp PrimaryExpression -> PrimaryExpression MultiplicativeOp ... Literal
|
|
# ;
|
|
func parse_MultiplicativeExpression():
|
|
var \
|
|
parseFn = FuncRef.new()
|
|
parseFn.set_instance(self)
|
|
parseFn.set_function("parse_PrimaryExpression")
|
|
|
|
return parse_BinaryExpression(parseFn, TokenType.MultiplicativeOp)
|
|
|
|
# AdditiveExpression
|
|
# : MultiplicativeExpression
|
|
# | AdditiveExpression AdditiveOp MultiplicativeExpression -> MultiplicativeExpression AdditiveOp ... Literal
|
|
# ;
|
|
func parse_AdditiveExpression():
|
|
var \
|
|
parseFn = FuncRef.new()
|
|
parseFn.set_instance(self)
|
|
parseFn.set_function("parse_MultiplicativeExpression")
|
|
|
|
return parse_BinaryExpression(parseFn, TokenType.AdditiveOp)
|
|
|
|
# BinaryExpression
|
|
# : MultiplicativeExpression
|
|
# | AdditiveExpression
|
|
# ;
|
|
func parse_BinaryExpression(parse_fn, operatorToken):
|
|
var left = parse_fn.call_func()
|
|
|
|
while NextToken.Type == operatorToken:
|
|
var operator = eat(operatorToken)
|
|
var right = parse_fn.call_func()
|
|
|
|
var \
|
|
nestedNode = SyntaxNode.new()
|
|
nestedNode.Type = SyntaxNodeType.BinaryExpression
|
|
nestedNode.Value = []
|
|
nestedNode.Value.append(operator.Value)
|
|
nestedNode.Value.append(left)
|
|
nestedNode.Value.append(right)
|
|
|
|
left = nestedNode;
|
|
|
|
return left
|
|
|
|
# Expression
|
|
# : Literal
|
|
# : AdditiveExpression
|
|
# ;
|
|
func parse_Expression():
|
|
return parse_AdditiveExpression()
|
|
|
|
# EmptyStatement
|
|
# ;
|
|
func parse_EmptyStatement():
|
|
eat(TokenType.StatementEnd)
|
|
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = SyntaxNodeType.EmptyStatement
|
|
|
|
return node
|
|
|
|
# BlockStatement
|
|
# : { OptStatementList }
|
|
# ;
|
|
func parse_BlockStatement():
|
|
eat(TokenType.StmtBlockStart)
|
|
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = SyntaxNodeType.BlockStatement
|
|
|
|
if NextToken.Type != TokenType.StmtBlockEnd :
|
|
node.Value = parse_StatementList(TokenType.StmtBlockEnd)
|
|
else :
|
|
node.Value = []
|
|
|
|
eat(TokenType.StmtBlockEnd)
|
|
|
|
return node
|
|
|
|
# ExpressionStatement
|
|
# : Expression
|
|
# ;
|
|
func parse_ExpressionStatement():
|
|
var expression = parse_Expression()
|
|
eat(TokenType.StatementEnd)
|
|
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = SyntaxNodeType.ExpressionStatement
|
|
node.Value = expression
|
|
|
|
return expression
|
|
|
|
# Statement
|
|
# : ExpressionStatement
|
|
# : BlockStatement
|
|
# : EmptyStatement
|
|
# ;
|
|
func parse_Statement():
|
|
match NextToken.Type :
|
|
TokenType.StatementEnd :
|
|
return parse_EmptyStatement()
|
|
TokenType.StmtBlockStart :
|
|
return parse_BlockStatement()
|
|
|
|
return parse_ExpressionStatement()
|
|
|
|
# StatementList
|
|
# : Statement
|
|
# | StatementList Statement -> Statement ...
|
|
# ;
|
|
func parse_StatementList(endToken):
|
|
var statementList = [ parse_Statement() ]
|
|
|
|
while NextToken != null && NextToken.Type != endToken :
|
|
statementList.append( parse_Statement() )
|
|
|
|
return statementList
|
|
|
|
# Program
|
|
# : StatementList
|
|
# : Literal
|
|
# ;
|
|
func parse_Program():
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = TokenType.Program
|
|
node.Value = parse_StatementList(null)
|
|
|
|
return node
|
|
|
|
# Parses the text program description into an AST.
|
|
func parse(TokenizerRef):
|
|
self.TokenizerRef = TokenizerRef
|
|
|
|
NextToken = TokenizerRef.next_Token()
|
|
|
|
return parse_Program()
|
|
|
|
var GParser = Parser.new()
|
|
|
|
|
|
|
|
onready var TextOut = GScene.get_node("TextOutput")
|
|
|
|
func tout(text):
|
|
TextOut.insert_text_at_cursor(text)
|
|
|
|
const Tests = \
|
|
{
|
|
MultiStatement = \
|
|
{
|
|
Name = "Multi-Statement",
|
|
File = "1.Multi-Statement.uf"
|
|
},
|
|
BlockStatement = \
|
|
{
|
|
Name = "Block Statement",
|
|
File = "2.BlockStatement.uf"
|
|
},
|
|
BinaryExpression = \
|
|
{
|
|
Name = "Binary Expression",
|
|
File = "3.BinaryExpression.uf"
|
|
}
|
|
}
|
|
|
|
func test(entry):
|
|
var introMessage = "Testing: {Name}\n"
|
|
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
|
|
tout(introMessageFormatted)
|
|
|
|
var path
|
|
if Engine.editor_hint :
|
|
path = "res://../Tests/{TestName}"
|
|
else :
|
|
path = "res://../Builds/Tests/{TestName}"
|
|
var pathFormatted = path.format({"TestName" : entry.File})
|
|
|
|
var \
|
|
file = File.new()
|
|
file.open(pathFormatted, File.READ)
|
|
|
|
var programDescription = file.get_as_text()
|
|
file.close()
|
|
|
|
GTokenizer.init(programDescription)
|
|
var ast = GParser.parse(GTokenizer)
|
|
|
|
var json = JSON.print(ast.to_SExpression(), '\t')
|
|
|
|
tout(json + "\n")
|
|
tout("Passed!\n")
|
|
|
|
|
|
# Main Entry point.
|
|
func _ready():
|
|
for Key in Tests :
|
|
test(Tests[Key])
|