mirror of
https://github.com/Ed94/LangStudies.git
synced 2025-01-22 12:33:45 -08:00
312 lines
6.3 KiB
GDScript
312 lines
6.3 KiB
GDScript
extends Node
|
|
|
|
# This closesly follows the source provided in the lectures.
|
|
# Later on after the lectures are complete or when I deem
|
|
# Necessary there will be heavy refactors.
|
|
|
|
const TokenType = \
|
|
{
|
|
Program = "Program",
|
|
|
|
# Comments
|
|
CommentLine = "CommentLine",
|
|
CommentMultiLine = "CommentMultiLine",
|
|
|
|
# Formatting
|
|
Whitespace = "Whitespace",
|
|
|
|
# Statements
|
|
StatementEnd = "StatementEnd",
|
|
|
|
# Literals
|
|
Number = "Number",
|
|
String = "String"
|
|
}
|
|
|
|
const TokenSpec = \
|
|
{
|
|
TokenType.CommentLine : "^\/\/.*",
|
|
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
|
|
TokenType.Whitespace : "^\\s+",
|
|
TokenType.Number : "\\d+",
|
|
TokenType.String : "^\"[^\"]*\"",
|
|
TokenType.StatementEnd : "^;"
|
|
}
|
|
|
|
class Token:
|
|
var Type : String
|
|
var Value : String
|
|
|
|
func toDict():
|
|
var result = \
|
|
{
|
|
Type = self.Type,
|
|
Value = self.Value
|
|
}
|
|
return result
|
|
|
|
class Tokenizer:
|
|
var SrcTxt : String
|
|
var Cursor : int;
|
|
|
|
# Sets up the tokenizer with the program source text.
|
|
func init(programSrcText):
|
|
SrcTxt = programSrcText
|
|
Cursor = 0
|
|
|
|
# Provides the next token in the source text.
|
|
func next_Token():
|
|
if self.reached_EndOfTxt() == true :
|
|
return null
|
|
|
|
var srcLeft = self.SrcTxt.substr(Cursor)
|
|
var regex = RegEx.new()
|
|
var token = Token.new()
|
|
|
|
for type in TokenSpec :
|
|
regex.compile(TokenSpec[type])
|
|
|
|
var result = regex.search(srcLeft)
|
|
if result == null || result.get_start() != 0 :
|
|
continue
|
|
|
|
# Skip Comments
|
|
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
|
|
self.Cursor += result.get_string().length()
|
|
return next_Token()
|
|
|
|
# Skip Whitespace
|
|
if type == TokenType.Whitespace :
|
|
var addVal = result.get_string().length()
|
|
self.Cursor += addVal
|
|
|
|
return next_Token()
|
|
|
|
token.Type = type
|
|
token.Value = result.get_string()
|
|
self.Cursor += ( result.get_string().length() )
|
|
|
|
return token
|
|
|
|
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
|
|
var assertStr = assertStrTmplt.format({"value" : self.Cursor})
|
|
assert(true != true, assertStr)
|
|
return null
|
|
|
|
func reached_EndOfTxt():
|
|
return self.Cursor >= ( self.SrcTxt.length() - 1 )
|
|
|
|
var GTokenizer = Tokenizer.new()
|
|
|
|
|
|
const SyntaxNodeType = \
|
|
{
|
|
NumericLiteral = "NumericLiteral",
|
|
StringLiteral = "StringLiteral",
|
|
ExpressionStatement = "ExpressionStatement"
|
|
}
|
|
|
|
class SyntaxNode:
|
|
var Type : String
|
|
var Value # Not specifing a type implicity declares a Variant type.
|
|
|
|
func toDict():
|
|
var ValueDict = self.Value
|
|
if typeof(Value) == TYPE_ARRAY :
|
|
var dict = {}
|
|
var index = 0
|
|
for entry in self.Value :
|
|
dict[index] = entry.toDict()
|
|
index += 1
|
|
|
|
ValueDict = dict
|
|
|
|
var result = \
|
|
{
|
|
Type = self.Type,
|
|
Value = ValueDict
|
|
}
|
|
return result
|
|
|
|
class ProgramNode:
|
|
var Type : String
|
|
var Body : Object
|
|
|
|
func toDict():
|
|
var result = \
|
|
{
|
|
Type = self.Type,
|
|
Body = self.Body.toDict()
|
|
}
|
|
return result
|
|
|
|
class Parser:
|
|
var TokenizerRef : Tokenizer
|
|
var NextToken : Token
|
|
|
|
func eat(tokenType):
|
|
var currToken = self.NextToken
|
|
|
|
assert(currToken != null, "eat: NextToken was null")
|
|
|
|
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
|
|
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
|
|
|
|
assert(currToken.Type == tokenType, assertStr)
|
|
|
|
self.NextToken = self.TokenizerRef.next_Token()
|
|
|
|
return currToken
|
|
|
|
# Literal
|
|
# : NumericLiteral
|
|
# : StringLiteral
|
|
# ;
|
|
#
|
|
func parse_Literal():
|
|
match NextToken.Type :
|
|
TokenType.Number:
|
|
return parse_NumericLiteral()
|
|
TokenType.String:
|
|
return parse_StringLiteral()
|
|
|
|
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
|
|
|
|
# NumericLiteral
|
|
# : Number
|
|
# ;
|
|
#
|
|
func parse_NumericLiteral():
|
|
var Token = eat(TokenType.Number)
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = SyntaxNodeType.NumericLiteral
|
|
node.Value = int( Token.Value )
|
|
|
|
return node
|
|
|
|
# StringLiteral
|
|
# : String
|
|
# ;
|
|
#
|
|
func parse_StringLiteral():
|
|
var Token = eat(TokenType.String)
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = SyntaxNodeType.StringLiteral
|
|
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
|
|
|
|
return node
|
|
|
|
# Expression
|
|
# : Literal
|
|
# ;
|
|
#
|
|
func parse_Expression():
|
|
return parse_Literal()
|
|
|
|
# ExpressionStatement
|
|
# : Expression
|
|
# ;
|
|
#
|
|
func parse_ExpressionStatement():
|
|
var expression = parse_Expression()
|
|
eat(TokenType.StatementEnd)
|
|
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = SyntaxNodeType.ExpressionStatement
|
|
node.Value = expression
|
|
|
|
return expression
|
|
|
|
# Statement
|
|
# : ExpressionStatement
|
|
# ;
|
|
#
|
|
func parse_Statement():
|
|
return parse_ExpressionStatement()
|
|
|
|
# StatementList
|
|
# : Statement
|
|
# | StatementList Statement -> Statement ...
|
|
# ;
|
|
#
|
|
func parse_StatementList():
|
|
var statementList = [ parse_Statement() ]
|
|
|
|
while NextToken != null :
|
|
statementList.append( parse_Statement() )
|
|
|
|
var \
|
|
node = SyntaxNode.new()
|
|
node.Type = "StatementList"
|
|
node.Value = statementList
|
|
|
|
return node
|
|
|
|
# Program
|
|
# : StatementList
|
|
# : Literal
|
|
# ;
|
|
#
|
|
func parse_Program():
|
|
var \
|
|
node = ProgramNode.new()
|
|
node.Type = TokenType.Program
|
|
node.Body = parse_StatementList()
|
|
|
|
return node
|
|
|
|
# Parses the text program description into an AST.
|
|
func parse(TokenizerRef):
|
|
self.TokenizerRef = TokenizerRef
|
|
|
|
NextToken = TokenizerRef.next_Token()
|
|
|
|
return parse_Program()
|
|
|
|
var GParser = Parser.new()
|
|
|
|
const Tests = \
|
|
{
|
|
MultiStatement = \
|
|
{
|
|
Name = "Multi-Statement",
|
|
File = "1.Multi-Statement.uf"
|
|
}
|
|
}
|
|
|
|
func test(entry):
|
|
var introMessage = "Testing: {Name}"
|
|
var introMessageFormatted = introMessage.format({"Name" : entry.Name})
|
|
print(introMessageFormatted)
|
|
|
|
var path
|
|
if Engine.editor_hint() :
|
|
path = "res://Tests/{TestName}"
|
|
else :
|
|
path = "res://../Builds/Tests/{TestName}"
|
|
var pathFormatted = path.format({"TestName" : entry.File})
|
|
|
|
var \
|
|
file = File.new()
|
|
file.open(pathFormatted, File.READ)
|
|
|
|
var programDescription = file.get_as_text()
|
|
file.close()
|
|
|
|
GTokenizer.init(programDescription)
|
|
var ast = GParser.parse(GTokenizer)
|
|
|
|
var json = JSON.print(ast.toDict(), "\t")
|
|
|
|
print(JSON.print(ast.toDict(), "\t"))
|
|
print("Passed!\n")
|
|
|
|
|
|
# Main Entry point.
|
|
func _ready():
|
|
for Key in Tests :
|
|
test(Tests[Key])
|