LangStudies/Editor/Lectures/Lecture.3..gd

265 lines
5.1 KiB
GDScript3
Raw Normal View History

2022-07-06 14:16:06 -04:00
extends Node
# This closesly follows the source provided in the lectures.
# Later on after the lectures are complete or when I deem
# Necessary there will be heavy refactors.
const TokenType = \
{
Program = "Program",
# Comments
CommentLine = "CommentLine",
CommentMultiLine = "CommentMultiLine",
# Formatting
Whitespace = "Whitespace",
# Literals
Number = "Number",
String = "String"
}
const TokenSpec = \
{
TokenType.CommentLine : "^\/\/.*",
TokenType.CommentMultiLine : "^\/\\*[\\s\\S]*?\\*\/",
TokenType.Whitespace : "^\\s+",
TokenType.Number : "\\d+",
TokenType.String : "^\"[^\"]*\""
}
class Token:
var Type : String
var Value : String
func toDict():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class Tokenizer:
var SrcTxt : String
var Cursor : int;
# Sets up the tokenizer with the program source text.
func init(programSrcText):
SrcTxt = programSrcText
Cursor = 0
# Provides the next token in the source text.
func next_Token():
if self.reached_EndOfTxt() == true :
return null
var srcLeft = self.SrcTxt.substr(Cursor)
var regex = RegEx.new()
var token = Token.new()
for type in TokenSpec :
regex.compile(TokenSpec[type])
var result = regex.search(srcLeft)
if result == null :
continue
# Skip Comments
if type == TokenType.CommentLine || type == TokenType.CommentMultiLine :
self.Cursor += result.get_string().length()
return next_Token()
# Skip Whitespace
if type == TokenType.Whitespace :
var addVal = result.get_string().length()
self.Cursor += addVal
return next_Token()
token.Type = type
token.Value = result.get_string()
self.Cursor += ( result.get_string().length() -1 )
return token
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value}"
var assertStr = assertStrTmplt.format({"value" : self.Cursor})
assert(true != true, assertStr)
return null
func reached_EndOfTxt():
return self.Cursor >= ( self.SrcTxt.length() - 1 )
var GTokenizer = Tokenizer.new()
class SyntaxNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func toDict():
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
class ProgramNode:
var Type : String
var Body : Object
func toDict():
var result = \
{
Type = self.Type,
Body = self.Body.toDict()
}
return result
class Parser:
var TokenizerRef : Tokenizer
var NextToken : Token
func eat(tokenType):
var currToken = self.NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
self.NextToken = self.TokenizerRef.next_Token()
return currToken
# Literal
# : NumericLiteral
# : StringLiteral
# ;
func parse_Literal():
match self.NextToken.Type :
TokenType.Number:
return parse_NumericLiteral()
TokenType.String:
return parse_StringLiteral()
assert(false, "parse_Literal: Was not able to detect valid literal type from NextToken")
# NumericLiteral
# : Number
# ;
#
func parse_NumericLiteral():
var Token = self.eat(TokenType.Number)
var \
node = SyntaxNode.new()
node.Type = TokenType.Number
node.Value = int( Token.Value )
return node
# StringLiteral
# : String
# ;
#
func parse_StringLiteral():
var Token = self.eat(TokenType.String)
var \
node = SyntaxNode.new()
node.Type = TokenType.String
node.Value = Token.Value.substr( 1, Token.Value.length() - 2 )
return node
# Program
# : Literal
# ;
#
func parse_Program():
var \
node = ProgramNode.new()
node.Type = TokenType.Program
node.Body = parse_Literal()
return node
# Parses the text program description into an AST.
func parse(TokenizerRef):
self.TokenizerRef = TokenizerRef
NextToken = TokenizerRef.next_Token()
return parse_Program()
var GParser = Parser.new()
var ProgramDescription : String
func test():
GTokenizer.init(ProgramDescription)
var ast = GParser.parse(GTokenizer)
print(JSON.print(ast.toDict(), "\t"))
2022-07-06 14:16:06 -04:00
# Main Entry point.
func _ready():
# Numerical test
ProgramDescription = "47"
test()
# String Test
ProgramDescription = "\"hello\""
test()
# Whitespace test
ProgramDescription = " \"we got past whitespace\" "
test()
# Comment Single Test
ProgramDescription = \
"""
// Testing a comment
\"hello sir\"
"""
test()
# Comment Multi-Line Test
ProgramDescription = \
"""
/**
*
* Testing a comment
*/
\"may I have some grapes\"
"""
test()
# Multi-statement test
ProgramDescription = \
"""
// Testing a comment
\"hello sir\";
/**
*
* Testing a comment
*/
\"may I have some grapes\";
"""
test()
# Called every frame. 'delta' is the elapsed time since the previous frame.
#func _process(delta):
# pass