mirror of
https://github.com/Ed94/LangStudies.git
synced 2024-11-10 12:24:53 -08:00
231 lines
4.9 KiB
GDScript3
231 lines
4.9 KiB
GDScript3
|
extends Object
|
||
|
|
||
|
|
||
|
class_name Lexer
|
||
|
|
||
|
|
||
|
const TokenType : Dictionary = \
|
||
|
{
|
||
|
# Comments
|
||
|
cmt_SL = "Comment Single Line",
|
||
|
cmt_ML = "Comment Multi-Line",
|
||
|
|
||
|
# Formatting
|
||
|
fmt_S = "Formatting String",
|
||
|
|
||
|
# Delimiters
|
||
|
delim_Comma = "Comma Delimiter",
|
||
|
delim_SMR = "Symbol Member Resolution",
|
||
|
|
||
|
# Statements
|
||
|
def_End = "Statement End",
|
||
|
def_BStart = "Block Start",
|
||
|
def_BEnd = "Block End",
|
||
|
def_Var = "Variable Declaration",
|
||
|
def_Class = "Class",
|
||
|
|
||
|
# Iteration
|
||
|
def_While = "While",
|
||
|
def_Do = "Do-While",
|
||
|
def_For = "For",
|
||
|
|
||
|
# Procedures
|
||
|
def_Proc = "Procedure Declaration",
|
||
|
def_Return = "Return",
|
||
|
|
||
|
# Conditional
|
||
|
def_If = "If Statement",
|
||
|
def_Else = "Else Statement",
|
||
|
|
||
|
# Expressions
|
||
|
expr_PStart = "Parenthesis Start",
|
||
|
expr_PEnd = "Parenthesis End",
|
||
|
expr_SBStart = "Bracket Start",
|
||
|
expr_SBEnd = "Bracket End",
|
||
|
expr_New = "New Expression",
|
||
|
expr_Super = "Super Expression",
|
||
|
expr_Extends = "Class Extension",
|
||
|
|
||
|
# Operators
|
||
|
|
||
|
# Logical
|
||
|
op_Relational = "Relational",
|
||
|
op_Equality = "Equality",
|
||
|
op_LAnd = "Logical And",
|
||
|
op_LOr = "Logical Or",
|
||
|
op_LNot = "Logical Not",
|
||
|
|
||
|
# Arithmetic
|
||
|
op_CAssign = "ComplexAssignment",
|
||
|
op_Assign = "Assignment",
|
||
|
op_Additive = "AdditiveOperator",
|
||
|
op_Multiplicative = "MultiplicativeOperator",
|
||
|
|
||
|
# Literals
|
||
|
literal_BTrue = "True",
|
||
|
literal_BFalse = "False",
|
||
|
literal_Number = "Number",
|
||
|
literal_String = "String",
|
||
|
literal_Null = "Null Value",
|
||
|
|
||
|
# Symbols
|
||
|
sym_This = "This Reference",
|
||
|
sym_Identifier = "User Identifier",
|
||
|
}
|
||
|
|
||
|
const Spec : Dictionary = \
|
||
|
{
|
||
|
# Comments
|
||
|
TokenType.cmt_SL : "^\\/\\/.*",
|
||
|
TokenType.cmt_ML : "^\\/\\*[\\s\\S]*?\\*\\/",
|
||
|
|
||
|
# Formatting
|
||
|
TokenType.fmt_S : "^\\s+",
|
||
|
|
||
|
# Delimiters
|
||
|
TokenType.delim_Comma : "^,",
|
||
|
TokenType.delim_SMR : "^\\.",
|
||
|
|
||
|
# Statements
|
||
|
TokenType.def_End : "^;",
|
||
|
TokenType.def_BStart : "^{",
|
||
|
TokenType.def_BEnd : "^}",
|
||
|
TokenType.def_Var : "^\\blet\\b",
|
||
|
TokenType.def_Class : "^\\bclass\\b",
|
||
|
|
||
|
# Iteration
|
||
|
TokenType.def_While : "^\\bwhile\\b",
|
||
|
TokenType.def_Do : "^\\bdo\\b",
|
||
|
TokenType.def_For : "^\\bfor\\b",
|
||
|
|
||
|
# Procedures
|
||
|
TokenType.def_Proc : "^\\bdef\\b",
|
||
|
TokenType.def_Return : "^\\breturn\\b",
|
||
|
|
||
|
# Conditional
|
||
|
TokenType.def_If : "^\\bif\\b",
|
||
|
TokenType.def_Else : "^\\belse\\b",
|
||
|
|
||
|
# Expressions
|
||
|
TokenType.expr_PStart : "^\\(",
|
||
|
TokenType.expr_PEnd : "^\\)",
|
||
|
TokenType.expr_SBStart : "^\\[",
|
||
|
TokenType.expr_SBEnd : "^\\]",
|
||
|
TokenType.expr_New : "^\\bnew\\b",
|
||
|
TokenType.expr_Super : "^\\bsuper\\b",
|
||
|
TokenType.expr_Extends : "^\\bextends\\b",
|
||
|
|
||
|
#Operators
|
||
|
|
||
|
# Logical
|
||
|
TokenType.op_Relational : "^[>\\<]=?",
|
||
|
TokenType.op_Equality : "^[=!]=",
|
||
|
TokenType.op_LAnd : "^&&",
|
||
|
TokenType.op_LOr : "^\\|\\|",
|
||
|
TokenType.op_LNot : "^!",
|
||
|
|
||
|
# Arithmetic
|
||
|
TokenType.op_CAssign : "^[*\\/\\+\\-]=",
|
||
|
TokenType.op_Assign : "^=",
|
||
|
TokenType.op_Additive : "^[+\\-]",
|
||
|
TokenType.op_Multiplicative : "^[*\\/]",
|
||
|
|
||
|
# Literals
|
||
|
TokenType.literal_BTrue : "^\\btrue\\b",
|
||
|
TokenType.literal_BFalse : "^\\bfalse\\b",
|
||
|
TokenType.literal_Number : "^\\d+",
|
||
|
TokenType.literal_String : "^\"[^\"]*\"",
|
||
|
TokenType.literal_Null : "^\\bnull\\b",
|
||
|
|
||
|
# Symbols
|
||
|
TokenType.sym_This : "^\\bthis\\b",
|
||
|
TokenType.sym_Identifier : "^\\w+"
|
||
|
}
|
||
|
|
||
|
|
||
|
class Token:
|
||
|
var Type : String
|
||
|
var Value : String
|
||
|
|
||
|
|
||
|
var SourceText : String
|
||
|
var Cursor : int
|
||
|
var SpecRegex : Dictionary
|
||
|
var Tokens : Array
|
||
|
var TokenIndex : int = 0
|
||
|
|
||
|
|
||
|
func compile_regex():
|
||
|
for type in TokenType.values() :
|
||
|
var \
|
||
|
regex = RegEx.new()
|
||
|
regex.compile( Spec[type] )
|
||
|
|
||
|
SpecRegex[type] = regex
|
||
|
# SpecRegex[type].compile( Spec[type] )
|
||
|
|
||
|
func init(programSrcText):
|
||
|
SourceText = programSrcText
|
||
|
Cursor = 0
|
||
|
TokenIndex = 0
|
||
|
|
||
|
if SpecRegex.size() == 0 :
|
||
|
compile_regex()
|
||
|
|
||
|
tokenize()
|
||
|
|
||
|
func next_Token():
|
||
|
|
||
|
var nextToken = null
|
||
|
|
||
|
if Tokens.size() > TokenIndex :
|
||
|
nextToken = Tokens[TokenIndex]
|
||
|
TokenIndex += 1
|
||
|
|
||
|
return nextToken
|
||
|
|
||
|
func reached_EndOfText():
|
||
|
return Cursor >= SourceText.length()
|
||
|
|
||
|
func tokenize():
|
||
|
Tokens.clear()
|
||
|
|
||
|
while reached_EndOfText() == false :
|
||
|
var srcLeft = SourceText.substr(Cursor)
|
||
|
var token = Token.new()
|
||
|
|
||
|
var error = true
|
||
|
for type in TokenType.values() :
|
||
|
var result = SpecRegex[type].search( srcLeft )
|
||
|
if result == null || result.get_start() != 0 :
|
||
|
continue
|
||
|
|
||
|
# Skip Comments
|
||
|
if type == TokenType.cmt_SL || type == TokenType.cmt_ML :
|
||
|
Cursor += result.get_string().length()
|
||
|
error = false
|
||
|
break
|
||
|
|
||
|
# Skip Whitespace
|
||
|
if type == TokenType.fmt_S :
|
||
|
var addVal = result.get_string().length()
|
||
|
|
||
|
Cursor += addVal
|
||
|
error = false
|
||
|
break
|
||
|
|
||
|
token.Type = type
|
||
|
token.Value = result.get_string()
|
||
|
Cursor += ( result.get_string().length() )
|
||
|
|
||
|
Tokens.append( token )
|
||
|
|
||
|
error = false
|
||
|
break;
|
||
|
|
||
|
if error :
|
||
|
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value} -: {txt}"
|
||
|
var assertStr = assertStrTmplt.format({"value" : Cursor, "txt" : srcLeft})
|
||
|
assert(true != true, assertStr)
|
||
|
return
|