mirror of
				https://github.com/Ed94/LangStudies.git
				synced 2025-10-31 06:50:56 -07:00 
			
		
		
		
	
		
			
				
	
	
		
			1070 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			GDScript
		
	
	
	
	
	
			
		
		
	
	
			1070 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			GDScript
		
	
	
	
	
	
| extends Object
 | |
| 
 | |
| 
 | |
| # Simple Regular Expressions
 | |
| # This is a "high-level" langauge and transpiler for regex
 | |
| # That makes it easier to write out and read
 | |
| # than the original notation or syntax.
 | |
| # 
 | |
| # The main interface function is transpile( <string> )
 | |
| # Which can take any valid string from gdscript.
 | |
| 
 | |
| 
 | |
| # Lexer
 | |
| 
 | |
| const TokenType : Dictionary = \
 | |
| {
 | |
| 	fmt_S = "Formatting",
 | |
| 	
 | |
| 	str_start = "String Start",
 | |
| 	str_end   = "String End",
 | |
| 	
 | |
| 	glyph_bPOpen  = "\\(",
 | |
| 	glyph_bPClose = "\\)",
 | |
| 
 | |
| 	expr_PStart = "Parenthesis Start",
 | |
| 	expr_PEnd   = "Parenthesis End",
 | |
| 
 | |
| 	glyph_between = "Glyphs Between",
 | |
| 	glyph_digit   = "Digit",
 | |
| 	glyph_inline  = "inline",
 | |
| 	glyph_space   = "Space",
 | |
| 	glyph_word    = "Word",
 | |
| 	glyph_ws      = "Whitespace",
 | |
| 
 | |
| 	glyph_dash    = "-",
 | |
| 	glyph_dot     = ". dot",
 | |
| 	glyph_excla   = "! Mark",
 | |
| 	glyph_vertS   = "\\|",
 | |
| 	glyph_dQuote  = "\"",
 | |
| 
 | |
| 	op_lazy   = "Lazy Operator",
 | |
| 	op_look   = "Lookahead",
 | |
| 	op_not    = "Not Operator",
 | |
| 	op_repeat = "Repeating Operator",
 | |
| 	op_union  = "Union Operator",
 | |
| 
 | |
| 	ref     = "Backreference Group",
 | |
| 	set     = "Set",
 | |
| 
 | |
| 	string = "String",
 | |
| 	
 | |
| 	glyph = "Glyph",
 | |
| }
 | |
| 
 | |
| const Spec : Dictionary = \
 | |
| {
 | |
| 	TokenType.fmt_S : "^\\s",
 | |
| 	
 | |
| 	TokenType.str_start : "^\\bstart\\b",
 | |
| 	TokenType.str_end   : "^\\bend\\b",
 | |
| 
 | |
| 	TokenType.string : "^\"[^\"]*\"",
 | |
| 	
 | |
| 	TokenType.glyph_bPOpen  : "^\\\\\\(",
 | |
| 	TokenType.glyph_bPClose : "^\\\\\\)",
 | |
| 
 | |
| 	TokenType.expr_PStart : "^\\(",
 | |
| 	TokenType.expr_PEnd   : "^\\)",
 | |
| 
 | |
| 	TokenType.glyph_between : "^\\-",
 | |
| 	TokenType.glyph_digit   : "^\\bdigit\\b",
 | |
| 	TokenType.glyph_inline  : "^\\binline\\b",
 | |
| 	TokenType.glyph_space   : "^\\bspace\\b",
 | |
| 	TokenType.glyph_word    : "^\\bword\\b",
 | |
| 	TokenType.glyph_ws      : "^\\bwhitespace\\b",
 | |
| 
 | |
| 	TokenType.op_lazy   : "^\\.\\blazy\\b",
 | |
| 	TokenType.op_repeat : "^\\.\\brepeat\\b",
 | |
| 
 | |
| 	TokenType.glyph_dash    : "^\\\\\\-",
 | |
| 	TokenType.glyph_dot     : "^\\\\\\.",
 | |
| 	TokenType.glyph_excla   : "^\\\\\\!",
 | |
| 	TokenType.glyph_vertS   : "^\\\\\\|",
 | |
| 	TokenType.glyph_dQuote  : "^\\\\\"",
 | |
| 
 | |
| 	TokenType.op_look   : "^\\blook\\b",
 | |
| 	TokenType.op_not    : "^\\!",
 | |
| 	TokenType.op_union  : "^\\|",
 | |
| 
 | |
| 	TokenType.ref       : "^\\bbackref\\b",
 | |
| 	TokenType.set       : "^\\bset\\b",
 | |
| 
 | |
| 	TokenType.glyph     : "^[^\\s]"
 | |
| }
 | |
| 
 | |
| 
 | |
| class Token:
 | |
| 	var Type  : String
 | |
| 	var Value : String
 | |
| 
 | |
| 
 | |
| var SourceText : String
 | |
| var Cursor     : int
 | |
| var SpecRegex  : Dictionary
 | |
| var Tokens     : Array
 | |
| var TokenIndex : int = 0
 | |
| 
 | |
| 
 | |
| func compile_regex():
 | |
| 	for type in TokenType.values() :
 | |
| 		var \
 | |
| 		regex = RegEx.new()
 | |
| 		var _spec = Spec[type]
 | |
| 		regex.compile( Spec[type] )
 | |
| 		
 | |
| 		SpecRegex[type] = regex
 | |
| 
 | |
| func init(programSrcText):
 | |
| 	SourceText = programSrcText
 | |
| 	Cursor     = 0
 | |
| 	TokenIndex = 0
 | |
| 
 | |
| 	if SpecRegex.size() == 0 :
 | |
| 		compile_regex()
 | |
| 
 | |
| 	tokenize()
 | |
| 
 | |
| func next_Token():
 | |
| 	
 | |
| 	var nextToken = null
 | |
| 	
 | |
| 	if Tokens.size() > TokenIndex :
 | |
| 		nextToken   = Tokens[TokenIndex]
 | |
| 		TokenIndex += 1
 | |
| 	
 | |
| 	return nextToken
 | |
| 
 | |
| func reached_EndOfText():
 | |
| 	return Cursor >= SourceText.length()
 | |
| 
 | |
| func tokenize():
 | |
| 	Tokens.clear()
 | |
| 
 | |
| 	while reached_EndOfText() == false :
 | |
| 		var srcLeft = SourceText.substr(Cursor)
 | |
| 		var token   = Token.new()
 | |
| 
 | |
| 		var error = true
 | |
| 		for type in TokenType.values() :
 | |
| 			var result = SpecRegex[type].search( srcLeft )
 | |
| 			if  result == null || result.get_start() != 0 :
 | |
| 				continue
 | |
| 			
 | |
| 			# Skip Whitespace
 | |
| 			if type == TokenType.fmt_S :
 | |
| 				var addVal   = result.get_string().length()
 | |
| 				
 | |
| 				Cursor += addVal
 | |
| 				error   = false
 | |
| 				break
 | |
| 
 | |
| 			token.Type   = type
 | |
| 			token.Value  = result.get_string()
 | |
| 			Cursor      += ( result.get_string().length() )
 | |
| 			
 | |
| 			Tokens.append( token )
 | |
| 			
 | |
| 			error = false
 | |
| 			break;
 | |
| 
 | |
| 		if error :
 | |
| 			var assertStrTmplt = "next_Token: Source text not understood by tokenizer at Cursor pos: {value} -: {txt}"
 | |
| 			var assertStr      = assertStrTmplt.format({"value" : Cursor, "txt" : srcLeft})
 | |
| 			assert(true != true, assertStr)
 | |
| 			return
 | |
| 
 | |
| # End : Lexer
 | |
| 
 | |
| 
 | |
| 
 | |
| # Parser
 | |
| 
 | |
| class ASTNode:
 | |
| 	var Type  : String
 | |
| 	var Value # Not specifing a type implicity declares a Variant type.
 | |
| 	
 | |
| 	func array_Serialize(array, fn_objSerializer) :
 | |
| 		var result = []
 | |
| 
 | |
| 		for entry in array :
 | |
| 			if typeof(entry) == TYPE_ARRAY :
 | |
| 				result.append( array_Serialize( entry, fn_objSerializer ))
 | |
| 
 | |
| 			elif typeof(entry) == TYPE_OBJECT :
 | |
| 				fn_objSerializer.set_instance(entry)
 | |
| 				result.append( fn_objSerializer.call_func() )
 | |
| 
 | |
| 			else :
 | |
| 				result.append( entry )
 | |
| 				
 | |
| 		return result
 | |
| 
 | |
| 	func to_SExpression():
 | |
| 		var expression = [ Type ]
 | |
| 
 | |
| 		if typeof(Value) == TYPE_ARRAY :
 | |
| 			var \
 | |
| 			to_SExpression_Fn = FuncRef.new()
 | |
| 			to_SExpression_Fn.set_function("to_SExpression")
 | |
| 			
 | |
| 			var array = array_Serialize( self.Value, to_SExpression_Fn )
 | |
| 			
 | |
| 			expression.append(array)
 | |
| 			return expression
 | |
| 			
 | |
| 		if typeof(Value) == TYPE_OBJECT :
 | |
| 			var result = [ Type, Value.to_SExpression() ]
 | |
| 			return result
 | |
| 			
 | |
| 		expression.append(Value)
 | |
| 		return expression
 | |
| 	
 | |
| 	func to_Dictionary():
 | |
| 		if typeof(Value) == TYPE_ARRAY :
 | |
| 			var \
 | |
| 			to_Dictionary_Fn = FuncRef.new()
 | |
| 			to_Dictionary_Fn.set_function("to_Dictionary")
 | |
| 			
 | |
| 			var array = array_Serialize( self.Value, to_Dictionary_Fn )
 | |
| 			var result = \
 | |
| 			{
 | |
| 				Type  = self.Type,
 | |
| 				Value = array
 | |
| 			}
 | |
| 			return result
 | |
| 			
 | |
| 		if typeof(Value) == TYPE_OBJECT :
 | |
| 			var result = \
 | |
| 			{
 | |
| 				Type  = self.Type,
 | |
| 				Value = self.Value.to_Dictionary()
 | |
| 			}
 | |
| 			return result
 | |
| 
 | |
| 		var result = \
 | |
| 		{ 
 | |
| 			Type  = self.Type,
 | |
| 			Value = self.Value
 | |
| 		}
 | |
| 		return result
 | |
| 
 | |
| const NodeType = \
 | |
| {
 | |
| 	expression = "Expression",
 | |
| 
 | |
| 	between = "Glyphs Between Set",
 | |
| 	capture = "Capture Group",
 | |
| 	lazy    = "Lazy",
 | |
| 	look    = "Lookahead",
 | |
| 	op_not  = "Not Operator",
 | |
| 	ref     = "Backreference Group",
 | |
| 	repeat  = "Repeat",
 | |
| 	set     = "Set",
 | |
| 	union   = "Union",
 | |
| 
 | |
| 	digit         = "Digit",
 | |
| 	inline        = "Any Inline",
 | |
| 	space         = "Space",
 | |
| 	word          = "Word",
 | |
| 	whitespace    = "Whitespace",
 | |
| 	string        = "String",
 | |
| 	str_start     = "String Start",
 | |
| 	str_end       = "String End",
 | |
| 
 | |
| 	glyph = "Glyph",
 | |
| }
 | |
| 
 | |
| 
 | |
| var NextToken   : Token
 | |
| 
 | |
| # --------------------------------------------------------------------- HELPERS
 | |
| 
 | |
| # Gets the next token only if the current token is the specified intended token (tokenType)
 | |
| func eat(tokenType):
 | |
| 	var currToken = NextToken
 | |
| 	
 | |
| 	assert(currToken != null, "eat: NextToken was null")
 | |
| 	
 | |
| 	var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
 | |
| 	var assertStr      = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
 | |
| 	
 | |
| 	assert(currToken.Type == tokenType, assertStr)
 | |
| 	
 | |
| 	NextToken = next_Token()
 | |
| 	
 | |
| 	return currToken
 | |
| 
 | |
| func is_Glyph(glyph = NextToken) :
 | |
| 	match glyph.Type:
 | |
| 		TokenType.glyph :
 | |
| 			return true
 | |
| 		TokenType.glyph_digit :
 | |
| 			return true
 | |
| 		TokenType.glyph_inline :
 | |
| 			return true
 | |
| 		TokenType.glyph_word :
 | |
| 			return true
 | |
| 		TokenType.glyph_ws :
 | |
| 			return true
 | |
| 		TokenType.glyph_dash :
 | |
| 			return true
 | |
| 		TokenType.glyph_dot :
 | |
| 			return true
 | |
| 		TokenType.glyph_excla :
 | |
| 			return true
 | |
| 		TokenType.glyph_vertS :
 | |
| 			return true
 | |
| 		TokenType.glyph_bPOpen :
 | |
| 			return true
 | |
| 		TokenType.glyph_bPClose :
 | |
| 			return true
 | |
| 		TokenType.glyph_dQuote :
 | |
| 			return true
 | |
| 			
 | |
| 	return false
 | |
| 
 | |
| func is_GlyphOrStr() :
 | |
| 	return is_Glyph() || NextToken.Type == TokenType.string
 | |
| 	
 | |
| func is_GroupToken() :
 | |
| 	if NextToken.Value.length() == 2 && NextToken.Value[0] == "\\" :
 | |
| 		match NextToken.Value[1] :
 | |
| 			"0" : continue
 | |
| 			"1" : continue
 | |
| 			"2" : continue
 | |
| 			"3" : continue
 | |
| 			"4" : continue
 | |
| 			"5" : continue
 | |
| 			"6" : continue
 | |
| 			"7" : continue
 | |
| 			"8" : continue
 | |
| 			"9" : continue
 | |
| 			_:
 | |
| 				return true
 | |
| 	return false
 | |
| 	
 | |
| func is_Number() :
 | |
| 	var \
 | |
| 	regex = RegEx.new()
 | |
| 	regex.compile("^\\d")
 | |
| 	
 | |
| 	return regex.search(NextToken.Value) != null
 | |
| 	
 | |
| func is_RegExToken() :
 | |
| 	match NextToken.Value :
 | |
| 		"^" : 
 | |
| 			return true
 | |
| 		"$" :
 | |
| 			return true
 | |
| 		"*" : 
 | |
| 			return true
 | |
| 		"[" :
 | |
| 			return true
 | |
| 		"]" : 
 | |
| 			return true
 | |
| 		"?" :
 | |
| 			return true	
 | |
| 	return
 | |
| 
 | |
| # --------------------------------------------------------------------- HELPERS
 | |
| 
 | |
| #   > Union
 | |
| # Union
 | |
| # : expression | expression ..
 | |
| # | expression
 | |
| # ;
 | |
| func parse_OpUnion(endToken):
 | |
| 	var expression = parse_Expression(endToken)
 | |
| 
 | |
| 	if NextToken == null || NextToken.Type != TokenType.op_union :
 | |
| 		return expression
 | |
| 
 | |
| 	eat(TokenType.op_union)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.union
 | |
| 	node.Value = [ expression, parse_OpUnion(endToken) ]
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| #   > Union
 | |
| # Expression
 | |
| #   : EVERYTHING (Almost)
 | |
| #   ;
 | |
| func parse_Expression(endToken):
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.expression
 | |
| 	node.Value = []
 | |
| 
 | |
| 	while NextToken != null && NextToken.Type != TokenType.op_union :
 | |
| 		if endToken != null && NextToken.Type == endToken :
 | |
| 			break
 | |
| 			
 | |
| 		match NextToken.Type :
 | |
| 			TokenType.str_start :
 | |
| 				node.Value.append( parse_StrStart() )
 | |
| 
 | |
| 			TokenType.str_end :
 | |
| 				node.Value.append( parse_StrEnd() )
 | |
| 			   
 | |
| 			TokenType.expr_PStart :
 | |
| 				node.Value.append( parse_CaptureGroup() )
 | |
| 
 | |
| 			TokenType.glyph :
 | |
| 				node.Value.append( parse_Glyph() )
 | |
| 
 | |
| 			TokenType.glyph_digit :
 | |
| 				node.Value.append( parse_GlyphDigit() )
 | |
| 
 | |
| 			TokenType.glyph_inline :
 | |
| 				node.Value.append( parse_GlyphInline() )
 | |
| 				
 | |
| 			TokenType.glyph_space :
 | |
| 				node.Value.append( parse_GlyphSpace() )
 | |
| 
 | |
| 			TokenType.glyph_word :
 | |
| 				node.Value.append( parse_GlyphWord() )
 | |
| 
 | |
| 			TokenType.glyph_ws :
 | |
| 				node.Value.append( parse_GlyphWhitespace() )
 | |
| 
 | |
| 
 | |
| 			TokenType.glyph_dash :
 | |
| 				node.Value.append( parse_GlyphDash() )
 | |
| 
 | |
| 			TokenType.glyph_dot :
 | |
| 				node.Value.append( parse_GlyphDot() )
 | |
| 
 | |
| 			TokenType.glyph_excla :
 | |
| 				node.Value.append( parse_GlyphExclamation() )
 | |
| 
 | |
| 			TokenType.glyph_vertS :
 | |
| 				node.Value.append( parse_GlyphVertS() )
 | |
| 
 | |
| 			TokenType.glyph_bPOpen :
 | |
| 				node.Value.append( parse_Glyph_bPOpen() )
 | |
| 
 | |
| 			TokenType.glyph_bPClose :
 | |
| 				node.Value.append( parse_Glyph_bPClose() )
 | |
| 				
 | |
| 			TokenType.glyph_dQuote :
 | |
| 				node.Value.append( parse_Glyph_DQuote() )
 | |
| 
 | |
| 
 | |
| 			TokenType.op_look :
 | |
| 				node.Value.append( parse_OpLook() )
 | |
| 
 | |
| 			TokenType.op_not :
 | |
| 				node.Value.append( parse_OpNot() )
 | |
| 
 | |
| 			TokenType.op_repeat:
 | |
| 				node.Value.append( parse_OpRepeat() )
 | |
| 
 | |
| 			TokenType.ref :
 | |
| 				node.Value.append( parse_Backreference() )
 | |
| 
 | |
| 			TokenType.set :
 | |
| 				node.Value.append( parse_Set() )
 | |
| 
 | |
| 			TokenType.string :
 | |
| 				node.Value.append( parse_String() )
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| func parse_StrStart():
 | |
| 	eat(TokenType.str_start)
 | |
| 
 | |
| 	var \
 | |
| 	node      = ASTNode.new()
 | |
| 	node.Type = NodeType.str_start
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| func parse_StrEnd():
 | |
| 	eat(TokenType.str_end)
 | |
| 
 | |
| 	var \
 | |
| 	node      = ASTNode.new()
 | |
| 	node.Type = NodeType.str_end
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| # Between
 | |
| #   : glyph
 | |
| #   | glyph - glyph
 | |
| #   ;
 | |
| func parse_Between(quantifier : bool = false):
 | |
| 	var glyph
 | |
| 	
 | |
| 	match NextToken.Type :
 | |
| 		TokenType.glyph :
 | |
| 			glyph = parse_Glyph(quantifier)
 | |
| #		TokenType.glyph_digit :
 | |
| #			glyph = parse_GlyphDigit()
 | |
| 		TokenType.glyph_inline :
 | |
| 			glyph =  parse_GlyphInline()
 | |
| #		TokenType.glyph_word :
 | |
| #			glyph =  parse_GlyphWord()
 | |
| 		TokenType.glyph_ws :
 | |
| 			glyph = parse_GlyphWhitespace()
 | |
| 		TokenType.glyph_dash :
 | |
| 			glyph = parse_GlyphDash()
 | |
| 		TokenType.glyph_dot :
 | |
| 			glyph = parse_GlyphDot()
 | |
| 		TokenType.glyph_excla :
 | |
| 			glyph = parse_GlyphExclamation()
 | |
| 		TokenType.glyph_vertS :
 | |
| 			glyph = parse_GlyphVertS()
 | |
| 		TokenType.glyph_bPOpen :
 | |
| 			glyph = parse_Glyph_bPOpen()
 | |
| 		TokenType.glyph_bPClose :
 | |
| 			glyph = parse_Glyph_bPClose()		
 | |
| 		TokenType.glyph_dQuote :
 | |
| 			glyph = parse_Glyph_DQuote()
 | |
| 
 | |
| 	if NextToken.Type != TokenType.glyph_between :
 | |
| 		return glyph
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.between
 | |
| 	node.Value = []
 | |
| 
 | |
| 	node.Value.append( glyph )
 | |
| 
 | |
| 	if NextToken.Type == TokenType.glyph_between:
 | |
| 		eat(TokenType.glyph_between)
 | |
| 
 | |
| 		if is_Glyph() :
 | |
| 			node.Value.append( parse_Glyph(quantifier) )
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| # CaptureGroup
 | |
| #   : ( OpUnion )
 | |
| #   ;
 | |
| func parse_CaptureGroup():
 | |
| 	eat(TokenType.expr_PStart)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.capture
 | |
| 	node.Value = parse_OpUnion(TokenType.expr_PEnd)
 | |
| 
 | |
| 	eat(TokenType.expr_PEnd)
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| #   > Between
 | |
| # Glyph
 | |
| #   : glyph
 | |
| #   ;
 | |
| func parse_Glyph(numerical = false):	
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.glyph
 | |
| 	
 | |
| 	node.Value = ""
 | |
| 	
 | |
| 	while NextToken.Type == TokenType.glyph :
 | |
| 		if NextToken.Value == "/" :
 | |
| 			node.Value += "\\/"
 | |
| 		elif is_RegExToken() :
 | |
| 			node.Value += "\\" + NextToken.Value
 | |
| 		elif is_GroupToken() :
 | |
| 			node.Value += "\\\\" + NextToken.Value[1] 
 | |
| 		else : 
 | |
| 			node.Value += NextToken.Value
 | |
| 	
 | |
| 		eat(TokenType.glyph)
 | |
| 		
 | |
| 		if numerical == false :
 | |
| 			break
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_GlyphDigit():
 | |
| 	eat(TokenType.glyph_digit)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.digit
 | |
| 	node.Value = "\\d"
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_GlyphInline():
 | |
| 	eat(TokenType.glyph_inline)
 | |
| 
 | |
| 	var \
 | |
| 	node = ASTNode.new()
 | |
| 	node.Type  = NodeType.inline
 | |
| 	node.Value = "."
 | |
| 
 | |
| 	return node
 | |
| 	
 | |
| func parse_GlyphSpace():
 | |
| 	eat(TokenType.glyph_space)
 | |
| 	
 | |
| 	var \
 | |
| 	node = ASTNode.new()
 | |
| 	node.Type = NodeType.space
 | |
| 	node.Value = " "
 | |
| 	
 | |
| 	if NextToken.Type == TokenType.expr_PStart :
 | |
| 		eat(TokenType.expr_PStart)
 | |
| 		
 | |
| 		var numGlyph = parse_Glyph(true)
 | |
| 		for n in range(int(numGlyph.Value)) :
 | |
| 			node.Value += " "
 | |
| 			
 | |
| 		eat(TokenType.expr_PEnd)
 | |
| 	
 | |
| 	return node
 | |
| 
 | |
| func parse_GlyphWord():
 | |
| 	eat(TokenType.glyph_word)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.word
 | |
| 	node.Value = "\\w"
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_GlyphWhitespace():
 | |
| 	eat(TokenType.glyph_ws)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.whitespace
 | |
| 	node.Value = "\\s"
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_GlyphDash():
 | |
| 	eat(TokenType.glyph_dash)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.glyph
 | |
| 	node.Value = "-"
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_GlyphDot():
 | |
| 	eat(TokenType.glyph_dot)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.glyph
 | |
| 	node.Value = "\\."
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_GlyphExclamation():
 | |
| 	eat(TokenType.glyph_excla)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.glyph
 | |
| 	node.Value = "!"
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_GlyphVertS():
 | |
| 	eat(TokenType.glyph_vertS)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.glyph
 | |
| 	node.Value = "\\|"
 | |
| 	
 | |
| 	return node
 | |
| 
 | |
| func parse_Glyph_bPOpen():
 | |
| 	eat(TokenType.glyph_bPOpen)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.glyph
 | |
| 	node.Value = "\\("
 | |
| 	
 | |
| 	return node
 | |
| 
 | |
| func parse_Glyph_bPClose():
 | |
| 	eat(TokenType.glyph_bPClose)
 | |
| 
 | |
| 	var \
 | |
| 	node = ASTNode.new()
 | |
| 	node.Type  = NodeType.glyph
 | |
| 	node.Value = "\\)"
 | |
| 	
 | |
| 	return node
 | |
| 
 | |
| func parse_Glyph_DQuote():
 | |
| 	eat(TokenType.glyph_dQuote)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.glyph
 | |
| 	node.Value = "\""
 | |
| 	
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| #   : .lazy
 | |
| #   ;
 | |
| func parse_OpLazy():
 | |
| 	eat(TokenType.op_lazy)
 | |
| 
 | |
| 	var \
 | |
| 	node      = ASTNode.new()
 | |
| 	node.Type = NodeType.lazy
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| #   > OpNot
 | |
| # Look
 | |
| #   : look ( Expression )
 | |
| #   ;
 | |
| func parse_OpLook():
 | |
| 	eat(TokenType.op_look)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.look
 | |
| 	node.Value = parse_CaptureGroup()
 | |
| 	
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| # OpNot
 | |
| #   : ! 
 | |
| #   | CaptureGroup
 | |
| #   | GlyphDigit
 | |
| #   | GlyphWord
 | |
| #   | GlyphWhitespace
 | |
| #   | OpLook
 | |
| #   | String
 | |
| #   | Set
 | |
| #   ; 
 | |
| func parse_OpNot():
 | |
| 	eat(TokenType.op_not)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.op_not
 | |
| 
 | |
| 	match NextToken.Type:
 | |
| 		TokenType.expr_PStart:
 | |
| 			node.Value = parse_CaptureGroup()
 | |
| 
 | |
| 		TokenType.glyph_digit:
 | |
| 			node.Value = parse_GlyphDigit()
 | |
| 
 | |
| 		TokenType.glyph_word:
 | |
| 			node.Value = parse_GlyphWord()
 | |
| 			
 | |
| 		TokenType.glyph_ws:
 | |
| 			node.Value = parse_GlyphWhitespace()
 | |
| 
 | |
| 		TokenType.op_look:
 | |
| 			node.Value = parse_OpLook()
 | |
| 
 | |
| 		TokenType.string:
 | |
| 			node.Value = parse_String()
 | |
| 
 | |
| 		TokenType.set:
 | |
| 			node.Value = parse_Set()
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| #   > Expression
 | |
| # OpRepeat
 | |
| #   : .repeat ( opt# optBetween opt# ) opt.lazy
 | |
| #   ;
 | |
| func parse_OpRepeat():
 | |
| 	eat(TokenType.op_repeat)
 | |
| 
 | |
| 	var \
 | |
| 	node      = ASTNode.new()
 | |
| 	node.Type = NodeType.repeat
 | |
| 
 | |
| 	var vrange = null
 | |
| 	var lazy   = null
 | |
| 
 | |
| 	eat(TokenType.expr_PStart)
 | |
| 
 | |
| 	vrange = parse_Between(true)
 | |
| 	
 | |
| 	eat(TokenType.expr_PEnd)
 | |
| 
 | |
| 	if NextToken && NextToken.Type == TokenType.op_lazy :
 | |
| 		lazy = parse_OpLazy();
 | |
| 	
 | |
| 	node.Value = [ vrange, lazy ] 
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_Backreference():
 | |
| 	eat(TokenType.ref)
 | |
| 
 | |
| 	var \
 | |
| 	node      = ASTNode.new()
 | |
| 	node.Type = NodeType.ref
 | |
| 
 | |
| 	eat(TokenType.expr_PStart)
 | |
| 	
 | |
| 	var assertStrTmplt = "Error when parsing a backreference expression: Expected digit but got: {value}"
 | |
| 	var assertStr      = assertStrTmplt.format({"value" : NextToken.Value})
 | |
| 
 | |
| 	assert(NextToken.Type == TokenType.glyph, assertStr)
 | |
| 	node.Value = NextToken.Value
 | |
| 	eat(TokenType.glyph)
 | |
| 	
 | |
| 	eat(TokenType.expr_PEnd)
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_Set():
 | |
| 	eat(TokenType.set)
 | |
| 
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.set
 | |
| 	node.Value = []
 | |
| 
 | |
| 	eat(TokenType.expr_PStart)
 | |
| 
 | |
| 	while is_Glyph() || NextToken.Type == TokenType.op_not :
 | |
| 		if NextToken.Type == TokenType.op_not :
 | |
| 			var possibleGlyph = parse_OpNot()
 | |
| 			if is_Glyph(possibleGlyph.Value) :
 | |
| 				node.Value.append( possibleGlyph )
 | |
| 				continue
 | |
| 				
 | |
| 			assert(true == false, "Bad ! operator in set.")
 | |
| 		
 | |
| 		node.Value.append( parse_Between() )
 | |
| 
 | |
| 	eat(TokenType.expr_PEnd)
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| func parse_String():
 | |
| 	var string = ""
 | |
| 	
 | |
| 	var index = 1
 | |
| 	while NextToken.Value[index] != "\"" :
 | |
| 		string += NextToken.Value[index]
 | |
| 		index += 1
 | |
| 	
 | |
| 	var \
 | |
| 	node       = ASTNode.new()
 | |
| 	node.Type  = NodeType.string
 | |
| 	node.Value = string
 | |
| 
 | |
| 	eat(TokenType.string)
 | |
| 
 | |
| 	return node
 | |
| 
 | |
| # End: Parser
 | |
| 
 | |
| 
 | |
| # Transpiling
 | |
| 
 | |
| var ExprAST     : ASTNode
 | |
| var RegexResult : String
 | |
| 
 | |
| func transpile(expression : String):
 | |
| 	init( expression )
 | |
| 
 | |
| 	NextToken = next_Token()
 | |
| 	ExprAST   = parse_OpUnion(null)
 | |
| 
 | |
| 	return transiple_Union(ExprAST)
 | |
| 
 | |
| func transiple_Union(node : ASTNode):
 | |
| 	var result         = ""
 | |
| 	var expressionLeft = node.Value
 | |
| 	
 | |
| 	if node.Type == NodeType.union :
 | |
| 		expressionLeft = node.Value[0].Value
 | |
| 		
 | |
| 
 | |
| 	for entry in expressionLeft :
 | |
| 		match entry.Type :
 | |
| 			NodeType.str_start:
 | |
| 				result += "^"
 | |
| 			NodeType.str_end:
 | |
| 				result += "$"
 | |
| 			
 | |
| 			NodeType.capture:
 | |
| 				result += transpile_CaptureGroup(entry, false)
 | |
| 			NodeType.look:	
 | |
| 				result += transpile_LookAhead(entry, false)
 | |
| 			NodeType.ref:
 | |
| 				result += transpile_Backreference(entry)
 | |
| 			NodeType.repeat:
 | |
| 				result += transpile_Repeat(entry)
 | |
| 			NodeType.set:
 | |
| 				result += transpile_Set(entry, false)
 | |
| 				
 | |
| 			NodeType.glyph:
 | |
| 				result += entry.Value
 | |
| 			NodeType.inline:
 | |
| 				result += entry.Value
 | |
| 			NodeType.digit:
 | |
| 				result += entry.Value
 | |
| 			NodeType.space:
 | |
| 				result += entry.Value
 | |
| 			NodeType.word:
 | |
| 				result += entry.Value
 | |
| 			NodeType.whitespace:
 | |
| 				result += entry.Value
 | |
| 
 | |
| 			NodeType.string:
 | |
| 				result += transpile_String(entry, false)
 | |
| 	
 | |
| 			NodeType.op_not:
 | |
| 				result += transpile_OpNot(entry)
 | |
| 
 | |
| 
 | |
| 	if node.Type == NodeType.union && node.Value[1] != null :
 | |
| 		result += "|"
 | |
| 		result += transiple_Union(node.Value[1])
 | |
| 
 | |
| 	return result
 | |
| 
 | |
| func transpile_CaptureGroup(node : ASTNode, negate : bool):
 | |
| 	var result = ""
 | |
| 
 | |
| 	if negate :
 | |
| 		result += "(?:"
 | |
| 	else :
 | |
| 		result += "("
 | |
| 
 | |
| 	result += transiple_Union(node.Value)
 | |
| 	result += ")"
 | |
| 
 | |
| 	return result
 | |
| 
 | |
| func transpile_LookAhead(node : ASTNode, negate : bool):
 | |
| 	var result = ""
 | |
| 
 | |
| 	if negate :
 | |
| 		result += "(?!"
 | |
| 	else :
 | |
| 		result += "(?="
 | |
| 
 | |
| 	result += transiple_Union(node.Value.Value)
 | |
| 	result += ")"
 | |
| 	
 | |
| 	return result
 | |
| 
 | |
| func transpile_Backreference(node : ASTNode):
 | |
| 	var \
 | |
| 	result = "\\"
 | |
| 	result += node.Value
 | |
| 
 | |
| 	return result
 | |
| 
 | |
| func transpile_Repeat(node : ASTNode):
 | |
| 	var result = ""
 | |
| 	var vrange = node.Value[0]
 | |
| 	var lazy   = node.Value[1]
 | |
| 
 | |
| 	if vrange.Type == NodeType.between :
 | |
| 		if vrange.Value.size() == 1 :
 | |
| 			if vrange.Value[0].Value == "0" :
 | |
| 				result += "*"
 | |
| 			elif vrange.Value[0].Value == "1" :
 | |
| 				result += "+"
 | |
| 			else :
 | |
| 				result += "{" + vrange.Value[0].Value + "," + "}"
 | |
| 		if vrange.Value.size() == 2 :
 | |
| 			if vrange.Value[0].Value == "0" && vrange.Value[1].Value == "1" :
 | |
| 				result += "?"
 | |
| 			else :
 | |
| 				result += "{" + vrange.Value[0].Value + "," + vrange.Value[1].Value + "}"
 | |
| 	else :
 | |
| 		result += "{" + vrange.Value + "}"
 | |
| 
 | |
| 	if lazy != null :
 | |
| 		result += "?"
 | |
| 
 | |
| 	return result
 | |
| 
 | |
| func transpile_Set(node : ASTNode, negate : bool):
 | |
| 	var result = ""
 | |
| 
 | |
| 	if negate :
 | |
| 		result += "[^"
 | |
| 	else :
 | |
| 		result += "["
 | |
| 
 | |
| 	for entry in node.Value :
 | |
| 		if entry.Type == NodeType.op_not :
 | |
| 			result += transpile_OpNot(entry)
 | |
| 		elif entry.Type == NodeType.between :
 | |
| 			result += entry.Value[0].Value
 | |
| 			result += "-"
 | |
| 			result += entry.Value[1].Value
 | |
| 		else :		
 | |
| 			result += entry.Value
 | |
| 
 | |
| 	result += "]"
 | |
| 
 | |
| 	return result
 | |
| 
 | |
| func transpile_String(node : ASTNode, negate : bool):
 | |
| 	var result = ""
 | |
| 
 | |
| 	if negate :
 | |
| 		result += "\\B"
 | |
| 	else :
 | |
| 		result += "\\b"
 | |
| 
 | |
| 	result += node.Value
 | |
| 
 | |
| 	if negate :
 | |
| 		result += "\\B"
 | |
| 	else :
 | |
| 		result += "\\b"
 | |
| 
 | |
| 	return result
 | |
| 
 | |
| func transpile_OpNot(node : ASTNode):
 | |
| 	var result = ""
 | |
| 
 | |
| 	var entry = node.Value
 | |
| 
 | |
| 	match entry.Type :
 | |
| 		NodeType.capture:
 | |
| 			result += transpile_CaptureGroup(entry, true)
 | |
| 		NodeType.digit:
 | |
| 			result += "\\D"
 | |
| 		NodeType.word:
 | |
| 			result += "\\W"
 | |
| 		NodeType.whitespace:
 | |
| 			result += "\\S"
 | |
| 		NodeType.look:
 | |
| 			result += transpile_LookAhead(entry, true)
 | |
| 		NodeType.string:
 | |
| 			result += transpile_String(entry, true)
 | |
| 		NodeType.set:
 | |
| 			result += transpile_Set(entry, true)
 | |
| 
 | |
| 	return result
 | |
| 
 |