LangStudies/App/RegM/Scripts/SRegex.gd

1041 lines
20 KiB
GDScript3
Raw Normal View History

2022-07-17 00:09:42 -07:00
extends Object
# Simple Regular Expressions
# This is a "high-level" langauge and transpiler for regex
# That makes it easier to write out and read
# than the original notation or syntax.
#
# The main interface function is transpile( <string> )
# Which can take any valid string from gdscript.
# Lexer
const TokenType : Dictionary = \
2022-07-17 00:09:42 -07:00
{
fmt_S = "Formatting",
str_start = "String Start",
str_end = "String End",
glyph_bPOpen = "\\(",
glyph_bPClose = "\\)",
expr_PStart = "Parenthesis Start",
expr_PEnd = "Parenthesis End",
glyph_between = "Glyphs Between",
glyph_digit = "Digit",
glyph_inline = "inline",
glyph_word = "Word",
glyph_ws = "Whitespace",
glyph_dash = "-",
glyph_dot = ". dot",
glyph_excla = "! Mark",
glyph_vertS = "\\|",
glyph_dQuote = "\"",
op_lazy = "Lazy Operator",
op_look = "Lookahead",
op_not = "Not Operator",
op_repeat = "Repeating Operator",
op_union = "Union Operator",
ref = "Backreference Group",
set = "Set",
string = "String",
glyph = "Glyph",
2022-07-17 00:09:42 -07:00
}
const Spec : Dictionary = \
2022-07-17 00:09:42 -07:00
{
TokenType.fmt_S : "^\\s",
TokenType.str_start : "^\\bstart\\b",
TokenType.str_end : "^\\bend\\b",
2022-07-17 00:09:42 -07:00
TokenType.string : "^\"[^\"]*\"",
TokenType.glyph_bPOpen : "^\\\\\\(",
TokenType.glyph_bPClose : "^\\\\\\)",
2022-07-17 00:09:42 -07:00
TokenType.expr_PStart : "^\\(",
TokenType.expr_PEnd : "^\\)",
2022-07-17 00:09:42 -07:00
TokenType.glyph_between : "^\\-",
TokenType.glyph_digit : "^\\bdigit\\b",
TokenType.glyph_inline : "^\\binline\\b",
TokenType.glyph_word : "^\\bword\\b",
TokenType.glyph_ws : "^\\bwhitespace\\b",
2022-07-17 00:09:42 -07:00
TokenType.op_lazy : "^\\.\\blazy\\b",
TokenType.op_repeat : "^\\.\\brepeat\\b",
2022-07-17 00:09:42 -07:00
TokenType.glyph_dash : "^\\\\\\-",
TokenType.glyph_dot : "^\\\\\\.",
TokenType.glyph_excla : "^\\\\\\!",
TokenType.glyph_vertS : "^\\\\\\|",
TokenType.glyph_dQuote : "^\\\\\"",
2022-07-17 00:09:42 -07:00
TokenType.op_look : "^\\blook\\b",
TokenType.op_not : "^\\!",
TokenType.op_union : "^\\|",
2022-07-17 00:09:42 -07:00
TokenType.ref : "^\\bbackref\\b",
TokenType.set : "^\\bset\\b",
2022-07-17 00:09:42 -07:00
TokenType.glyph : "^[^\\s]"
2022-07-17 00:09:42 -07:00
}
class Token:
var Type : String
var Value : String
var SourceText : String
var Cursor : int
var SpecRegex : Dictionary
var Tokens : Array
var TokenIndex : int = 0
func compile_regex():
for type in TokenType.values() :
var \
regex = RegEx.new()
var _spec = Spec[type]
2022-07-17 00:09:42 -07:00
regex.compile( Spec[type] )
SpecRegex[type] = regex
func init(programSrcText):
SourceText = programSrcText
Cursor = 0
TokenIndex = 0
if SpecRegex.size() == 0 :
compile_regex()
tokenize()
func next_Token():
var nextToken = null
if Tokens.size() > TokenIndex :
nextToken = Tokens[TokenIndex]
TokenIndex += 1
return nextToken
func reached_EndOfText():
return Cursor >= SourceText.length()
func tokenize():
Tokens.clear()
while reached_EndOfText() == false :
var srcLeft = SourceText.substr(Cursor)
var token = Token.new()
var error = true
for type in TokenType.values() :
var result = SpecRegex[type].search( srcLeft )
if result == null || result.get_start() != 0 :
continue
# Skip Whitespace
if type == TokenType.fmt_S :
var addVal = result.get_string().length()
Cursor += addVal
error = false
break
token.Type = type
token.Value = result.get_string()
Cursor += ( result.get_string().length() )
Tokens.append( token )
error = false
break;
if error :
var assertStrTmplt = "next_Token: Source text not understood by tokenizer at Cursor pos: {value} -: {txt}"
2022-07-17 00:09:42 -07:00
var assertStr = assertStrTmplt.format({"value" : Cursor, "txt" : srcLeft})
assert(true != true, assertStr)
return
# End : Lexer
# Parser
class ASTNode:
var Type : String
var Value # Not specifing a type implicity declares a Variant type.
func array_Serialize(array, fn_objSerializer) :
var result = []
for entry in array :
if typeof(entry) == TYPE_ARRAY :
result.append( array_Serialize( entry, fn_objSerializer ))
elif typeof(entry) == TYPE_OBJECT :
fn_objSerializer.set_instance(entry)
result.append( fn_objSerializer.call_func() )
else :
result.append( entry )
return result
func to_SExpression():
var expression = [ Type ]
if typeof(Value) == TYPE_ARRAY :
var \
to_SExpression_Fn = FuncRef.new()
to_SExpression_Fn.set_function("to_SExpression")
var array = array_Serialize( self.Value, to_SExpression_Fn )
expression.append(array)
return expression
if typeof(Value) == TYPE_OBJECT :
var result = [ Type, Value.to_SExpression() ]
return result
expression.append(Value)
return expression
func to_Dictionary():
if typeof(Value) == TYPE_ARRAY :
var \
to_Dictionary_Fn = FuncRef.new()
to_Dictionary_Fn.set_function("to_Dictionary")
var array = array_Serialize( self.Value, to_Dictionary_Fn )
var result = \
{
Type = self.Type,
Value = array
}
return result
if typeof(Value) == TYPE_OBJECT :
var result = \
{
Type = self.Type,
Value = self.Value.to_Dictionary()
}
return result
var result = \
{
Type = self.Type,
Value = self.Value
}
return result
const NodeType = \
{
expression = "Expression",
between = "Glyphs Between Set",
capture = "Capture Group",
lazy = "Lazy",
look = "Lookahead",
op_not = "Not Operator",
ref = "Backreference Group",
repeat = "Repeat",
set = "Set",
union = "Union",
digit = "Digit",
inline = "Any Inline",
word = "Word",
whitespace = "Whitespace",
string = "String",
str_start = "String Start",
str_end = "String End",
glyph = "Glyph",
2022-07-17 00:09:42 -07:00
}
var NextToken : Token
# --------------------------------------------------------------------- HELPERS
# Gets the next token only if the current token is the specified intended token (tokenType)
func eat(tokenType):
var currToken = NextToken
assert(currToken != null, "eat: NextToken was null")
var assertStrTmplt = "eat: Unexpected token: {value}, expected: {type}"
var assertStr = assertStrTmplt.format({"value" : currToken.Value, "type" : tokenType})
assert(currToken.Type == tokenType, assertStr)
NextToken = next_Token()
return currToken
func is_Glyph(glyph = NextToken) :
match glyph.Type:
TokenType.glyph :
return true
TokenType.glyph_digit :
return true
TokenType.glyph_inline :
return true
TokenType.glyph_word :
return true
TokenType.glyph_ws :
return true
TokenType.glyph_dash :
return true
TokenType.glyph_dot :
return true
TokenType.glyph_excla :
return true
TokenType.glyph_vertS :
return true
TokenType.glyph_bPOpen :
return true
TokenType.glyph_bPClose :
return true
TokenType.glyph_dQuote :
return true
return false
2022-07-17 00:09:42 -07:00
func is_GlyphOrStr() :
return is_Glyph() || NextToken.Type == TokenType.string
func is_GroupToken() :
if NextToken.Value.length() == 2 && NextToken.Value[0] == "\\" :
match NextToken.Value[1] :
"0" : continue
"1" : continue
"2" : continue
"3" : continue
"4" : continue
"5" : continue
"6" : continue
"7" : continue
"8" : continue
"9" : continue
_:
return true
return false
func is_RegExToken() :
match NextToken.Value :
"^" :
return true
"$" :
return true
"*" :
return true
"[" :
return true
"]" :
return true
"?" :
return true
return
2022-07-17 00:09:42 -07:00
# --------------------------------------------------------------------- HELPERS
# > Union
# Union
# : expression | expression ..
# | expression
# ;
func parse_OpUnion(endToken):
var expression = parse_Expression(endToken)
2022-07-17 00:09:42 -07:00
if NextToken == null || NextToken.Type != TokenType.op_union :
return expression
2022-07-17 00:09:42 -07:00
eat(TokenType.op_union)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.union
node.Value = [ expression, parse_OpUnion(endToken) ]
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
# > Union
# Expression
# : EVERYTHING (Almost)
# ;
func parse_Expression(endToken):
var \
node = ASTNode.new()
node.Type = NodeType.expression
node.Value = []
2022-07-17 00:09:42 -07:00
while NextToken != null && NextToken.Type != TokenType.op_union :
if endToken != null && NextToken.Type == endToken :
break
match NextToken.Type :
TokenType.str_start :
node.Value.append( parse_StrStart() )
2022-07-17 00:09:42 -07:00
TokenType.str_end :
node.Value.append( parse_StrEnd() )
TokenType.expr_PStart :
node.Value.append( parse_CaptureGroup() )
2022-07-17 00:09:42 -07:00
TokenType.glyph :
node.Value.append( parse_Glyph() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_digit :
node.Value.append( parse_GlyphDigit() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_inline :
node.Value.append( parse_GlyphInline() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_word :
node.Value.append( parse_GlyphWord() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_ws :
node.Value.append( parse_GlyphWhitespace() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_dash :
node.Value.append( parse_GlyphDash() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_dot :
node.Value.append( parse_GlyphDot() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_excla :
node.Value.append( parse_GlyphExclamation() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_vertS :
node.Value.append( parse_GlyphVertS() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_bPOpen :
node.Value.append( parse_Glyph_bPOpen() )
2022-07-17 00:09:42 -07:00
TokenType.glyph_bPClose :
node.Value.append( parse_Glyph_bPClose() )
TokenType.glyph_dQuote :
node.Value.append( parse_Glyph_DQuote() )
2022-07-17 00:09:42 -07:00
TokenType.op_look :
node.Value.append( parse_OpLook() )
2022-07-17 00:09:42 -07:00
TokenType.op_not :
node.Value.append( parse_OpNot() )
2022-07-17 00:09:42 -07:00
TokenType.op_repeat:
node.Value.append( parse_OpRepeat() )
2022-07-17 00:09:42 -07:00
TokenType.ref :
node.Value.append( parse_Backreference() )
2022-07-17 00:09:42 -07:00
TokenType.set :
node.Value.append( parse_Set() )
2022-07-17 00:09:42 -07:00
TokenType.string :
node.Value.append( parse_String() )
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
# > Expression
func parse_StrStart():
eat(TokenType.str_start)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.str_start
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
# > Expression
func parse_StrEnd():
eat(TokenType.str_end)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.str_end
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
# > Expression
# Between
# : glyph
# | glyph - glyph
# ;
func parse_Between():
var glyph
match NextToken.Type :
TokenType.glyph :
glyph = parse_Glyph()
TokenType.glyph_digit :
glyph = parse_GlyphDigit()
2022-07-17 00:09:42 -07:00
TokenType.glyph_inline :
glyph = parse_GlyphInline()
2022-07-17 00:09:42 -07:00
TokenType.glyph_word :
glyph = parse_GlyphWord()
2022-07-17 00:09:42 -07:00
TokenType.glyph_ws :
glyph = parse_GlyphWhitespace()
2022-07-17 00:09:42 -07:00
TokenType.glyph_dash :
glyph = parse_GlyphDash()
2022-07-17 00:09:42 -07:00
TokenType.glyph_dot :
glyph = parse_GlyphDot()
2022-07-17 00:09:42 -07:00
TokenType.glyph_excla :
glyph = parse_GlyphExclamation()
TokenType.glyph_vertS :
glyph = parse_GlyphVertS()
TokenType.glyph_bPOpen :
glyph = parse_Glyph_bPOpen()
TokenType.glyph_bPClose :
glyph = parse_Glyph_bPClose()
TokenType.glyph_dQuote :
glyph = parse_Glyph_DQuote()
if NextToken.Type != TokenType.glyph_between :
return glyph
var \
node = ASTNode.new()
node.Type = NodeType.between
node.Value = []
node.Value.append( glyph )
if NextToken.Type == TokenType.glyph_between:
eat(TokenType.glyph_between)
if is_Glyph() :
node.Value.append( parse_Glyph() )
return node
2022-07-17 00:09:42 -07:00
# > Expression
# CaptureGroup
# : ( OpUnion )
# ;
func parse_CaptureGroup():
eat(TokenType.expr_PStart)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.capture
node.Value = parse_OpUnion(TokenType.expr_PEnd)
2022-07-17 00:09:42 -07:00
eat(TokenType.expr_PEnd)
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
# > Expression
# > Between
# Glyph
# : glyph
# ;
func parse_Glyph():
var \
node = ASTNode.new()
node.Type = NodeType.glyph
if NextToken.Value == "/" :
node.Value = "\\/"
elif is_RegExToken() :
node.Value = "\\" + NextToken.Value
elif is_GroupToken() :
node.Value = "\\\\" + NextToken.Value[1]
else :
node.Value = NextToken.Value
eat(TokenType.glyph)
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_GlyphDigit():
eat(TokenType.glyph_digit)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.digit
node.Value = "\\d"
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_GlyphInline():
eat(TokenType.glyph_inline)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.inline
node.Value = "."
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_GlyphWord():
eat(TokenType.glyph_word)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.word
node.Value = "\\w"
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_GlyphWhitespace():
eat(TokenType.glyph_ws)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.whitespace
node.Value = "\\s"
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_GlyphDash():
eat(TokenType.glyph_dash)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.glyph
node.Value = "-"
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_GlyphDot():
eat(TokenType.glyph_dot)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.glyph
node.Value = "\\."
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_GlyphExclamation():
eat(TokenType.glyph_excla)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.glyph
node.Value = "!"
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_GlyphVertS():
eat(TokenType.glyph_vertS)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.glyph
node.Value = "\\|"
return node
2022-07-17 00:09:42 -07:00
func parse_Glyph_bPOpen():
eat(TokenType.glyph_bPOpen)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.glyph
node.Value = "\\("
return node
2022-07-17 00:09:42 -07:00
func parse_Glyph_bPClose():
eat(TokenType.glyph_bPClose)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.glyph
node.Value = "\\)"
return node
2022-07-17 00:09:42 -07:00
func parse_Glyph_DQuote():
eat(TokenType.glyph_dQuote)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.glyph
node.Value = "\""
return node
2022-07-17 00:09:42 -07:00
# > Expression
# : .lazy
# ;
func parse_OpLazy():
eat(TokenType.op_lazy)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.lazy
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
# > Expression
# > OpNot
# Look
# : look ( Expression )
# ;
func parse_OpLook():
eat(TokenType.op_look)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.look
node.Value = parse_CaptureGroup()
return node
2022-07-17 00:09:42 -07:00
# > Expression
# OpNot
# : !
# | CaptureGroup
# | GlyphDigit
# | GlyphWord
# | GlyphWhitespace
# | OpLook
# | String
# | Set
# ;
func parse_OpNot():
eat(TokenType.op_not)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.op_not
2022-07-17 00:09:42 -07:00
match NextToken.Type:
TokenType.expr_PStart:
node.Value = parse_CaptureGroup()
2022-07-17 00:09:42 -07:00
TokenType.glyph_digit:
node.Value = parse_GlyphDigit()
2022-07-17 00:09:42 -07:00
TokenType.glyph_word:
node.Value = parse_GlyphWord()
TokenType.glyph_ws:
node.Value = parse_GlyphWhitespace()
2022-07-17 00:09:42 -07:00
TokenType.op_look:
node.Value = parse_OpLook()
2022-07-17 00:09:42 -07:00
TokenType.string:
node.Value = parse_String()
2022-07-17 00:09:42 -07:00
TokenType.set:
node.Value = parse_Set()
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
# > Expression
# OpRepeat
# : .repeat ( opt# optBetween opt# ) opt.lazy
# ;
func parse_OpRepeat():
eat(TokenType.op_repeat)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.repeat
2022-07-17 00:09:42 -07:00
var vrange = null
var lazy = null
2022-07-17 00:09:42 -07:00
eat(TokenType.expr_PStart)
2022-07-17 00:09:42 -07:00
vrange = parse_Between()
2022-07-17 00:09:42 -07:00
eat(TokenType.expr_PEnd)
2022-07-17 00:09:42 -07:00
if NextToken && NextToken.Type == TokenType.op_lazy :
lazy = parse_OpLazy();
node.Value = [ vrange, lazy ]
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_Backreference():
eat(TokenType.ref)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.ref
2022-07-17 00:09:42 -07:00
eat(TokenType.expr_PStart)
var assertStrTmplt = "Error when parsing a backreference expression: Expected digit but got: {value}"
var assertStr = assertStrTmplt.format({"value" : NextToken.Value})
2022-07-17 00:09:42 -07:00
assert(NextToken.Type == TokenType.glyph, assertStr)
node.Value = NextToken.Value
eat(TokenType.glyph)
eat(TokenType.expr_PEnd)
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_Set():
eat(TokenType.set)
2022-07-17 00:09:42 -07:00
var \
node = ASTNode.new()
node.Type = NodeType.set
node.Value = []
2022-07-17 00:09:42 -07:00
eat(TokenType.expr_PStart)
2022-07-17 00:09:42 -07:00
while is_Glyph() || NextToken.Type == TokenType.op_not :
if NextToken.Type == TokenType.op_not :
var possibleGlyph = parse_OpNot()
if is_Glyph(possibleGlyph.Value) :
node.Value.append( possibleGlyph )
continue
assert(true == false, "Bad ! operator in set.")
node.Value.append( parse_Between() )
2022-07-17 00:09:42 -07:00
eat(TokenType.expr_PEnd)
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
func parse_String():
var string = ""
var index = 1
while NextToken.Value[index] != "\"" :
string += NextToken.Value[index]
index += 1
var \
node = ASTNode.new()
node.Type = NodeType.string
node.Value = string
2022-07-17 00:09:42 -07:00
eat(TokenType.string)
2022-07-17 00:09:42 -07:00
return node
2022-07-17 00:09:42 -07:00
# End: Parser
# Transpiling
var ExprAST : ASTNode
var RegexResult : String
func transpile(expression : String):
init( expression )
2022-07-17 00:09:42 -07:00
NextToken = next_Token()
ExprAST = parse_OpUnion(null)
2022-07-17 00:09:42 -07:00
return transiple_Union(ExprAST)
2022-07-17 00:09:42 -07:00
func transiple_Union(node : ASTNode):
var result = ""
var expressionLeft = node.Value
if node.Type == NodeType.union :
expressionLeft = node.Value[0].Value
for entry in expressionLeft :
match entry.Type :
NodeType.str_start:
result += "^"
NodeType.str_end:
result += "$"
NodeType.capture:
result += transpile_CaptureGroup(entry, false)
NodeType.look:
result += transpile_LookAhead(entry, false)
NodeType.ref:
result += transpile_Backreference(entry)
NodeType.repeat:
result += transpile_Repeat(entry)
NodeType.set:
result += transpile_Set(entry, false)
NodeType.glyph:
result += entry.Value
NodeType.inline:
result += entry.Value
NodeType.digit:
result += entry.Value
NodeType.word:
result += entry.Value
NodeType.whitespace:
result += entry.Value
NodeType.string:
result += transpile_String(entry, false)
NodeType.op_not:
result += transpile_OpNot(entry)
2022-07-17 00:09:42 -07:00
if node.Type == NodeType.union && node.Value[1] != null :
result += "|"
result += transiple_Union(node.Value[1])
2022-07-17 00:09:42 -07:00
return result
2022-07-17 00:09:42 -07:00
func transpile_CaptureGroup(node : ASTNode, negate : bool):
var result = ""
2022-07-17 00:09:42 -07:00
if negate :
result += "(?:"
else :
result += "("
2022-07-17 00:09:42 -07:00
result += transiple_Union(node.Value)
result += ")"
2022-07-17 00:09:42 -07:00
return result
2022-07-17 00:09:42 -07:00
func transpile_LookAhead(node : ASTNode, negate : bool):
var result = ""
2022-07-17 00:09:42 -07:00
if negate :
result += "(?!"
else :
result += "(?="
2022-07-17 00:09:42 -07:00
result += transiple_Union(node.Value.Value)
result += ")"
return result
2022-07-17 00:09:42 -07:00
func transpile_Backreference(node : ASTNode):
var \
result = "\\"
result += node.Value
return result
func transpile_Repeat(node : ASTNode):
var result = ""
var vrange = node.Value[0]
var lazy = node.Value[1]
if vrange.Type == NodeType.between :
if vrange.Value.size() == 1 :
if vrange.Value[0].Value == "0" :
result += "*"
elif vrange.Value[0].Value == "1" :
result += "+"
else :
result += "{" + vrange.Value[0].Value + "," + "}"
if vrange.Value.size() == 2 :
if vrange.Value[0].Value == "0" && vrange.Value[1].Value == "1" :
result += "?"
else :
result += "{" + vrange.Value[0].Value + "," + vrange.Value[1].Value + "}"
else :
result += "{" + vrange.Value[0] + "}"
2022-07-17 00:09:42 -07:00
if lazy != null :
result += "?"
2022-07-17 00:09:42 -07:00
return result
2022-07-17 00:09:42 -07:00
func transpile_Set(node : ASTNode, negate : bool):
var result = ""
2022-07-17 00:09:42 -07:00
if negate :
result += "[^"
else :
result += "["
2022-07-17 00:09:42 -07:00
for entry in node.Value :
if entry.Type == NodeType.op_not :
result += transpile_OpNot(entry)
elif entry.Type == NodeType.between :
result += entry.Value[0].Value
result += "-"
result += entry.Value[1].Value
else :
result += entry.Value
2022-07-17 00:09:42 -07:00
result += "]"
2022-07-17 00:09:42 -07:00
return result
2022-07-17 00:09:42 -07:00
func transpile_String(node : ASTNode, negate : bool):
var result = ""
2022-07-17 00:09:42 -07:00
if negate :
result += "\\B"
else :
result += "\\b"
2022-07-17 00:09:42 -07:00
result += node.Value
2022-07-17 00:09:42 -07:00
if negate :
result += "\\B"
else :
result += "\\b"
2022-07-17 00:09:42 -07:00
return result
2022-07-17 00:09:42 -07:00
func transpile_OpNot(node : ASTNode):
var result = ""
var entry = node.Value
match entry.Type :
NodeType.capture:
result += transpile_CaptureGroup(entry, true)
NodeType.digit:
result += "\\D"
NodeType.word:
result += "\\W"
NodeType.whitespace:
result += "\\S"
NodeType.look:
result += transpile_LookAhead(entry, true)
NodeType.string:
result += transpile_String(entry, true)
NodeType.set:
result += transpile_Set(entry, true)
return result
2022-07-17 00:09:42 -07:00