SRegEx works!!!!

Its not a full flegged transpiler but it works at least on RDP's lexer. I can expand on demand.
This commit is contained in:
Edward R. Gonzalez 2022-07-17 07:32:57 -04:00
parent 5ae405e284
commit 17c3b8fe36
6 changed files with 653 additions and 490 deletions

View File

@ -1,5 +1,7 @@
extends Object extends Object
var SRegEx = preload("res://RegM/Scripts/SRegex.gd").new()
class_name Lexer class_name Lexer
@ -118,17 +120,17 @@ const Spec : Dictionary = \
#Operators #Operators
# Logical # Logical
TokenType.op_Relational : "^[>\\<]=?", TokenType.op_Relational : "^[><]=?",
TokenType.op_Equality : "^[=!]=", TokenType.op_Equality : "^[=!]=",
TokenType.op_LAnd : "^&&", TokenType.op_LAnd : "^&&",
TokenType.op_LOr : "^\\|\\|", TokenType.op_LOr : "^\\|\\|",
TokenType.op_LNot : "^!", TokenType.op_LNot : "^!",
# Arithmetic # Arithmetic
TokenType.op_CAssign : "^[*\\/\\+\\-]=", TokenType.op_CAssign : "^[\\*\\/+-]=",
TokenType.op_Assign : "^=", TokenType.op_Assign : "^=",
TokenType.op_Additive : "^[+\\-]", TokenType.op_Additive : "^[+-]",
TokenType.op_Multiplicative : "^[*\\/]", TokenType.op_Multiplicative : "^[\\*\\/]",
# Literals # Literals
TokenType.literal_BTrue : "^\\btrue\\b", TokenType.literal_BTrue : "^\\btrue\\b",
@ -142,11 +144,11 @@ const Spec : Dictionary = \
TokenType.sym_Identifier : "^\\w+" TokenType.sym_Identifier : "^\\w+"
} }
const SSpec : Dictonary = const SSpec : Dictionary = \
{ {
# Comments # Comments
TokenType.cmt_SL : "start // inline.repeat()", TokenType.cmt_SL : "start // inline.repeat(0-)",
TokenType.cmt_ML : "start /* set(whitespace !whitespace).repeat.lazy */", TokenType.cmt_ML : "start /* set(whitespace !whitespace).repeat(0-).lazy */",
# Formatting # Formatting
TokenType.fmt_S : "start whitespace.repeat(1-)", TokenType.fmt_S : "start whitespace.repeat(1-)",
@ -176,8 +178,8 @@ const SSpec : Dictonary =
TokenType.def_Else : "start \"else\"", TokenType.def_Else : "start \"else\"",
# Expressions # Expressions
TokenType.expr_PStart : "start \(", TokenType.expr_PStart : "start \\(",
TokenType.expr_PEnd : "start \)", TokenType.expr_PEnd : "start \\)",
TokenType.expr_SBStart : "start [", TokenType.expr_SBStart : "start [",
TokenType.expr_SBEnd : "start ]", TokenType.expr_SBEnd : "start ]",
TokenType.expr_New : "start \"new\"", TokenType.expr_New : "start \"new\"",
@ -190,20 +192,20 @@ const SSpec : Dictonary =
TokenType.op_Relational : "start set(> <) =.repeat(0-1)", TokenType.op_Relational : "start set(> <) =.repeat(0-1)",
TokenType.op_Equality : "start set(= \\!) =", TokenType.op_Equality : "start set(= \\!) =",
TokenType.op_LAnd : "start &&", TokenType.op_LAnd : "start &&",
TokenType.op_LOr : "start \\\| \\\|", TokenType.op_LOr : "start \\| \\|",
TokenType.op_LNot : "start \\\!", TokenType.op_LNot : "start \\!",
# Arithmetic # Arithmetic
TokenType.op_CAssign : "start set(* / + -) =", TokenType.op_CAssign : "start set(* / + \\-) =",
TokenType.op_Assign : "start =", TokenType.op_Assign : "start =",
TokenType.op_Additive : "start set(+ -)", TokenType.op_Additive : "start set(+ \\-)",
TokenType.op_Multiplicative : "start set(* /)", TokenType.op_Multiplicative : "start set(* /)",
# Literals # Literals
TokenType.literal_BTrue : "start \"true\"", TokenType.literal_BTrue : "start \"true\"",
TokenType.literal_BFalse : "start \"false\"", TokenType.literal_BFalse : "start \"false\"",
TokenType.literal_Number : "start digit.repeat(1-)", TokenType.literal_Number : "start digit.repeat(1-)",
TokenType.literal_String : "start \\\" !set( \\\" ).repeat(1-) \\\" ", TokenType.literal_String : "start \\\" !set( \\\" ).repeat(0-) \\\"",
TokenType.literal_Null : "start \"null\"", TokenType.literal_Null : "start \"null\"",
# Symbols # Symbols
@ -227,10 +229,15 @@ func compile_regex():
for type in TokenType.values() : for type in TokenType.values() :
var \ var \
regex = RegEx.new() regex = RegEx.new()
regex.compile( Spec[type] )
var original = Spec[type]
var transpiled = SRegEx.transpile(SSpec[type])
assert(transpiled == original, "transpiled did not match original")
regex.compile( transpiled )
SpecRegex[type] = regex SpecRegex[type] = regex
# SpecRegex[type].compile( Spec[type] )
func init(programSrcText): func init(programSrcText):
SourceText = programSrcText SourceText = programSrcText

View File

@ -1,7 +1,7 @@
## Concatenation ## Concatenation
Regex : `/^AB$/` Regex : `/^AB$/`
Psuedo: `start str(AB) end` Psuedo: `start AB end`
Machine: Machine:
``` ```
@ -13,7 +13,7 @@ Submachine_A --epsilon--> Submachine_B
## Union ## Union
Regex : `/^A|B$/` Regex : `/^A|B$/`
Psuedo: `start glyph(A) | glyph(B) end` Psuedo: `start A | B end`
Machine: Machine:
``` ```
@ -27,7 +27,7 @@ Machine:
## Kleene Closure ## Kleene Closure
Regex : `/^A*$/` Regex : `/^A*$/`
Psuedo: `start glyph(A).repeating end` Psuedo: `start A.repeat(0-) end`
Machine: Machine:
``` ```

View File

@ -0,0 +1,30 @@
# Complex Machines
Ex:
RegEx : `/xy*|z`
SRegEx: `x y.repeat(0-) | z`
## Decomposition
### Stage 1: Union
```
->o.start (o)
\epsilon-> o --xy*-> o -epsilon-->/
\epsilon-> o --z---> o -epsilon->/
```
### Stage 2: Concatenation
```
->o.start (o)
\epsilon -> o --x--> o -epsilon-> o --y* -epsilon->/
\epsilon -> o --z--> o -epsilon------------------>/
```
### Stage 2: Kleene Closure
```
|<------------<|
->epsi -> o -x-> o -epsi-> o -epsi-> o -y-> -epsi-> o ->epsi->|
| |>---------------------->| /
->o.start (o)
\epsi -> o -z-> o -epsi------------------------------------>/
```

View File

@ -0,0 +1,11 @@
# Syntactic Sugar
Ex:
RegEx : `/a+|[0-3]/`
SRegEx: `a.repeat(1-) | set(0-3)`
`A+` === `AA*` === `A.repeat(1-)` === `AA.repeat(0-)`
`A?` === `A|ε` === `A.repeat(0-1)`
`[0-9]` === `0|1|2|3|4|5|6|7|8|9` === `set(0-9)`

View File

@ -96,8 +96,6 @@ func union_pair(a : NFA, b : NFA):
return NFA.new(start, accepting) return NFA.new(start, accepting)
func test(): func test():
var state_1 = State.new(false) var state_1 = State.new(false)
var state_2 = State.new(true) var state_2 = State.new(true)

View File

@ -12,27 +12,30 @@ extends Object
# Lexer # Lexer
const TokenType = \ const TokenType : Dictionary = \
{ {
fmt_S = "Formatting", fmt_S = "Formatting",
str_start = "String Start",
str_end = "String End",
glyph_bPOpen = "\\(",
glyph_bPClose = "\\)",
expr_PStart = "Parenthesis Start", expr_PStart = "Parenthesis Start",
expr_PEnd = "Parenthesis End", expr_PEnd = "Parenthesis End",
glyph = "Glyph",
glyph_between = "Glyphs Between", glyph_between = "Glyphs Between",
glyph_digit = "Digit", glyph_digit = "Digit",
glyph_inline = "inline", glyph_inline = "inline",
glyph_word = "Word", glyph_word = "Word",
glyph_ws = "Whitespace", glyph_ws = "Whitespace",
glyph_dash = "-" glyph_dash = "-",
glyph_dot = ". dot", glyph_dot = ". dot",
glyph_excla = "! Mark", glyph_excla = "! Mark",
glyph_vertS = "|", glyph_vertS = "\\|",
glyph_bPOpen = "(", glyph_dQuote = "\"",
glyph_bPClose = ")",
glyph_dQuote = "\""
op_lazy = "Lazy Operator", op_lazy = "Lazy Operator",
op_look = "Lookahead", op_look = "Lookahead",
@ -43,47 +46,49 @@ const TokenType = \
ref = "Backreference Group", ref = "Backreference Group",
set = "Set", set = "Set",
str_start = "String Start", string = "String",
str_end = "String End",
string = "String" glyph = "Glyph",
} }
const TokenSpec = \ const Spec : Dictionary = \
{ {
TokenType.fmt_S = "^\\s", TokenType.fmt_S : "^\\s",
TokenType.string = "^\"[^\"]*\"", TokenType.str_start : "^\\bstart\\b",
TokenType.str_end : "^\\bend\\b",
TokenType.expr_PStart = "^\\(", TokenType.string : "^\"[^\"]*\"",
TokenType.expr_PEnd = "^\\)",
TokenType.glyph_between = "^\\-" TokenType.glyph_bPOpen : "^\\\\\\(",
TokenType.glyph_digit = "^\\bdigit\\b", TokenType.glyph_bPClose : "^\\\\\\)",
TokenType.glyph_inline = "^\\binline\\b",
TokenType.glyph_word = "^\\bword\\b",
TokenType.glyph_ws = "^\\bwhitespace\\b",
TokenType.op_lazy = "^\\b.lazy\\b", TokenType.expr_PStart : "^\\(",
TokenType.op_repeat = "^\\b\\.repeat\\b", TokenType.expr_PEnd : "^\\)",
TokenType.glyph_dash = "^\\\-" TokenType.glyph_between : "^\\-",
TokenType.glyph_dot = "^\\\.", TokenType.glyph_digit : "^\\bdigit\\b",
TokenType.glyph_excla = "^\\\!", TokenType.glyph_inline : "^\\binline\\b",
TokenType.glyph_vertS = "^\\\|", TokenType.glyph_word : "^\\bword\\b",
TokenType.glyph_bPOpen = "^\\\(", TokenType.glyph_ws : "^\\bwhitespace\\b",
TokenType.glyph_bPClose = "^\\\)",
TokenType.glpyh_dQuote = "^\\\"",
TokenType.op_look = "^\\blook\\b", TokenType.op_lazy : "^\\.\\blazy\\b",
TokenType.op_not = "^\\!", TokenType.op_repeat : "^\\.\\brepeat\\b",
TokenType.op_union = "^\\|",
TokenType.ref = "^\\bbackref\\b", TokenType.glyph_dash : "^\\\\\\-",
TokenType.set = "^\\bset\\b", TokenType.glyph_dot : "^\\\\\\.",
TokenType.str_start = "^\\bstart\\b", TokenType.glyph_excla : "^\\\\\\!",
TokenType.str_end = "^\\bend\\b", TokenType.glyph_vertS : "^\\\\\\|",
TokenType.glyph_dQuote : "^\\\\\"",
TokenType.glyph = "^[\\w\\d]" TokenType.op_look : "^\\blook\\b",
TokenType.op_not : "^\\!",
TokenType.op_union : "^\\|",
TokenType.ref : "^\\bbackref\\b",
TokenType.set : "^\\bset\\b",
TokenType.glyph : "^[^\\s]"
} }
@ -103,6 +108,7 @@ func compile_regex():
for type in TokenType.values() : for type in TokenType.values() :
var \ var \
regex = RegEx.new() regex = RegEx.new()
var _spec = Spec[type]
regex.compile( Spec[type] ) regex.compile( Spec[type] )
SpecRegex[type] = regex SpecRegex[type] = regex
@ -161,7 +167,7 @@ func tokenize():
break; break;
if error : if error :
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value} -: {txt}" var assertStrTmplt = "next_Token: Source text not understood by tokenizer at Cursor pos: {value} -: {txt}"
var assertStr = assertStrTmplt.format({"value" : Cursor, "txt" : srcLeft}) var assertStr = assertStrTmplt.format({"value" : Cursor, "txt" : srcLeft})
assert(true != true, assertStr) assert(true != true, assertStr)
return return
@ -245,23 +251,23 @@ const NodeType = \
{ {
expression = "Expression", expression = "Expression",
between = "Glyphs Between Set" between = "Glyphs Between Set",
capture = "Capture Group", capture = "Capture Group",
lazy = "Lazy", lazy = "Lazy",
look = "Lookahead", look = "Lookahead",
op_not = "Not Operator",
ref = "Backreference Group", ref = "Backreference Group",
repeat = "Repeat", repeat = "Repeat",
set = "Set", set = "Set",
union = "Union", union = "Union",
inline = "Inline",
digit = "Digit", digit = "Digit",
inline = "Any Inline" inline = "Any Inline",
word = "Word", word = "Word",
whitespace = "Whitespace", whitespace = "Whitespace",
string = "String" string = "String",
strStart = "String Start", str_start = "String Start",
strEnd = "String End", str_end = "String End",
glyph = "Glyph", glyph = "Glyph",
} }
@ -286,19 +292,30 @@ func eat(tokenType):
return currToken return currToken
func is_Glyph() : func is_Glyph(glyph = NextToken) :
match NextToken: match glyph.Type:
TokenType.glyph : TokenType.glyph :
return true
TokenType.glyph_digit : TokenType.glyph_digit :
return true
TokenType.glyph_inline : TokenType.glyph_inline :
return true
TokenType.glyph_word : TokenType.glyph_word :
return true
TokenType.glyph_ws : TokenType.glyph_ws :
return true
TokenType.glyph_dash : TokenType.glyph_dash :
return true
TokenType.glyph_dot : TokenType.glyph_dot :
return true
TokenType.glyph_excla : TokenType.glyph_excla :
return true
TokenType.glyph_vertS : TokenType.glyph_vertS :
return true
TokenType.glyph_bPOpen : TokenType.glyph_bPOpen :
return true
TokenType.glyph_bPClose : TokenType.glyph_bPClose :
return true
TokenType.glyph_dQuote : TokenType.glyph_dQuote :
return true return true
@ -307,6 +324,39 @@ func is_Glyph() :
func is_GlyphOrStr() : func is_GlyphOrStr() :
return is_Glyph() || NextToken.Type == TokenType.string return is_Glyph() || NextToken.Type == TokenType.string
func is_GroupToken() :
if NextToken.Value.length() == 2 && NextToken.Value[0] == "\\" :
match NextToken.Value[1] :
"0" : continue
"1" : continue
"2" : continue
"3" : continue
"4" : continue
"5" : continue
"6" : continue
"7" : continue
"8" : continue
"9" : continue
_:
return true
return false
func is_RegExToken() :
match NextToken.Value :
"^" :
return true
"$" :
return true
"*" :
return true
"[" :
return true
"]" :
return true
"?" :
return true
return
# --------------------------------------------------------------------- HELPERS # --------------------------------------------------------------------- HELPERS
# > Union # > Union
@ -314,10 +364,10 @@ func is_GlyphOrStr() :
# : expression | expression .. # : expression | expression ..
# | expression # | expression
# ; # ;
func parse_OpUnion(): func parse_OpUnion(endToken : Token):
var expression = parse_Expression(TokenType.union) var expression = parse_Expression(endToken)
if NextToken.Type != TokenType.union : if NextToken == null || NextToken.Type != TokenType.union :
return expression return expression
eat(TokenType.op_union) eat(TokenType.op_union)
@ -325,7 +375,7 @@ func parse_OpUnion():
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.union node.Type = NodeType.union
node.Value = [ expression, parse_union() ] node.Value = [ expression, parse_OpUnion(endToken) ]
return node return node
@ -333,14 +383,18 @@ func parse_OpUnion():
# Expression # Expression
# : EVERYTHING (Almost) # : EVERYTHING (Almost)
# ; # ;
func parse_Expression(end_token : Token): func parse_Expression(endToken : Token):
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.Expression node.Type = NodeType.expression
node.Value = [] node.Value = []
while NextToken != null && NextToken.Type != end_token : var sentinel = endToken != null
match NextToken.Type if sentinel :
sentinel = NextToken.Type == endToken.Type
while NextToken != null && !sentinel :
match NextToken.Type :
TokenType.str_start : TokenType.str_start :
node.Value.append( parse_StrStart() ) node.Value.append( parse_StrStart() )
@ -414,7 +468,7 @@ func parse_StrStart():
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.strStart node.Type = NodeType.str_start
return node return node
@ -424,7 +478,7 @@ func parse_StrEnd():
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.strEnd node.Type = NodeType.str_end
return node return node
@ -434,9 +488,46 @@ func parse_StrEnd():
# | glyph - glyph # | glyph - glyph
# ; # ;
func parse_Between(): func parse_Between():
var glyph = parse_Glyph() var glyph
if NextToken.Type != TokenType.between : match NextToken.Type :
TokenType.glyph :
glyph = parse_Glyph()
TokenType.glyph_digit :
glyph = parse_GlyphDigit()
TokenType.glyph_inline :
glyph = parse_GlyphInline()
TokenType.glyph_word :
glyph = parse_GlyphWord()
TokenType.glyph_ws :
glyph = parse_GlyphWhitespace()
TokenType.glyph_dash :
glyph = parse_GlyphDash()
TokenType.glyph_dot :
glyph = parse_GlyphDot()
TokenType.glyph_excla :
glyph = parse_GlyphExclamation()
TokenType.glyph_vertS :
glyph = parse_GlyphVertS()
TokenType.glyph_bPOpen :
glyph = parse_Glyph_bPOpen()
TokenType.glyph_bPClose :
glyph = parse_Glyph_bPClose()
TokenType.glyph_dQuote :
glyph = parse_Glyph_DQuote()
if NextToken.Type != TokenType.glyph_between :
return glyph return glyph
var \ var \
@ -449,7 +540,7 @@ func parse_Between():
if NextToken.Type == TokenType.glyph_between: if NextToken.Type == TokenType.glyph_between:
eat(TokenType.glyph_between) eat(TokenType.glyph_between)
if is_Glyph() if is_Glyph() :
node.Value.append( parse_Glyph() ) node.Value.append( parse_Glyph() )
return node return node
@ -464,7 +555,7 @@ func parse_CaptureGroup():
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.capture node.Type = NodeType.capture
node.Value = parse_union(TokenType.expr_PEnd) node.Value = parse_OpUnion(TokenType.expr_PEnd)
eat(TokenType.expr_PEnd) eat(TokenType.expr_PEnd)
@ -476,13 +567,21 @@ func parse_CaptureGroup():
# : glyph # : glyph
# ; # ;
func parse_Glyph(): func parse_Glyph():
eat(TokenType.glyph)
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.glyph node.Type = NodeType.glyph
if NextToken.Value == "/" :
node.Value = "\\/"
elif is_RegExToken() :
node.Value = "\\" + NextToken.Value
elif is_GroupToken() :
node.Value = "\\\\" + NextToken.Value[1]
else :
node.Value = NextToken.Value node.Value = NextToken.Value
eat(TokenType.glyph)
return node return node
func parse_GlyphDigit(): func parse_GlyphDigit():
@ -501,7 +600,7 @@ func parse_GlyphInline():
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.inline node.Type = NodeType.inline
node.Value = "\." node.Value = "."
return node return node
@ -550,8 +649,8 @@ func parse_GlyphExclamation():
var \ var \
node = ASTNode.new() node = ASTNode.new()
ndoe.Type = NodeType.glyph node.Type = NodeType.glyph
node.Value = "\\!" node.Value = "!"
return node return node
@ -591,7 +690,7 @@ func parse_Glyph_DQuote():
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.glyph node.Type = NodeType.glyph
node.Value = "\\\"" node.Value = "\""
return node return node
@ -636,7 +735,7 @@ func parse_OpNot():
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.op_Not node.Type = NodeType.op_not
match NextToken.Type: match NextToken.Type:
TokenType.expr_PStart: TokenType.expr_PStart:
@ -651,7 +750,7 @@ func parse_OpNot():
TokenType.glyph_ws: TokenType.glyph_ws:
node.Value = parse_GlyphWhitespace() node.Value = parse_GlyphWhitespace()
TokenType.look: TokenType.op_look:
node.Value = parse_OpLook() node.Value = parse_OpLook()
TokenType.string: TokenType.string:
@ -673,19 +772,19 @@ func parse_OpRepeat():
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.repeat node.Type = NodeType.repeat
var range = null var vrange = null
var lazy = null var lazy = null
eat(TokenType.expr_PStart) eat(TokenType.expr_PStart)
range = parse_Between() vrange = parse_Between()
eat(TokenType.expr_PEnd) eat(TokenType.expr_PEnd)
if NextToken.Type == TokenType.lazy : if NextToken && NextToken.Type == TokenType.op_lazy :
lazy = parse_OpLazy(); lazy = parse_OpLazy();
node.Value = [ range, lazy ] node.Value = [ vrange, lazy ]
return node return node
@ -699,7 +798,7 @@ func parse_Backreference():
eat(TokenType.expr_PStart) eat(TokenType.expr_PStart)
var assertStrTmplt = "Error when parsing a backreference expression: Expected digit but got: {value}" var assertStrTmplt = "Error when parsing a backreference expression: Expected digit but got: {value}"
var assertStr = assertStrTmplt.format({"value" : NextToken.Value) var assertStr = assertStrTmplt.format({"value" : NextToken.Value})
assert(NextToken.Type == TokenType.glyph_digit, assertStr) assert(NextToken.Type == TokenType.glyph_digit, assertStr)
node.Value = NextToken.Value node.Value = NextToken.Value
@ -718,7 +817,15 @@ func parse_Set():
eat(TokenType.expr_PStart) eat(TokenType.expr_PStart)
while is_Glyph() : while is_Glyph() || NextToken.Type == TokenType.op_not :
if NextToken.Type == TokenType.op_not :
var possibleGlyph = parse_OpNot()
if is_Glyph(possibleGlyph.Value) :
node.Value.append( possibleGlyph )
continue
assert(true == false, "Bad ! operator in set.")
node.Value.append( parse_Between() ) node.Value.append( parse_Between() )
eat(TokenType.expr_PEnd) eat(TokenType.expr_PEnd)
@ -726,12 +833,19 @@ func parse_Set():
return node return node
func parse_String(): func parse_String():
var string = ""
var index = 1
while NextToken.Value[index] != "\"" :
string += NextToken.Value[index]
index += 1
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.string node.Type = NodeType.string
node.Value = NextToken.Value node.Value = string
eat(TokenType.str) eat(TokenType.string)
return node return node
@ -746,17 +860,20 @@ var RegexResult : String
func transpile(expression : String): func transpile(expression : String):
init( expression ) init( expression )
NextToken = next_token() NextToken = next_Token()
ExprAST = parse_union() ExprAST = parse_OpUnion(null)
return transiple_Union(ExprAST) return transiple_Union(ExprAST)
func transiple_Union(node : ASTNode): func transiple_Union(node : ASTNode):
var result = String var result = ""
var expressionLeft = node.Value[0] var expressionLeft = node.Value
for entry in expressionLeft if node.Type == NodeType.union :
match entry : expressionLeft = node.Value[0]
for entry in expressionLeft :
match entry.Type :
NodeType.str_start: NodeType.str_start:
result += "^" result += "^"
NodeType.str_end: NodeType.str_end:
@ -775,13 +892,13 @@ func transiple_Union(node : ASTNode):
NodeType.glyph: NodeType.glyph:
result += entry.Value result += entry.Value
NodeType.glyph_inline: NodeType.inline:
result += entry.Value result += entry.Value
NodeType.glyph_digit: NodeType.digit:
result += entry.Value result += entry.Value
NodeType.glyph_word: NodeType.word:
result += entry.Value result += entry.Value
NodeType.glyph_ws: NodeType.whitespace:
result += entry.Value result += entry.Value
NodeType.string: NodeType.string:
@ -791,21 +908,12 @@ func transiple_Union(node : ASTNode):
result += transpile_OpNot(entry) result += transpile_OpNot(entry)
if node.Value[1] != null : if node.Type == NodeType.union && node.Value[1] != null :
result += "|" result += "|"
result += transiple_Union(node.Value[1]) result += transiple_Union(node.Value[1])
return result return result
func transpile_Between(node : ASTNode):
var \
result : "["
result += node.Value[0]
result += node.Value[1]
result += "]"
return result
func transpile_CaptureGroup(node : ASTNode, negate : bool): func transpile_CaptureGroup(node : ASTNode, negate : bool):
var result = "" var result = ""
@ -830,6 +938,8 @@ func transpile_LookAhead(node : ASTNode, negate : bool):
result += transiple_Union(node.Value) result += transiple_Union(node.Value)
result += ")" result += ")"
return result
func transpile_Backreference(node : ASTNode): func transpile_Backreference(node : ASTNode):
var \ var \
result = "\\" result = "\\"
@ -837,31 +947,31 @@ func transpile_Backreference(node : ASTNode):
return result return result
func transpile_Repeat(node : ASTNode) func transpile_Repeat(node : ASTNode):
var result = "" var result = ""
var range = node.Value[0] var vrange = node.Value[0]
var lazy = node.Value[1] var lazy = node.Value[1]
if range.Type == NodeType.between : if vrange.Type == NodeType.between :
if range.Value.length() == 1 : if vrange.Value.size() == 1 :
if range.Value[0] == "0" : if vrange.Value[0].Value == "0" :
result += "*" result += "*"
if range.Value[0] == "1" : if vrange.Value[0].Value == "1" :
result += "+" result += "+"
if range.Value.length() == 2 : if vrange.Value.size() == 2 :
if range.Vlaue[0] == "0" && range.Value[1] == "1" : if vrange.Value[0].Value == "0" && vrange.Value[1].Value == "1" :
result += "?" result += "?"
else : else :
result += "{" + range.Value[0] + "," + range.Value[1] + "}" result += "{" + vrange.Value[0].Value[0] + "," + vrange.Value[0].Value[1] + "}"
else : else :
result += "{" + range.Value[0] + "}" result += "{" + vrange.Value[0] + "}"
if lazy != null : if lazy != null :
result += "?" result += "?"
return result return result
func transpile_Set(node : ASTNode, negate : bool) func transpile_Set(node : ASTNode, negate : bool):
var result = "" var result = ""
if negate : if negate :
@ -870,6 +980,13 @@ func transpile_Set(node : ASTNode, negate : bool)
result += "[" result += "["
for entry in node.Value : for entry in node.Value :
if entry.Type == NodeType.op_not :
result += transpile_OpNot(entry)
elif entry.Type == NodeType.between :
result += entry.Value[0]
result += "-"
result += entry.Value[1]
else :
result += entry.Value result += entry.Value
result += "]" result += "]"
@ -898,18 +1015,18 @@ func transpile_OpNot(node : ASTNode):
var entry = node.Value var entry = node.Value
match entry : match entry.Type :
NodeType.capture: NodeType.capture:
result += transpile_CaptureGroup(entry, true) result += transpile_CaptureGroup(entry, true)
NodeType.glyph_digit: NodeType.digit:
result += "\\D" result += "\\D"
NodeType.glyph_word: NodeType.word:
result += "\\W" result += "\\W"
NodeType.glyph_ws: NodeType.whitespace:
result += "\\S" result += "\\S"
NodeType.glyph_look: NodeType.look:
result += transpile_LookAhead(entry, true) result += transpile_LookAhead(entry, true)
NodType.string: NodeType.string:
result += transpile_String(entry, true) result += transpile_String(entry, true)
NodeType.set: NodeType.set:
result += transpile_Set(entry, true) result += transpile_Set(entry, true)