SRegEx works!!!!

Its not a full flegged transpiler but it works at least on RDP's lexer. I can expand on demand.
This commit is contained in:
Edward R. Gonzalez 2022-07-17 07:32:57 -04:00
parent 5ae405e284
commit 17c3b8fe36
6 changed files with 653 additions and 490 deletions

View File

@ -1,5 +1,7 @@
extends Object extends Object
var SRegEx = preload("res://RegM/Scripts/SRegex.gd").new()
class_name Lexer class_name Lexer
@ -118,17 +120,17 @@ const Spec : Dictionary = \
#Operators #Operators
# Logical # Logical
TokenType.op_Relational : "^[>\\<]=?", TokenType.op_Relational : "^[><]=?",
TokenType.op_Equality : "^[=!]=", TokenType.op_Equality : "^[=!]=",
TokenType.op_LAnd : "^&&", TokenType.op_LAnd : "^&&",
TokenType.op_LOr : "^\\|\\|", TokenType.op_LOr : "^\\|\\|",
TokenType.op_LNot : "^!", TokenType.op_LNot : "^!",
# Arithmetic # Arithmetic
TokenType.op_CAssign : "^[*\\/\\+\\-]=", TokenType.op_CAssign : "^[\\*\\/+-]=",
TokenType.op_Assign : "^=", TokenType.op_Assign : "^=",
TokenType.op_Additive : "^[+\\-]", TokenType.op_Additive : "^[+-]",
TokenType.op_Multiplicative : "^[*\\/]", TokenType.op_Multiplicative : "^[\\*\\/]",
# Literals # Literals
TokenType.literal_BTrue : "^\\btrue\\b", TokenType.literal_BTrue : "^\\btrue\\b",
@ -142,11 +144,11 @@ const Spec : Dictionary = \
TokenType.sym_Identifier : "^\\w+" TokenType.sym_Identifier : "^\\w+"
} }
const SSpec : Dictonary = const SSpec : Dictionary = \
{ {
# Comments # Comments
TokenType.cmt_SL : "start // inline.repeat()", TokenType.cmt_SL : "start // inline.repeat(0-)",
TokenType.cmt_ML : "start /* set(whitespace !whitespace).repeat.lazy */", TokenType.cmt_ML : "start /* set(whitespace !whitespace).repeat(0-).lazy */",
# Formatting # Formatting
TokenType.fmt_S : "start whitespace.repeat(1-)", TokenType.fmt_S : "start whitespace.repeat(1-)",
@ -176,8 +178,8 @@ const SSpec : Dictonary =
TokenType.def_Else : "start \"else\"", TokenType.def_Else : "start \"else\"",
# Expressions # Expressions
TokenType.expr_PStart : "start \(", TokenType.expr_PStart : "start \\(",
TokenType.expr_PEnd : "start \)", TokenType.expr_PEnd : "start \\)",
TokenType.expr_SBStart : "start [", TokenType.expr_SBStart : "start [",
TokenType.expr_SBEnd : "start ]", TokenType.expr_SBEnd : "start ]",
TokenType.expr_New : "start \"new\"", TokenType.expr_New : "start \"new\"",
@ -190,20 +192,20 @@ const SSpec : Dictonary =
TokenType.op_Relational : "start set(> <) =.repeat(0-1)", TokenType.op_Relational : "start set(> <) =.repeat(0-1)",
TokenType.op_Equality : "start set(= \\!) =", TokenType.op_Equality : "start set(= \\!) =",
TokenType.op_LAnd : "start &&", TokenType.op_LAnd : "start &&",
TokenType.op_LOr : "start \\\| \\\|", TokenType.op_LOr : "start \\| \\|",
TokenType.op_LNot : "start \\\!", TokenType.op_LNot : "start \\!",
# Arithmetic # Arithmetic
TokenType.op_CAssign : "start set(* / + -) =", TokenType.op_CAssign : "start set(* / + \\-) =",
TokenType.op_Assign : "start =", TokenType.op_Assign : "start =",
TokenType.op_Additive : "start set(+ -)", TokenType.op_Additive : "start set(+ \\-)",
TokenType.op_Multiplicative : "start set(* /)", TokenType.op_Multiplicative : "start set(* /)",
# Literals # Literals
TokenType.literal_BTrue : "start \"true\"", TokenType.literal_BTrue : "start \"true\"",
TokenType.literal_BFalse : "start \"false\"", TokenType.literal_BFalse : "start \"false\"",
TokenType.literal_Number : "start digit.repeat(1-)", TokenType.literal_Number : "start digit.repeat(1-)",
TokenType.literal_String : "start \\\" !set( \\\" ).repeat(1-) \\\" ", TokenType.literal_String : "start \\\" !set( \\\" ).repeat(0-) \\\"",
TokenType.literal_Null : "start \"null\"", TokenType.literal_Null : "start \"null\"",
# Symbols # Symbols
@ -227,10 +229,15 @@ func compile_regex():
for type in TokenType.values() : for type in TokenType.values() :
var \ var \
regex = RegEx.new() regex = RegEx.new()
regex.compile( Spec[type] )
var original = Spec[type]
var transpiled = SRegEx.transpile(SSpec[type])
assert(transpiled == original, "transpiled did not match original")
regex.compile( transpiled )
SpecRegex[type] = regex SpecRegex[type] = regex
# SpecRegex[type].compile( Spec[type] )
func init(programSrcText): func init(programSrcText):
SourceText = programSrcText SourceText = programSrcText

View File

@ -1,7 +1,7 @@
## Concatenation ## Concatenation
Regex : `/^AB$/` Regex : `/^AB$/`
Psuedo: `start str(AB) end` Psuedo: `start AB end`
Machine: Machine:
``` ```
@ -13,7 +13,7 @@ Submachine_A --epsilon--> Submachine_B
## Union ## Union
Regex : `/^A|B$/` Regex : `/^A|B$/`
Psuedo: `start glyph(A) | glyph(B) end` Psuedo: `start A | B end`
Machine: Machine:
``` ```
@ -27,7 +27,7 @@ Machine:
## Kleene Closure ## Kleene Closure
Regex : `/^A*$/` Regex : `/^A*$/`
Psuedo: `start glyph(A).repeating end` Psuedo: `start A.repeat(0-) end`
Machine: Machine:
``` ```

View File

@ -0,0 +1,30 @@
# Complex Machines
Ex:
RegEx : `/xy*|z`
SRegEx: `x y.repeat(0-) | z`
## Decomposition
### Stage 1: Union
```
->o.start (o)
\epsilon-> o --xy*-> o -epsilon-->/
\epsilon-> o --z---> o -epsilon->/
```
### Stage 2: Concatenation
```
->o.start (o)
\epsilon -> o --x--> o -epsilon-> o --y* -epsilon->/
\epsilon -> o --z--> o -epsilon------------------>/
```
### Stage 2: Kleene Closure
```
|<------------<|
->epsi -> o -x-> o -epsi-> o -epsi-> o -y-> -epsi-> o ->epsi->|
| |>---------------------->| /
->o.start (o)
\epsi -> o -z-> o -epsi------------------------------------>/
```

View File

@ -0,0 +1,11 @@
# Syntactic Sugar
Ex:
RegEx : `/a+|[0-3]/`
SRegEx: `a.repeat(1-) | set(0-3)`
`A+` === `AA*` === `A.repeat(1-)` === `AA.repeat(0-)`
`A?` === `A|ε` === `A.repeat(0-1)`
`[0-9]` === `0|1|2|3|4|5|6|7|8|9` === `set(0-9)`

View File

@ -96,8 +96,6 @@ func union_pair(a : NFA, b : NFA):
return NFA.new(start, accepting) return NFA.new(start, accepting)
func test(): func test():
var state_1 = State.new(false) var state_1 = State.new(false)
var state_2 = State.new(true) var state_2 = State.new(true)

File diff suppressed because it is too large Load Diff