mirror of
https://github.com/Ed94/LangStudies.git
synced 2025-01-22 04:23:45 -08:00
SRegEx works!!!!
Its not a full flegged transpiler but it works at least on RDP's lexer. I can expand on demand.
This commit is contained in:
parent
5ae405e284
commit
17c3b8fe36
@ -1,5 +1,7 @@
|
|||||||
extends Object
|
extends Object
|
||||||
|
|
||||||
|
var SRegEx = preload("res://RegM/Scripts/SRegex.gd").new()
|
||||||
|
|
||||||
|
|
||||||
class_name Lexer
|
class_name Lexer
|
||||||
|
|
||||||
@ -118,17 +120,17 @@ const Spec : Dictionary = \
|
|||||||
#Operators
|
#Operators
|
||||||
|
|
||||||
# Logical
|
# Logical
|
||||||
TokenType.op_Relational : "^[>\\<]=?",
|
TokenType.op_Relational : "^[><]=?",
|
||||||
TokenType.op_Equality : "^[=!]=",
|
TokenType.op_Equality : "^[=!]=",
|
||||||
TokenType.op_LAnd : "^&&",
|
TokenType.op_LAnd : "^&&",
|
||||||
TokenType.op_LOr : "^\\|\\|",
|
TokenType.op_LOr : "^\\|\\|",
|
||||||
TokenType.op_LNot : "^!",
|
TokenType.op_LNot : "^!",
|
||||||
|
|
||||||
# Arithmetic
|
# Arithmetic
|
||||||
TokenType.op_CAssign : "^[*\\/\\+\\-]=",
|
TokenType.op_CAssign : "^[\\*\\/+-]=",
|
||||||
TokenType.op_Assign : "^=",
|
TokenType.op_Assign : "^=",
|
||||||
TokenType.op_Additive : "^[+\\-]",
|
TokenType.op_Additive : "^[+-]",
|
||||||
TokenType.op_Multiplicative : "^[*\\/]",
|
TokenType.op_Multiplicative : "^[\\*\\/]",
|
||||||
|
|
||||||
# Literals
|
# Literals
|
||||||
TokenType.literal_BTrue : "^\\btrue\\b",
|
TokenType.literal_BTrue : "^\\btrue\\b",
|
||||||
@ -142,11 +144,11 @@ const Spec : Dictionary = \
|
|||||||
TokenType.sym_Identifier : "^\\w+"
|
TokenType.sym_Identifier : "^\\w+"
|
||||||
}
|
}
|
||||||
|
|
||||||
const SSpec : Dictonary =
|
const SSpec : Dictionary = \
|
||||||
{
|
{
|
||||||
# Comments
|
# Comments
|
||||||
TokenType.cmt_SL : "start // inline.repeat()",
|
TokenType.cmt_SL : "start // inline.repeat(0-)",
|
||||||
TokenType.cmt_ML : "start /* set(whitespace !whitespace).repeat.lazy */",
|
TokenType.cmt_ML : "start /* set(whitespace !whitespace).repeat(0-).lazy */",
|
||||||
|
|
||||||
# Formatting
|
# Formatting
|
||||||
TokenType.fmt_S : "start whitespace.repeat(1-)",
|
TokenType.fmt_S : "start whitespace.repeat(1-)",
|
||||||
@ -176,8 +178,8 @@ const SSpec : Dictonary =
|
|||||||
TokenType.def_Else : "start \"else\"",
|
TokenType.def_Else : "start \"else\"",
|
||||||
|
|
||||||
# Expressions
|
# Expressions
|
||||||
TokenType.expr_PStart : "start \(",
|
TokenType.expr_PStart : "start \\(",
|
||||||
TokenType.expr_PEnd : "start \)",
|
TokenType.expr_PEnd : "start \\)",
|
||||||
TokenType.expr_SBStart : "start [",
|
TokenType.expr_SBStart : "start [",
|
||||||
TokenType.expr_SBEnd : "start ]",
|
TokenType.expr_SBEnd : "start ]",
|
||||||
TokenType.expr_New : "start \"new\"",
|
TokenType.expr_New : "start \"new\"",
|
||||||
@ -190,20 +192,20 @@ const SSpec : Dictonary =
|
|||||||
TokenType.op_Relational : "start set(> <) =.repeat(0-1)",
|
TokenType.op_Relational : "start set(> <) =.repeat(0-1)",
|
||||||
TokenType.op_Equality : "start set(= \\!) =",
|
TokenType.op_Equality : "start set(= \\!) =",
|
||||||
TokenType.op_LAnd : "start &&",
|
TokenType.op_LAnd : "start &&",
|
||||||
TokenType.op_LOr : "start \\\| \\\|",
|
TokenType.op_LOr : "start \\| \\|",
|
||||||
TokenType.op_LNot : "start \\\!",
|
TokenType.op_LNot : "start \\!",
|
||||||
|
|
||||||
# Arithmetic
|
# Arithmetic
|
||||||
TokenType.op_CAssign : "start set(* / + -) =",
|
TokenType.op_CAssign : "start set(* / + \\-) =",
|
||||||
TokenType.op_Assign : "start =",
|
TokenType.op_Assign : "start =",
|
||||||
TokenType.op_Additive : "start set(+ -)",
|
TokenType.op_Additive : "start set(+ \\-)",
|
||||||
TokenType.op_Multiplicative : "start set(* /)",
|
TokenType.op_Multiplicative : "start set(* /)",
|
||||||
|
|
||||||
# Literals
|
# Literals
|
||||||
TokenType.literal_BTrue : "start \"true\"",
|
TokenType.literal_BTrue : "start \"true\"",
|
||||||
TokenType.literal_BFalse : "start \"false\"",
|
TokenType.literal_BFalse : "start \"false\"",
|
||||||
TokenType.literal_Number : "start digit.repeat(1-)",
|
TokenType.literal_Number : "start digit.repeat(1-)",
|
||||||
TokenType.literal_String : "start \\\" !set( \\\" ).repeat(1-) \\\" ",
|
TokenType.literal_String : "start \\\" !set( \\\" ).repeat(0-) \\\"",
|
||||||
TokenType.literal_Null : "start \"null\"",
|
TokenType.literal_Null : "start \"null\"",
|
||||||
|
|
||||||
# Symbols
|
# Symbols
|
||||||
@ -227,10 +229,15 @@ func compile_regex():
|
|||||||
for type in TokenType.values() :
|
for type in TokenType.values() :
|
||||||
var \
|
var \
|
||||||
regex = RegEx.new()
|
regex = RegEx.new()
|
||||||
regex.compile( Spec[type] )
|
|
||||||
|
var original = Spec[type]
|
||||||
|
var transpiled = SRegEx.transpile(SSpec[type])
|
||||||
|
|
||||||
|
assert(transpiled == original, "transpiled did not match original")
|
||||||
|
|
||||||
|
regex.compile( transpiled )
|
||||||
|
|
||||||
SpecRegex[type] = regex
|
SpecRegex[type] = regex
|
||||||
# SpecRegex[type].compile( Spec[type] )
|
|
||||||
|
|
||||||
func init(programSrcText):
|
func init(programSrcText):
|
||||||
SourceText = programSrcText
|
SourceText = programSrcText
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
## Concatenation
|
## Concatenation
|
||||||
|
|
||||||
Regex : `/^AB$/`
|
Regex : `/^AB$/`
|
||||||
Psuedo: `start str(AB) end`
|
Psuedo: `start AB end`
|
||||||
|
|
||||||
Machine:
|
Machine:
|
||||||
```
|
```
|
||||||
@ -13,7 +13,7 @@ Submachine_A --epsilon--> Submachine_B
|
|||||||
## Union
|
## Union
|
||||||
|
|
||||||
Regex : `/^A|B$/`
|
Regex : `/^A|B$/`
|
||||||
Psuedo: `start glyph(A) | glyph(B) end`
|
Psuedo: `start A | B end`
|
||||||
|
|
||||||
Machine:
|
Machine:
|
||||||
```
|
```
|
||||||
@ -27,7 +27,7 @@ Machine:
|
|||||||
## Kleene Closure
|
## Kleene Closure
|
||||||
|
|
||||||
Regex : `/^A*$/`
|
Regex : `/^A*$/`
|
||||||
Psuedo: `start glyph(A).repeating end`
|
Psuedo: `start A.repeat(0-) end`
|
||||||
|
|
||||||
Machine:
|
Machine:
|
||||||
```
|
```
|
||||||
|
30
App/RegM/Lectures/Lecture.8.Notes.md
Normal file
30
App/RegM/Lectures/Lecture.8.Notes.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# Complex Machines
|
||||||
|
|
||||||
|
Ex:
|
||||||
|
|
||||||
|
RegEx : `/xy*|z`
|
||||||
|
SRegEx: `x y.repeat(0-) | z`
|
||||||
|
|
||||||
|
## Decomposition
|
||||||
|
|
||||||
|
### Stage 1: Union
|
||||||
|
```
|
||||||
|
->o.start (o)
|
||||||
|
\epsilon-> o --xy*-> o -epsilon-->/
|
||||||
|
\epsilon-> o --z---> o -epsilon->/
|
||||||
|
```
|
||||||
|
### Stage 2: Concatenation
|
||||||
|
```
|
||||||
|
->o.start (o)
|
||||||
|
\epsilon -> o --x--> o -epsilon-> o --y* -epsilon->/
|
||||||
|
\epsilon -> o --z--> o -epsilon------------------>/
|
||||||
|
```
|
||||||
|
### Stage 2: Kleene Closure
|
||||||
|
```
|
||||||
|
|<------------<|
|
||||||
|
->epsi -> o -x-> o -epsi-> o -epsi-> o -y-> -epsi-> o ->epsi->|
|
||||||
|
| |>---------------------->| /
|
||||||
|
->o.start (o)
|
||||||
|
\epsi -> o -z-> o -epsi------------------------------------>/
|
||||||
|
```
|
||||||
|
|
11
App/RegM/Lectures/Lecture.9.Notes.md
Normal file
11
App/RegM/Lectures/Lecture.9.Notes.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# Syntactic Sugar
|
||||||
|
|
||||||
|
Ex:
|
||||||
|
|
||||||
|
RegEx : `/a+|[0-3]/`
|
||||||
|
SRegEx: `a.repeat(1-) | set(0-3)`
|
||||||
|
|
||||||
|
`A+` === `AA*` === `A.repeat(1-)` === `AA.repeat(0-)`
|
||||||
|
`A?` === `A|ε` === `A.repeat(0-1)`
|
||||||
|
|
||||||
|
`[0-9]` === `0|1|2|3|4|5|6|7|8|9` === `set(0-9)`
|
@ -96,8 +96,6 @@ func union_pair(a : NFA, b : NFA):
|
|||||||
|
|
||||||
return NFA.new(start, accepting)
|
return NFA.new(start, accepting)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
func test():
|
func test():
|
||||||
var state_1 = State.new(false)
|
var state_1 = State.new(false)
|
||||||
var state_2 = State.new(true)
|
var state_2 = State.new(true)
|
||||||
|
@ -12,27 +12,30 @@ extends Object
|
|||||||
|
|
||||||
# Lexer
|
# Lexer
|
||||||
|
|
||||||
const TokenType = \
|
const TokenType : Dictionary = \
|
||||||
{
|
{
|
||||||
fmt_S = "Formatting",
|
fmt_S = "Formatting",
|
||||||
|
|
||||||
|
str_start = "String Start",
|
||||||
|
str_end = "String End",
|
||||||
|
|
||||||
|
glyph_bPOpen = "\\(",
|
||||||
|
glyph_bPClose = "\\)",
|
||||||
|
|
||||||
expr_PStart = "Parenthesis Start",
|
expr_PStart = "Parenthesis Start",
|
||||||
expr_PEnd = "Parenthesis End",
|
expr_PEnd = "Parenthesis End",
|
||||||
|
|
||||||
glyph = "Glyph",
|
|
||||||
glyph_between = "Glyphs Between",
|
glyph_between = "Glyphs Between",
|
||||||
glyph_digit = "Digit",
|
glyph_digit = "Digit",
|
||||||
glyph_inline = "inline",
|
glyph_inline = "inline",
|
||||||
glyph_word = "Word",
|
glyph_word = "Word",
|
||||||
glyph_ws = "Whitespace",
|
glyph_ws = "Whitespace",
|
||||||
|
|
||||||
glyph_dash = "-"
|
glyph_dash = "-",
|
||||||
glyph_dot = ". dot",
|
glyph_dot = ". dot",
|
||||||
glyph_excla = "! Mark",
|
glyph_excla = "! Mark",
|
||||||
glyph_vertS = "|",
|
glyph_vertS = "\\|",
|
||||||
glyph_bPOpen = "(",
|
glyph_dQuote = "\"",
|
||||||
glyph_bPClose = ")",
|
|
||||||
glyph_dQuote = "\""
|
|
||||||
|
|
||||||
op_lazy = "Lazy Operator",
|
op_lazy = "Lazy Operator",
|
||||||
op_look = "Lookahead",
|
op_look = "Lookahead",
|
||||||
@ -43,47 +46,49 @@ const TokenType = \
|
|||||||
ref = "Backreference Group",
|
ref = "Backreference Group",
|
||||||
set = "Set",
|
set = "Set",
|
||||||
|
|
||||||
str_start = "String Start",
|
string = "String",
|
||||||
str_end = "String End",
|
|
||||||
string = "String"
|
glyph = "Glyph",
|
||||||
}
|
}
|
||||||
|
|
||||||
const TokenSpec = \
|
const Spec : Dictionary = \
|
||||||
{
|
{
|
||||||
TokenType.fmt_S = "^\\s",
|
TokenType.fmt_S : "^\\s",
|
||||||
|
|
||||||
TokenType.string = "^\"[^\"]*\"",
|
TokenType.str_start : "^\\bstart\\b",
|
||||||
|
TokenType.str_end : "^\\bend\\b",
|
||||||
|
|
||||||
TokenType.expr_PStart = "^\\(",
|
TokenType.string : "^\"[^\"]*\"",
|
||||||
TokenType.expr_PEnd = "^\\)",
|
|
||||||
|
|
||||||
TokenType.glyph_between = "^\\-"
|
TokenType.glyph_bPOpen : "^\\\\\\(",
|
||||||
TokenType.glyph_digit = "^\\bdigit\\b",
|
TokenType.glyph_bPClose : "^\\\\\\)",
|
||||||
TokenType.glyph_inline = "^\\binline\\b",
|
|
||||||
TokenType.glyph_word = "^\\bword\\b",
|
|
||||||
TokenType.glyph_ws = "^\\bwhitespace\\b",
|
|
||||||
|
|
||||||
TokenType.op_lazy = "^\\b.lazy\\b",
|
TokenType.expr_PStart : "^\\(",
|
||||||
TokenType.op_repeat = "^\\b\\.repeat\\b",
|
TokenType.expr_PEnd : "^\\)",
|
||||||
|
|
||||||
TokenType.glyph_dash = "^\\\-"
|
TokenType.glyph_between : "^\\-",
|
||||||
TokenType.glyph_dot = "^\\\.",
|
TokenType.glyph_digit : "^\\bdigit\\b",
|
||||||
TokenType.glyph_excla = "^\\\!",
|
TokenType.glyph_inline : "^\\binline\\b",
|
||||||
TokenType.glyph_vertS = "^\\\|",
|
TokenType.glyph_word : "^\\bword\\b",
|
||||||
TokenType.glyph_bPOpen = "^\\\(",
|
TokenType.glyph_ws : "^\\bwhitespace\\b",
|
||||||
TokenType.glyph_bPClose = "^\\\)",
|
|
||||||
TokenType.glpyh_dQuote = "^\\\"",
|
|
||||||
|
|
||||||
TokenType.op_look = "^\\blook\\b",
|
TokenType.op_lazy : "^\\.\\blazy\\b",
|
||||||
TokenType.op_not = "^\\!",
|
TokenType.op_repeat : "^\\.\\brepeat\\b",
|
||||||
TokenType.op_union = "^\\|",
|
|
||||||
|
|
||||||
TokenType.ref = "^\\bbackref\\b",
|
TokenType.glyph_dash : "^\\\\\\-",
|
||||||
TokenType.set = "^\\bset\\b",
|
TokenType.glyph_dot : "^\\\\\\.",
|
||||||
TokenType.str_start = "^\\bstart\\b",
|
TokenType.glyph_excla : "^\\\\\\!",
|
||||||
TokenType.str_end = "^\\bend\\b",
|
TokenType.glyph_vertS : "^\\\\\\|",
|
||||||
|
TokenType.glyph_dQuote : "^\\\\\"",
|
||||||
|
|
||||||
TokenType.glyph = "^[\\w\\d]"
|
TokenType.op_look : "^\\blook\\b",
|
||||||
|
TokenType.op_not : "^\\!",
|
||||||
|
TokenType.op_union : "^\\|",
|
||||||
|
|
||||||
|
TokenType.ref : "^\\bbackref\\b",
|
||||||
|
TokenType.set : "^\\bset\\b",
|
||||||
|
|
||||||
|
TokenType.glyph : "^[^\\s]"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -103,6 +108,7 @@ func compile_regex():
|
|||||||
for type in TokenType.values() :
|
for type in TokenType.values() :
|
||||||
var \
|
var \
|
||||||
regex = RegEx.new()
|
regex = RegEx.new()
|
||||||
|
var _spec = Spec[type]
|
||||||
regex.compile( Spec[type] )
|
regex.compile( Spec[type] )
|
||||||
|
|
||||||
SpecRegex[type] = regex
|
SpecRegex[type] = regex
|
||||||
@ -161,7 +167,7 @@ func tokenize():
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
if error :
|
if error :
|
||||||
var assertStrTmplt = "next_token: Source text not understood by tokenizer at Cursor pos: {value} -: {txt}"
|
var assertStrTmplt = "next_Token: Source text not understood by tokenizer at Cursor pos: {value} -: {txt}"
|
||||||
var assertStr = assertStrTmplt.format({"value" : Cursor, "txt" : srcLeft})
|
var assertStr = assertStrTmplt.format({"value" : Cursor, "txt" : srcLeft})
|
||||||
assert(true != true, assertStr)
|
assert(true != true, assertStr)
|
||||||
return
|
return
|
||||||
@ -245,23 +251,23 @@ const NodeType = \
|
|||||||
{
|
{
|
||||||
expression = "Expression",
|
expression = "Expression",
|
||||||
|
|
||||||
between = "Glyphs Between Set"
|
between = "Glyphs Between Set",
|
||||||
capture = "Capture Group",
|
capture = "Capture Group",
|
||||||
lazy = "Lazy",
|
lazy = "Lazy",
|
||||||
look = "Lookahead",
|
look = "Lookahead",
|
||||||
|
op_not = "Not Operator",
|
||||||
ref = "Backreference Group",
|
ref = "Backreference Group",
|
||||||
repeat = "Repeat",
|
repeat = "Repeat",
|
||||||
set = "Set",
|
set = "Set",
|
||||||
union = "Union",
|
union = "Union",
|
||||||
|
|
||||||
inline = "Inline",
|
|
||||||
digit = "Digit",
|
digit = "Digit",
|
||||||
inline = "Any Inline"
|
inline = "Any Inline",
|
||||||
word = "Word",
|
word = "Word",
|
||||||
whitespace = "Whitespace",
|
whitespace = "Whitespace",
|
||||||
string = "String"
|
string = "String",
|
||||||
strStart = "String Start",
|
str_start = "String Start",
|
||||||
strEnd = "String End",
|
str_end = "String End",
|
||||||
|
|
||||||
glyph = "Glyph",
|
glyph = "Glyph",
|
||||||
}
|
}
|
||||||
@ -286,19 +292,30 @@ func eat(tokenType):
|
|||||||
|
|
||||||
return currToken
|
return currToken
|
||||||
|
|
||||||
func is_Glyph() :
|
func is_Glyph(glyph = NextToken) :
|
||||||
match NextToken:
|
match glyph.Type:
|
||||||
TokenType.glyph:
|
TokenType.glyph :
|
||||||
TokenType.glyph_digit:
|
return true
|
||||||
TokenType.glyph_inline:
|
TokenType.glyph_digit :
|
||||||
TokenType.glyph_word:
|
return true
|
||||||
TokenType.glyph_ws:
|
TokenType.glyph_inline :
|
||||||
|
return true
|
||||||
|
TokenType.glyph_word :
|
||||||
|
return true
|
||||||
|
TokenType.glyph_ws :
|
||||||
|
return true
|
||||||
TokenType.glyph_dash :
|
TokenType.glyph_dash :
|
||||||
|
return true
|
||||||
TokenType.glyph_dot :
|
TokenType.glyph_dot :
|
||||||
|
return true
|
||||||
TokenType.glyph_excla :
|
TokenType.glyph_excla :
|
||||||
|
return true
|
||||||
TokenType.glyph_vertS :
|
TokenType.glyph_vertS :
|
||||||
|
return true
|
||||||
TokenType.glyph_bPOpen :
|
TokenType.glyph_bPOpen :
|
||||||
|
return true
|
||||||
TokenType.glyph_bPClose :
|
TokenType.glyph_bPClose :
|
||||||
|
return true
|
||||||
TokenType.glyph_dQuote :
|
TokenType.glyph_dQuote :
|
||||||
return true
|
return true
|
||||||
|
|
||||||
@ -307,6 +324,39 @@ func is_Glyph() :
|
|||||||
func is_GlyphOrStr() :
|
func is_GlyphOrStr() :
|
||||||
return is_Glyph() || NextToken.Type == TokenType.string
|
return is_Glyph() || NextToken.Type == TokenType.string
|
||||||
|
|
||||||
|
func is_GroupToken() :
|
||||||
|
if NextToken.Value.length() == 2 && NextToken.Value[0] == "\\" :
|
||||||
|
match NextToken.Value[1] :
|
||||||
|
"0" : continue
|
||||||
|
"1" : continue
|
||||||
|
"2" : continue
|
||||||
|
"3" : continue
|
||||||
|
"4" : continue
|
||||||
|
"5" : continue
|
||||||
|
"6" : continue
|
||||||
|
"7" : continue
|
||||||
|
"8" : continue
|
||||||
|
"9" : continue
|
||||||
|
_:
|
||||||
|
return true
|
||||||
|
return false
|
||||||
|
|
||||||
|
func is_RegExToken() :
|
||||||
|
match NextToken.Value :
|
||||||
|
"^" :
|
||||||
|
return true
|
||||||
|
"$" :
|
||||||
|
return true
|
||||||
|
"*" :
|
||||||
|
return true
|
||||||
|
"[" :
|
||||||
|
return true
|
||||||
|
"]" :
|
||||||
|
return true
|
||||||
|
"?" :
|
||||||
|
return true
|
||||||
|
return
|
||||||
|
|
||||||
# --------------------------------------------------------------------- HELPERS
|
# --------------------------------------------------------------------- HELPERS
|
||||||
|
|
||||||
# > Union
|
# > Union
|
||||||
@ -314,10 +364,10 @@ func is_GlyphOrStr() :
|
|||||||
# : expression | expression ..
|
# : expression | expression ..
|
||||||
# | expression
|
# | expression
|
||||||
# ;
|
# ;
|
||||||
func parse_OpUnion():
|
func parse_OpUnion(endToken : Token):
|
||||||
var expression = parse_Expression(TokenType.union)
|
var expression = parse_Expression(endToken)
|
||||||
|
|
||||||
if NextToken.Type != TokenType.union :
|
if NextToken == null || NextToken.Type != TokenType.union :
|
||||||
return expression
|
return expression
|
||||||
|
|
||||||
eat(TokenType.op_union)
|
eat(TokenType.op_union)
|
||||||
@ -325,7 +375,7 @@ func parse_OpUnion():
|
|||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.union
|
node.Type = NodeType.union
|
||||||
node.Value = [ expression, parse_union() ]
|
node.Value = [ expression, parse_OpUnion(endToken) ]
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
@ -333,14 +383,18 @@ func parse_OpUnion():
|
|||||||
# Expression
|
# Expression
|
||||||
# : EVERYTHING (Almost)
|
# : EVERYTHING (Almost)
|
||||||
# ;
|
# ;
|
||||||
func parse_Expression(end_token : Token):
|
func parse_Expression(endToken : Token):
|
||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.Expression
|
node.Type = NodeType.expression
|
||||||
node.Value = []
|
node.Value = []
|
||||||
|
|
||||||
while NextToken != null && NextToken.Type != end_token :
|
var sentinel = endToken != null
|
||||||
match NextToken.Type
|
if sentinel :
|
||||||
|
sentinel = NextToken.Type == endToken.Type
|
||||||
|
|
||||||
|
while NextToken != null && !sentinel :
|
||||||
|
match NextToken.Type :
|
||||||
TokenType.str_start :
|
TokenType.str_start :
|
||||||
node.Value.append( parse_StrStart() )
|
node.Value.append( parse_StrStart() )
|
||||||
|
|
||||||
@ -414,7 +468,7 @@ func parse_StrStart():
|
|||||||
|
|
||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.strStart
|
node.Type = NodeType.str_start
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
@ -424,7 +478,7 @@ func parse_StrEnd():
|
|||||||
|
|
||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.strEnd
|
node.Type = NodeType.str_end
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
@ -434,9 +488,46 @@ func parse_StrEnd():
|
|||||||
# | glyph - glyph
|
# | glyph - glyph
|
||||||
# ;
|
# ;
|
||||||
func parse_Between():
|
func parse_Between():
|
||||||
var glyph = parse_Glyph()
|
var glyph
|
||||||
|
|
||||||
if NextToken.Type != TokenType.between :
|
match NextToken.Type :
|
||||||
|
TokenType.glyph :
|
||||||
|
glyph = parse_Glyph()
|
||||||
|
|
||||||
|
TokenType.glyph_digit :
|
||||||
|
glyph = parse_GlyphDigit()
|
||||||
|
|
||||||
|
TokenType.glyph_inline :
|
||||||
|
glyph = parse_GlyphInline()
|
||||||
|
|
||||||
|
TokenType.glyph_word :
|
||||||
|
glyph = parse_GlyphWord()
|
||||||
|
|
||||||
|
TokenType.glyph_ws :
|
||||||
|
glyph = parse_GlyphWhitespace()
|
||||||
|
|
||||||
|
TokenType.glyph_dash :
|
||||||
|
glyph = parse_GlyphDash()
|
||||||
|
|
||||||
|
TokenType.glyph_dot :
|
||||||
|
glyph = parse_GlyphDot()
|
||||||
|
|
||||||
|
TokenType.glyph_excla :
|
||||||
|
glyph = parse_GlyphExclamation()
|
||||||
|
|
||||||
|
TokenType.glyph_vertS :
|
||||||
|
glyph = parse_GlyphVertS()
|
||||||
|
|
||||||
|
TokenType.glyph_bPOpen :
|
||||||
|
glyph = parse_Glyph_bPOpen()
|
||||||
|
|
||||||
|
TokenType.glyph_bPClose :
|
||||||
|
glyph = parse_Glyph_bPClose()
|
||||||
|
|
||||||
|
TokenType.glyph_dQuote :
|
||||||
|
glyph = parse_Glyph_DQuote()
|
||||||
|
|
||||||
|
if NextToken.Type != TokenType.glyph_between :
|
||||||
return glyph
|
return glyph
|
||||||
|
|
||||||
var \
|
var \
|
||||||
@ -449,7 +540,7 @@ func parse_Between():
|
|||||||
if NextToken.Type == TokenType.glyph_between:
|
if NextToken.Type == TokenType.glyph_between:
|
||||||
eat(TokenType.glyph_between)
|
eat(TokenType.glyph_between)
|
||||||
|
|
||||||
if is_Glyph()
|
if is_Glyph() :
|
||||||
node.Value.append( parse_Glyph() )
|
node.Value.append( parse_Glyph() )
|
||||||
|
|
||||||
return node
|
return node
|
||||||
@ -464,7 +555,7 @@ func parse_CaptureGroup():
|
|||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.capture
|
node.Type = NodeType.capture
|
||||||
node.Value = parse_union(TokenType.expr_PEnd)
|
node.Value = parse_OpUnion(TokenType.expr_PEnd)
|
||||||
|
|
||||||
eat(TokenType.expr_PEnd)
|
eat(TokenType.expr_PEnd)
|
||||||
|
|
||||||
@ -476,13 +567,21 @@ func parse_CaptureGroup():
|
|||||||
# : glyph
|
# : glyph
|
||||||
# ;
|
# ;
|
||||||
func parse_Glyph():
|
func parse_Glyph():
|
||||||
eat(TokenType.glyph)
|
|
||||||
|
|
||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.glyph
|
node.Type = NodeType.glyph
|
||||||
|
|
||||||
|
if NextToken.Value == "/" :
|
||||||
|
node.Value = "\\/"
|
||||||
|
elif is_RegExToken() :
|
||||||
|
node.Value = "\\" + NextToken.Value
|
||||||
|
elif is_GroupToken() :
|
||||||
|
node.Value = "\\\\" + NextToken.Value[1]
|
||||||
|
else :
|
||||||
node.Value = NextToken.Value
|
node.Value = NextToken.Value
|
||||||
|
|
||||||
|
eat(TokenType.glyph)
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
func parse_GlyphDigit():
|
func parse_GlyphDigit():
|
||||||
@ -501,7 +600,7 @@ func parse_GlyphInline():
|
|||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.inline
|
node.Type = NodeType.inline
|
||||||
node.Value = "\."
|
node.Value = "."
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
@ -550,8 +649,8 @@ func parse_GlyphExclamation():
|
|||||||
|
|
||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
ndoe.Type = NodeType.glyph
|
node.Type = NodeType.glyph
|
||||||
node.Value = "\\!"
|
node.Value = "!"
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
@ -591,7 +690,7 @@ func parse_Glyph_DQuote():
|
|||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.glyph
|
node.Type = NodeType.glyph
|
||||||
node.Value = "\\\""
|
node.Value = "\""
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
@ -636,7 +735,7 @@ func parse_OpNot():
|
|||||||
|
|
||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.op_Not
|
node.Type = NodeType.op_not
|
||||||
|
|
||||||
match NextToken.Type:
|
match NextToken.Type:
|
||||||
TokenType.expr_PStart:
|
TokenType.expr_PStart:
|
||||||
@ -651,7 +750,7 @@ func parse_OpNot():
|
|||||||
TokenType.glyph_ws:
|
TokenType.glyph_ws:
|
||||||
node.Value = parse_GlyphWhitespace()
|
node.Value = parse_GlyphWhitespace()
|
||||||
|
|
||||||
TokenType.look:
|
TokenType.op_look:
|
||||||
node.Value = parse_OpLook()
|
node.Value = parse_OpLook()
|
||||||
|
|
||||||
TokenType.string:
|
TokenType.string:
|
||||||
@ -673,19 +772,19 @@ func parse_OpRepeat():
|
|||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.repeat
|
node.Type = NodeType.repeat
|
||||||
|
|
||||||
var range = null
|
var vrange = null
|
||||||
var lazy = null
|
var lazy = null
|
||||||
|
|
||||||
eat(TokenType.expr_PStart)
|
eat(TokenType.expr_PStart)
|
||||||
|
|
||||||
range = parse_Between()
|
vrange = parse_Between()
|
||||||
|
|
||||||
eat(TokenType.expr_PEnd)
|
eat(TokenType.expr_PEnd)
|
||||||
|
|
||||||
if NextToken.Type == TokenType.lazy :
|
if NextToken && NextToken.Type == TokenType.op_lazy :
|
||||||
lazy = parse_OpLazy();
|
lazy = parse_OpLazy();
|
||||||
|
|
||||||
node.Value = [ range, lazy ]
|
node.Value = [ vrange, lazy ]
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
@ -699,7 +798,7 @@ func parse_Backreference():
|
|||||||
eat(TokenType.expr_PStart)
|
eat(TokenType.expr_PStart)
|
||||||
|
|
||||||
var assertStrTmplt = "Error when parsing a backreference expression: Expected digit but got: {value}"
|
var assertStrTmplt = "Error when parsing a backreference expression: Expected digit but got: {value}"
|
||||||
var assertStr = assertStrTmplt.format({"value" : NextToken.Value)
|
var assertStr = assertStrTmplt.format({"value" : NextToken.Value})
|
||||||
|
|
||||||
assert(NextToken.Type == TokenType.glyph_digit, assertStr)
|
assert(NextToken.Type == TokenType.glyph_digit, assertStr)
|
||||||
node.Value = NextToken.Value
|
node.Value = NextToken.Value
|
||||||
@ -718,7 +817,15 @@ func parse_Set():
|
|||||||
|
|
||||||
eat(TokenType.expr_PStart)
|
eat(TokenType.expr_PStart)
|
||||||
|
|
||||||
while is_Glyph() :
|
while is_Glyph() || NextToken.Type == TokenType.op_not :
|
||||||
|
if NextToken.Type == TokenType.op_not :
|
||||||
|
var possibleGlyph = parse_OpNot()
|
||||||
|
if is_Glyph(possibleGlyph.Value) :
|
||||||
|
node.Value.append( possibleGlyph )
|
||||||
|
continue
|
||||||
|
|
||||||
|
assert(true == false, "Bad ! operator in set.")
|
||||||
|
|
||||||
node.Value.append( parse_Between() )
|
node.Value.append( parse_Between() )
|
||||||
|
|
||||||
eat(TokenType.expr_PEnd)
|
eat(TokenType.expr_PEnd)
|
||||||
@ -726,12 +833,19 @@ func parse_Set():
|
|||||||
return node
|
return node
|
||||||
|
|
||||||
func parse_String():
|
func parse_String():
|
||||||
|
var string = ""
|
||||||
|
|
||||||
|
var index = 1
|
||||||
|
while NextToken.Value[index] != "\"" :
|
||||||
|
string += NextToken.Value[index]
|
||||||
|
index += 1
|
||||||
|
|
||||||
var \
|
var \
|
||||||
node = ASTNode.new()
|
node = ASTNode.new()
|
||||||
node.Type = NodeType.string
|
node.Type = NodeType.string
|
||||||
node.Value = NextToken.Value
|
node.Value = string
|
||||||
|
|
||||||
eat(TokenType.str)
|
eat(TokenType.string)
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
@ -746,17 +860,20 @@ var RegexResult : String
|
|||||||
func transpile(expression : String):
|
func transpile(expression : String):
|
||||||
init( expression )
|
init( expression )
|
||||||
|
|
||||||
NextToken = next_token()
|
NextToken = next_Token()
|
||||||
ExprAST = parse_union()
|
ExprAST = parse_OpUnion(null)
|
||||||
|
|
||||||
return transiple_Union(ExprAST)
|
return transiple_Union(ExprAST)
|
||||||
|
|
||||||
func transiple_Union(node : ASTNode):
|
func transiple_Union(node : ASTNode):
|
||||||
var result = String
|
var result = ""
|
||||||
var expressionLeft = node.Value[0]
|
var expressionLeft = node.Value
|
||||||
|
|
||||||
for entry in expressionLeft
|
if node.Type == NodeType.union :
|
||||||
match entry :
|
expressionLeft = node.Value[0]
|
||||||
|
|
||||||
|
for entry in expressionLeft :
|
||||||
|
match entry.Type :
|
||||||
NodeType.str_start:
|
NodeType.str_start:
|
||||||
result += "^"
|
result += "^"
|
||||||
NodeType.str_end:
|
NodeType.str_end:
|
||||||
@ -775,13 +892,13 @@ func transiple_Union(node : ASTNode):
|
|||||||
|
|
||||||
NodeType.glyph:
|
NodeType.glyph:
|
||||||
result += entry.Value
|
result += entry.Value
|
||||||
NodeType.glyph_inline:
|
NodeType.inline:
|
||||||
result += entry.Value
|
result += entry.Value
|
||||||
NodeType.glyph_digit:
|
NodeType.digit:
|
||||||
result += entry.Value
|
result += entry.Value
|
||||||
NodeType.glyph_word:
|
NodeType.word:
|
||||||
result += entry.Value
|
result += entry.Value
|
||||||
NodeType.glyph_ws:
|
NodeType.whitespace:
|
||||||
result += entry.Value
|
result += entry.Value
|
||||||
|
|
||||||
NodeType.string:
|
NodeType.string:
|
||||||
@ -791,21 +908,12 @@ func transiple_Union(node : ASTNode):
|
|||||||
result += transpile_OpNot(entry)
|
result += transpile_OpNot(entry)
|
||||||
|
|
||||||
|
|
||||||
if node.Value[1] != null :
|
if node.Type == NodeType.union && node.Value[1] != null :
|
||||||
result += "|"
|
result += "|"
|
||||||
result += transiple_Union(node.Value[1])
|
result += transiple_Union(node.Value[1])
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
func transpile_Between(node : ASTNode):
|
|
||||||
var \
|
|
||||||
result : "["
|
|
||||||
result += node.Value[0]
|
|
||||||
result += node.Value[1]
|
|
||||||
result += "]"
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
func transpile_CaptureGroup(node : ASTNode, negate : bool):
|
func transpile_CaptureGroup(node : ASTNode, negate : bool):
|
||||||
var result = ""
|
var result = ""
|
||||||
|
|
||||||
@ -830,6 +938,8 @@ func transpile_LookAhead(node : ASTNode, negate : bool):
|
|||||||
result += transiple_Union(node.Value)
|
result += transiple_Union(node.Value)
|
||||||
result += ")"
|
result += ")"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
func transpile_Backreference(node : ASTNode):
|
func transpile_Backreference(node : ASTNode):
|
||||||
var \
|
var \
|
||||||
result = "\\"
|
result = "\\"
|
||||||
@ -837,31 +947,31 @@ func transpile_Backreference(node : ASTNode):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
func transpile_Repeat(node : ASTNode)
|
func transpile_Repeat(node : ASTNode):
|
||||||
var result = ""
|
var result = ""
|
||||||
var range = node.Value[0]
|
var vrange = node.Value[0]
|
||||||
var lazy = node.Value[1]
|
var lazy = node.Value[1]
|
||||||
|
|
||||||
if range.Type == NodeType.between :
|
if vrange.Type == NodeType.between :
|
||||||
if range.Value.length() == 1 :
|
if vrange.Value.size() == 1 :
|
||||||
if range.Value[0] == "0" :
|
if vrange.Value[0].Value == "0" :
|
||||||
result += "*"
|
result += "*"
|
||||||
if range.Value[0] == "1" :
|
if vrange.Value[0].Value == "1" :
|
||||||
result += "+"
|
result += "+"
|
||||||
if range.Value.length() == 2 :
|
if vrange.Value.size() == 2 :
|
||||||
if range.Vlaue[0] == "0" && range.Value[1] == "1" :
|
if vrange.Value[0].Value == "0" && vrange.Value[1].Value == "1" :
|
||||||
result += "?"
|
result += "?"
|
||||||
else :
|
else :
|
||||||
result += "{" + range.Value[0] + "," + range.Value[1] + "}"
|
result += "{" + vrange.Value[0].Value[0] + "," + vrange.Value[0].Value[1] + "}"
|
||||||
else :
|
else :
|
||||||
result += "{" + range.Value[0] + "}"
|
result += "{" + vrange.Value[0] + "}"
|
||||||
|
|
||||||
if lazy != null :
|
if lazy != null :
|
||||||
result += "?"
|
result += "?"
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
func transpile_Set(node : ASTNode, negate : bool)
|
func transpile_Set(node : ASTNode, negate : bool):
|
||||||
var result = ""
|
var result = ""
|
||||||
|
|
||||||
if negate :
|
if negate :
|
||||||
@ -870,6 +980,13 @@ func transpile_Set(node : ASTNode, negate : bool)
|
|||||||
result += "["
|
result += "["
|
||||||
|
|
||||||
for entry in node.Value :
|
for entry in node.Value :
|
||||||
|
if entry.Type == NodeType.op_not :
|
||||||
|
result += transpile_OpNot(entry)
|
||||||
|
elif entry.Type == NodeType.between :
|
||||||
|
result += entry.Value[0]
|
||||||
|
result += "-"
|
||||||
|
result += entry.Value[1]
|
||||||
|
else :
|
||||||
result += entry.Value
|
result += entry.Value
|
||||||
|
|
||||||
result += "]"
|
result += "]"
|
||||||
@ -898,18 +1015,18 @@ func transpile_OpNot(node : ASTNode):
|
|||||||
|
|
||||||
var entry = node.Value
|
var entry = node.Value
|
||||||
|
|
||||||
match entry :
|
match entry.Type :
|
||||||
NodeType.capture:
|
NodeType.capture:
|
||||||
result += transpile_CaptureGroup(entry, true)
|
result += transpile_CaptureGroup(entry, true)
|
||||||
NodeType.glyph_digit:
|
NodeType.digit:
|
||||||
result += "\\D"
|
result += "\\D"
|
||||||
NodeType.glyph_word:
|
NodeType.word:
|
||||||
result += "\\W"
|
result += "\\W"
|
||||||
NodeType.glyph_ws:
|
NodeType.whitespace:
|
||||||
result += "\\S"
|
result += "\\S"
|
||||||
NodeType.glyph_look:
|
NodeType.look:
|
||||||
result += transpile_LookAhead(entry, true)
|
result += transpile_LookAhead(entry, true)
|
||||||
NodType.string:
|
NodeType.string:
|
||||||
result += transpile_String(entry, true)
|
result += transpile_String(entry, true)
|
||||||
NodeType.set:
|
NodeType.set:
|
||||||
result += transpile_Set(entry, true)
|
result += transpile_Set(entry, true)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user