Fixes, also added support for spaces. (RegM)

This commit is contained in:
Edward R. Gonzalez 2022-07-17 11:04:02 -04:00
parent 2041732e28
commit 31f1ae9b8f

View File

@ -28,6 +28,7 @@ const TokenType : Dictionary = \
glyph_between = "Glyphs Between", glyph_between = "Glyphs Between",
glyph_digit = "Digit", glyph_digit = "Digit",
glyph_inline = "inline", glyph_inline = "inline",
glyph_space = "Space",
glyph_word = "Word", glyph_word = "Word",
glyph_ws = "Whitespace", glyph_ws = "Whitespace",
@ -69,6 +70,7 @@ const Spec : Dictionary = \
TokenType.glyph_between : "^\\-", TokenType.glyph_between : "^\\-",
TokenType.glyph_digit : "^\\bdigit\\b", TokenType.glyph_digit : "^\\bdigit\\b",
TokenType.glyph_inline : "^\\binline\\b", TokenType.glyph_inline : "^\\binline\\b",
TokenType.glyph_space : "^\\bspace\\b",
TokenType.glyph_word : "^\\bword\\b", TokenType.glyph_word : "^\\bword\\b",
TokenType.glyph_ws : "^\\bwhitespace\\b", TokenType.glyph_ws : "^\\bwhitespace\\b",
@ -263,6 +265,7 @@ const NodeType = \
digit = "Digit", digit = "Digit",
inline = "Any Inline", inline = "Any Inline",
space = "Space",
word = "Word", word = "Word",
whitespace = "Whitespace", whitespace = "Whitespace",
string = "String", string = "String",
@ -341,6 +344,13 @@ func is_GroupToken() :
return true return true
return false return false
func is_Number() :
var \
regex = RegEx.new()
regex.compile("^\\d")
return regex.search(NextToken.Value) != null
func is_RegExToken() : func is_RegExToken() :
match NextToken.Value : match NextToken.Value :
"^" : "^" :
@ -411,6 +421,9 @@ func parse_Expression(endToken):
TokenType.glyph_inline : TokenType.glyph_inline :
node.Value.append( parse_GlyphInline() ) node.Value.append( parse_GlyphInline() )
TokenType.glyph_space :
node.Value.append( parse_GlyphSpace() )
TokenType.glyph_word : TokenType.glyph_word :
node.Value.append( parse_GlyphWord() ) node.Value.append( parse_GlyphWord() )
@ -486,43 +499,32 @@ func parse_StrEnd():
# : glyph # : glyph
# | glyph - glyph # | glyph - glyph
# ; # ;
func parse_Between(): func parse_Between(quantifier : bool = false):
var glyph var glyph
match NextToken.Type : match NextToken.Type :
TokenType.glyph : TokenType.glyph :
glyph = parse_Glyph() glyph = parse_Glyph(quantifier)
# TokenType.glyph_digit :
TokenType.glyph_digit : # glyph = parse_GlyphDigit()
glyph = parse_GlyphDigit()
TokenType.glyph_inline : TokenType.glyph_inline :
glyph = parse_GlyphInline() glyph = parse_GlyphInline()
# TokenType.glyph_word :
TokenType.glyph_word : # glyph = parse_GlyphWord()
glyph = parse_GlyphWord()
TokenType.glyph_ws : TokenType.glyph_ws :
glyph = parse_GlyphWhitespace() glyph = parse_GlyphWhitespace()
TokenType.glyph_dash : TokenType.glyph_dash :
glyph = parse_GlyphDash() glyph = parse_GlyphDash()
TokenType.glyph_dot : TokenType.glyph_dot :
glyph = parse_GlyphDot() glyph = parse_GlyphDot()
TokenType.glyph_excla : TokenType.glyph_excla :
glyph = parse_GlyphExclamation() glyph = parse_GlyphExclamation()
TokenType.glyph_vertS : TokenType.glyph_vertS :
glyph = parse_GlyphVertS() glyph = parse_GlyphVertS()
TokenType.glyph_bPOpen : TokenType.glyph_bPOpen :
glyph = parse_Glyph_bPOpen() glyph = parse_Glyph_bPOpen()
TokenType.glyph_bPClose : TokenType.glyph_bPClose :
glyph = parse_Glyph_bPClose() glyph = parse_Glyph_bPClose()
TokenType.glyph_dQuote : TokenType.glyph_dQuote :
glyph = parse_Glyph_DQuote() glyph = parse_Glyph_DQuote()
@ -540,7 +542,7 @@ func parse_Between():
eat(TokenType.glyph_between) eat(TokenType.glyph_between)
if is_Glyph() : if is_Glyph() :
node.Value.append( parse_Glyph() ) node.Value.append( parse_Glyph(quantifier) )
return node return node
@ -565,21 +567,27 @@ func parse_CaptureGroup():
# Glyph # Glyph
# : glyph # : glyph
# ; # ;
func parse_Glyph(): func parse_Glyph(numerical = false):
var \ var \
node = ASTNode.new() node = ASTNode.new()
node.Type = NodeType.glyph node.Type = NodeType.glyph
if NextToken.Value == "/" : node.Value = ""
node.Value = "\\/"
elif is_RegExToken() :
node.Value = "\\" + NextToken.Value
elif is_GroupToken() :
node.Value = "\\\\" + NextToken.Value[1]
else :
node.Value = NextToken.Value
eat(TokenType.glyph) while NextToken.Type == TokenType.glyph :
if NextToken.Value == "/" :
node.Value += "\\/"
elif is_RegExToken() :
node.Value += "\\" + NextToken.Value
elif is_GroupToken() :
node.Value += "\\\\" + NextToken.Value[1]
else :
node.Value += NextToken.Value
eat(TokenType.glyph)
if numerical == false :
break
return node return node
@ -602,6 +610,25 @@ func parse_GlyphInline():
node.Value = "." node.Value = "."
return node return node
func parse_GlyphSpace():
eat(TokenType.glyph_space)
var \
node = ASTNode.new()
node.Type = NodeType.space
node.Value = " "
if NextToken.Type == TokenType.expr_PStart :
eat(TokenType.expr_PStart)
var numGlyph = parse_Glyph(true)
for n in range(int(numGlyph.Value)) :
node.Value += " "
eat(TokenType.expr_PEnd)
return node
func parse_GlyphWord(): func parse_GlyphWord():
eat(TokenType.glyph_word) eat(TokenType.glyph_word)
@ -778,8 +805,8 @@ func parse_OpRepeat():
eat(TokenType.expr_PStart) eat(TokenType.expr_PStart)
vrange = parse_Between() vrange = parse_Between(true)
eat(TokenType.expr_PEnd) eat(TokenType.expr_PEnd)
if NextToken && NextToken.Type == TokenType.op_lazy : if NextToken && NextToken.Type == TokenType.op_lazy :
@ -884,7 +911,7 @@ func transiple_Union(node : ASTNode):
NodeType.capture: NodeType.capture:
result += transpile_CaptureGroup(entry, false) result += transpile_CaptureGroup(entry, false)
NodeType.look: NodeType.look:
result += transpile_LookAhead(entry, false) result += transpile_LookAhead(entry, false)
NodeType.ref: NodeType.ref:
result += transpile_Backreference(entry) result += transpile_Backreference(entry)
@ -899,6 +926,8 @@ func transiple_Union(node : ASTNode):
result += entry.Value result += entry.Value
NodeType.digit: NodeType.digit:
result += entry.Value result += entry.Value
NodeType.space:
result += entry.Value
NodeType.word: NodeType.word:
result += entry.Value result += entry.Value
NodeType.whitespace: NodeType.whitespace:
@ -969,7 +998,7 @@ func transpile_Repeat(node : ASTNode):
else : else :
result += "{" + vrange.Value[0].Value + "," + vrange.Value[1].Value + "}" result += "{" + vrange.Value[0].Value + "," + vrange.Value[1].Value + "}"
else : else :
result += "{" + vrange.Value[0] + "}" result += "{" + vrange.Value + "}"
if lazy != null : if lazy != null :
result += "?" result += "?"