Remove Global RegEx flag, default to unanchored patterns

This commit is contained in:
Feoramund
2025-05-24 06:02:50 -04:00
parent fedb9efb41
commit 37d6491300
5 changed files with 27 additions and 28 deletions
-3
View File
@@ -15,8 +15,6 @@ MAX_PROGRAM_SIZE :: int(max(i16))
MAX_CLASSES :: int(max(u8))
Flag :: enum u8 {
// Global: try to match the pattern anywhere in the string.
Global,
// Multiline: treat `^` and `$` as if they also match newlines.
Multiline,
// Case Insensitive: treat `a-z` as if it was also `A-Z`.
@@ -36,7 +34,6 @@ Flags :: bit_set[Flag; u8]
@(rodata)
Flag_To_Letter := #sparse[Flag]u8 {
.Global = 'g',
.Multiline = 'm',
.Case_Insensitive = 'i',
.Ignore_Whitespace = 'x',
+9 -5
View File
@@ -401,7 +401,7 @@ compile :: proc(tree: Node, flags: common.Flags) -> (code: Program, class_data:
pc_open := 0
add_global: if .Global in flags {
optimize_opening: {
// Check if the opening to the pattern is predictable.
// If so, use one of the optimized Wait opcodes.
iter := virtual_machine.Opcode_Iterator{ code[:], 0 }
@@ -412,7 +412,7 @@ compile :: proc(tree: Node, flags: common.Flags) -> (code: Program, class_data:
pc_open += size_of(Opcode)
inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
pc_open += size_of(u8)
break add_global
break optimize_opening
case .Rune:
operand := intrinsics.unaligned_load(cast(^rune)&code[pc+1])
@@ -420,24 +420,28 @@ compile :: proc(tree: Node, flags: common.Flags) -> (code: Program, class_data:
pc_open += size_of(Opcode)
inject_raw(&code, pc_open, operand)
pc_open += size_of(rune)
break add_global
break optimize_opening
case .Rune_Class:
inject_at(&code, pc_open, Opcode.Wait_For_Rune_Class)
pc_open += size_of(Opcode)
inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
pc_open += size_of(u8)
break add_global
break optimize_opening
case .Rune_Class_Negated:
inject_at(&code, pc_open, Opcode.Wait_For_Rune_Class_Negated)
pc_open += size_of(Opcode)
inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
pc_open += size_of(u8)
break add_global
break optimize_opening
case .Save:
continue
case .Assert_Start:
break optimize_opening
case:
break seek_loop
}
-4
View File
@@ -167,7 +167,6 @@ to escape the delimiter if found in the middle of the string.
All runes after the closing delimiter will be parsed as flags:
- 'g': Global
- 'm': Multiline
- 'i': Case_Insensitive
- 'x': Ignore_Whitespace
@@ -244,7 +243,6 @@ create_by_user :: proc(
// to `end` here.
for r in pattern[start + end:] {
switch r {
case 'g': flags += { .Global }
case 'm': flags += { .Multiline }
case 'i': flags += { .Case_Insensitive }
case 'x': flags += { .Ignore_Whitespace }
@@ -283,8 +281,6 @@ create_iterator :: proc(
permanent_allocator := context.allocator,
temporary_allocator := context.temp_allocator,
) -> (result: Match_Iterator, err: Error) {
flags := flags
flags += {.Global} // We're iterating over a string, so the next match could start anywhere
if .Multiline in flags {
return {}, .Unsupported_Flag
@@ -103,9 +103,11 @@ expensive_for_backtrackers :: proc(t: ^testing.T) {
@test
global_capture_end_word :: proc(t: ^testing.T) {
// NOTE: The previous behavior of `.Global`, which was to automatically
// insert `.*?` at the start of the pattern, is now default.
EXPR :: `Hellope World!`
rex, err := regex.create(EXPR, { .Global })
rex, err := regex.create(EXPR, { /*.Global*/ })
if !testing.expect_value(t, err, nil) {
return
}
@@ -145,7 +147,7 @@ global_capture_end_word_unicode :: proc(t: ^testing.T) {
EXPR :: `こにちは`
needle := string(EXPR)
rex, err := regex.create(EXPR, { .Global, .Unicode })
rex, err := regex.create(EXPR, { /*.Global,*/ .Unicode })
if !testing.expect_value(t, err, nil) {
return
}
@@ -185,7 +187,7 @@ global_capture_end_word_unicode :: proc(t: ^testing.T) {
alternations :: proc(t: ^testing.T) {
EXPR :: `a(?:bb|cc|dd|ee|ff)`
rex, err := regex.create(EXPR, { .No_Capture, .Global })
rex, err := regex.create(EXPR, { .No_Capture, /*.Global*/ })
if !testing.expect_value(t, err, nil) {
return
}
@@ -219,7 +221,7 @@ classes :: proc(t: ^testing.T) {
EXPR :: `[\w\d]+`
NEEDLE :: "0123456789abcdef"
rex, err := regex.create(EXPR, { .Global })
rex, err := regex.create(EXPR, { /*.Global*/ })
if !testing.expect_value(t, err, nil) {
return
}
+12 -12
View File
@@ -51,13 +51,13 @@ check_expression_with_flags :: proc(t: ^testing.T, pattern: string, flags: regex
}
check_expression :: proc(t: ^testing.T, pattern, haystack: string, needles: ..string, extra_flags := regex.Flags{}, loc := #caller_location) {
check_expression_with_flags(t, pattern, { .Global } + extra_flags,
check_expression_with_flags(t, pattern, extra_flags,
haystack, ..needles, loc = loc)
check_expression_with_flags(t, pattern, { .Global, .No_Optimization } + extra_flags,
check_expression_with_flags(t, pattern, { .No_Optimization } + extra_flags,
haystack, ..needles, loc = loc)
check_expression_with_flags(t, pattern, { .Global, .Unicode } + extra_flags,
check_expression_with_flags(t, pattern, { .Unicode } + extra_flags,
haystack, ..needles, loc = loc)
check_expression_with_flags(t, pattern, { .Global, .Unicode, .No_Optimization } + extra_flags,
check_expression_with_flags(t, pattern, { .Unicode, .No_Optimization } + extra_flags,
haystack, ..needles, loc = loc)
}
@@ -516,7 +516,7 @@ test_pos_index_explicitly :: proc(t: ^testing.T) {
STR :: "This is an island."
EXPR :: `\bis\b`
rex, err := regex.create(EXPR, { .Global })
rex, err := regex.create(EXPR)
if !testing.expect_value(t, err, nil) {
return
}
@@ -642,9 +642,9 @@ test_unicode_explicitly :: proc(t: ^testing.T) {
}
{
EXPR :: "こにちは!"
check_expression_with_flags(t, EXPR, { .Global, .Unicode },
check_expression_with_flags(t, EXPR, { .Unicode },
"Hello こにちは!", "こにちは!")
check_expression_with_flags(t, EXPR, { .Global, .Unicode, .No_Optimization },
check_expression_with_flags(t, EXPR, { .Unicode, .No_Optimization },
"Hello こにちは!", "こにちは!")
}
}
@@ -901,12 +901,12 @@ test_everything_at_once :: proc(t: ^testing.T) {
@test
test_creation_from_user_string :: proc(t: ^testing.T) {
{
USER_EXPR :: `/^hellope$/gmixun-`
USER_EXPR :: `/^hellope$/mixun-`
STR :: "hellope"
rex, err := regex.create_by_user(USER_EXPR)
defer regex.destroy(rex)
testing.expect_value(t, err, nil)
testing.expect_value(t, rex.flags, regex.Flags{ .Global, .Multiline, .Case_Insensitive, .Ignore_Whitespace, .Unicode, .No_Capture, .No_Optimization })
testing.expect_value(t, rex.flags, regex.Flags{ .Multiline, .Case_Insensitive, .Ignore_Whitespace, .Unicode, .No_Capture, .No_Optimization })
_, ok := regex.match(rex, STR)
testing.expectf(t, ok, "expected user-provided RegEx %v to match %q", rex, STR)
@@ -1102,14 +1102,14 @@ Iterator_Test :: struct {
iterator_vectors := []Iterator_Test{
{
`xxab32ab52xx`, `(ab\d{1})`, {}, // {.Global} implicitly added by the iterator
`xxab32ab52xx`, `(ab\d{1})`, {},
{
{pos = {{2, 5}, {2, 5}}, groups = {"ab3", "ab3"}},
{pos = {{6, 9}, {6, 9}}, groups = {"ab5", "ab5"}},
},
},
{
`xxfoobarxfoobarxx`, `f(o)ob(ar)`, {.Global},
`xxfoobarxfoobarxx`, `f(o)ob(ar)`, {},
{
{pos = {{2, 8}, {3, 4}, {6, 8}}, groups = {"foobar", "o", "ar"}},
{pos = {{9, 15}, {10, 11}, {13, 15}}, groups = {"foobar", "o", "ar"}},
@@ -1135,4 +1135,4 @@ test_match_iterator :: proc(t: ^testing.T) {
}
testing.expect_value(t, it.idx, len(test.expected))
}
}
}