mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-13 01:21:38 -07:00
Remove Global RegEx flag, default to unanchored patterns
This commit is contained in:
@@ -15,8 +15,6 @@ MAX_PROGRAM_SIZE :: int(max(i16))
|
||||
MAX_CLASSES :: int(max(u8))
|
||||
|
||||
Flag :: enum u8 {
|
||||
// Global: try to match the pattern anywhere in the string.
|
||||
Global,
|
||||
// Multiline: treat `^` and `$` as if they also match newlines.
|
||||
Multiline,
|
||||
// Case Insensitive: treat `a-z` as if it was also `A-Z`.
|
||||
@@ -36,7 +34,6 @@ Flags :: bit_set[Flag; u8]
|
||||
|
||||
@(rodata)
|
||||
Flag_To_Letter := #sparse[Flag]u8 {
|
||||
.Global = 'g',
|
||||
.Multiline = 'm',
|
||||
.Case_Insensitive = 'i',
|
||||
.Ignore_Whitespace = 'x',
|
||||
|
||||
@@ -401,7 +401,7 @@ compile :: proc(tree: Node, flags: common.Flags) -> (code: Program, class_data:
|
||||
|
||||
pc_open := 0
|
||||
|
||||
add_global: if .Global in flags {
|
||||
optimize_opening: {
|
||||
// Check if the opening to the pattern is predictable.
|
||||
// If so, use one of the optimized Wait opcodes.
|
||||
iter := virtual_machine.Opcode_Iterator{ code[:], 0 }
|
||||
@@ -412,7 +412,7 @@ compile :: proc(tree: Node, flags: common.Flags) -> (code: Program, class_data:
|
||||
pc_open += size_of(Opcode)
|
||||
inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
|
||||
pc_open += size_of(u8)
|
||||
break add_global
|
||||
break optimize_opening
|
||||
|
||||
case .Rune:
|
||||
operand := intrinsics.unaligned_load(cast(^rune)&code[pc+1])
|
||||
@@ -420,24 +420,28 @@ compile :: proc(tree: Node, flags: common.Flags) -> (code: Program, class_data:
|
||||
pc_open += size_of(Opcode)
|
||||
inject_raw(&code, pc_open, operand)
|
||||
pc_open += size_of(rune)
|
||||
break add_global
|
||||
break optimize_opening
|
||||
|
||||
case .Rune_Class:
|
||||
inject_at(&code, pc_open, Opcode.Wait_For_Rune_Class)
|
||||
pc_open += size_of(Opcode)
|
||||
inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
|
||||
pc_open += size_of(u8)
|
||||
break add_global
|
||||
break optimize_opening
|
||||
|
||||
case .Rune_Class_Negated:
|
||||
inject_at(&code, pc_open, Opcode.Wait_For_Rune_Class_Negated)
|
||||
pc_open += size_of(Opcode)
|
||||
inject_at(&code, pc_open, Opcode(code[pc + size_of(Opcode) + pc_open]))
|
||||
pc_open += size_of(u8)
|
||||
break add_global
|
||||
break optimize_opening
|
||||
|
||||
case .Save:
|
||||
continue
|
||||
|
||||
case .Assert_Start:
|
||||
break optimize_opening
|
||||
|
||||
case:
|
||||
break seek_loop
|
||||
}
|
||||
|
||||
@@ -167,7 +167,6 @@ to escape the delimiter if found in the middle of the string.
|
||||
|
||||
All runes after the closing delimiter will be parsed as flags:
|
||||
|
||||
- 'g': Global
|
||||
- 'm': Multiline
|
||||
- 'i': Case_Insensitive
|
||||
- 'x': Ignore_Whitespace
|
||||
@@ -244,7 +243,6 @@ create_by_user :: proc(
|
||||
// to `end` here.
|
||||
for r in pattern[start + end:] {
|
||||
switch r {
|
||||
case 'g': flags += { .Global }
|
||||
case 'm': flags += { .Multiline }
|
||||
case 'i': flags += { .Case_Insensitive }
|
||||
case 'x': flags += { .Ignore_Whitespace }
|
||||
@@ -283,8 +281,6 @@ create_iterator :: proc(
|
||||
permanent_allocator := context.allocator,
|
||||
temporary_allocator := context.temp_allocator,
|
||||
) -> (result: Match_Iterator, err: Error) {
|
||||
flags := flags
|
||||
flags += {.Global} // We're iterating over a string, so the next match could start anywhere
|
||||
|
||||
if .Multiline in flags {
|
||||
return {}, .Unsupported_Flag
|
||||
|
||||
@@ -103,9 +103,11 @@ expensive_for_backtrackers :: proc(t: ^testing.T) {
|
||||
|
||||
@test
|
||||
global_capture_end_word :: proc(t: ^testing.T) {
|
||||
// NOTE: The previous behavior of `.Global`, which was to automatically
|
||||
// insert `.*?` at the start of the pattern, is now default.
|
||||
EXPR :: `Hellope World!`
|
||||
|
||||
rex, err := regex.create(EXPR, { .Global })
|
||||
rex, err := regex.create(EXPR, { /*.Global*/ })
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
@@ -145,7 +147,7 @@ global_capture_end_word_unicode :: proc(t: ^testing.T) {
|
||||
EXPR :: `こにちは`
|
||||
needle := string(EXPR)
|
||||
|
||||
rex, err := regex.create(EXPR, { .Global, .Unicode })
|
||||
rex, err := regex.create(EXPR, { /*.Global,*/ .Unicode })
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
@@ -185,7 +187,7 @@ global_capture_end_word_unicode :: proc(t: ^testing.T) {
|
||||
alternations :: proc(t: ^testing.T) {
|
||||
EXPR :: `a(?:bb|cc|dd|ee|ff)`
|
||||
|
||||
rex, err := regex.create(EXPR, { .No_Capture, .Global })
|
||||
rex, err := regex.create(EXPR, { .No_Capture, /*.Global*/ })
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
@@ -219,7 +221,7 @@ classes :: proc(t: ^testing.T) {
|
||||
EXPR :: `[\w\d]+`
|
||||
NEEDLE :: "0123456789abcdef"
|
||||
|
||||
rex, err := regex.create(EXPR, { .Global })
|
||||
rex, err := regex.create(EXPR, { /*.Global*/ })
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -51,13 +51,13 @@ check_expression_with_flags :: proc(t: ^testing.T, pattern: string, flags: regex
|
||||
}
|
||||
|
||||
check_expression :: proc(t: ^testing.T, pattern, haystack: string, needles: ..string, extra_flags := regex.Flags{}, loc := #caller_location) {
|
||||
check_expression_with_flags(t, pattern, { .Global } + extra_flags,
|
||||
check_expression_with_flags(t, pattern, extra_flags,
|
||||
haystack, ..needles, loc = loc)
|
||||
check_expression_with_flags(t, pattern, { .Global, .No_Optimization } + extra_flags,
|
||||
check_expression_with_flags(t, pattern, { .No_Optimization } + extra_flags,
|
||||
haystack, ..needles, loc = loc)
|
||||
check_expression_with_flags(t, pattern, { .Global, .Unicode } + extra_flags,
|
||||
check_expression_with_flags(t, pattern, { .Unicode } + extra_flags,
|
||||
haystack, ..needles, loc = loc)
|
||||
check_expression_with_flags(t, pattern, { .Global, .Unicode, .No_Optimization } + extra_flags,
|
||||
check_expression_with_flags(t, pattern, { .Unicode, .No_Optimization } + extra_flags,
|
||||
haystack, ..needles, loc = loc)
|
||||
}
|
||||
|
||||
@@ -516,7 +516,7 @@ test_pos_index_explicitly :: proc(t: ^testing.T) {
|
||||
STR :: "This is an island."
|
||||
EXPR :: `\bis\b`
|
||||
|
||||
rex, err := regex.create(EXPR, { .Global })
|
||||
rex, err := regex.create(EXPR)
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
@@ -642,9 +642,9 @@ test_unicode_explicitly :: proc(t: ^testing.T) {
|
||||
}
|
||||
{
|
||||
EXPR :: "こにちは!"
|
||||
check_expression_with_flags(t, EXPR, { .Global, .Unicode },
|
||||
check_expression_with_flags(t, EXPR, { .Unicode },
|
||||
"Hello こにちは!", "こにちは!")
|
||||
check_expression_with_flags(t, EXPR, { .Global, .Unicode, .No_Optimization },
|
||||
check_expression_with_flags(t, EXPR, { .Unicode, .No_Optimization },
|
||||
"Hello こにちは!", "こにちは!")
|
||||
}
|
||||
}
|
||||
@@ -901,12 +901,12 @@ test_everything_at_once :: proc(t: ^testing.T) {
|
||||
@test
|
||||
test_creation_from_user_string :: proc(t: ^testing.T) {
|
||||
{
|
||||
USER_EXPR :: `/^hellope$/gmixun-`
|
||||
USER_EXPR :: `/^hellope$/mixun-`
|
||||
STR :: "hellope"
|
||||
rex, err := regex.create_by_user(USER_EXPR)
|
||||
defer regex.destroy(rex)
|
||||
testing.expect_value(t, err, nil)
|
||||
testing.expect_value(t, rex.flags, regex.Flags{ .Global, .Multiline, .Case_Insensitive, .Ignore_Whitespace, .Unicode, .No_Capture, .No_Optimization })
|
||||
testing.expect_value(t, rex.flags, regex.Flags{ .Multiline, .Case_Insensitive, .Ignore_Whitespace, .Unicode, .No_Capture, .No_Optimization })
|
||||
|
||||
_, ok := regex.match(rex, STR)
|
||||
testing.expectf(t, ok, "expected user-provided RegEx %v to match %q", rex, STR)
|
||||
@@ -1102,14 +1102,14 @@ Iterator_Test :: struct {
|
||||
|
||||
iterator_vectors := []Iterator_Test{
|
||||
{
|
||||
`xxab32ab52xx`, `(ab\d{1})`, {}, // {.Global} implicitly added by the iterator
|
||||
`xxab32ab52xx`, `(ab\d{1})`, {},
|
||||
{
|
||||
{pos = {{2, 5}, {2, 5}}, groups = {"ab3", "ab3"}},
|
||||
{pos = {{6, 9}, {6, 9}}, groups = {"ab5", "ab5"}},
|
||||
},
|
||||
},
|
||||
{
|
||||
`xxfoobarxfoobarxx`, `f(o)ob(ar)`, {.Global},
|
||||
`xxfoobarxfoobarxx`, `f(o)ob(ar)`, {},
|
||||
{
|
||||
{pos = {{2, 8}, {3, 4}, {6, 8}}, groups = {"foobar", "o", "ar"}},
|
||||
{pos = {{9, 15}, {10, 11}, {13, 15}}, groups = {"foobar", "o", "ar"}},
|
||||
@@ -1135,4 +1135,4 @@ test_match_iterator :: proc(t: ^testing.T) {
|
||||
}
|
||||
testing.expect_value(t, it.idx, len(test.expected))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user