From bd3596f01261ab8f3238553fbfe5616a5441e1c0 Mon Sep 17 00:00:00 2001 From: skytrias Date: Wed, 30 Nov 2022 05:03:36 +0100 Subject: [PATCH] create lua strlib text package and tests --- core/text/lua/strlib.odin | 725 ++++++++++++++++++++ tests/core/Makefile | 5 +- tests/core/build.bat | 5 + tests/core/text/lua/test_core_text_lua.odin | 325 +++++++++ 4 files changed, 1059 insertions(+), 1 deletion(-) create mode 100644 core/text/lua/strlib.odin create mode 100644 tests/core/text/lua/test_core_text_lua.odin diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin new file mode 100644 index 000000000..a97ccdc8d --- /dev/null +++ b/core/text/lua/strlib.odin @@ -0,0 +1,725 @@ +package strlib + +import "core:strings" + +MAXCAPTURES :: 32 + +Capture :: struct { + init: int, + len: int, +} + +Match :: struct { + start, end: int, +} + +Error :: enum { + OK, + OOB, + Invalid_Capture_Index, + Invalid_Pattern_Capture, + Unfinished_Capture, +} + +L_ESC :: '%' +CAP_POSITION :: -2 +CAP_UNFINISHED :: -1 +INVALID :: -1 + +MatchState :: struct { + src: string, + pattern: string, + level: int, + capture: [MAXCAPTURES]Capture, +} + +match_class :: proc(c: u8, cl: u8) -> (res: bool) { + switch tolower(cl) { + case 'a': res = isalpha(c) + case 'c': res = iscntrl(c) + case 'd': res = isdigit(c) + case 'g': res = isgraph(c) + case 'l': res = islower(c) + case 'p': res = ispunct(c) + case 's': res = isspace(c) + case 'u': res = isupper(c) + case 'w': res = isalnum(c) + case 'x': res = isxdigit(c) + case: return cl == c + } + + return islower(cl) ? res : !res +} + +isalpha :: proc(c: u8) -> bool { + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') +} + +isdigit :: proc(c: u8) -> bool { + return '0' <= c && c <= '9' +} + +isalnum :: proc(c: u8) -> bool { + return isalpha(c) || isdigit(c) +} + +iscntrl :: proc(c: u8) -> bool { + return c <= '\007' || (c >= '\010' && c <= '\017') || (c >= '\020' && c <= '\027') || (c >= '\030' && c <= '\037') || c == '\177' +} + +islower :: proc(c: u8) -> bool { + return c >= 'a' && c <= 'z' +} + +isupper :: proc(c: u8) -> bool { + return c >= 'A' && c <= 'Z' +} + +isgraph :: proc(c: u8) -> bool { + return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') +} + +ispunct :: proc(c: u8) -> bool { + return (c >= '{' && c <= '~') || (c == '`') || (c >= '[' && c <= '_') || (c == '@') || (c >= ':' && c <= '?') || (c >= '(' && c <= '/') || (c >= '!' && c <= '\'') +} + +isxdigit :: proc(c: u8) -> bool { + return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') +} + +isspace :: proc(c: u8) -> bool { + return c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || c == ' ' +} + +// ascii safe +tolower :: proc(c: u8) -> u8 { + if c >= 65 && c <= 90 { // upper case + return c + 32 + } + + return c +} + +check_capture :: proc(ms: ^MatchState, l: rune) -> (int, Error) { + l := int(l - '1') + + if l < 0 || l >= ms.level || ms.capture[l].len == CAP_UNFINISHED { + return 0, .Invalid_Capture_Index + } + + return l, .OK +} + +capture_to_close :: proc(ms: ^MatchState) -> (int, Error) { + level := ms.level - 1 + + for level >= 0 { + if ms.capture[level].len == CAP_UNFINISHED { + return level, .OK + } + + level -= 1 + } + + return 0, .Invalid_Pattern_Capture +} + +classend :: proc(ms: ^MatchState, p: int) -> (int, Error) { + ch := ms.pattern[p] + p := p + 1 + + switch ch { + case L_ESC: { + // if > 0 { + // fmt.eprintln("ERR classend: not enough pattern length") + // return nil + // } + + return p + 1, .OK + } + + case '[': { + if ms.pattern[p] == '^' { + p += 1 + } + + // TODO double check + for { + ch := ms.pattern[p] + + if ch == L_ESC && p <= len(ms.pattern) { + // skip escapes like '%' + p += 1 + } + + if ms.pattern[p] == ']' { + break + } + + p += 1 + } + + return p + 1, .OK + } + + case: { + return p, .OK + } + } +} + +matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool { + sig := true + p := p + + if ms.pattern[p + 1] == '^' { + p += 1 + sig = false + } + + p += 1 + + // while inside of class range + for p < ec { + ch := ms.pattern[p] + + if ms.pattern[p] == L_ESC { + p += 1 + + if match_class(c, ms.pattern[p]) { + return sig + } + } else if ms.pattern[p + 1] == '-' && p + 2 < len(ms.pattern) { + // e.g. [a-z] check + if ms.pattern[p] <= c && c <= ms.pattern[p + 2] { + return sig + } + + p += 2 + } else if ms.pattern[p] == c { + return sig + } + + p += 1 + } + + return !sig +} + +singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> bool { + if s >= len(ms.src) { + return false + } + + switch ms.pattern[p] { + case '.': return true + case L_ESC: return match_class(ms.src[s], ms.pattern[p + 1]) + case '[': return matchbracketclass(ms, ms.src[s], p, ep - 1) + case: return ms.src[s] == ms.pattern[p] + } +} + +// matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) { +// s_begin := s +// s := s + 1 +// cont := 0 + +// begin := ms.pattern[p] +// end := ms.pattern[p + 1] +// print("BALANCED between", rune(begin), "AND", rune(end)) + +// for s < len(ms.src) { +// ch := ms.src[s] +// print("\t", rune(ch)) + +// if ch == end { +// cont -= 1 +// print("END", cont) + +// if cont == 0 { +// print("BALANCED RET", s + 1, len(ms.src), ms.src[s_begin:s + 1]) +// return s + 1 +// } +// } else if ch == begin { +// cont += 1 +// print("BEGIN", cont) +// } + +// s += 1 +// } + +// print("OUT OF BALANCE", cont) +// // out of balance +// return 0, . +// } + +max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { + i := 0 + for singlematch(ms, s + i, p, ep) { + i += 1 + } + + for i >= 0 { + result := match(ms, s + i, ep + 1) or_return + + if result != INVALID { + // print("SET", result) + return result, .OK + } + + i -= 1 + } + + return INVALID, .OK +} + +min_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { + s := s + + for { + result := match(ms, s, ep + 1) or_return + + if result != INVALID { + return result, .OK + } else if singlematch(ms, s, p, ep) { + s += 1 + } else { + return INVALID, .OK + } + } +} + +start_capture :: proc(ms: ^MatchState, s, p, what: int) -> (res: int, err: Error) { + level := ms.level + + ms.capture[level].init = s + ms.capture[level].len = what + ms.level += 1 + + res = match(ms, s, p) or_return + if res == INVALID { + ms.level -= 1 + } + return +} + +end_capture :: proc(ms: ^MatchState, s, p: int) -> (res: int, err: Error) { + l := capture_to_close(ms) or_return + + // TODO double check, could do string as int index + ms.capture[l].len = s - ms.capture[l].init + + res = match(ms, s, p) or_return + if res == INVALID { + ms.capture[l].len = CAP_UNFINISHED + } + return +} + +match_capture :: proc(ms: ^MatchState, s: int, char: rune) -> (res: int, err: Error) { + index := check_capture(ms, char) or_return + length := ms.capture[index].len + + if len(ms.src) - s >= length { + return s + length, .OK + } + + return INVALID, .OK +} + +match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { + s := s + p := p + + if p == len(ms.pattern) { + return s, .OK + } + + switch ms.pattern[p] { + case '(': { + if ms.pattern[p + 1] == ')' { + s = start_capture(ms, s, p + 2, CAP_POSITION) or_return + } else { + s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return + } + } + + case ')': { + s = end_capture(ms, s, p + 1) or_return + } + + case '$': { + if p + 1 != len(ms.pattern) { + return match_default(ms, s, p) + } + + if len(ms.src) != s { + s = INVALID + } + } + + case L_ESC: { + // stop short patterns like "%" only + if p + 1 >= len(ms.pattern) { + err = .OOB + return + } + + switch ms.pattern[p + 1] { + // balanced string + case 'b': { + // res := matchbalance(ms, s, p + 2) + + // if data, ok := res.?; ok { + // // s = data + // // eg after %b() + // // print("SUCCESS") + // return patt_match(ms, s, p + 4) + // } + + } + + // frontier + case 'f': { + // p += 2 + + // if ms.pattern[p] != '[' { + // print("missing '[' after %f in pattern") + // return nil + // } + + // ep := classend(ms, p).? + // previous := 0 if s == 0 else s - 1 + + // if !matchbracketclass(ms, ms.src[previous], p, ep - 1) && + // matchbracketclass(ms, ms.src[s], p, ep) { + // return patt_match(ms, s, ep) + // } + + // return nil + } + + // capture group + case '0'..<'9': { + s = match_capture(ms, s, rune(ms.pattern[p + 1])) or_return + + if s != INVALID { + return match(ms, s, p + 2) + } + } + + case: { + return match_default(ms, s, p) + } + } + } + + case: { + return match_default(ms, s, p) + // print("PATT DEF", rune(ms.src[s]), rune(ms.pattern[p])) + } + } + + return s, .OK +} + +match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { + s := s + ep := classend(ms, p) or_return + // ch := s < len(ms.src) ? rune(ms.src[s]) : 0 + + if !singlematch(ms, s, p, ep) { + epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 + // print("+++", rune(epc)) + + if epc == '*' || epc == '?' || epc == '-' { + return match(ms, s, ep + 1) + } else { + s = INVALID + } + } else { + epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 + // print("~~~", ch, rune(epc)) + + switch epc { + case '?': { + result := match(ms, s + 1, ep + 1) or_return + + if result != INVALID { + s = result + } else { + return match(ms, s, ep + 1) + } + } + + case '+': { + s = max_expand(ms, s + 1, p, ep) or_return + } + + case '*': { + s = max_expand(ms, s, p, ep) or_return + } + + case '-': { + s = min_expand(ms, s, p, ep) or_return + } + + case: { + return match(ms, s + 1, ep) + } + } + } + + return s, .OK +} + +push_onecapture :: proc( + ms: ^MatchState, + i: int, + s: int, + e: int, + matches: []Match, +) -> (err: Error) { + if i >= ms.level { + if i == 0 { + matches[0] = { 0, e - s } + } else { + err = .Invalid_Capture_Index + } + } else { + init := ms.capture[i].init + length := ms.capture[i].len + + switch length { + case CAP_UNFINISHED: { + err = .Unfinished_Capture + } + + case CAP_POSITION: { + matches[i] = { init - 1, init - 1 } + } + + case: { + matches[i] = { init, init + length } + } + } + } + + return +} + +push_captures :: proc( + ms: ^MatchState, + s: int, + e: int, + matches: []Match, +) -> (nlevels: int, err: Error) { + nlevels = 1 if ms.level == 0 && s != -1 else ms.level + + for i in 0.. int { + for i in 0.. int { + l1 := len(s1) + l2 := len(s2) + + if l2 == 0 { + return 0 + } else if l2 > l1 { + return -1 + } else { + init := strings.index_byte(s1, s2[0]) + end := init + l2 + + for end <= l1 && init != -1 { + init += 1 + + if s1[init - 1:end] == s2 { + return init - 1 + } else { + next := strings.index_byte(s1[init:], s2[0]) + + if next == -1 { + return -1 + } else { + init = init + next + end = init + l2 + } + } + } + } + + return -1 +} + +// find a pattern with in a haystack with an offset +// allow_memfind will speed up simple searches +find_aux :: proc( + haystack: string, + pattern: string, + offset: int, + allow_memfind: bool, + matches: ^[MAXCAPTURES]Match, +) -> (captures: int, err: Error) { + s := offset + p := 0 + + specials_idx := index_special(pattern) + if allow_memfind && specials_idx == -1 { + if index := lmemfind(haystack[s:], pattern); index != -1 { + matches[0] = { index + s, index + s + len(pattern) } + captures = 1 + return + } else { + return + } + } + + pattern := pattern + anchor: bool + if len(pattern) > 0 && pattern[0] == '^' { + anchor = true + pattern = pattern[1:] + } + + ms := MatchState { + src = haystack, + pattern = pattern, + } + + for { + res := match(&ms, s, p) or_return + + if res != INVALID { + // NOTE(Skytrias): first result is reserved for a full match + matches[0] = { s, res } + // rest are the actual captures + captures = push_captures(&ms, -1, -1, matches[1:]) or_return + captures += 1 + + return + } + + s += 1 + + if !(s < len(ms.src) && !anchor) { + break + } + } + + return +} + +// iterative matching which returns the 0th/1st match +// rest has to be used from captures +gmatch :: proc( + haystack: ^string, + pattern: string, + captures: ^[MAXCAPTURES]Match, +) -> (res: string, ok: bool) { + if len(haystack) > 0 { + length, err := find_aux(haystack^, pattern, 0, false, captures) + + if length != 0 && err == .OK { + ok = true + first := length > 1 ? 1 : 0 + cap := captures[first] + res = haystack[cap.start:cap.end] + haystack^ = haystack[cap.end:] + } + } + + return +} + +// gsub with builder +gsub_builder :: proc( + builder: ^strings.Builder, + haystack: string, + pattern: string, + replace: string, +) -> string { + // find matches + captures: [MAXCAPTURES]Match + haystack := haystack + + for { + length, err := find_aux(haystack, pattern, 0, false, &captures) + + // done + if length == 0 { + break + } + + if err != .OK { + return {} + } + + cap := captures[0] + + // write front till capture + strings.write_string(builder, haystack[:cap.start]) + + // write replacements + strings.write_string(builder, replace) + + // advance string till end + haystack = haystack[cap.end:] + } + + strings.write_string(builder, haystack[:]) + return strings.to_string(builder^) +} + +// uses temp builder to build initial string - then allocates the result +gsub_allocator :: proc( + haystack: string, + pattern: string, + replace: string, + allocator := context.allocator, +) -> string { + builder := strings.builder_make(0, 256, context.temp_allocator) + return gsub_builder(&builder, haystack, pattern, replace) +} + +gsub :: proc { gsub_builder, gsub_allocator } + +// iterative find with first capture only +gfind :: proc( + haystack: ^string, + pattern: string, + captures: ^[MAXCAPTURES]Match, +) -> (res: string, ok: bool) { + if len(haystack) > 0 { + length, err := find_aux(haystack^, pattern, 0, true, captures) + + if length != 0 && err == .OK { + ok = true + cap := captures[0] + res = haystack[cap.start:cap.end] + haystack^ = haystack[cap.end:] + } + } + + return +} \ No newline at end of file diff --git a/tests/core/Makefile b/tests/core/Makefile index 92f12cbe7..8a36f7ea3 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -2,7 +2,7 @@ ODIN=../../odin PYTHON=$(shell which python3) all: download_test_assets image_test compress_test strings_test hash_test crypto_test noise_test encoding_test \ - math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test c_libc_test + math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test lua_strlib_test c_libc_test download_test_assets: $(PYTHON) download_assets.py @@ -49,5 +49,8 @@ os_exit_test: i18n_test: $(ODIN) run text/i18n -out:test_core_i18n +lua_strlib_test: + $(ODIN) run text/lua -out:test_core_lua_strlib + c_libc_test: $(ODIN) run c/libc -out:test_core_libc \ No newline at end of file diff --git a/tests/core/build.bat b/tests/core/build.bat index 94f8e0fd8..66c1839bf 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -71,6 +71,11 @@ echo Running core:text/i18n tests echo --- %PATH_TO_ODIN% run text\i18n %COMMON% -out:test_core_i18n.exe +echo --- +echo Running core:text/lua tests +echo --- +%PATH_TO_ODIN% run text\lua %COMMON% -out:test_core_lua_strlib.exe + echo --- echo Running core:slice tests echo --- diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/lua/test_core_text_lua.odin new file mode 100644 index 000000000..63d8b5239 --- /dev/null +++ b/tests/core/text/lua/test_core_text_lua.odin @@ -0,0 +1,325 @@ +package test_strlib + +import lua "core:text/lua" +import "core:testing" +import "core:fmt" +import "core:os" + +TEST_count: int +TEST_fail: int + +when ODIN_TEST { + expect :: testing.expect +} else { + expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { + TEST_count += 1 + if !condition { + TEST_fail += 1 + fmt.printf("[%v] %v\n", loc, message) + return + } + } +} + +// find correct byte offsets +@test +test_find :: proc(t: ^testing.T) { + Entry :: struct { + s, p: string, + offset: int, + + match: struct { + start, end: int, // expected start/end + ok: bool, + }, + } + + ENTRIES :: [?]Entry { + { "", "", 0, { 0, 0, true } }, + { "alo", "", 0, { 0, 0, true } }, + { "a o a o a o", "a", 0, { 0, 1, true } }, + { "a o a o a o", "a o", 1, { 4, 7, true } }, + { "alo123alo", "12", 0, { 3, 5, true } }, + { "alo123alo", "^12", 0, {} }, + + // from https://riptutorial.com/lua/example/20535/string-find--introduction- + { "137'5 m47ch s0m3 d1g175", "m%d%d", 0, { 6, 9, true } }, + { "stack overflow", "[abc]", 0, { 2, 3, true } }, + { "stack overflow", "[^stack ]", 0, { 6, 7, true } }, + { "hello", "o%d?", 0, { 4, 5, true } }, + { "hello20", "o%d?", 0, { 4, 6, true } }, + { "helllllo", "el+", 0, { 1, 7, true } }, + { "heo", "el+", 0, {} }, + { "helelo", "h.+l", 0, { 0, 5, true } }, + { "helelo", "h.-l", 0, { 0, 3, true } }, + } + + captures: [lua.MAXCAPTURES]lua.Match + for entry in ENTRIES { + captures[0] = {} + length, err := lua.find_aux(entry.s, entry.p, entry.offset, true, &captures) + cap := captures[0] + ok := length > 0 && err == .OK + success := entry.match.ok == ok && entry.match.start == cap.start && entry.match.end == cap.end + + if !success { + fmt.eprintf("Find failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p) + } + + expect(t, entry.match.start == cap.start, "match start didnt match") + expect(t, entry.match.end == cap.end, "match end didnt match",) + expect(t, entry.match.ok == ok, "find result didnt match") + } +} + +@test +test_match :: proc(t: ^testing.T) { + Entry :: struct { + s, p: string, + result: string, // expected start/end + ok: bool, + } + + ENTRIES :: [?]Entry { + // star + { "aaab", ".*b", "aaab", true }, + { "aaa", ".*a", "aaa", true }, + { "b", ".*b", "b", true }, + + // plus + { "aaab", ".+b", "aaab", true }, + { "aaa", ".+a", "aaa", true }, + { "b", ".+b", "", false }, + + // question + { "aaab", ".?b", "ab", true }, + { "aaa", ".?a", "aa", true }, + { "b", ".?b", "b", true }, + + // CLASSES, checking shorted invalid patterns + { "a", "%", "", false }, + + // %a letter (A-Z, a-z) + { "letterS", "%a+", "letterS", true }, + { "Let123", "%a+", "Let", true }, + { "Let123", "%A+", "123", true }, + + // %c control characters (\n, \t, \r) + { "\n", "%c", "\n", true }, + { "\t", "%c", "\t", true }, + { "\t", "%C", "", false }, + { "a", "%C", "a", true }, + + // %d digit characters (0-9) + { "0123", "%d+", "0123", true }, + { "abcd", "%D+", "abcd", true }, + { "ab23", "%d+", "23", true }, + + // %l lower characters (a-z) + { "lowerCASE", "%l+", "lower", true }, + { "LOWERcase", "%l+", "case", true }, + { "LOWERcase", "%L+", "LOWER", true }, + + // %p punctionation characters (!, ?, &, ...) + { "!?&", "%p+", "!?&", true }, + { "abc!abc", "%p", "!", true }, + { "!abc!", "%P+", "abc", true }, + + // %s space characters + { " ", "%s", " ", true }, + { "a", "%S", "a", true }, + { "abc ", "%s+", " ", true }, + + // %u upper characters (A-Z) + { "lowerCASE", "%u+", "CASE", true }, + { "LOWERcase", "%u+", "LOWER", true }, + { "LOWERcase", "%U+", "case", true }, + + // %w alpha numeric (A-Z, a-z, 0-9) + { "0123", "%w+", "0123", true }, + { "abcd", "%W+", "", false }, + { "ab23", "%w+", "ab23", true }, + + // %x hexadecimal digits (0x1A, ...) + { "3", "%x", "3", true }, + { "9f", "%x+", "9f", true }, + { "9g", "%x+", "9", true }, + { "9g", "%X+", "g", true }, + + // random tests + { "f123", "%D", "f", true }, + { "f123", "%d", "1", true }, + { "f123", "%d+", "123", true }, + { "foo 123 bar", "%d%d%d", "123", true }, + { "Uppercase", "%u", "U", true }, + { "abcd", "[bc][bc]", "bc", true }, + { "abcd", "[^ad]", "b", true }, + { "123", "[0-9]", "1", true }, + + // end of line + { "testing this", "this$", "this", true }, + { "testing this ", "this$", "", false }, + { "testing this$", "this%$$", "this$", true }, + + // start of line + { "testing this", "^testing", "testing", true }, + { " testing this", "^testing", "", false }, + { "testing this", "^%w+", "testing", true }, + { " testing this", "^%w+", "", false }, + } + + captures: [lua.MAXCAPTURES]lua.Match + for entry, i in ENTRIES { + captures[0] = {} + length, err := lua.find_aux(entry.s, entry.p, 0, false, &captures) + ok := length > 0 && err == .OK + result := entry.s[captures[0].start:captures[0].end] + success := entry.ok == ok && result == entry.result + + if !success { + fmt.eprintf("Match failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p) + fmt.eprintf("Match invalid result! |WANTED:| %s |GOT:| %s\n", entry.result, result) + } + + expect(t, entry.ok == ok, "find result didnt match") + expect(t, result == entry.result, "entry result didnt match") + } +} + +@test +test_captures :: proc(t: ^testing.T) { + Temp :: struct { + pattern: string, + captures: [lua.MAXCAPTURES]lua.Match, + } + + // match all captures + compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) { + length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures) + expect(t, len(comp) == length, "didnt match input comparison strings", loc) + + for i in 0.. 0 && err == .OK + expect(t, result == ok, "result didnt eq", loc) + } + + temp := Temp { pattern = "(one).+" } + compare_captures(t, &temp, " one two", { "one two", "one" }) + compare_captures(t, &temp, "three", {}) + + matches(t, &temp, "one dog", true) + matches(t, &temp, "dog one ", true) + matches(t, &temp, "dog one", false) + + temp.pattern = "^(%a+)" + matches(t, &temp, "one dog", true) + matches(t, &temp, " one dog", false) + + // multiple captures + { + haystack := " 233 hello dolly" + pattern := "%s*(%d+)%s+(%S+)" + captures: [lua.MAXCAPTURES]lua.Match + lua.find_aux(haystack, pattern, 0, false, &captures) + cap1 := captures[1] + cap2 := captures[2] + text1 := haystack[cap1.start:cap1.end] + text2 := haystack[cap2.start:cap2.end] + expect(t, text1 == "233", "Multi-Capture failed at 1") + expect(t, text2 == "hello", "Multi-Capture failed at 2") + } +} + +@test +test_gmatch :: proc(t: ^testing.T) { + { + haystack := "testing this out 123" + pattern := "%w+" + s := &haystack + captures: [lua.MAXCAPTURES]lua.Match + output := [?]string { "testing", "this", "out", "123" } + index: int + + for match in lua.gmatch(s, pattern, &captures) { + expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + index += 1 + } + } + + { + haystack := "#afdde6" + pattern := "%x%x" + s := &haystack + captures: [lua.MAXCAPTURES]lua.Match + output := [?]string { "af", "dd", "e6" } + index: int + + for match in lua.gmatch(s, pattern, &captures) { + expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + index += 1 + } + } + + { + haystack := "testing outz captures yo outz outtz" + pattern := "(out)z" + s := &haystack + captures: [lua.MAXCAPTURES]lua.Match + output := [?]string { "out", "out" } + index: int + + for match in lua.gmatch(s, pattern, &captures) { + expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + index += 1 + } + } +} + +@test +test_gsub :: proc(t: ^testing.T) { + result := lua.gsub("testing123testing", "%d+", " sup ", context.temp_allocator) + expect(t, result == "testing sup testing", "GSUB 0: failed") + result = lua.gsub("testing123testing", "%a+", "345", context.temp_allocator) + expect(t, result == "345123345", "GSUB 1: failed") +} + +@test +test_gfind :: proc(t: ^testing.T) { + { + haystack := "test1 123 test2 123 test3" + pattern := "%w+" + captures: [lua.MAXCAPTURES]lua.Match + s := &haystack + output := [?]string { "test1", "123", "test2", "123", "test3" } + index: int + + for word in lua.gfind(s, pattern, &captures) { + expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word)) + index += 1 + } + } +} + +main :: proc() { + t: testing.T + test_find(&t) + test_match(&t) + test_captures(&t) + test_gmatch(&t) + test_gsub(&t) + test_gfind(&t) + + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + if TEST_fail > 0 { + os.exit(1) + } +} \ No newline at end of file