diff --git a/core/strings/ascii_set.odin b/core/strings/ascii_set.odin index 9b59666f3..7b6f46f5b 100644 --- a/core/strings/ascii_set.odin +++ b/core/strings/ascii_set.odin @@ -3,9 +3,22 @@ package strings import "core:unicode/utf8" +/* +Ascii_Set is designed to store ASCII characters efficiently as a bit-array +Each bit in the array corresponds to a specific ASCII character, where the value of the bit (0 or 1) +indicates if the character is present in the set or not. +*/ Ascii_Set :: distinct [8]u32 +/* +Creates an Ascii_Set with unique characters from the input string. -// create an ascii set of all unique characters in the string +Inputs: +- chars: A string containing characters to include in the Ascii_Set. + +Returns: +- as: An Ascii_Set with unique characters from the input string. +- ok: false if any character in the input string is not a valid ASCII character. +*/ ascii_set_make :: proc(chars: string) -> (as: Ascii_Set, ok: bool) #no_bounds_check { for i in 0.. (as: Ascii_Set, ok: bool) #no_bounds_ch ok = true return } +/* +Determines if a given char is contained within an Ascii_Set. -// returns true when the `c` byte is contained in the `as` ascii set +Inputs: +- as: The Ascii_Set to search. +- c: The char to check for in the Ascii_Set. + +Returns: A boolean indicating if the byte is contained in the Ascii_Set (true) or not (false). +*/ ascii_set_contains :: proc(as: Ascii_Set, c: byte) -> bool #no_bounds_check { return as[c>>5] & (1<<(c&31)) != 0 } \ No newline at end of file diff --git a/core/strings/builder.odin b/core/strings/builder.odin index a6d5b78b4..f405f285b 100644 --- a/core/strings/builder.odin +++ b/core/strings/builder.odin @@ -4,68 +4,126 @@ import "core:runtime" import "core:unicode/utf8" import "core:strconv" import "core:io" - -Builder_Flush_Proc :: #type proc(b: ^Builder) -> (do_reset: bool) - /* - dynamic byte buffer / string builder with helper procedures - the dynamic array is wrapped inside the struct to be more opaque - you can use `fmt.sbprint*` procedures with a `^strings.Builder` directly +Type definition for a procedure that flushes a Builder + +Inputs: +- b: A pointer to the Builder + +Returns: A boolean indicating whether the Builder should be reset +*/ +Builder_Flush_Proc :: #type proc(b: ^Builder) -> (do_reset: bool) +/* +A dynamic byte buffer / string builder with helper procedures +The dynamic array is wrapped inside the struct to be more opaque +You can use `fmt.sbprint*` procedures with a `^strings.Builder` directly */ Builder :: struct { buf: [dynamic]byte, } +/* +Produces a Builder with a default length of 0 and cap of 16 -// return a builder, default length 0 / cap 16 are done through make +*Allocates Using Provided Allocator* + +Inputs: +- allocator: (default is context.allocator) + +Returns: A new Builder +*/ builder_make_none :: proc(allocator := context.allocator) -> Builder { return Builder{buf=make([dynamic]byte, allocator)} } +/* +Produces a Builder with a specified length and cap of max(16,len) byte buffer -// return a builder, with a set length `len` and cap 16 byte buffer +*Allocates Using Provided Allocator* + +Inputs: +- len: The desired length of the Builder's buffer +- allocator: (default is context.allocator) + +Returns: A new Builder +*/ builder_make_len :: proc(len: int, allocator := context.allocator) -> Builder { return Builder{buf=make([dynamic]byte, len, allocator)} } +/* +Produces a Builder with a specified length and cap -// return a builder, with a set length `len` byte buffer and a custom `cap` +*Allocates Using Provided Allocator* + +Inputs: +- len: The desired length of the Builder's buffer +- cap: The desired capacity of the Builder's buffer, cap is max(cap, len) +- allocator: (default is context.allocator) + +Returns: A new Builder +*/ builder_make_len_cap :: proc(len, cap: int, allocator := context.allocator) -> Builder { return Builder{buf=make([dynamic]byte, len, cap, allocator)} } - // overload simple `builder_make_*` with or without len / cap parameters builder_make :: proc{ builder_make_none, builder_make_len, builder_make_len_cap, } +/* +Initializes a Builder with a length of 0 and cap of 16 +It replaces the existing `buf` -// initialize a builder, default length 0 / cap 16 are done through make -// replaces the existing `buf` +*Allocates Using Provided Allocator* + +Inputs: +- b: A pointer to the Builder +- allocator: (default is context.allocator) + +Returns: initialized ^Builder +*/ builder_init_none :: proc(b: ^Builder, allocator := context.allocator) -> ^Builder { b.buf = make([dynamic]byte, allocator) return b } +/* +Initializes a Builder with a specified length and cap, which is max(len,16) +It replaces the existing `buf` -// initialize a builder, with a set length `len` and cap 16 byte buffer -// replaces the existing `buf` +*Allocates Using Provided Allocator* + +Inputs: +- b: A pointer to the Builder +- len: The desired length of the Builder's buffer +- allocator: (default is context.allocator) + +Returns: Initialized ^Builder +*/ builder_init_len :: proc(b: ^Builder, len: int, allocator := context.allocator) -> ^Builder { b.buf = make([dynamic]byte, len, allocator) return b } +/* +Initializes a Builder with a specified length and cap +It replaces the existing `buf` -// initialize a builder, with a set length `len` byte buffer and a custom `cap` -// replaces the existing `buf` +Inputs: +- b: A pointer to the Builder +- len: The desired length of the Builder's buffer +- cap: The desired capacity of the Builder's buffer, actual max(len,cap) +- allocator: (default is context.allocator) + +Returns: A pointer to the initialized Builder +*/ builder_init_len_cap :: proc(b: ^Builder, len, cap: int, allocator := context.allocator) -> ^Builder { b.buf = make([dynamic]byte, len, cap, allocator) return b } - // overload simple `builder_init_*` with or without len / ap parameters builder_init :: proc{ builder_init_none, builder_init_len, builder_init_len_cap, } - @(private) _builder_stream_vtable_obj := io.Stream_VTable{ impl_write = proc(s: io.Stream, p: []byte) -> (n: int, err: io.Error) { @@ -91,49 +149,80 @@ _builder_stream_vtable_obj := io.Stream_VTable{ impl_destroy = proc(s: io.Stream) -> io.Error { b := (^Builder)(s.stream_data) delete(b.buf) + b.buf=nil return .None }, } - // NOTE(dweiler): Work around a miscompilation bug on Linux still. @(private) _builder_stream_vtable := &_builder_stream_vtable_obj +/* +Returns an io.Stream from a Builder -// return an `io.Stream` from a builder +Inputs: +- b: A pointer to the Builder + +Returns: An io.Stream +*/ to_stream :: proc(b: ^Builder) -> io.Stream { return io.Stream{stream_vtable=_builder_stream_vtable, stream_data=b} } +/* +Returns an io.Writer from a Builder -// return an `io.Writer` from a builder +Inputs: +- b: A pointer to the Builder + +Returns: An io.Writer +*/ to_writer :: proc(b: ^Builder) -> io.Writer { return io.to_writer(to_stream(b)) } +/* +Deletes and clears the Builder byte buffer content -// delete and clear the builder byte buffer content +Inputs: +- b: A pointer to the Builder +*/ builder_destroy :: proc(b: ^Builder) { delete(b.buf) - clear(&b.buf) + b.buf = nil } +/* +Reserves the Builder byte buffer to a specific capacity, when it's higher than before -// reserve the builfer byte buffer to a specific cap, when it's higher than before +Inputs: +- b: A pointer to the Builder +- cap: The desired capacity for the Builder's buffer +*/ builder_grow :: proc(b: ^Builder, cap: int) { reserve(&b.buf, cap) } +/* +Clears the Builder byte buffer content (sets len to zero) -// clear the builder byte buffer content +Inputs: +- b: A pointer to the Builder +*/ builder_reset :: proc(b: ^Builder) { clear(&b.buf) } - /* - create an empty builder with the same slice length as its cap - uses the `mem.nil_allocator` to avoid allocation and keep a fixed length - used in `fmt.bprint*` - +Creates a Builder from a slice of bytes with the same slice length as its capacity. Used in fmt.bprint* + +*Uses Nil Allocator - Does NOT allocate* + +Inputs: +- backing: A slice of bytes to be used as the backing buffer + +Example: +```odin bytes: [8]byte // <-- gets filled builder := strings.builder_from_bytes(bytes[:]) - strings.write_byte(&builder, 'a') -> "a" - strings.write_byte(&builder, 'b') -> "ab" + strings.write_byte(&builder, 'a') // -> "a" + strings.write_byte(&builder, 'b') // -> "ab" +``` +Returns: A new Builder */ builder_from_bytes :: proc(backing: []byte) -> Builder { s := transmute(runtime.Raw_Slice)backing @@ -147,36 +236,69 @@ builder_from_bytes :: proc(backing: []byte) -> Builder { buf = transmute([dynamic]byte)d, } } +// Alias to `builder_from_bytes` builder_from_slice :: builder_from_bytes +/* +Casts the Builder byte buffer to a string and returns it -// cast the builder byte buffer to a string and return it +Inputs: +- b: A Builder + +Returns: The contents of the Builder's buffer, as a string +*/ to_string :: proc(b: Builder) -> string { return string(b.buf[:]) } +/* +Returns the length of the Builder's buffer, in bytes -// return the length of the builder byte buffer +Inputs: +- b: A Builder + +Returns: The length of the Builder's buffer +*/ builder_len :: proc(b: Builder) -> int { return len(b.buf) } +/* +Returns the capacity of the Builder's buffer, in bytes -// return the cap of the builder byte buffer +Inputs: +- b: A Builder + +Returns: The capacity of the Builder's buffer +*/ builder_cap :: proc(b: Builder) -> int { return cap(b.buf) } +/* +The free space left in the Builder's buffer, in bytes -// returns the space left in the builder byte buffer to use up +Inputs: +- b: A Builder + +Returns: The available space left in the Builder's buffer +*/ builder_space :: proc(b: Builder) -> int { return cap(b.buf) - len(b.buf) } - /* - appends a byte to the builder, returns the append diff +Appends a byte to the Builder and returns the number of bytes appended +Inputs: +- b: A pointer to the Builder +- x: The byte to be appended + +Example: +```odin builder := strings.builder_make() - strings.write_byte(&builder, 'a') // 1 - strings.write_byte(&builder, 'b') // 1 - strings.write_byte(&builder, 'c') // 1 - fmt.println(strings.to_string(builder)) // -> abc + strings.write_byte(&builder, 'a') // 1 + strings.write_byte(&builder, 'b') // 1 + fmt.println(strings.to_string(builder)) // -> ab +``` +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of bytes appended */ write_byte :: proc(b: ^Builder, x: byte) -> (n: int) { n0 := len(b.buf) @@ -184,14 +306,23 @@ write_byte :: proc(b: ^Builder, x: byte) -> (n: int) { n1 := len(b.buf) return n1-n0 } - /* - appends a slice of bytes to the builder, returns the append diff +Appends a slice of bytes to the Builder and returns the number of bytes appended +Inputs: +- b: A pointer to the Builder +- x: The slice of bytes to be appended + +Example: +```odin builder := strings.builder_make() bytes := [?]byte { 'a', 'b', 'c' } strings.write_bytes(&builder, bytes[:]) // 3 fmt.println(strings.to_string(builder)) // -> abc +``` +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of bytes appended */ write_bytes :: proc(b: ^Builder, x: []byte) -> (n: int) { n0 := len(b.buf) @@ -199,42 +330,66 @@ write_bytes :: proc(b: ^Builder, x: []byte) -> (n: int) { n1 := len(b.buf) return n1-n0 } - /* - appends a single rune into the builder, returns written rune size and an `io.Error` +Appends a single rune to the Builder and returns the number of bytes written and an `io.Error` +Inputs: +- b: A pointer to the Builder +- r: The rune to be appended + +Example: +```odin builder := strings.builder_make() - strings.write_rune(&builder, 'ä') // 2 None - strings.write_rune(&builder, 'b') // 1 None - strings.write_rune(&builder, 'c') // 1 None - fmt.println(strings.to_string(builder)) // -> äbc + strings.write_rune(&builder, 'ä') // 2 None + strings.write_rune(&builder, 'b') // 1 None + fmt.println(strings.to_string(builder)) // -> äb +``` +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of bytes written and an io.Error (if any) */ write_rune :: proc(b: ^Builder, r: rune) -> (int, io.Error) { return io.write_rune(to_writer(b), r) } - /* - appends a quoted rune into the builder, returns written size +Appends a quoted rune to the Builder and returns the number of bytes written +Inputs: +- b: A pointer to the Builder +- r: The rune to be appended + +Example: +```odin builder := strings.builder_make() - strings.write_string(&builder, "abc") // 3 + strings.write_string(&builder, "abc") // 3 strings.write_quoted_rune(&builder, 'ä') // 4 - strings.write_string(&builder, "abc") // 3 - fmt.println(strings.to_string(builder)) // -> abc'ä'abc + strings.write_string(&builder, "abc") // 3 + fmt.println(strings.to_string(builder)) // -> abc'ä'abc +``` +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of bytes written */ write_quoted_rune :: proc(b: ^Builder, r: rune) -> (n: int) { return io.write_quoted_rune(to_writer(b), r) } - - /* - appends a string to the builder, return the written byte size - +Appends a string to the Builder and returns the number of bytes written + +Inputs: +- b: A pointer to the Builder +- s: The string to be appended + +Example: +```odin builder := strings.builder_make() - strings.write_string(&builder, "a") // 1 - strings.write_string(&builder, "bc") // 2 - strings.write_string(&builder, "xyz") // 3 - fmt.println(strings.to_string(builder)) // -> abcxyz + strings.write_string(&builder, "a") // 1 + strings.write_string(&builder, "bc") // 2 + fmt.println(strings.to_string(builder)) // -> abc +``` +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of bytes written */ write_string :: proc(b: ^Builder, s: string) -> (n: int) { n0 := len(b.buf) @@ -242,10 +397,14 @@ write_string :: proc(b: ^Builder, s: string) -> (n: int) { n1 := len(b.buf) return n1-n0 } +/* +Pops and returns the last byte in the Builder or 0 when the Builder is empty +Inputs: +- b: A pointer to the Builder -// pops and returns the last byte in the builder -// returns 0 when the builder is empty +Returns: The last byte in the Builder or 0 if empty +*/ pop_byte :: proc(b: ^Builder) -> (r: byte) { if len(b.buf) == 0 { return 0 @@ -256,9 +415,14 @@ pop_byte :: proc(b: ^Builder) -> (r: byte) { d.len = max(d.len-1, 0) return } +/* +Pops the last rune in the Builder and returns the popped rune and its rune width or (0, 0) if empty -// pops the last rune in the builder and returns the popped rune and its rune width -// returns 0, 0 when the builder is empty +Inputs: +- b: A pointer to the Builder + +Returns: The popped rune and its rune width or (0, 0) if empty +*/ pop_rune :: proc(b: ^Builder) -> (r: rune, width: int) { if len(b.buf) == 0 { return 0, 0 @@ -269,41 +433,84 @@ pop_rune :: proc(b: ^Builder) -> (r: rune, width: int) { d.len = max(d.len-width, 0) return } - @(private) DIGITS_LOWER := "0123456789abcdefx" - /* - append a quoted string into the builder, return the written byte size +Inputs: +- b: A pointer to the Builder +- str: The string to be quoted and appended +- quote: The optional quote character (default is double quotes) +Example: +```odin builder := strings.builder_make() - strings.write_quoted_string(&builder, "a") // 3 - strings.write_quoted_string(&builder, "bc", '\'') // 4 - strings.write_quoted_string(&builder, "xyz") // 5 - fmt.println(strings.to_string(builder)) // -> "a"'bc'xyz" + strings.write_quoted_string(&builder, "a") // 3 + strings.write_quoted_string(&builder, "bc", '\'') // 4 + strings.write_quoted_string(&builder, "xyz") // 5 + fmt.println(strings.to_string(builder)) // -> "a"'bc'xyz" +``` +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of bytes written */ write_quoted_string :: proc(b: ^Builder, str: string, quote: byte = '"') -> (n: int) { n, _ = io.write_quoted_string(to_writer(b), str, quote) return } +/* +Appends an encoded rune to the Builder and returns the number of bytes written +Inputs: +- b: A pointer to the Builder +- r: The rune to be appended +- write_quote: Optional boolean flag to write the quote character (default is true) -// appends a rune to the builder, optional `write_quote` boolean tag, returns the written rune size +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of bytes written +*/ write_encoded_rune :: proc(b: ^Builder, r: rune, write_quote := true) -> (n: int) { n, _ = io.write_encoded_rune(to_writer(b), r, write_quote) return } +/* +Appends an escaped rune to the Builder and returns the number of bytes written -// appends a rune to the builder, fully written out in case of escaped runes e.g. '\a' will be written as such -// when `r` and `quote` match and `quote` is `\\` - they will be written as two slashes -// `html_safe` flag in case the runes '<', '>', '&' should be encoded as digits e.g. `\u0026` +Inputs: +- b: A pointer to the Builder +- r: The rune to be appended +- quote: The quote character +- html_safe: Optional boolean flag to encode '<', '>', '&' as digits (default is false) + +Examples: +- '\a' will be written as such +- `r` and `quote` match and `quote` is `\\` - they will be written as two slashes +- `html_safe` flag in case the runes '<', '>', '&' should be encoded as digits e.g. `\u0026` + +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of bytes written +*/ write_escaped_rune :: proc(b: ^Builder, r: rune, quote: byte, html_safe := false) -> (n: int) { n, _ = io.write_escaped_rune(to_writer(b), r, quote, html_safe) return } +/* +Writes a f64 value to the Builder and returns the number of characters written -// writes a f64 value into the builder, returns the written amount of characters +Inputs: +- b: A pointer to the Builder +- f: The f64 value to be appended +- fmt: The format byte +- prec: The precision +- bit_size: The bit size +- always_signed: Optional boolean flag to always include the sign (default is false) + +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of characters written +*/ write_float :: proc(b: ^Builder, f: f64, fmt: byte, prec, bit_size: int, always_signed := false) -> (n: int) { buf: [384]byte s := strconv.append_float(buf[:], f, fmt, prec, bit_size) @@ -314,8 +521,19 @@ write_float :: proc(b: ^Builder, f: f64, fmt: byte, prec, bit_size: int, always_ } return write_string(b, s) } +/* +Writes a f16 value to the Builder and returns the number of characters written -// writes a f16 value into the builder, returns the written amount of characters +Inputs: +- b: A pointer to the Builder +- f: The f16 value to be appended +- fmt: The format byte +- always_signed: Optional boolean flag to always include the sign + +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of characters written +*/ write_f16 :: proc(b: ^Builder, f: f16, fmt: byte, always_signed := false) -> (n: int) { buf: [384]byte s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) @@ -324,8 +542,27 @@ write_f16 :: proc(b: ^Builder, f: f16, fmt: byte, always_signed := false) -> (n: } return write_string(b, s) } +/* +Writes a f32 value to the Builder and returns the number of characters written -// writes a f32 value into the builder, returns the written amount of characters +Inputs: +- b: A pointer to the Builder +- f: The f32 value to be appended +- fmt: The format byte +- always_signed: Optional boolean flag to always include the sign + +Example: +```odin + builder := strings.builder_make() + strings.write_f32(&builder, 3.14159, 'f') // 6 + strings.write_string(&builder, " - ") // 3 + strings.write_f32(&builder, -0.123, 'e') // 8 + fmt.println(strings.to_string(builder)) // -> 3.14159012 - -1.23000003e-01 +``` +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of characters written +*/ write_f32 :: proc(b: ^Builder, f: f32, fmt: byte, always_signed := false) -> (n: int) { buf: [384]byte s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) @@ -334,8 +571,19 @@ write_f32 :: proc(b: ^Builder, f: f32, fmt: byte, always_signed := false) -> (n: } return write_string(b, s) } +/* +Writes a f32 value to the Builder and returns the number of characters written -// writes a f64 value into the builder, returns the written amount of characters +Inputs: +- b: A pointer to the Builder +- f: The f32 value to be appended +- fmt: The format byte +- always_signed: Optional boolean flag to always include the sign + +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of characters written +*/ write_f64 :: proc(b: ^Builder, f: f64, fmt: byte, always_signed := false) -> (n: int) { buf: [384]byte s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f)) @@ -344,30 +592,67 @@ write_f64 :: proc(b: ^Builder, f: f64, fmt: byte, always_signed := false) -> (n: } return write_string(b, s) } +/* +Writes a u64 value to the Builder and returns the number of characters written +Inputs: +- b: A pointer to the Builder +- i: The u64 value to be appended +- base: The optional base for the numeric representation +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. -// writes a u64 value `i` in `base` = 10 into the builder, returns the written amount of characters +Returns: The number of characters written +*/ write_u64 :: proc(b: ^Builder, i: u64, base: int = 10) -> (n: int) { buf: [32]byte s := strconv.append_bits(buf[:], i, base, false, 64, strconv.digits, nil) return write_string(b, s) } +/* +Writes a i64 value to the Builder and returns the number of characters written -// writes a i64 value `i` in `base` = 10 into the builder, returns the written amount of characters +Inputs: +- b: A pointer to the Builder +- i: The i64 value to be appended +- base: The optional base for the numeric representation + +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of characters written +*/ write_i64 :: proc(b: ^Builder, i: i64, base: int = 10) -> (n: int) { buf: [32]byte s := strconv.append_bits(buf[:], u64(i), base, true, 64, strconv.digits, nil) return write_string(b, s) } +/* +Writes a uint value to the Builder and returns the number of characters written -// writes a uint value `i` in `base` = 10 into the builder, returns the written amount of characters +Inputs: +- b: A pointer to the Builder +- i: The uint value to be appended +- base: The optional base for the numeric representation + +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of characters written +*/ write_uint :: proc(b: ^Builder, i: uint, base: int = 10) -> (n: int) { return write_u64(b, u64(i), base) } +/* +Writes a int value to the Builder and returns the number of characters written -// writes a int value `i` in `base` = 10 into the builder, returns the written amount of characters +Inputs: +- b: A pointer to the Builder +- i: The int value to be appended +- base: The optional base for the numeric representation + +NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written. + +Returns: The number of characters written +*/ write_int :: proc(b: ^Builder, i: int, base: int = 10) -> (n: int) { return write_i64(b, i64(i), base) } - diff --git a/core/strings/conversion.odin b/core/strings/conversion.odin index df03442c5..c952725fa 100644 --- a/core/strings/conversion.odin +++ b/core/strings/conversion.odin @@ -4,6 +4,20 @@ import "core:io" import "core:unicode" import "core:unicode/utf8" +/* +Converts invalid UTF-8 sequences in the input string `s` to the `replacement` string. + +*Allocates Using Provided Allocator* + +Inputs: +- s: Input string that may contain invalid UTF-8 sequences. +- replacement: String to replace invalid UTF-8 sequences with. +- allocator: (default: context.allocator). + +WARNING: Allocation does not occur when len(s) == 0 + +Returns: A valid UTF-8 string with invalid sequences replaced by `replacement`. +*/ to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) -> string { if len(s) == 0 { return "" @@ -33,7 +47,7 @@ to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) -> invalid := false - for i := 0; i < len(s); /**/ { + for i := 0; i < len(s); /**/{ c := s[i] if c < utf8.RUNE_SELF { i += 1 @@ -57,13 +71,20 @@ to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) -> } return to_string(b) } - /* - returns the input string `s` with all runes set to lowered case - always allocates using the `allocator` +Converts the input string `s` to all lowercase characters. - strings.to_lower("test") -> test - strings.to_lower("Test") -> test +*Allocates Using Provided Allocator* + +Inputs: +- s: Input string to be converted. +- allocator: (default: context.allocator). + +Example: +```odin + strings.to_lower("TeST") -> test +``` +Returns: A new string with all characters converted to lowercase. */ to_lower :: proc(s: string, allocator := context.allocator) -> string { b: Builder @@ -73,13 +94,20 @@ to_lower :: proc(s: string, allocator := context.allocator) -> string { } return to_string(b) } - /* - returns the input string `s` with all runes set to upper case - always allocates using the `allocator` +Converts the input string `s` to all uppercase characters. - strings.to_upper("test") -> TEST +*Allocates Using Provided Allocator* + +Inputs: +- s: Input string to be converted. +- allocator: (default: context.allocator). + +Example: +```odin strings.to_upper("Test") -> TEST +``` +Returns: A new string with all characters converted to uppercase. */ to_upper :: proc(s: string, allocator := context.allocator) -> string { b: Builder @@ -89,21 +117,36 @@ to_upper :: proc(s: string, allocator := context.allocator) -> string { } return to_string(b) } +/* +Checks if the rune `c` is a delimiter (' ', '-', or '_'). -// returns true when the `c` rune is a space, '-' or '_' -// useful when treating strings like words in a text editor or html paths +Inputs: +- c: Rune to check for delimiter status. + +Returns: True if `c` is a delimiter, false otherwise. +*/ is_delimiter :: proc(c: rune) -> bool { return c == '-' || c == '_' || is_space(c) } +/* +Checks if the rune `r` is a non-alphanumeric or space character. -// returns true when the `r` rune is a non alpha or `unicode.is_space` rune +Inputs: +- r: Rune to check for separator status. + +Returns: True if `r` is a non-alpha or `unicode.is_space` rune. +*/ is_separator :: proc(r: rune) -> bool { if r <= 0x7f { switch r { - case '0'..='9': return false - case 'a'..='z': return false - case 'A'..='Z': return false - case '_': return false + case '0' ..= '9': + return false + case 'a' ..= 'z': + return false + case 'A' ..= 'Z': + return false + case '_': + return false } return true } @@ -115,12 +158,32 @@ is_separator :: proc(r: rune) -> bool { return unicode.is_space(r) } - /* - iterator that loops through the string and calls the callback with the `prev`, `curr` and `next` rune - on empty string `s` the callback gets called once with empty runes +Iterates over a string, calling a callback for each rune with the previous, current, and next runes as arguments. + +Inputs: +- w: An io.Writer to be used by the callback for writing output. +- s: The input string to be iterated over. +- callback: A procedure to be called for each rune in the string, with arguments (w: io.Writer, prev, curr, next: rune). +The callback can utilize the provided io.Writer to write output during the iteration. + +Example: +```odin + my_callback :: proc(w: io.Writer, prev, curr, next: rune) { + fmt.println("my_callback", curr) // <-- Custom logic here + } + s := "hello world" + b: strings.Builder + strings.builder_init_len(&b, len(s)) + w := strings.to_writer(&b) + strings.string_case_iterator(w, s, my_callback) +``` */ -string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Writer, prev, curr, next: rune)) { +string_case_iterator :: proc( + w: io.Writer, + s: string, + callback: proc(w: io.Writer, prev, curr, next: rune), +) { prev, curr: rune for next in s { if curr == 0 { @@ -139,10 +202,19 @@ string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Write callback(w, prev, curr, 0) } } - +// Alias to `to_camel_case` to_lower_camel_case :: to_camel_case +/* +Converts the input string `s` to "lowerCamelCase". -// converts the `s` string to "lowerCamelCase" +*Allocates Using Provided Allocator* + +Inputs: +- s: Input string to be converted. +- allocator: (default: context.allocator). + +Returns: A "lowerCamelCase" formatted string. +*/ to_camel_case :: proc(s: string, allocator := context.allocator) -> string { s := s s = trim_space(s) @@ -164,10 +236,19 @@ to_camel_case :: proc(s: string, allocator := context.allocator) -> string { return to_string(b) } - +// Alias to `to_pascal_case` to_upper_camel_case :: to_pascal_case +/* +Converts the input string `s` to "UpperCamelCase" (PascalCase). -// converts the `s` string to "PascalCase" +*Allocates Using Provided Allocator* + +Inputs: +- s: Input string to be converted. +- allocator: (default: context.allocator). + +Returns: A "PascalCase" formatted string. +*/ to_pascal_case :: proc(s: string, allocator := context.allocator) -> string { s := s s = trim_space(s) @@ -189,17 +270,31 @@ to_pascal_case :: proc(s: string, allocator := context.allocator) -> string { return to_string(b) } +/* +Returns a string converted to a delimiter-separated case with configurable casing -/* - returns the `s` string to words seperated by the given `delimiter` rune - all runes will be upper or lowercased based on the `all_uppercase` bool +*Allocates Using Provided Allocator* - strings.to_delimiter_case("Hello World", '_', false) -> hello_world - strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD - strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD - strings.to_delimiter_case("aBC", '_', false) -> a_b_c +Inputs: +- s: The input string to be converted +- delimiter: The rune to be used as the delimiter between words +- all_upper_case: A boolean indicating if the output should be all uppercased (true) or lowercased (false) +- allocator: (default: context.allocator). + +Example: +```odin + strings.to_delimiter_case("Hello World", '_', false) // -> "hello_world" + strings.to_delimiter_case("Hello World", ' ', true) // -> "HELLO WORLD" + strings.to_delimiter_case("aBC", '_', false) // -> "a_b_c" +``` +Returns: The converted string */ -to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string { +to_delimiter_case :: proc( + s: string, + delimiter: rune, + all_upper_case: bool, + allocator := context.allocator, +) -> string { s := s s = trim_space(s) b: Builder @@ -237,35 +332,96 @@ to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allo return to_string(b) } +/* +Converts a string to "snake_case" with all runes lowercased -/* - converts the `s` string to "snake_case" with all runes lowercased - - strings.to_snake_case("HelloWorld") -> hello_world - strings.to_snake_case("Hello World") -> hello_world +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to be converted +- allocator: (default: context.allocator). + +Example: +```odin + strings.to_snake_case("HelloWorld") // -> "hello_world" + strings.to_snake_case("Hello World") // -> "hello_world" +``` +Returns: The converted string */ to_snake_case :: proc(s: string, allocator := context.allocator) -> string { return to_delimiter_case(s, '_', false, allocator) } - +// Alias for `to_upper_snake_case` to_screaming_snake_case :: to_upper_snake_case +/* +Converts a string to "SNAKE_CASE" with all runes uppercased -// converts the `s` string to "SNAKE_CASE" with all runes uppercased +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to be converted +- allocator: (default: context.allocator). + +Example: +```odin + strings.to_upper_snake_case("HelloWorld") // -> "HELLO_WORLD" +``` +Returns: The converted string +*/ to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string { return to_delimiter_case(s, '_', true, allocator) } +/* +Converts a string to "kebab-case" with all runes lowercased -// converts the `s` string to "kebab-case" with all runes lowercased +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to be converted +- allocator: (default: context.allocator). + +Example: +```odin + strings.to_kebab_case("HelloWorld") // -> "hello-world" +``` +Returns: The converted string +*/ to_kebab_case :: proc(s: string, allocator := context.allocator) -> string { return to_delimiter_case(s, '-', false, allocator) } +/* +Converts a string to "KEBAB-CASE" with all runes uppercased -// converts the `s` string to "KEBAB-CASE" with all runes uppercased +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to be converted +- allocator: (default: context.allocator). + +Example: +```odin + strings.to_upper_kebab_case("HelloWorld") // -> "HELLO-WORLD" +``` +Returns: The converted string +*/ to_upper_kebab_case :: proc(s: string, allocator := context.allocator) -> string { return to_delimiter_case(s, '-', true, allocator) } +/* +Converts a string to "Ada_Case" -// converts the `s` string to "Ada_Case" +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to be converted +- allocator: (default: context.allocator). + +Example: +```odin + strings.to_ada_case("HelloWorld") // -> "Hello_World" +``` +Returns: The converted string +*/ to_ada_case :: proc(s: string, allocator := context.allocator) -> string { s := s s = trim_space(s) @@ -275,7 +431,9 @@ to_ada_case :: proc(s: string, allocator := context.allocator) -> string { string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) { if !is_delimiter(curr) { - if is_delimiter(prev) || prev == 0 || (unicode.is_lower(prev) && unicode.is_upper(curr)) { + if is_delimiter(prev) || + prev == 0 || + (unicode.is_lower(prev) && unicode.is_upper(curr)) { if prev != 0 { io.write_rune(w, '_') } diff --git a/core/strings/intern.odin b/core/strings/intern.odin index 5e9193a0d..5f849ed13 100644 --- a/core/strings/intern.odin +++ b/core/strings/intern.odin @@ -2,49 +2,96 @@ package strings import "core:runtime" -// custom string entry struct +// Custom string entry struct Intern_Entry :: struct { len: int, str: [1]byte, // string is allocated inline with the entry to keep allocations simple } +/* +Intern is a more memory efficient string map -// "intern" is a more memory efficient string map -// `allocator` is used to allocate the actual `Intern_Entry` strings +Uses Specified Allocator for `Intern_Entry` strings + +Fields: +- allocator: The allocator used for the Intern_Entry strings +- entries: A map of strings to interned string entries +*/ Intern :: struct { allocator: runtime.Allocator, entries: map[string]^Intern_Entry, } +/* +Initializes the entries map and sets the allocator for the string entries -// initialize the entries map and set the allocator for the string entries +*Allocates Using Provided Allocators* + +Inputs: +- m: A pointer to the Intern struct to be initialized +- allocator: The allocator for the Intern_Entry strings (Default: context.allocator) +- map_allocator: The allocator for the map of entries (Default: context.allocator) +*/ intern_init :: proc(m: ^Intern, allocator := context.allocator, map_allocator := context.allocator) { m.allocator = allocator m.entries = make(map[string]^Intern_Entry, 16, map_allocator) } +/* +Frees the map and all its content allocated using the `.allocator`. -// free the map and all its content allocated using the `.allocator` +Inputs: +- m: A pointer to the Intern struct to be destroyed +*/ intern_destroy :: proc(m: ^Intern) { for _, value in m.entries { free(value, m.allocator) } delete(m.entries) } +/* +Returns the interned string for the given text, is set in the map if it didnt exist yet. -// returns the `text` string from the intern map - gets set if it didnt exist yet -// the returned string lives as long as the map entry lives +*MAY Allocate using the Intern's Allocator* + +Inputs: +- m: A pointer to the Intern struct +- text: The string to be interned + +NOTE: The returned string lives as long as the map entry lives. + +Returns: The interned string and an allocator error if any +*/ intern_get :: proc(m: ^Intern, text: string) -> (str: string, err: runtime.Allocator_Error) { entry := _intern_get_entry(m, text) or_return #no_bounds_check return string(entry.str[:entry.len]), nil } +/* +Returns the interned C-String for the given text, is set in the map if it didnt exist yet. -// returns the `text` cstring from the intern map - gets set if it didnt exist yet -// the returned cstring lives as long as the map entry lives +*MAY Allocate using the Intern's Allocator* + +Inputs: +- m: A pointer to the Intern struct +- text: The string to be interned + +NOTE: The returned C-String lives as long as the map entry lives + +Returns: The interned C-String and an allocator error if any +*/ intern_get_cstring :: proc(m: ^Intern, text: string) -> (str: cstring, err: runtime.Allocator_Error) { entry := _intern_get_entry(m, text) or_return return cstring(&entry.str[0]), nil } +/* +Internal function to lookup whether the text string exists in the map, returns the entry +Sets and allocates the entry if it wasn't set yet -// looks up wether the `text` string exists in the map, returns the entry -// sets & allocates the entry if it wasnt set yet +*MAY Allocate using the Intern's Allocator* + +Inputs: +- m: A pointer to the Intern struct +- text: The string to be looked up or interned + +Returns: The new or existing interned entry and an allocator error if any +*/ _intern_get_entry :: proc(m: ^Intern, text: string) -> (new_entry: ^Intern_Entry, err: runtime.Allocator_Error) #no_bounds_check { if prev, ok := m.entries[text]; ok { return prev, nil diff --git a/core/strings/reader.odin b/core/strings/reader.odin index 038740526..f4e3b213a 100644 --- a/core/strings/reader.odin +++ b/core/strings/reader.odin @@ -4,59 +4,104 @@ import "core:io" import "core:unicode/utf8" /* - io stream data for a string reader that can read based on bytes or runes - implements the vtable when using the io.Reader variants - "read" calls advance the current reading offset `i` +io stream data for a string reader that can read based on bytes or runes +implements the vtable when using the io.Reader variants +"read" calls advance the current reading offset `i` */ Reader :: struct { s: string, // read-only buffer i: i64, // current reading index prev_rune: int, // previous reading index of rune or < 0 } +/* +Initializes a string Reader with the provided string -// init the reader to the string `s` +Inputs: +- r: A pointer to a Reader struct +- s: The input string to be read +*/ reader_init :: proc(r: ^Reader, s: string) { r.s = s r.i = 0 r.prev_rune = -1 } +/* +Converts a Reader into an io.Stream -// returns a stream from the reader data +Inputs: +- r: A pointer to a Reader struct + +Returns: An io.Stream for the given Reader +*/ reader_to_stream :: proc(r: ^Reader) -> (s: io.Stream) { s.stream_data = r s.stream_vtable = &_reader_vtable return } +/* +Initializes a string Reader and returns an io.Reader for the given string -// init a reader to the string `s` and return an io.Reader +Inputs: +- r: A pointer to a Reader struct +- s: The input string to be read + +Returns: An io.Reader for the given string +*/ to_reader :: proc(r: ^Reader, s: string) -> io.Reader { reader_init(r, s) rr, _ := io.to_reader(reader_to_stream(r)) return rr } +/* +Initializes a string Reader and returns an io.Reader_At for the given string -// init a reader to the string `s` and return an io.Reader_At +Inputs: +- r: A pointer to a Reader struct +- s: The input string to be read + +Returns: An io.Reader_At for the given string +*/ to_reader_at :: proc(r: ^Reader, s: string) -> io.Reader_At { reader_init(r, s) rr, _ := io.to_reader_at(reader_to_stream(r)) return rr } +/* +Returns the remaining length of the Reader -// remaining length of the reader +Inputs: +- r: A pointer to a Reader struct + +Returns: The remaining length of the Reader +*/ reader_length :: proc(r: ^Reader) -> int { if r.i >= i64(len(r.s)) { return 0 } return int(i64(len(r.s)) - r.i) } +/* +Returns the length of the string stored in the Reader -// returns the string length stored by the reader +Inputs: +- r: A pointer to a Reader struct + +Returns: The length of the string stored in the Reader +*/ reader_size :: proc(r: ^Reader) -> i64 { return i64(len(r.s)) } +/* +Reads len(p) bytes from the Reader's string and copies into the provided slice. -// reads len(p) bytes into the slice from the string in the reader -// returns `n` amount of read bytes and an io.Error +Inputs: +- r: A pointer to a Reader struct +- p: A byte slice to copy data into + +Returns: +- n: The number of bytes read +- err: An io.Error if an error occurs while reading, including .EOF, otherwise nil denotes success. +*/ reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) { if r.i >= i64(len(r.s)) { return 0, .EOF @@ -66,9 +111,18 @@ reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) { r.i += i64(n) return } +/* +Reads len(p) bytes from the Reader's string and copies into the provided slice, at the specified offset from the current index. -// reads len(p) bytes into the slice from the string in the reader at an offset -// returns `n` amount of read bytes and an io.Error +Inputs: +- r: A pointer to a Reader struct +- p: A byte slice to copy data into +- off: The offset from which to read + +Returns: +- n: The number of bytes read +- err: An io.Error if an error occurs while reading, including .EOF, otherwise nil denotes success. +*/ reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Error) { if off < 0 { return 0, .Invalid_Offset @@ -82,8 +136,16 @@ reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Erro } return } +/* +Reads and returns a single byte from the Reader's string -// reads and returns a single byte - error when out of bounds +Inputs: +- r: A pointer to a Reader struct + +Returns: +- The byte read from the Reader +- err: An io.Error if an error occurs while reading, including .EOF, otherwise nil denotes success. +*/ reader_read_byte :: proc(r: ^Reader) -> (byte, io.Error) { r.prev_rune = -1 if r.i >= i64(len(r.s)) { @@ -93,8 +155,14 @@ reader_read_byte :: proc(r: ^Reader) -> (byte, io.Error) { r.i += 1 return b, nil } +/* +Decrements the Reader's index (i) by 1 -// decreases the reader offset - error when below 0 +Inputs: +- r: A pointer to a Reader struct + +Returns: An io.Error if `r.i <= 0` (.Invalid_Unread), otherwise nil denotes success. +*/ reader_unread_byte :: proc(r: ^Reader) -> io.Error { if r.i <= 0 { return .Invalid_Unread @@ -103,8 +171,17 @@ reader_unread_byte :: proc(r: ^Reader) -> io.Error { r.i -= 1 return nil } +/* +Reads and returns a single rune and its size from the Reader's string -// reads and returns a single rune and the rune size - error when out bounds +Inputs: +- r: A pointer to a Reader struct + +Returns: +- ch: The rune read from the Reader +- size: The size of the rune in bytes +- err: An io.Error if an error occurs while reading +*/ reader_read_rune :: proc(r: ^Reader) -> (ch: rune, size: int, err: io.Error) { if r.i >= i64(len(r.s)) { r.prev_rune = -1 @@ -119,9 +196,16 @@ reader_read_rune :: proc(r: ^Reader) -> (ch: rune, size: int, err: io.Error) { r.i += i64(size) return } +/* +Decrements the Reader's index (i) by the size of the last read rune -// decreases the reader offset by the last rune -// can only be used once and after a valid read_rune call +Inputs: +- r: A pointer to a Reader struct + +WARNING: May only be used once and after a valid `read_rune` call + +Returns: An io.Error if an error occurs while unreading (.Invalid_Unread), else nil denotes success. +*/ reader_unread_rune :: proc(r: ^Reader) -> io.Error { if r.i <= 0 { return .Invalid_Unread @@ -133,8 +217,18 @@ reader_unread_rune :: proc(r: ^Reader) -> io.Error { r.prev_rune = -1 return nil } +/* +Seeks the Reader's index to a new position -// seeks the reader offset to a wanted offset +Inputs: +- r: A pointer to a Reader struct +- offset: The new offset position +- whence: The reference point for the new position (.Start, .Current, or .End) + +Returns: +- The absolute offset after seeking +- err: An io.Error if an error occurs while seeking (.Invalid_Whence, .Invalid_Offset) +*/ reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.Error) { r.prev_rune = -1 abs: i64 @@ -155,8 +249,19 @@ reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.E r.i = abs return abs, nil } +/* +Writes the remaining content of the Reader's string into the provided io.Writer -// writes the string content left to read into the io.Writer `w` +Inputs: +- r: A pointer to a Reader struct +- w: The io.Writer to write the remaining content into + +WARNING: Panics if writer writes more bytes than remainig length of string. + +Returns: +- n: The number of bytes written +- err: An io.Error if an error occurs while writing (.Short_Write) +*/ reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) { r.prev_rune = -1 if r.i >= i64(len(r.s)) { @@ -175,7 +280,12 @@ reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) { } return } +/* +VTable containing implementations for various io.Stream methods +This VTable is used by the Reader struct to provide its functionality +as an io.Stream. +*/ @(private) _reader_vtable := io.Stream_VTable{ impl_size = proc(s: io.Stream) -> i64 { diff --git a/core/strings/strings.odin b/core/strings/strings.odin index 33cdafef3..bd2dcc004 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -1,4 +1,4 @@ -// simple procedures to manipulate UTF-8 encoded strings +// Procedures to manipulate UTF-8 encoded strings package strings import "core:io" @@ -6,58 +6,128 @@ import "core:mem" import "core:unicode" import "core:unicode/utf8" -// returns a clone of the string `s` allocated using the `allocator` +/* +Clones a string + +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to be cloned +- allocator: (default: context.allocator) +- loc: The caller location for debugging purposes (default: #caller_location) + +Returns: A cloned string +*/ clone :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> string { c := make([]byte, len(s), allocator, loc) copy(c, s) return string(c[:len(s)]) } +/* +Clones a string safely (returns early with an allocation error on failure) -// returns a clone of the string `s` allocated using the `allocator` +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to be cloned +- allocator: (default: context.allocator) +- loc: The caller location for debugging purposes (default: #caller_location) + +Returns: +- str: A cloned string +- err: A mem.Allocator_Error if an error occurs during allocation +*/ clone_safe :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> (str: string, err: mem.Allocator_Error) { c := make([]byte, len(s), allocator, loc) or_return copy(c, s) return string(c[:len(s)]), nil } +/* +Clones a string and appends a nul byte to make it a cstring -// returns a clone of the string `s` allocated using the `allocator` as a cstring -// a nul byte is appended to the clone, to make the cstring safe +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to be cloned +- allocator: (default: context.allocator) +- loc: The caller location for debugging purposes (default: #caller_location) + +Returns: A cloned cstring with an appended nul byte +*/ clone_to_cstring :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> cstring { c := make([]byte, len(s)+1, allocator, loc) copy(c, s) c[len(s)] = 0 return cstring(&c[0]) } +/* +Transmutes a raw pointer into a string. Non-allocating. -// returns a string from a byte pointer `ptr` and byte length `len` -// the string is valid as long as the parameters stay alive +Inputs: +- ptr: A pointer to the start of the byte sequence +- len: The length of the byte sequence + +NOTE: The created string is only valid as long as the pointer and length are valid. + +Returns: A string created from the byte pointer and length +*/ string_from_ptr :: proc(ptr: ^byte, len: int) -> string { return transmute(string)mem.Raw_String{ptr, len} } +/* +Transmutes a raw pointer (nul-terminated) into a string. Non-allocating. Searches for a nul byte from 0.. string { s := transmute(string)mem.Raw_String{ptr, len} s = truncate_to_byte(s, 0) return s } +/* +Gets the raw byte pointer for the start of a string `str` -// returns the raw ^byte start of the string `str` +Inputs: +- str: The input string + +Returns: A pointer to the start of the string's bytes +*/ ptr_from_string :: proc(str: string) -> ^byte { d := transmute(mem.Raw_String)str return d.data } +/* +Converts a string `str` to a cstring -// returns the transmute of string `str` to a cstring -// not safe since the origin string may not contain a nul byte +Inputs: +- str: The input string + +WARNING: This is unsafe because the original string may not contain a nul byte. + +Returns: The converted cstring +*/ unsafe_string_to_cstring :: proc(str: string) -> cstring { d := transmute(mem.Raw_String)str return cstring(d.data) } +/* +Truncates a string `str` at the first occurrence of char/byte `b` -// returns a string truncated to the first time it finds the byte `b` -// uses the `len` of the string `str` when it couldn't find the input +Inputs: +- str: The input string +- b: The byte to truncate the string at + +NOTE: Failure to find the byte results in returning the entire string. + +Returns: The truncated string +*/ truncate_to_byte :: proc(str: string, b: byte) -> string { n := index_byte(str, b) if n < 0 { @@ -65,9 +135,15 @@ truncate_to_byte :: proc(str: string, b: byte) -> string { } return str[:n] } +/* +Truncates a string str at the first occurrence of rune r as a slice of the original, entire string if not found -// returns a string truncated to the first time it finds the rune `r` -// uses the `len` of the string `str` when it couldn't find the input +Inputs: +- str: The input string +- r: The rune to truncate the string at + +Returns: The truncated string +*/ truncate_to_rune :: proc(str: string, r: rune) -> string { n := index_rune(str, r) if n < 0 { @@ -75,51 +151,107 @@ truncate_to_rune :: proc(str: string, r: rune) -> string { } return str[:n] } +/* +Clones a byte array s and appends a nul byte -// returns a cloned string of the byte array `s` using the `allocator` -// appends a leading nul byte +*Allocates Using Provided Allocator* + +Inputs: +- s: The byte array to be cloned +- allocator: (default: context.allocator) +- loc: The caller location for debugging purposes (default: #caller_location) + +Returns: A cloned string from the byte array with a nul byte +*/ clone_from_bytes :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> string { c := make([]byte, len(s)+1, allocator, loc) copy(c, s) c[len(s)] = 0 return string(c[:len(s)]) } +/* +Clones a cstring s as a string -// returns a clone of the cstring `s` using the `allocator` as a string +*Allocates Using Provided Allocator* + +Inputs: +- s: The cstring to be cloned +- allocator: (default: context.allocator) +- loc: The caller location for debugging purposes (default: #caller_location) + +Returns: A cloned string from the cstring +*/ clone_from_cstring :: proc(s: cstring, allocator := context.allocator, loc := #caller_location) -> string { return clone(string(s), allocator, loc) } +/* +Clones a string from a byte pointer ptr and a byte length len -// returns a cloned string from the pointer `ptr` and a byte length `len` using the `allocator` -// same to `string_from_ptr` but allocates +*Allocates Using Provided Allocator* + +Inputs: +- ptr: A pointer to the start of the byte sequence +- len: The length of the byte sequence +- allocator: (default: context.allocator) +- loc: The caller location for debugging purposes (default: #caller_location) + +NOTE: Same as `string_from_ptr`, but perform an additional `clone` operation + +Returns: A cloned string from the byte pointer and length +*/ clone_from_ptr :: proc(ptr: ^byte, len: int, allocator := context.allocator, loc := #caller_location) -> string { s := string_from_ptr(ptr, len) return clone(s, allocator, loc) } - -// overload to clone from a `string`, `[]byte`, `cstring` or a `^byte + length` to a string +// Overloaded procedure to clone from a string, []byte, cstring or a ^byte + length clone_from :: proc{ clone, clone_from_bytes, clone_from_cstring, clone_from_ptr, } +/* +Clones a string from a nul-terminated cstring ptr and a byte length len -// returns a cloned string from the cstring `ptr` and a byte length `len` using the `allocator` -// truncates till the first nul byte it finds or the byte len +*Allocates Using Provided Allocator* + +Inputs: +- ptr: A pointer to the start of the nul-terminated cstring +- len: The byte length of the cstring +- allocator: (default: context.allocator) +- loc: The caller location for debugging purposes (default: #caller_location) + +NOTE: Truncates at the first nul byte encountered or the byte length. + +Returns: A cloned string from the nul-terminated cstring and byte length +*/ clone_from_cstring_bounded :: proc(ptr: cstring, len: int, allocator := context.allocator, loc := #caller_location) -> string { s := string_from_ptr((^u8)(ptr), len) s = truncate_to_byte(s, 0) return clone(s, allocator, loc) } +/* +Compares two strings, returning a value representing which one comes first lexicographically. +-1 for lhs; 1 for rhs, or 0 if they are equal. -// Compares two strings, returning a value representing which one comes first lexiographically. -// -1 for `lhs`; 1 for `rhs`, or 0 if they are equal. +Inputs: +- lhs: First string for comparison +- rhs: Second string for comparison + +Returns: -1 if lhs comes first, 1 if rhs comes first, or 0 if they are equal +*/ compare :: proc(lhs, rhs: string) -> int { return mem.compare(transmute([]byte)lhs, transmute([]byte)rhs) } +/* +Returns the byte offset of the rune r in the string s, -1 when not found -// returns the byte offset of the rune `r` in the string `s`, -1 when not found +Inputs: +- s: The input string +- r: The rune to search for + +Returns: The byte offset of the rune r in the string s, or -1 if not found +*/ contains_rune :: proc(s: string, r: rune) -> int { for c, offset in s { if c == r { @@ -128,48 +260,79 @@ contains_rune :: proc(s: string, r: rune) -> int { } return -1 } - /* - returns true when the string `substr` is contained inside the string `s` +Returns true when the string substr is contained inside the string s - strings.contains("testing", "test") -> true - strings.contains("testing", "ing") -> true - strings.contains("testing", "text") -> false +Inputs: +- s: The input string +- substr: The substring to search for + +Example: +```odin + strings.contains("testing", "test") // -> true + strings.contains("testing", "ing") // -> true + strings.contains("testing", "text") // -> false +``` + +Returns: true if substr is contained inside the string s, false otherwise */ contains :: proc(s, substr: string) -> bool { return index(s, substr) >= 0 } - /* - returns true when the string `s` contains any of the characters inside the string `chars` - - strings.contains_any("test", "test") -> true - strings.contains_any("test", "ts") -> true - strings.contains_any("test", "et") -> true - strings.contains_any("test", "a") -> false +Returns true when the string s contains any of the characters inside the string chars + +Inputs: +- s: The input string +- chars: The characters to search for + +Example: +```odin + strings.contains_any("test", "test") // -> true + strings.contains_any("test", "ts") // -> true + strings.contains_any("test", "et") // -> true + strings.contains_any("test", "a") // -> false +``` + +Returns: true if the string s contains any of the characters in chars, false otherwise */ contains_any :: proc(s, chars: string) -> bool { return index_any(s, chars) >= 0 } - /* - returns the utf8 rune count of the string `s` +Returns the UTF-8 rune count of the string s - strings.rune_count("test") -> 4 - strings.rune_count("testö") -> 5, where len("testö") -> 6 +Inputs: +- s: The input string + +Example: +```odin + strings.rune_count("test") // -> 4 + strings.rune_count("testö") // -> 5, where len("testö") -> 6 +``` + +Returns: The UTF-8 rune count of the string s */ rune_count :: proc(s: string) -> int { return utf8.rune_count_in_string(s) } - /* - returns wether the strings `u` and `v` are the same alpha characters - works with utf8 string content and ignores different casings +Returns whether the strings u and v are the same alpha characters, ignoring different casings +Works with UTF-8 string content - strings.equal_fold("test", "test") -> true - strings.equal_fold("Test", "test") -> true - strings.equal_fold("Test", "tEsT") -> true - strings.equal_fold("test", "tes") -> false +Inputs: +- u: The first string for comparison +- v: The second string for comparison + +Example: +```odin + strings.equal_fold("test", "test") // -> true + strings.equal_fold("Test", "test") // -> true + strings.equal_fold("Test", "tEsT") // -> true + strings.equal_fold("test", "tes") // -> false +``` + +Returns: True if the strings u and v are the same alpha characters (ignoring case), false */ equal_fold :: proc(u, v: string) -> bool { s, t := u, v @@ -213,14 +376,22 @@ equal_fold :: proc(u, v: string) -> bool { return s == t } - /* - return the prefix length common between strings `a` and `b`. +Returns the prefix length common between strings a and b - strings.prefix_length("testing", "test") -> 4 - strings.prefix_length("testing", "te") -> 2 - strings.prefix_length("telephone", "te") -> 2 - strings.prefix_length("testing", "est") -> 0 +Inputs: +- a: The first input string +- b: The second input string + +Example: +```odin + strings.prefix_length("testing", "test") // -> 4 + strings.prefix_length("testing", "te") // -> 2 + strings.prefix_length("telephone", "te") // -> 2 + strings.prefix_length("testing", "est") // -> 0 +``` + +Returns: The prefix length common between strings a and b */ prefix_length :: proc(a, b: string) -> (n: int) { _len := min(len(a), len(b)) @@ -245,39 +416,61 @@ prefix_length :: proc(a, b: string) -> (n: int) { } return } - /* - return true when the string `prefix` is contained at the start of the string `s` +Determines if a string s starts with a given prefix - strings.has_prefix("testing", "test") -> true - strings.has_prefix("testing", "te") -> true - strings.has_prefix("telephone", "te") -> true - strings.has_prefix("testing", "est") -> false +Example: +```odin + strings.has_prefix("testing", "test") // -> true + strings.has_prefix("testing", "te") // -> true + strings.has_prefix("telephone", "te") // -> true + strings.has_prefix("testing", "est") // -> false +``` +Inputs: +- s: The string to check for the prefix +- prefix: The prefix to look for + +Returns: true if the string s starts with the prefix, otherwise false */ has_prefix :: proc(s, prefix: string) -> bool { return len(s) >= len(prefix) && s[0:len(prefix)] == prefix } - /* - returns true when the string `suffix` is contained at the end of the string `s` - good example to use this is for file extensions +Determines if a string s ends with a given suffix - strings.has_suffix("todo.txt", ".txt") -> true - strings.has_suffix("todo.doc", ".txt") -> false - strings.has_suffix("todo.doc.txt", ".txt") -> true +Example: +```odin + strings.has_suffix("todo.txt", ".txt") // -> true + strings.has_suffix("todo.doc", ".txt") // -> false + strings.has_suffix("todo.doc.txt", ".txt") // -> true +``` +Inputs: +- s: The string to check for the suffix +- suffix: The suffix to look for + +Returns: true if the string s ends with the suffix, otherwise false */ has_suffix :: proc(s, suffix: string) -> bool { return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix } - /* - returns a combined string from the slice of strings `a` seperated with the `sep` string - allocates the string using the `allocator` +Joins a slice of strings a with a sep string +*Allocates Using Provided Allocator* + +Example: +```odin a := [?]string { "a", "b", "c" } - b := strings.join(a[:], " ") -> "a b c" - c := strings.join(a[:], "-") -> "a-b-c" - d := strings.join(a[:], "...") -> "a...b...c" + b := strings.join(a[:], " ") // -> "a b c" + c := strings.join(a[:], "-") // -> "a-b-c" + d := strings.join(a[:], "...") // -> "a...b...c" +``` +Inputs: +- a: A slice of strings to join +- sep: The separator string +- allocator: (default is context.allocator) + +Returns: A combined string from the slice of strings a separated with the sep string */ join :: proc(a: []string, sep: string, allocator := context.allocator) -> string { if len(a) == 0 { @@ -297,7 +490,20 @@ join :: proc(a: []string, sep: string, allocator := context.allocator) -> string } return string(b) } +/* +Joins a slice of strings a with a sep string, returns an error on allocation failure +*Allocates Using Provided Allocator* + +Inputs: +- a: A slice of strings to join +- sep: The separator string +- allocator: (default is context.allocator) + +Returns: +- str: A combined string from the slice of strings a separated with the sep string +- err: An error if allocation failed, otherwise nil +*/ join_safe :: proc(a: []string, sep: string, allocator := context.allocator) -> (str: string, err: mem.Allocator_Error) { if len(a) == 0 { return "", nil @@ -316,14 +522,21 @@ join_safe :: proc(a: []string, sep: string, allocator := context.allocator) -> ( } return string(b), nil } - /* - returns a combined string from the slice of strings `a` without a seperator - allocates the string using the `allocator` - +Returns a combined string from the slice of strings `a` without a separator +*Allocates Using Provided Allocator* + +Inputs: +- a: A slice of strings to concatenate +- allocator: An optional custom allocator (default is context.allocator) + +Example: +```odin a := [?]string { "a", "b", "c" } - b := strings.concatenate(a[:]) -> "abc" + b := strings.concatenate(a[:]) // -> "abc" +``` +Returns: The concatenated string */ concatenate :: proc(a: []string, allocator := context.allocator) -> string { if len(a) == 0 { @@ -341,7 +554,17 @@ concatenate :: proc(a: []string, allocator := context.allocator) -> string { } return string(b) } +/* +Returns a combined string from the slice of strings `a` without a separator, or an error if allocation fails +*Allocates Using Provided Allocator* + +Inputs: +- a: A slice of strings to concatenate +- allocator: An optional custom allocator (default is context.allocator) + +Returns: The concatenated string, and an error if allocation fails +*/ concatenate_safe :: proc(a: []string, allocator := context.allocator) -> (res: string, err: mem.Allocator_Error) { if len(a) == 0 { return "", nil @@ -358,14 +581,24 @@ concatenate_safe :: proc(a: []string, allocator := context.allocator) -> (res: s } return string(b), nil } - /* - `rune_offset` and `rune_length` are in runes, not bytes. - If `rune_length` <= 0, then it'll return the remainder of the string starting at `rune_offset`. +Returns a substring of the input string `s` with the specified rune offset and length - strings.cut("some example text", 0, 4) -> "some" - strings.cut("some example text", 2, 2) -> "me" - strings.cut("some example text", 5, 7) -> "example" +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to cut +- rune_offset: The starting rune index (default is 0). In runes, not bytes. +- rune_length: The number of runes to include in the substring (default is 0, which returns the remainder of the string). In runes, not bytes. +- allocator: An optional custom allocator (default is context.allocator) + +Example: +```odin + strings.cut("some example text", 0, 4) // -> "some" + strings.cut("some example text", 2, 2) // -> "me" + strings.cut("some example text", 5, 7) // -> "example" +``` +Returns: The substring */ cut :: proc(s: string, rune_offset := int(0), rune_length := int(0), allocator := context.allocator) -> (res: string) { s := s; rune_length := rune_length @@ -418,7 +651,20 @@ cut :: proc(s: string, rune_offset := int(0), rune_length := int(0), allocator : } return string(buf[:byte_offset]) } +/* +Splits the input string `s` into a slice of substrings separated by the specified `sep` string +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to split +- sep: The separator string +- sep_save: A flag determining if the separator should be saved in the resulting substrings +- n: The maximum number of substrings to return, returns nil without alloc when n=0 +- allocator: An optional custom allocator (default is context.allocator) + +Returns: A slice of substrings +*/ @private _split :: proc(s_, sep: string, sep_save, n_: int, allocator := context.allocator) -> []string { s, n := s_, n_ @@ -466,58 +712,110 @@ _split :: proc(s_, sep: string, sep_save, n_: int, allocator := context.allocato return res[:i+1] } - /* - Splits a string into parts, based on a separator. - Returned strings are substrings of 's'. - ``` - s := "aaa.bbb.ccc.ddd.eee" // 5 parts - ss := split(s, ".") - fmt.println(ss) // [aaa, bbb, ccc, ddd, eee] - ``` +Splits a string into parts based on a separator. + +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to split. +- sep: The separator string used to split the input string. +- allocator: (default is context.allocator). + +Example: +```odin + s := "aaa.bbb.ccc.ddd.eee" // 5 parts + ss := strings.split(s, ".") + fmt.println(ss) // [aaa, bbb, ccc, ddd, eee] +``` + +Returns: A slice of strings, each representing a part of the split string. */ split :: proc(s, sep: string, allocator := context.allocator) -> []string { return _split(s, sep, 0, -1, allocator) } - /* - Splits a string into a total of 'n' parts, based on a separator. - Returns fewer parts if there wasn't enough occurrences of the separator. - Returned strings are substrings of 's'. - ``` - s := "aaa.bbb.ccc.ddd.eee" // 5 parts present - ss := split_n(s, ".", 3) // total of 3 wanted - fmt.println(ss) // [aaa, bbb, ccc.ddd.eee] - ``` +Splits a string into parts based on a separator. if n < count of seperators, the remainder of the string is returned in the last entry. + +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to split. +- sep: The separator string used to split the input string. +- allocator: (default is context.allocator) + +Example: +```odin + s := "aaa.bbb.ccc.ddd.eee" // 5 parts present + ss := strings.split(s, ".") // total of 3 wanted + fmt.println(ss) // [aaa, bbb, ccc.ddd.eee] +``` + +Returns: A slice of strings, each representing a part of the split string. */ split_n :: proc(s, sep: string, n: int, allocator := context.allocator) -> []string { return _split(s, sep, 0, n, allocator) } - /* - splits the string `s` after the seperator string `sep` appears - returns the slice of split strings allocated using `allocator` +Splits a string into parts after the separator, retaining it in the substrings. - a := "aaa.bbb.ccc.ddd.eee" +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to split. +- sep: The separator string used to split the input string. +- allocator: (Optional) The allocator used for allocation (default is context.allocator). + +Example: +```odin + a := "aaa.bbb.ccc.ddd.eee" // 5 parts aa := strings.split_after(a, ".") - fmt.eprintln(aa) // [aaa., bbb., ccc., ddd., eee] + fmt.println(aa) // [aaa., bbb., ccc., ddd., eee] +``` + +Returns: A slice of strings, each representing a part of the split string after the separator. */ split_after :: proc(s, sep: string, allocator := context.allocator) -> []string { return _split(s, sep, len(sep), -1, allocator) } - /* - splits the string `s` after the seperator string `sep` appears into a total of `n` parts - returns the slice of split strings allocated using `allocator` +Splits a string into a total of 'n' parts after the separator. +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to split. +- sep: The separator string used to split the input string. +- n: The maximum number of parts to split the string into. +- allocator: (default is context.allocator) + +Example: +```odin a := "aaa.bbb.ccc.ddd.eee" - aa := strings.split_after(a, ".") - fmt.eprintln(aa) // [aaa., bbb., ccc., ddd., eee] + aa := strings.split_after_n(a, ".", 3) + fmt.println(aa) // [aaa., bbb., ccc.ddd.eee] +``` + +Returns: A slice of strings with 'n' parts or fewer if there weren't */ split_after_n :: proc(s, sep: string, n: int, allocator := context.allocator) -> []string { return _split(s, sep, len(sep), n, allocator) } +/* +Searches for the first occurrence of 'sep' in the given string and returns the substring +up to (but not including) the separator, as well as a boolean indicating success. +*Used Internally - Private Function* + +Inputs: +- s: Pointer to the input string, which is modified during the search. +- sep: The separator string to search for. +- sep_save: Number of characters from the separator to include in the result. + +NOTE: Destructively consumes the string + +Returns: A tuple containing the resulting substring and a boolean indicating success. +*/ @private _split_iterator :: proc(s: ^string, sep: string, sep_save: int) -> (res: string, ok: bool) { // stop once the string is empty or nil @@ -545,15 +843,22 @@ _split_iterator :: proc(s: ^string, sep: string, sep_save: int) -> (res: string, } return } - /* - split the ^string `s` by the byte seperator `sep` in an iterator fashion - consumes the original string till the end, leaving the string `s` with len == 0 +Splits the input string by the byte separator in an iterator fashion. +Destructively consumes the original string until the end. +Inputs: +- s: Pointer to the input string, which is modified during the search. +- sep: The byte separator to search for. + +Example: +```odin text := "a.b.c.d.e" for str in strings.split_by_byte_iterator(&text, '.') { - fmt.eprintln(str) // every loop -> a b c d e + fmt.println(str) // every loop -> a b c d e } +``` +Returns: A tuple containing the resulting substring and a boolean indicating success. */ split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (res: string, ok: bool) { m := index_byte(s^, sep) @@ -569,34 +874,56 @@ split_by_byte_iterator :: proc(s: ^string, sep: u8) -> (res: string, ok: bool) { } return } - /* - split the ^string `s` by the seperator string `sep` in an iterator fashion - consumes the original string till the end +Splits the input string by the separator string in an iterator fashion. +Destructively consumes the original string until the end. +Inputs: +- s: Pointer to the input string, which is modified during the search. +- sep: The separator string to search for. + +Example: +```odin text := "a.b.c.d.e" for str in strings.split_iterator(&text, ".") { - fmt.eprintln(str) // every loop -> a b c d e + fmt.println(str) // every loop -> a b c d e } +``` +Returns: A tuple containing the resulting substring and a boolean indicating success. */ split_iterator :: proc(s: ^string, sep: string) -> (string, bool) { return _split_iterator(s, sep, 0) } - /* - split the ^string `s` after every seperator string `sep` in an iterator fashion - consumes the original string till the end +Splits the input string after every separator string in an iterator fashion. +Destructively consumes the original string until the end. +Inputs: +- s: Pointer to the input string, which is modified during the search. +- sep: The separator string to search for. + +Example: +```odin text := "a.b.c.d.e" for str in strings.split_after_iterator(&text, ".") { - fmt.eprintln(str) // every loop -> a. b. c. d. e + fmt.println(str) // every loop -> a. b. c. d. e } +``` +Returns: A tuple containing the resulting substring and a boolean indicating success. */ split_after_iterator :: proc(s: ^string, sep: string) -> (string, bool) { return _split_iterator(s, sep, len(sep)) } +/* +Trims the carriage return character from the end of the input string. +*Used Internally - Private Function* +Inputs: +- s: The input string to trim. + +Returns: The trimmed string as a slice. +*/ @(private) _trim_cr :: proc(s: string) -> string { n := len(s) @@ -607,14 +934,22 @@ _trim_cr :: proc(s: string) -> string { } return s } - /* - split the string `s` at every line break '\n' - return an allocated slice of strings +Splits the input string at every line break '\n'. +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to split. +- allocator: (default is context.allocator) + +Example: +```odin a := "a\nb\nc\nd\ne" b := strings.split_lines(a) fmt.eprintln(b) // [a, b, c, d, e] +``` +Returns: An allocated slice of strings split by line breaks. */ split_lines :: proc(s: string, allocator := context.allocator) -> []string { sep :: "\n" @@ -624,14 +959,23 @@ split_lines :: proc(s: string, allocator := context.allocator) -> []string { } return lines } - /* - split the string `s` at every line break '\n' for `n` parts - return an allocated slice of strings +Splits the input string at every line break '\n' for n parts. +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to split. +- n: The number of parts to split into. +- allocator: (default is context.allocator) + +Example: +```odin a := "a\nb\nc\nd\ne" b := strings.split_lines_n(a, 3) - fmt.eprintln(b) // [a, b, c, d\ne\n] + fmt.println(b) // [a, b, c, d\ne\n] +``` +Returns: An allocated array of strings split by line breaks. */ split_lines_n :: proc(s: string, n: int, allocator := context.allocator) -> []string { sep :: "\n" @@ -641,14 +985,22 @@ split_lines_n :: proc(s: string, n: int, allocator := context.allocator) -> []st } return lines } - /* - split the string `s` at every line break '\n' leaving the '\n' in the resulting strings - return an allocated slice of strings +Splits the input string at every line break '\n' leaving the '\n' in the resulting strings. +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to split. +- allocator: (default is context.allocator) + +Example: +```odin a := "a\nb\nc\nd\ne" b := strings.split_lines_after(a) - fmt.eprintln(b) // [a\n, b\n, c\n, d\n, e\n] + fmt.println(b) // [a\n, b\n, c\n, d\n, e\n] +``` +Returns: An allocated slice of strings split by line breaks with line breaks included. */ split_lines_after :: proc(s: string, allocator := context.allocator) -> []string { sep :: "\n" @@ -658,15 +1010,24 @@ split_lines_after :: proc(s: string, allocator := context.allocator) -> []string } return lines } - /* - split the string `s` at every line break '\n' leaving the '\n' in the resulting strings - only runs for `n` parts - return an allocated slice of strings +Splits the input string at every line break '\n' leaving the '\n' in the resulting strings. +Only runs for n parts. +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string to split. +- n: The number of parts to split into. +- allocator: (default is context.allocator) + +Example: +```odin a := "a\nb\nc\nd\ne" b := strings.split_lines_after_n(a, 3) - fmt.eprintln(b) // [a\n, b\n, c\n, d\ne\n] + fmt.println(b) // [a\n, b\n, c\n, d\ne\n] +``` +Returns: An allocated slice of strings split by line breaks with line breaks included. */ split_lines_after_n :: proc(s: string, n: int, allocator := context.allocator) -> []string { sep :: "\n" @@ -676,45 +1037,64 @@ split_lines_after_n :: proc(s: string, n: int, allocator := context.allocator) - } return lines } - /* - split the string `s` at every line break '\n' - returns the current split string every iteration till the string is consumed +Splits the input string at every line break '\n'. +Returns the current split string every iteration until the string is consumed. +Inputs: +- s: Pointer to the input string, which is modified during the search. + +Example: +```odin text := "a\nb\nc\nd\ne" for str in strings.split_lines_iterator(&text) { - fmt.eprintln(text) // every loop -> a b c d e + fmt.println(str) // every loop -> a b c d e } +``` +Returns: A tuple containing the resulting substring and a boolean indicating success. */ split_lines_iterator :: proc(s: ^string) -> (line: string, ok: bool) { sep :: "\n" line = _split_iterator(s, sep, 0) or_return return _trim_cr(line), true } - /* - split the string `s` at every line break '\n' - returns the current split string every iteration till the string is consumed +Splits the input string at every line break '\n'. +Returns the current split string with line breaks included every iteration until the string is consumed. +Inputs: +- s: Pointer to the input string, which is modified during the search. + +Example: +```odin text := "a\nb\nc\nd\ne" for str in strings.split_lines_after_iterator(&text) { - fmt.eprintln(text) // every loop -> a\n b\n c\n d\n e\n + fmt.println(str) // every loop -> a\n b\n c\n d\n e\n } +``` +Returns: A tuple containing the resulting substring with line breaks included and a boolean indicating success. */ split_lines_after_iterator :: proc(s: ^string) -> (line: string, ok: bool) { sep :: "\n" line = _split_iterator(s, sep, len(sep)) or_return return _trim_cr(line), true } - /* - returns the byte offset of the first byte `c` in the string `s` it finds, -1 when not found - can't find utf8 based runes +Returns the byte offset of the first byte c in the string s it finds, -1 when not found. +NOTE: Can't find UTF-8 based runes. - strings.index_byte("test", 't') -> 0 - strings.index_byte("test", 'e') -> 1 - strings.index_byte("test", 'x') -> -1 - strings.index_byte("teäst", 'ä') -> -1 +Inputs: +- s: The input string to search in. +- c: The byte to search for. + +Example: +```odin + strings.index_byte("test", 't') // -> 0 + strings.index_byte("test", 'e') // -> 1 + strings.index_byte("test", 'x') // -> -1 + strings.index_byte("teäst", 'ä') // -> -1 +``` +Returns: The byte offset of the first occurrence of c in s, or -1 if not found. */ index_byte :: proc(s: string, c: byte) -> int { for i := 0; i < len(s); i += 1 { @@ -724,15 +1104,18 @@ index_byte :: proc(s: string, c: byte) -> int { } return -1 } - /* - returns the byte offset of the last byte `c` in the string `s` it finds, -1 when not found - can't find utf8 based runes +Returns the byte offset of the last byte `c` in the string `s`, -1 when not found. +NOTE: Can't find UTF-8 based runes. - strings.index_byte("test", 't') -> 3 - strings.index_byte("test", 'e') -> 1 - strings.index_byte("test", 'x') -> -1 - strings.index_byte("teäst", 'ä') -> -1 +Example: +```odin + strings.last_index_byte("test", 't') // -> 3 + strings.last_index_byte("test", 'e') // -> 1 + strings.last_index_byte("test", 'x') // -> -1 + strings.last_index_byte("teäst", 'ä') // -> -1 +``` +Returns: The byte offset of the last occurrence of `c` in `s`, or -1 if not found. */ last_index_byte :: proc(s: string, c: byte) -> int { for i := len(s)-1; i >= 0; i -= 1 { @@ -742,20 +1125,22 @@ last_index_byte :: proc(s: string, c: byte) -> int { } return -1 } - - /* - returns the byte offset of the first rune `r` in the string `s` it finds, -1 when not found - avoids invalid runes +Returns the byte offset of the first rune `r` in the string `s` it finds, -1 when not found. +Invalid runes return -1 - strings.index_rune("abcädef", 'x') -> -1 - strings.index_rune("abcädef", 'a') -> 0 - strings.index_rune("abcädef", 'b') -> 1 - strings.index_rune("abcädef", 'c') -> 2 - strings.index_rune("abcädef", 'ä') -> 3 - strings.index_rune("abcädef", 'd') -> 5 - strings.index_rune("abcädef", 'e') -> 6 - strings.index_rune("abcädef", 'f') -> 7 +Example: +```odin + strings.index_rune("abcädef", 'x') // -> -1 + strings.index_rune("abcädef", 'a') // -> 0 + strings.index_rune("abcädef", 'b') // -> 1 + strings.index_rune("abcädef", 'c') // -> 2 + strings.index_rune("abcädef", 'ä') // -> 3 + strings.index_rune("abcädef", 'd') // -> 5 + strings.index_rune("abcädef", 'e') // -> 6 + strings.index_rune("abcädef", 'f') // -> 7 +``` +Returns: The byte offset of the first occurrence of `r` in `s`, or -1 if not found. */ index_rune :: proc(s: string, r: rune) -> int { switch { @@ -779,14 +1164,17 @@ index_rune :: proc(s: string, r: rune) -> int { } @private PRIME_RABIN_KARP :: 16777619 - /* - returns the byte offset of the string `substr` in the string `s`, -1 when not found - - strings.index("test", "t") -> 0 - strings.index("test", "te") -> 0 - strings.index("test", "st") -> 2 - strings.index("test", "tt") -> -1 +Returns the byte offset of the string `substr` in the string `s`, -1 when not found. + +Example: +```odin + strings.index("test", "t") // -> 0 + strings.index("test", "te") // -> 0 + strings.index("test", "st") // -> 2 + strings.index("test", "tt") // -> -1 +``` +Returns: The byte offset of the first occurrence of `substr` in `s`, or -1 if not found. */ index :: proc(s, substr: string) -> int { hash_str_rabin_karp :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) { @@ -837,14 +1225,17 @@ index :: proc(s, substr: string) -> int { } return -1 } - /* - returns the last byte offset of the string `substr` in the string `s`, -1 when not found - - strings.index("test", "t") -> 3 - strings.index("test", "te") -> 0 - strings.index("test", "st") -> 2 - strings.index("test", "tt") -> -1 +Returns the last byte offset of the string `substr` in the string `s`, -1 when not found. + +Example: +```odin + strings.last_index("test", "t") // -> 3 + strings.last_index("test", "te") // -> 0 + strings.last_index("test", "st") // -> 2 + strings.last_index("test", "tt") // -> -1 +``` +Returns: The byte offset of the last occurrence of `substr` in `s`, or -1 if not found. */ last_index :: proc(s, substr: string) -> int { hash_str_rabin_karp_reverse :: proc(s: string) -> (hash: u32 = 0, pow: u32 = 1) { @@ -893,15 +1284,18 @@ last_index :: proc(s, substr: string) -> int { } return -1 } - /* - returns the index of any first char of `chars` found in `s`, -1 if not found - - strings.index_any("test", "s") -> 2 - strings.index_any("test", "se") -> 1 - strings.index_any("test", "et") -> 0 - strings.index_any("test", "set") -> 0 - strings.index_any("test", "x") -> -1 +Returns the index of any first char of `chars` found in `s`, -1 if not found. + +Example: +```odin + strings.index_any("test", "s") // -> 2 + strings.index_any("test", "se") // -> 1 + strings.index_any("test", "et") // -> 0 + strings.index_any("test", "set") // -> 0 + strings.index_any("test", "x") // -> -1 +``` +Returns: The index of the first character of `chars` found in `s`, or -1 if not found. */ index_any :: proc(s, chars: string) -> int { if chars == "" { @@ -934,16 +1328,22 @@ index_any :: proc(s, chars: string) -> int { } return -1 } - /* - returns the last matching index in `s` of any char in `chars` found in `s`, -1 if not found - iterates the string in reverse +Finds the last occurrence of any character in 'chars' within 's'. Iterates in reverse. - strings.last_index_any("test", "s") -> 2 - strings.last_index_any("test", "se") -> 2 - strings.last_index_any("test", "et") -> 3 - strings.last_index_any("test", "set") -> 3 - strings.last_index_any("test", "x") -> -1 +Inputs: +- s: The string to search in +- chars: The characters to look for + +Example: +```odin + strings.last_index_any("test", "s") // -> 2 + strings.last_index_any("test", "se") // -> 2 + strings.last_index_any("test", "et") // -> 3 + strings.last_index_any("test", "set") // -> 3 + strings.last_index_any("test", "x") // -> -1 +``` +Returns: The index of the last matching character, or -1 if not found */ last_index_any :: proc(s, chars: string) -> int { if chars == "" { @@ -993,7 +1393,15 @@ last_index_any :: proc(s, chars: string) -> int { } return -1 } +/* +Finds the first occurrence of any substring in 'substrs' within 's' +Inputs: +- s: The string to search in +- substrs: The substrings to look for + +Returns: A tuple containing the index of the first matching substring, and its length (width) +*/ index_multi :: proc(s: string, substrs: []string) -> (idx: int, width: int) { idx = -1 if s == "" || len(substrs) <= 0 { @@ -1023,16 +1431,22 @@ index_multi :: proc(s: string, substrs: []string) -> (idx: int, width: int) { } return } - /* - returns the count of the string `substr` found in the string `s` - returns the rune_count + 1 of the string `s` on empty `substr` +Counts the number of non-overlapping occurrences of 'substr' in 's' - strings.count("abbccc", "a") -> 1 - strings.count("abbccc", "b") -> 2 - strings.count("abbccc", "c") -> 3 - strings.count("abbccc", "ab") -> 1 - strings.count("abbccc", " ") -> 0 +Inputs: +- s: The string to search in +- substr: The substring to count + +Example: +```odin + strings.count("abbccc", "a") // -> 1 + strings.count("abbccc", "b") // -> 2 + strings.count("abbccc", "c") // -> 3 + strings.count("abbccc", "ab") // -> 1 + strings.count("abbccc", " ") // -> 0 +``` +Returns: The number of occurrences of 'substr' in 's', returns the rune_count + 1 of the string `s` on empty `substr` */ count :: proc(s, substr: string) -> int { if len(substr) == 0 { // special case @@ -1068,12 +1482,23 @@ count :: proc(s, substr: string) -> int { } return n } - /* - repeats the string `s` multiple `count` times and returns the allocated string - panics when `count` is below 0 +Repeats the string 's' 'count' times, concatenating the result - strings.repeat("abc", 2) -> "abcabc" +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to repeat +- count: The number of times to repeat 's' +- allocator: (default is context.allocator) + +WARNING: Panics if count < 0 + +Example: +```odin + strings.repeat("abc", 2) // -> "abcabc" +``` +Returns: The concatenated repeated string */ repeat :: proc(s: string, count: int, allocator := context.allocator) -> string { if count < 0 { @@ -1090,28 +1515,48 @@ repeat :: proc(s: string, count: int, allocator := context.allocator) -> string } return string(b) } - /* - replaces all instances of `old` in the string `s` with the `new` string - returns the `output` string and true when an a allocation through a replace happened +Replaces all occurrences of 'old' in 's' with 'new' - strings.replace_all("xyzxyz", "xyz", "abc") -> "abcabc", true - strings.replace_all("xyzxyz", "abc", "xyz") -> "xyzxyz", false - strings.replace_all("xyzxyz", "xy", "z") -> "zzzz", true +*Allocates Using Provided Allocator* + +Inputs: +- s: The string to modify +- old: The substring to replace +- new: The substring to replace 'old' with +- allocator: The allocator to use for the new string (default is context.allocator) + +Example: +```odin + strings.replace_all("xyzxyz", "xyz", "abc") // -> "abcabc", true + strings.replace_all("xyzxyz", "abc", "xyz") // -> "xyzxyz", false + strings.replace_all("xyzxyz", "xy", "z") // -> "zzzz", true +``` +Returns: A tuple containing the modified string and a boolean indicating if an allocation occurred during the replacement */ replace_all :: proc(s, old, new: string, allocator := context.allocator) -> (output: string, was_allocation: bool) { return replace(s, old, new, -1, allocator) } - /* - replaces `n` instances of `old` in the string `s` with the `new` string - if n < 0, no limit on the number of replacements - returns the `output` string and true when an a allocation through a replace happened +Replaces n instances of old in the string s with the new string - strings.replace("xyzxyz", "xyz", "abc", 2) -> "abcabc", true - strings.replace("xyzxyz", "xyz", "abc", 1) -> "abcxyz", true - strings.replace("xyzxyz", "abc", "xyz", -1) -> "xyzxyz", false - strings.replace("xyzxyz", "xy", "z", -1) -> "zzzz", true +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string +- old: The substring to be replaced +- new: The replacement string +- n: The number of instances to replace (if n < 0, no limit on the number of replacements) +- allocator: (default: context.allocator) + +Example: +```odin + strings.replace("xyzxyz", "xyz", "abc", 2) // -> "abcabc", true + strings.replace("xyzxyz", "xyz", "abc", 1) // -> "abcxyz", true + strings.replace("xyzxyz", "abc", "xyz", -1) // -> "xyzxyz", false + strings.replace("xyzxyz", "xy", "z", -1) // -> "zzzz", true +``` +Returns: A tuple containing the modified string and a boolean indicating if an allocation occurred during the replacement */ replace :: proc(s, old, new: string, n: int, allocator := context.allocator) -> (output: string, was_allocation: bool) { if old == new || n == 0 { @@ -1152,44 +1597,61 @@ replace :: proc(s, old, new: string, n: int, allocator := context.allocator) -> output = string(t[0:w]) return } - /* - removes the `key` string `n` times from the `s` string - if n < 0, no limit on the number of removes - returns the `output` string and true when an a allocation through a remove happened +Removes the key string n times from the s string - strings.remove("abcabc", "abc", 1) -> "abc", true - strings.remove("abcabc", "abc", -1) -> "", true - strings.remove("abcabc", "a", -1) -> "bcbc", true - strings.remove("abcabc", "x", -1) -> "abcabc", false +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string +- key: The substring to be removed +- n: The number of instances to remove (if n < 0, no limit on the number of removes) +- allocator: (default: context.allocator) + +Example: +```odin + strings.remove("abcabc", "abc", 1) // -> "abc", true + strings.remove("abcabc", "abc", -1) // -> "", true + strings.remove("abcabc", "a", -1) // -> "bcbc", true + strings.remove("abcabc", "x", -1) // -> "abcabc", false +``` +Returns: A tuple containing the modified string and a boolean indicating if an allocation occurred during the removal */ remove :: proc(s, key: string, n: int, allocator := context.allocator) -> (output: string, was_allocation: bool) { return replace(s, key, "", n, allocator) } - /* - removes all the `key` string instanes from the `s` string - returns the `output` string and true when an a allocation through a remove happened +Removes all the key string instances from the s string - strings.remove("abcabc", "abc") -> "", true - strings.remove("abcabc", "a") -> "bcbc", true - strings.remove("abcabc", "x") -> "abcabc", false +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string +- key: The substring to be removed +- allocator: (default: context.allocator) + +Example: +```odin + strings.remove_all("abcabc", "abc") // -> "", true + strings.remove_all("abcabc", "a") // -> "bcbc", true + strings.remove_all("abcabc", "x") // -> "abcabc", false +``` +Returns: A tuple containing the modified string and a boolean indicating if an allocation occurred during the removal */ remove_all :: proc(s, key: string, allocator := context.allocator) -> (output: string, was_allocation: bool) { return remove(s, key, -1, allocator) } - +// Returns true if the r rune is an ASCII space character ('\t', '\n', '\v', '\f', '\r', ' ') @(private) _ascii_space := [256]bool{'\t' = true, '\n' = true, '\v' = true, '\f' = true, '\r' = true, ' ' = true} -// return true when the `r` rune is '\t', '\n', '\v', '\f', '\r' or ' ' +// Returns true when the `r` rune is '\t', '\n', '\v', '\f', '\r' or ' ' is_ascii_space :: proc(r: rune) -> bool { if r < utf8.RUNE_SELF { return _ascii_space[u8(r)] } return false } - -// returns true when the `r` rune is any asci or utf8 based whitespace +// Returns true if the r rune is any ASCII or UTF-8 based whitespace character is_space :: proc(r: rune) -> bool { if r < 0x2000 { switch r { @@ -1207,24 +1669,30 @@ is_space :: proc(r: rune) -> bool { } return false } - -// returns true when the `r` rune is a nul byte +// Returns true if the `r` rune is a null byte (0x0) is_null :: proc(r: rune) -> bool { return r == 0x0000 } - /* - runs trough the `s` string linearly and watches wether the `p` procedure matches the `truth` bool - returns the rune offset or -1 when no match was found +Finds the index of the first rune in the string s for which the procedure p returns the same value as truth +Inputs: +- s: The input string +- p: A procedure that takes a rune and returns a boolean +- truth: The boolean value to be matched (default: true) + +Example: +```odin call :: proc(r: rune) -> bool { return r == 'a' } - strings.index_proc("abcabc", call) -> 0 - strings.index_proc("cbacba", call) -> 2 - strings.index_proc("cbacba", call, false) -> 0 - strings.index_proc("abcabc", call, false) -> 1 - strings.index_proc("xyz", call) -> -1 + strings.index_proc("abcabc", call) // -> 0 + strings.index_proc("cbacba", call) // -> 2 + strings.index_proc("cbacba", call, false) // -> 0 + strings.index_proc("abcabc", call, false) // -> 1 + strings.index_proc("xyz", call) // -> -1 +``` +Returns: The index of the first matching rune, or -1 if no match was found */ index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int { for r, i in s { @@ -1234,8 +1702,7 @@ index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int { } return -1 } - -// same as `index_proc` but with a `p` procedure taking a rawptr for state +// Same as `index_proc`, but the procedure p takes a raw pointer for state index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int { for r, i in s { if p(state, r) == truth { @@ -1244,8 +1711,7 @@ index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: r } return -1 } - -// same as `index_proc` but runs through the string in reverse +// Finds the index of the *last* rune in the string s for which the procedure p returns the same value as truth last_index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int { // TODO(bill): Probably use Rabin-Karp Search for i := len(s); i > 0; { @@ -1257,8 +1723,7 @@ last_index_proc :: proc(s: string, p: proc(rune) -> bool, truth := true) -> int } return -1 } - -// same as `index_proc_with_state` but runs through the string in reverse +// Same as `index_proc_with_state`, runs through the string in reverse last_index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr, truth := true) -> int { // TODO(bill): Probably use Rabin-Karp Search for i := len(s); i > 0; { @@ -1270,16 +1735,21 @@ last_index_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, sta } return -1 } - /* - trims the input string `s` until the procedure `p` returns false - does not allocate - only returns a cut variant of the input string - returns an empty string when no match was found at all +Trims the input string s from the left until the procedure p returns false +Inputs: +- s: The input string +- p: A procedure that takes a rune and returns a boolean + +Example: +```odin find :: proc(r: rune) -> bool { return r != 'i' } - strings.trim_left_proc("testing", find) -> "ing" + strings.trim_left_proc("testing", find) // -> "ing" +``` +Returns: The trimmed string as a slice of the original */ trim_left_proc :: proc(s: string, p: proc(rune) -> bool) -> string { i := index_proc(s, p, false) @@ -1288,10 +1758,15 @@ trim_left_proc :: proc(s: string, p: proc(rune) -> bool) -> string { } return s[i:] } - /* - trims the input string `s` until the procedure `p` with state returns false - returns an empty string when no match was found at all +Trims the input string s from the left until the procedure p with state returns false + +Inputs: +- s: The input string +- p: A procedure that takes a raw pointer and a rune and returns a boolean +- state: The raw pointer to be passed to the procedure p + +Returns: The trimmed string as a slice of the original */ trim_left_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr) -> string { i := index_proc_with_state(s, p, state, false) @@ -1300,16 +1775,21 @@ trim_left_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, stat } return s[i:] } - /* - trims the input string `s` from the right until the procedure `p` returns false - does not allocate - only returns a cut variant of the input string - returns an empty string when no match was found at all +Trims the input string s from the right until the procedure p returns false +Inputs: +- s: The input string +- p: A procedure that takes a rune and returns a boolean + +Example: +```odin find :: proc(r: rune) -> bool { return r != 't' } - strings.trim_left_proc("testing", find) -> "test" + strings.trim_right_proc("testing", find) -> "test" +``` +Returns: The trimmed string as a slice of the original */ trim_right_proc :: proc(s: string, p: proc(rune) -> bool) -> string { i := last_index_proc(s, p, false) @@ -1321,10 +1801,15 @@ trim_right_proc :: proc(s: string, p: proc(rune) -> bool) -> string { } return s[0:i] } - /* - trims the input string `s` from the right until the procedure `p` with state returns false - returns an empty string when no match was found at all +Trims the input string s from the right until the procedure p with state returns false + +Inputs: +- s: The input string +- p: A procedure that takes a raw pointer and a rune and returns a boolean +- state: The raw pointer to be passed to the procedure p + +Returns: The trimmed string as a slice of the original, empty when no match */ trim_right_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, state: rawptr) -> string { i := last_index_proc_with_state(s, p, state, false) @@ -1336,8 +1821,7 @@ trim_right_proc_with_state :: proc(s: string, p: proc(rawptr, rune) -> bool, sta } return s[0:i] } - -// procedure for `trim_*_proc` variants, which has a string rawptr cast + rune comparison +// Procedure for `trim_*_proc` variants, which has a string rawptr cast + rune comparison is_in_cutset :: proc(state: rawptr, r: rune) -> bool { if state == nil { return false @@ -1350,8 +1834,15 @@ is_in_cutset :: proc(state: rawptr, r: rune) -> bool { } return false } +/* +Trims the cutset string from the s string -// trims the `cutset` string from the `s` string +Inputs: +- s: The input string +- cutset: The set of characters to be trimmed from the left of the input string + +Returns: The trimmed string as a slice of the original +*/ trim_left :: proc(s: string, cutset: string) -> string { if s == "" || cutset == "" { return s @@ -1359,8 +1850,15 @@ trim_left :: proc(s: string, cutset: string) -> string { state := cutset return trim_left_proc_with_state(s, is_in_cutset, &state) } +/* +Trims the cutset string from the s string from the right -// trims the `cutset` string from the `s` string from the right +Inputs: +- s: The input string +- cutset: The set of characters to be trimmed from the right of the input string + +Returns: The trimmed string as a slice of the original +*/ trim_right :: proc(s: string, cutset: string) -> string { if s == "" || cutset == "" { return s @@ -1368,48 +1866,96 @@ trim_right :: proc(s: string, cutset: string) -> string { state := cutset return trim_right_proc_with_state(s, is_in_cutset, &state) } +/* +Trims the cutset string from the s string, both from left and right -// trims the `cutset` string from the `s` string, both from left and right +Inputs: +- s: The input string +- cutset: The set of characters to be trimmed from both sides of the input string + +Returns: The trimmed string as a slice of the original +*/ trim :: proc(s: string, cutset: string) -> string { return trim_right(trim_left(s, cutset), cutset) } +/* +Trims until a valid non-space rune from the left, "\t\txyz\t\t" -> "xyz\t\t" -// trims until a valid non space rune: "\t\txyz\t\t" -> "xyz\t\t" +Inputs: +- s: The input string + +Returns: The trimmed string as a slice of the original +*/ trim_left_space :: proc(s: string) -> string { return trim_left_proc(s, is_space) } +/* +Trims from the right until a valid non-space rune, "\t\txyz\t\t" -> "\t\txyz" -// trims from the right until a valid non space rune: "\t\txyz\t\t" -> "\t\txyz" +Inputs: +- s: The input string + +Returns: The trimmed string as a slice of the original +*/ trim_right_space :: proc(s: string) -> string { return trim_right_proc(s, is_space) } +/* +Trims from both sides until a valid non-space rune, "\t\txyz\t\t" -> "xyz" -// trims from both sides until a valid non space rune: "\t\txyz\t\t" -> "xyz" +Inputs: +- s: The input string + +Returns: The trimmed string as a slice of the original +*/ trim_space :: proc(s: string) -> string { return trim_right_space(trim_left_space(s)) } +/* +Trims null runes from the left, "\x00\x00testing\x00\x00" -> "testing\x00\x00" -// trims nul runes from the left: "\x00\x00testing\x00\x00" -> "testing\x00\x00" +Inputs: +- s: The input string + +Returns: The trimmed string as a slice of the original +*/ trim_left_null :: proc(s: string) -> string { return trim_left_proc(s, is_null) } +/* +Trims null runes from the right, "\x00\x00testing\x00\x00" -> "\x00\x00testing" -// trims nul runes from the right: "\x00\x00testing\x00\x00" -> "\x00\x00testing" +Inputs: +- s: The input string + +Returns: The trimmed string as a slice of the original +*/ trim_right_null :: proc(s: string) -> string { return trim_right_proc(s, is_null) } +/* +Trims null runes from both sides, "\x00\x00testing\x00\x00" -> "testing" -// trims nul runes from both sides: "\x00\x00testing\x00\x00" -> "testing" +Inputs: +- s: The input string +Returns: The trimmed string as a slice of the original +*/ trim_null :: proc(s: string) -> string { return trim_right_null(trim_left_null(s)) } - /* - trims a `prefix` string from the start of the `s` string and returns the trimmed string - returns the input string `s` when no prefix was found +Trims a prefix string from the start of the s string and returns the trimmed string - strings.trim_prefix("testing", "test") -> "ing" - strings.trim_prefix("testing", "abc") -> "testing" +Inputs: +- s: The input string +- prefix: The prefix string to be removed + +Example: +```odin + strings.trim_prefix("testing", "test") // -> "ing" + strings.trim_prefix("testing", "abc") // -> "testing" +``` +Returns: The trimmed string as a slice of original, or the input string if no prefix was found */ trim_prefix :: proc(s, prefix: string) -> string { if has_prefix(s, prefix) { @@ -1417,13 +1963,19 @@ trim_prefix :: proc(s, prefix: string) -> string { } return s } - /* - trims a `suffix` string from the end of the `s` string and returns the trimmed string - returns the input string `s` when no suffix was found +Trims a suffix string from the end of the s string and returns the trimmed string - strings.trim_suffix("todo.txt", ".txt") -> "todo" - strings.trim_suffix("todo.doc", ".txt") -> "todo.doc" +Inputs: +- s: The input string +- suffix: The suffix string to be removed + +Example: +```odin + strings.trim_suffix("todo.txt", ".txt") // -> "todo" + strings.trim_suffix("todo.doc", ".txt") // -> "todo.doc" +``` +Returns: The trimmed string as a slice of original, or the input string if no suffix was found */ trim_suffix :: proc(s, suffix: string) -> string { if has_suffix(s, suffix) { @@ -1431,14 +1983,25 @@ trim_suffix :: proc(s, suffix: string) -> string { } return s } - /* - splits the input string `s` by all possible `substrs` []string - returns the allocated []string, nil on any empty substring or no matches +Splits the input string s by all possible substrs and returns an allocated array of strings +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string +- substrs: An array of substrings used for splitting +- allocator: (default is context.allocator) + +NOTE: Allocation occurs for the array, the splits are all slices of the original string. + +Example: +```odin splits := [?]string { "---", "~~~", ".", "_", "," } res := strings.split_multi("testing,this.out_nice---done~~~last", splits[:]) - fmt.eprintln(res) // -> [testing, this, out, nice, done, last] + fmt.println(res) // -> [testing, this, out, nice, done, last] +``` +Returns: An array of strings, or nil on empty substring or no matches */ split_multi :: proc(s: string, substrs: []string, allocator := context.allocator) -> []string #no_bounds_check { if s == "" || len(substrs) <= 0 { @@ -1480,15 +2043,21 @@ split_multi :: proc(s: string, substrs: []string, allocator := context.allocator assert(len(results) == n) return results[:] } - /* - splits the input string `s` by all possible `substrs` []string in an iterator fashion - returns the split string every iteration, the full string on no match - splits := [?]string { "---", "~~~", ".", "_", "," } +Splits the input string s by all possible substrs in an iterator fashion. The full string is returned if no match. + +Inputs: +- it: A pointer to the input string +- substrs: An array of substrings used for splitting + +Example: +```odin it := "testing,this.out_nice---done~~~last" for str in strings.split_multi_iterate(&it, splits[:]) { - fmt.eprintln(str) // every iteration -> [testing, this, out, nice, done, last] + fmt.println(str) // every iteration // -> [testing, this, out, nice, done, last] } +``` +Returns: A tuple containing the split string and a boolean indicating success or failure */ split_multi_iterate :: proc(it: ^string, substrs: []string) -> (res: string, ok: bool) #no_bounds_check { if it == nil || len(it) == 0 || len(substrs) <= 0 { @@ -1515,9 +2084,24 @@ split_multi_iterate :: proc(it: ^string, substrs: []string) -> (res: string, ok: ok = true return } +/* +Replaces invalid UTF-8 characters in the input string with a specified replacement string. Adjacent invalid bytes are only replaced once. -// scrub scruvs invalid utf-8 characters and replaces them with the replacement string -// Adjacent invalid bytes are only replaced once +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string +- replacement: The string used to replace invalid UTF-8 characters +- allocator: (default is context.allocator) + +Example: +```odin + text := "Hello\xC0\x80World" + result := strings.scrub(text, "?") + fmt.println(result) // -> "Hello?World" +``` +Returns: A new string with invalid UTF-8 characters replaced +*/ scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> string { str := s b: Builder @@ -1549,13 +2133,22 @@ scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> return to_string(b) } - /* - returns a reversed version of the `s` string +Reverses the input string s +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string +- allocator: (default is context.allocator) + +Example: +```odin a := "abcxyz" b := strings.reverse(a) - fmt.eprintln(a, b) // abcxyz zyxcba + fmt.println(a, b) // -> abcxyz zyxcba +``` +Returns: A reversed version of the input string */ reverse :: proc(s: string, allocator := context.allocator) -> string { str := s @@ -1571,14 +2164,25 @@ reverse :: proc(s: string, allocator := context.allocator) -> string { } return string(buf) } - /* - expands the string to a grid spaced by `tab_size` whenever a `\t` character appears - returns the tabbed string, panics on tab_size <= 0 +Expands the input string by replacing tab characters with spaces to align to a specified tab size - strings.expand_tabs("abc1\tabc2\tabc3", 4) -> abc1 abc2 abc3 - strings.expand_tabs("abc1\tabc2\tabc3", 5) -> abc1 abc2 abc3 - strings.expand_tabs("abc1\tabc2\tabc3", 6) -> abc1 abc2 abc3 +*Allocates Using Provided Allocator* + +Inputs: +- s: The input string +- tab_size: The number of spaces to use for each tab character +- allocator: (default is context.allocator) + +Example: +```odin + text := "abc1\tabc2\tabc3" + result := strings.expand_tabs(text, 4) + fmt.println(result) // -> "abc1 abc2 abc3" +``` +WARNING: Panics if tab_size <= 0 + +Returns: A new string with tab characters expanded to the specified tab size */ expand_tabs :: proc(s: string, tab_size: int, allocator := context.allocator) -> string { if tab_size <= 0 { @@ -1621,16 +2225,21 @@ expand_tabs :: proc(s: string, tab_size: int, allocator := context.allocator) -> return to_string(b) } - /* - splits the `str` string by the seperator `sep` string and returns 3 parts - `head`: before the split, `match`: the seperator, `tail`: the end of the split - returns the input string when the `sep` was not found +Splits the input string str by the separator sep string and returns 3 parts. The values are slices of the original string. +Inputs: +- str: The input string +- sep: The separator string + +Example: +```odin text := "testing this out" - strings.partition(text, " this ") -> head: "testing", match: " this ", tail: "out" - strings.partition(text, "hi") -> head: "testing t", match: "hi", tail: "s out" - strings.partition(text, "xyz") -> head: "testing this out", match: "", tail: "" + strings.partition(text, " this ") // -> head: "testing", match: " this ", tail: "out" + strings.partition(text, "hi") // -> head: "testing t", match: "hi", tail: "s out" + strings.partition(text, "xyz") // -> head: "testing this out", match: "", tail: "" +``` +Returns: A tuple with head (before the split), match (the separator), and tail (the end of the split) strings */ partition :: proc(str, sep: string) -> (head, match, tail: string) { i := index(str, sep) @@ -1644,10 +2253,21 @@ partition :: proc(str, sep: string) -> (head, match, tail: string) { tail = str[i+len(sep):] return } - +// Alias for centre_justify center_justify :: centre_justify // NOTE(bill): Because Americans exist +/* +Centers the input string within a field of specified length by adding pad string on both sides, if its length is less than the target length. -// centre_justify returns a string with a pad string at boths sides if the str's rune length is smaller than length +*Allocates Using Provided Allocator* + +Inputs: +- str: The input string +- length: The desired length of the centered string +- pad: The string used for padding on both sides +- allocator: (default is context.allocator) + +Returns: A new string centered within a field of the specified length +*/ centre_justify :: proc(str: string, length: int, pad: string, allocator := context.allocator) -> string { n := rune_count(str) if n >= length || pad == "" { @@ -1669,8 +2289,19 @@ centre_justify :: proc(str: string, length: int, pad: string, allocator := conte return to_string(b) } +/* +Left-justifies the input string within a field of specified length by adding pad string on the right side, if its length is less than the target length. -// left_justify returns a string with a pad string at right side if the str's rune length is smaller than length +*Allocates Using Provided Allocator* + +Inputs: +- str: The input string +- length: The desired length of the left-justified string +- pad: The string used for padding on the right side +- allocator: (default is context.allocator) + +Returns: A new string left-justified within a field of the specified length +*/ left_justify :: proc(str: string, length: int, pad: string, allocator := context.allocator) -> string { n := rune_count(str) if n >= length || pad == "" { @@ -1691,8 +2322,19 @@ left_justify :: proc(str: string, length: int, pad: string, allocator := context return to_string(b) } +/* +Right-justifies the input string within a field of specified length by adding pad string on the left side, if its length is less than the target length. -// right_justify returns a string with a pad string at left side if the str's rune length is smaller than length +*Allocates Using Provided Allocator* + +Inputs: +- str: The input string +- length: The desired length of the right-justified string +- pad: The string used for padding on the left side +- allocator: (default is context.allocator) + +Returns: A new string right-justified within a field of the specified length +*/ right_justify :: proc(str: string, length: int, pad: string, allocator := context.allocator) -> string { n := rune_count(str) if n >= length || pad == "" { @@ -1713,10 +2355,15 @@ right_justify :: proc(str: string, length: int, pad: string, allocator := contex return to_string(b) } +/* +Writes a given pad string a specified number of times to an io.Writer - - - +Inputs: +- w: The io.Writer to write the pad string to +- pad: The pad string to be written +- pad_len: The length of the pad string +- remains: The number of times to write the pad string +*/ @private write_pad_string :: proc(w: io.Writer, pad: string, pad_len, remains: int) { repeats := remains / pad_len @@ -1734,10 +2381,17 @@ write_pad_string :: proc(w: io.Writer, pad: string, pad_len, remains: int) { p = p[width:] } } +/* +Splits a string into a slice of substrings at each instance of one or more consecutive white space characters, as defined by unicode.is_space +*Allocates Using Provided Allocator* -// fields splits the string s around each instance of one or more consecutive white space character, defined by unicode.is_space -// returning a slice of substrings of s or an empty slice if s only contains white space +Inputs: +- s: The input string +- allocator: (default is context.allocator) + +Returns: A slice of substrings of the input string, or an empty slice if the input string only contains white space +*/ fields :: proc(s: string, allocator := context.allocator) -> []string #no_bounds_check { n := 0 was_space := 1 @@ -1786,14 +2440,20 @@ fields :: proc(s: string, allocator := context.allocator) -> []string #no_bounds } return a } +/* +Splits a string into a slice of substrings at each run of unicode code points `ch` satisfying the predicate f(ch) +*Allocates Using Provided Allocator* -// fields_proc splits the string s at each run of unicode code points `ch` satisfying f(ch) -// returns a slice of substrings of s -// If all code points in s satisfy f(ch) or string is empty, an empty slice is returned -// -// fields_proc makes no guarantee about the order in which it calls f(ch) -// it assumes that `f` always returns the same value for a given ch +Inputs: +- s: The input string +- f: A predicate function to determine the split points +- allocator: (default is context.allocator) + +NOTE: fields_proc makes no guarantee about the order in which it calls f(ch), it assumes that `f` always returns the same value for a given ch + +Returns: A slice of substrings of the input string, or an empty slice if all code points in the input string satisfy the predicate or if the input string is empty +*/ fields_proc :: proc(s: string, f: proc(rune) -> bool, allocator := context.allocator) -> []string #no_bounds_check { substrings := make([dynamic]string, 0, 32, allocator) @@ -1820,10 +2480,16 @@ fields_proc :: proc(s: string, f: proc(rune) -> bool, allocator := context.alloc return substrings[:] } +/* +Retrieves the first non-space substring from a mutable string reference and advances the reference. s is advanced from any space after the substring, or be an empty string if the substring was the remaining characters +Inputs: +- s: A mutable string reference to be iterated -// `fields_iterator` returns the first run of characters in `s` that does not contain white space, defined by `unicode.is_space` -// `s` will then start from any space after the substring, or be an empty string if the substring was the remaining characters +Returns: +- field: The first non-space substring found +- ok: A boolean indicating if a non-space substring was found +*/ fields_iterator :: proc(s: ^string) -> (field: string, ok: bool) { start, end := -1, -1 for r, offset in s { @@ -1852,10 +2518,21 @@ fields_iterator :: proc(s: ^string) -> (field: string, ok: bool) { s^ = s[len(s):] return } +/* +Computes the Levenshtein edit distance between two strings -// `levenshtein_distance` returns the Levenshtein edit distance between 2 strings. -// This is a single-row-version of the Wagner–Fischer algorithm, based on C code by Martin Ettl. -// Note: allocator isn't used if the length of string b in runes is smaller than 64. +*Allocates Using Provided Allocator (deletion occurs internal to proc)* + +NOTE: Does not perform internal allocation if Length of String b in Runes is Smaller Than 64 + +Inputs: +- a, b: The two strings to compare +- allocator: (default is context.allocator) + +Returns: The Levenshtein edit distance between the two strings + +NOTE: This implementation is a single-row-version of the Wagner–Fischer algorithm, based on C code by Martin Ettl. +*/ levenshtein_distance :: proc(a, b: string, allocator := context.allocator) -> int { LEVENSHTEIN_DEFAULT_COSTS: []int : { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,