string code docs

This commit is contained in:
Jon Lipstate
2023-03-27 20:09:51 -07:00
parent aa6299f114
commit bf82c40964
6 changed files with 1810 additions and 513 deletions
+22 -2
View File
@@ -3,9 +3,22 @@ package strings
import "core:unicode/utf8"
/*
Ascii_Set is designed to store ASCII characters efficiently as a bit-array
Each bit in the array corresponds to a specific ASCII character, where the value of the bit (0 or 1)
indicates if the character is present in the set or not.
*/
Ascii_Set :: distinct [8]u32
/*
Creates an Ascii_Set with unique characters from the input string.
// create an ascii set of all unique characters in the string
Inputs:
- chars: A string containing characters to include in the Ascii_Set.
Returns:
- as: An Ascii_Set with unique characters from the input string.
- ok: false if any character in the input string is not a valid ASCII character.
*/
ascii_set_make :: proc(chars: string) -> (as: Ascii_Set, ok: bool) #no_bounds_check {
for i in 0..<len(chars) {
c := chars[i]
@@ -17,8 +30,15 @@ ascii_set_make :: proc(chars: string) -> (as: Ascii_Set, ok: bool) #no_bounds_ch
ok = true
return
}
/*
Determines if a given char is contained within an Ascii_Set.
// returns true when the `c` byte is contained in the `as` ascii set
Inputs:
- as: The Ascii_Set to search.
- c: The char to check for in the Ascii_Set.
Returns: A boolean indicating if the byte is contained in the Ascii_Set (true) or not (false).
*/
ascii_set_contains :: proc(as: Ascii_Set, c: byte) -> bool #no_bounds_check {
return as[c>>5] & (1<<(c&31)) != 0
}
+372 -87
View File
@@ -4,68 +4,126 @@ import "core:runtime"
import "core:unicode/utf8"
import "core:strconv"
import "core:io"
Builder_Flush_Proc :: #type proc(b: ^Builder) -> (do_reset: bool)
/*
dynamic byte buffer / string builder with helper procedures
the dynamic array is wrapped inside the struct to be more opaque
you can use `fmt.sbprint*` procedures with a `^strings.Builder` directly
Type definition for a procedure that flushes a Builder
Inputs:
- b: A pointer to the Builder
Returns: A boolean indicating whether the Builder should be reset
*/
Builder_Flush_Proc :: #type proc(b: ^Builder) -> (do_reset: bool)
/*
A dynamic byte buffer / string builder with helper procedures
The dynamic array is wrapped inside the struct to be more opaque
You can use `fmt.sbprint*` procedures with a `^strings.Builder` directly
*/
Builder :: struct {
buf: [dynamic]byte,
}
/*
Produces a Builder with a default length of 0 and cap of 16
// return a builder, default length 0 / cap 16 are done through make
*Allocates Using Provided Allocator*
Inputs:
- allocator: (default is context.allocator)
Returns: A new Builder
*/
builder_make_none :: proc(allocator := context.allocator) -> Builder {
return Builder{buf=make([dynamic]byte, allocator)}
}
/*
Produces a Builder with a specified length and cap of max(16,len) byte buffer
// return a builder, with a set length `len` and cap 16 byte buffer
*Allocates Using Provided Allocator*
Inputs:
- len: The desired length of the Builder's buffer
- allocator: (default is context.allocator)
Returns: A new Builder
*/
builder_make_len :: proc(len: int, allocator := context.allocator) -> Builder {
return Builder{buf=make([dynamic]byte, len, allocator)}
}
/*
Produces a Builder with a specified length and cap
// return a builder, with a set length `len` byte buffer and a custom `cap`
*Allocates Using Provided Allocator*
Inputs:
- len: The desired length of the Builder's buffer
- cap: The desired capacity of the Builder's buffer, cap is max(cap, len)
- allocator: (default is context.allocator)
Returns: A new Builder
*/
builder_make_len_cap :: proc(len, cap: int, allocator := context.allocator) -> Builder {
return Builder{buf=make([dynamic]byte, len, cap, allocator)}
}
// overload simple `builder_make_*` with or without len / cap parameters
builder_make :: proc{
builder_make_none,
builder_make_len,
builder_make_len_cap,
}
/*
Initializes a Builder with a length of 0 and cap of 16
It replaces the existing `buf`
// initialize a builder, default length 0 / cap 16 are done through make
// replaces the existing `buf`
*Allocates Using Provided Allocator*
Inputs:
- b: A pointer to the Builder
- allocator: (default is context.allocator)
Returns: initialized ^Builder
*/
builder_init_none :: proc(b: ^Builder, allocator := context.allocator) -> ^Builder {
b.buf = make([dynamic]byte, allocator)
return b
}
/*
Initializes a Builder with a specified length and cap, which is max(len,16)
It replaces the existing `buf`
// initialize a builder, with a set length `len` and cap 16 byte buffer
// replaces the existing `buf`
*Allocates Using Provided Allocator*
Inputs:
- b: A pointer to the Builder
- len: The desired length of the Builder's buffer
- allocator: (default is context.allocator)
Returns: Initialized ^Builder
*/
builder_init_len :: proc(b: ^Builder, len: int, allocator := context.allocator) -> ^Builder {
b.buf = make([dynamic]byte, len, allocator)
return b
}
/*
Initializes a Builder with a specified length and cap
It replaces the existing `buf`
// initialize a builder, with a set length `len` byte buffer and a custom `cap`
// replaces the existing `buf`
Inputs:
- b: A pointer to the Builder
- len: The desired length of the Builder's buffer
- cap: The desired capacity of the Builder's buffer, actual max(len,cap)
- allocator: (default is context.allocator)
Returns: A pointer to the initialized Builder
*/
builder_init_len_cap :: proc(b: ^Builder, len, cap: int, allocator := context.allocator) -> ^Builder {
b.buf = make([dynamic]byte, len, cap, allocator)
return b
}
// overload simple `builder_init_*` with or without len / ap parameters
builder_init :: proc{
builder_init_none,
builder_init_len,
builder_init_len_cap,
}
@(private)
_builder_stream_vtable_obj := io.Stream_VTable{
impl_write = proc(s: io.Stream, p: []byte) -> (n: int, err: io.Error) {
@@ -91,49 +149,80 @@ _builder_stream_vtable_obj := io.Stream_VTable{
impl_destroy = proc(s: io.Stream) -> io.Error {
b := (^Builder)(s.stream_data)
delete(b.buf)
b.buf=nil
return .None
},
}
// NOTE(dweiler): Work around a miscompilation bug on Linux still.
@(private)
_builder_stream_vtable := &_builder_stream_vtable_obj
/*
Returns an io.Stream from a Builder
// return an `io.Stream` from a builder
Inputs:
- b: A pointer to the Builder
Returns: An io.Stream
*/
to_stream :: proc(b: ^Builder) -> io.Stream {
return io.Stream{stream_vtable=_builder_stream_vtable, stream_data=b}
}
/*
Returns an io.Writer from a Builder
// return an `io.Writer` from a builder
Inputs:
- b: A pointer to the Builder
Returns: An io.Writer
*/
to_writer :: proc(b: ^Builder) -> io.Writer {
return io.to_writer(to_stream(b))
}
/*
Deletes and clears the Builder byte buffer content
// delete and clear the builder byte buffer content
Inputs:
- b: A pointer to the Builder
*/
builder_destroy :: proc(b: ^Builder) {
delete(b.buf)
clear(&b.buf)
b.buf = nil
}
/*
Reserves the Builder byte buffer to a specific capacity, when it's higher than before
// reserve the builfer byte buffer to a specific cap, when it's higher than before
Inputs:
- b: A pointer to the Builder
- cap: The desired capacity for the Builder's buffer
*/
builder_grow :: proc(b: ^Builder, cap: int) {
reserve(&b.buf, cap)
}
/*
Clears the Builder byte buffer content (sets len to zero)
// clear the builder byte buffer content
Inputs:
- b: A pointer to the Builder
*/
builder_reset :: proc(b: ^Builder) {
clear(&b.buf)
}
/*
create an empty builder with the same slice length as its cap
uses the `mem.nil_allocator` to avoid allocation and keep a fixed length
used in `fmt.bprint*`
Creates a Builder from a slice of bytes with the same slice length as its capacity. Used in fmt.bprint*
*Uses Nil Allocator - Does NOT allocate*
Inputs:
- backing: A slice of bytes to be used as the backing buffer
Example:
```odin
bytes: [8]byte // <-- gets filled
builder := strings.builder_from_bytes(bytes[:])
strings.write_byte(&builder, 'a') -> "a"
strings.write_byte(&builder, 'b') -> "ab"
strings.write_byte(&builder, 'a') // -> "a"
strings.write_byte(&builder, 'b') // -> "ab"
```
Returns: A new Builder
*/
builder_from_bytes :: proc(backing: []byte) -> Builder {
s := transmute(runtime.Raw_Slice)backing
@@ -147,36 +236,69 @@ builder_from_bytes :: proc(backing: []byte) -> Builder {
buf = transmute([dynamic]byte)d,
}
}
// Alias to `builder_from_bytes`
builder_from_slice :: builder_from_bytes
/*
Casts the Builder byte buffer to a string and returns it
// cast the builder byte buffer to a string and return it
Inputs:
- b: A Builder
Returns: The contents of the Builder's buffer, as a string
*/
to_string :: proc(b: Builder) -> string {
return string(b.buf[:])
}
/*
Returns the length of the Builder's buffer, in bytes
// return the length of the builder byte buffer
Inputs:
- b: A Builder
Returns: The length of the Builder's buffer
*/
builder_len :: proc(b: Builder) -> int {
return len(b.buf)
}
/*
Returns the capacity of the Builder's buffer, in bytes
// return the cap of the builder byte buffer
Inputs:
- b: A Builder
Returns: The capacity of the Builder's buffer
*/
builder_cap :: proc(b: Builder) -> int {
return cap(b.buf)
}
/*
The free space left in the Builder's buffer, in bytes
// returns the space left in the builder byte buffer to use up
Inputs:
- b: A Builder
Returns: The available space left in the Builder's buffer
*/
builder_space :: proc(b: Builder) -> int {
return cap(b.buf) - len(b.buf)
}
/*
appends a byte to the builder, returns the append diff
Appends a byte to the Builder and returns the number of bytes appended
Inputs:
- b: A pointer to the Builder
- x: The byte to be appended
Example:
```odin
builder := strings.builder_make()
strings.write_byte(&builder, 'a') // 1
strings.write_byte(&builder, 'b') // 1
strings.write_byte(&builder, 'c') // 1
fmt.println(strings.to_string(builder)) // -> abc
strings.write_byte(&builder, 'a') // 1
strings.write_byte(&builder, 'b') // 1
fmt.println(strings.to_string(builder)) // -> ab
```
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of bytes appended
*/
write_byte :: proc(b: ^Builder, x: byte) -> (n: int) {
n0 := len(b.buf)
@@ -184,14 +306,23 @@ write_byte :: proc(b: ^Builder, x: byte) -> (n: int) {
n1 := len(b.buf)
return n1-n0
}
/*
appends a slice of bytes to the builder, returns the append diff
Appends a slice of bytes to the Builder and returns the number of bytes appended
Inputs:
- b: A pointer to the Builder
- x: The slice of bytes to be appended
Example:
```odin
builder := strings.builder_make()
bytes := [?]byte { 'a', 'b', 'c' }
strings.write_bytes(&builder, bytes[:]) // 3
fmt.println(strings.to_string(builder)) // -> abc
```
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of bytes appended
*/
write_bytes :: proc(b: ^Builder, x: []byte) -> (n: int) {
n0 := len(b.buf)
@@ -199,42 +330,66 @@ write_bytes :: proc(b: ^Builder, x: []byte) -> (n: int) {
n1 := len(b.buf)
return n1-n0
}
/*
appends a single rune into the builder, returns written rune size and an `io.Error`
Appends a single rune to the Builder and returns the number of bytes written and an `io.Error`
Inputs:
- b: A pointer to the Builder
- r: The rune to be appended
Example:
```odin
builder := strings.builder_make()
strings.write_rune(&builder, 'ä') // 2 None
strings.write_rune(&builder, 'b') // 1 None
strings.write_rune(&builder, 'c') // 1 None
fmt.println(strings.to_string(builder)) // -> äbc
strings.write_rune(&builder, 'ä') // 2 None
strings.write_rune(&builder, 'b') // 1 None
fmt.println(strings.to_string(builder)) // -> äb
```
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of bytes written and an io.Error (if any)
*/
write_rune :: proc(b: ^Builder, r: rune) -> (int, io.Error) {
return io.write_rune(to_writer(b), r)
}
/*
appends a quoted rune into the builder, returns written size
Appends a quoted rune to the Builder and returns the number of bytes written
Inputs:
- b: A pointer to the Builder
- r: The rune to be appended
Example:
```odin
builder := strings.builder_make()
strings.write_string(&builder, "abc") // 3
strings.write_string(&builder, "abc") // 3
strings.write_quoted_rune(&builder, 'ä') // 4
strings.write_string(&builder, "abc") // 3
fmt.println(strings.to_string(builder)) // -> abc'ä'abc
strings.write_string(&builder, "abc") // 3
fmt.println(strings.to_string(builder)) // -> abc'ä'abc
```
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of bytes written
*/
write_quoted_rune :: proc(b: ^Builder, r: rune) -> (n: int) {
return io.write_quoted_rune(to_writer(b), r)
}
/*
appends a string to the builder, return the written byte size
Appends a string to the Builder and returns the number of bytes written
Inputs:
- b: A pointer to the Builder
- s: The string to be appended
Example:
```odin
builder := strings.builder_make()
strings.write_string(&builder, "a") // 1
strings.write_string(&builder, "bc") // 2
strings.write_string(&builder, "xyz") // 3
fmt.println(strings.to_string(builder)) // -> abcxyz
strings.write_string(&builder, "a") // 1
strings.write_string(&builder, "bc") // 2
fmt.println(strings.to_string(builder)) // -> abc
```
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of bytes written
*/
write_string :: proc(b: ^Builder, s: string) -> (n: int) {
n0 := len(b.buf)
@@ -242,10 +397,14 @@ write_string :: proc(b: ^Builder, s: string) -> (n: int) {
n1 := len(b.buf)
return n1-n0
}
/*
Pops and returns the last byte in the Builder or 0 when the Builder is empty
Inputs:
- b: A pointer to the Builder
// pops and returns the last byte in the builder
// returns 0 when the builder is empty
Returns: The last byte in the Builder or 0 if empty
*/
pop_byte :: proc(b: ^Builder) -> (r: byte) {
if len(b.buf) == 0 {
return 0
@@ -256,9 +415,14 @@ pop_byte :: proc(b: ^Builder) -> (r: byte) {
d.len = max(d.len-1, 0)
return
}
/*
Pops the last rune in the Builder and returns the popped rune and its rune width or (0, 0) if empty
// pops the last rune in the builder and returns the popped rune and its rune width
// returns 0, 0 when the builder is empty
Inputs:
- b: A pointer to the Builder
Returns: The popped rune and its rune width or (0, 0) if empty
*/
pop_rune :: proc(b: ^Builder) -> (r: rune, width: int) {
if len(b.buf) == 0 {
return 0, 0
@@ -269,41 +433,84 @@ pop_rune :: proc(b: ^Builder) -> (r: rune, width: int) {
d.len = max(d.len-width, 0)
return
}
@(private)
DIGITS_LOWER := "0123456789abcdefx"
/*
append a quoted string into the builder, return the written byte size
Inputs:
- b: A pointer to the Builder
- str: The string to be quoted and appended
- quote: The optional quote character (default is double quotes)
Example:
```odin
builder := strings.builder_make()
strings.write_quoted_string(&builder, "a") // 3
strings.write_quoted_string(&builder, "bc", '\'') // 4
strings.write_quoted_string(&builder, "xyz") // 5
fmt.println(strings.to_string(builder)) // -> "a"'bc'xyz"
strings.write_quoted_string(&builder, "a") // 3
strings.write_quoted_string(&builder, "bc", '\'') // 4
strings.write_quoted_string(&builder, "xyz") // 5
fmt.println(strings.to_string(builder)) // -> "a"'bc'xyz"
```
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of bytes written
*/
write_quoted_string :: proc(b: ^Builder, str: string, quote: byte = '"') -> (n: int) {
n, _ = io.write_quoted_string(to_writer(b), str, quote)
return
}
/*
Appends an encoded rune to the Builder and returns the number of bytes written
Inputs:
- b: A pointer to the Builder
- r: The rune to be appended
- write_quote: Optional boolean flag to write the quote character (default is true)
// appends a rune to the builder, optional `write_quote` boolean tag, returns the written rune size
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of bytes written
*/
write_encoded_rune :: proc(b: ^Builder, r: rune, write_quote := true) -> (n: int) {
n, _ = io.write_encoded_rune(to_writer(b), r, write_quote)
return
}
/*
Appends an escaped rune to the Builder and returns the number of bytes written
// appends a rune to the builder, fully written out in case of escaped runes e.g. '\a' will be written as such
// when `r` and `quote` match and `quote` is `\\` - they will be written as two slashes
// `html_safe` flag in case the runes '<', '>', '&' should be encoded as digits e.g. `\u0026`
Inputs:
- b: A pointer to the Builder
- r: The rune to be appended
- quote: The quote character
- html_safe: Optional boolean flag to encode '<', '>', '&' as digits (default is false)
Examples:
- '\a' will be written as such
- `r` and `quote` match and `quote` is `\\` - they will be written as two slashes
- `html_safe` flag in case the runes '<', '>', '&' should be encoded as digits e.g. `\u0026`
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of bytes written
*/
write_escaped_rune :: proc(b: ^Builder, r: rune, quote: byte, html_safe := false) -> (n: int) {
n, _ = io.write_escaped_rune(to_writer(b), r, quote, html_safe)
return
}
/*
Writes a f64 value to the Builder and returns the number of characters written
// writes a f64 value into the builder, returns the written amount of characters
Inputs:
- b: A pointer to the Builder
- f: The f64 value to be appended
- fmt: The format byte
- prec: The precision
- bit_size: The bit size
- always_signed: Optional boolean flag to always include the sign (default is false)
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of characters written
*/
write_float :: proc(b: ^Builder, f: f64, fmt: byte, prec, bit_size: int, always_signed := false) -> (n: int) {
buf: [384]byte
s := strconv.append_float(buf[:], f, fmt, prec, bit_size)
@@ -314,8 +521,19 @@ write_float :: proc(b: ^Builder, f: f64, fmt: byte, prec, bit_size: int, always_
}
return write_string(b, s)
}
/*
Writes a f16 value to the Builder and returns the number of characters written
// writes a f16 value into the builder, returns the written amount of characters
Inputs:
- b: A pointer to the Builder
- f: The f16 value to be appended
- fmt: The format byte
- always_signed: Optional boolean flag to always include the sign
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of characters written
*/
write_f16 :: proc(b: ^Builder, f: f16, fmt: byte, always_signed := false) -> (n: int) {
buf: [384]byte
s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f))
@@ -324,8 +542,27 @@ write_f16 :: proc(b: ^Builder, f: f16, fmt: byte, always_signed := false) -> (n:
}
return write_string(b, s)
}
/*
Writes a f32 value to the Builder and returns the number of characters written
// writes a f32 value into the builder, returns the written amount of characters
Inputs:
- b: A pointer to the Builder
- f: The f32 value to be appended
- fmt: The format byte
- always_signed: Optional boolean flag to always include the sign
Example:
```odin
builder := strings.builder_make()
strings.write_f32(&builder, 3.14159, 'f') // 6
strings.write_string(&builder, " - ") // 3
strings.write_f32(&builder, -0.123, 'e') // 8
fmt.println(strings.to_string(builder)) // -> 3.14159012 - -1.23000003e-01
```
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of characters written
*/
write_f32 :: proc(b: ^Builder, f: f32, fmt: byte, always_signed := false) -> (n: int) {
buf: [384]byte
s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f))
@@ -334,8 +571,19 @@ write_f32 :: proc(b: ^Builder, f: f32, fmt: byte, always_signed := false) -> (n:
}
return write_string(b, s)
}
/*
Writes a f32 value to the Builder and returns the number of characters written
// writes a f64 value into the builder, returns the written amount of characters
Inputs:
- b: A pointer to the Builder
- f: The f32 value to be appended
- fmt: The format byte
- always_signed: Optional boolean flag to always include the sign
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of characters written
*/
write_f64 :: proc(b: ^Builder, f: f64, fmt: byte, always_signed := false) -> (n: int) {
buf: [384]byte
s := strconv.append_float(buf[:], f64(f), fmt, 2*size_of(f), 8*size_of(f))
@@ -344,30 +592,67 @@ write_f64 :: proc(b: ^Builder, f: f64, fmt: byte, always_signed := false) -> (n:
}
return write_string(b, s)
}
/*
Writes a u64 value to the Builder and returns the number of characters written
Inputs:
- b: A pointer to the Builder
- i: The u64 value to be appended
- base: The optional base for the numeric representation
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
// writes a u64 value `i` in `base` = 10 into the builder, returns the written amount of characters
Returns: The number of characters written
*/
write_u64 :: proc(b: ^Builder, i: u64, base: int = 10) -> (n: int) {
buf: [32]byte
s := strconv.append_bits(buf[:], i, base, false, 64, strconv.digits, nil)
return write_string(b, s)
}
/*
Writes a i64 value to the Builder and returns the number of characters written
// writes a i64 value `i` in `base` = 10 into the builder, returns the written amount of characters
Inputs:
- b: A pointer to the Builder
- i: The i64 value to be appended
- base: The optional base for the numeric representation
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of characters written
*/
write_i64 :: proc(b: ^Builder, i: i64, base: int = 10) -> (n: int) {
buf: [32]byte
s := strconv.append_bits(buf[:], u64(i), base, true, 64, strconv.digits, nil)
return write_string(b, s)
}
/*
Writes a uint value to the Builder and returns the number of characters written
// writes a uint value `i` in `base` = 10 into the builder, returns the written amount of characters
Inputs:
- b: A pointer to the Builder
- i: The uint value to be appended
- base: The optional base for the numeric representation
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of characters written
*/
write_uint :: proc(b: ^Builder, i: uint, base: int = 10) -> (n: int) {
return write_u64(b, u64(i), base)
}
/*
Writes a int value to the Builder and returns the number of characters written
// writes a int value `i` in `base` = 10 into the builder, returns the written amount of characters
Inputs:
- b: A pointer to the Builder
- i: The int value to be appended
- base: The optional base for the numeric representation
NOTE: The backing dynamic array may be fixed in capacity or fail to resize, `n` states the number actually written.
Returns: The number of characters written
*/
write_int :: proc(b: ^Builder, i: int, base: int = 10) -> (n: int) {
return write_i64(b, i64(i), base)
}
+202 -44
View File
@@ -4,6 +4,20 @@ import "core:io"
import "core:unicode"
import "core:unicode/utf8"
/*
Converts invalid UTF-8 sequences in the input string `s` to the `replacement` string.
*Allocates Using Provided Allocator*
Inputs:
- s: Input string that may contain invalid UTF-8 sequences.
- replacement: String to replace invalid UTF-8 sequences with.
- allocator: (default: context.allocator).
WARNING: Allocation does not occur when len(s) == 0
Returns: A valid UTF-8 string with invalid sequences replaced by `replacement`.
*/
to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) -> string {
if len(s) == 0 {
return ""
@@ -33,7 +47,7 @@ to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) ->
invalid := false
for i := 0; i < len(s); /**/ {
for i := 0; i < len(s); /**/{
c := s[i]
if c < utf8.RUNE_SELF {
i += 1
@@ -57,13 +71,20 @@ to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) ->
}
return to_string(b)
}
/*
returns the input string `s` with all runes set to lowered case
always allocates using the `allocator`
Converts the input string `s` to all lowercase characters.
strings.to_lower("test") -> test
strings.to_lower("Test") -> test
*Allocates Using Provided Allocator*
Inputs:
- s: Input string to be converted.
- allocator: (default: context.allocator).
Example:
```odin
strings.to_lower("TeST") -> test
```
Returns: A new string with all characters converted to lowercase.
*/
to_lower :: proc(s: string, allocator := context.allocator) -> string {
b: Builder
@@ -73,13 +94,20 @@ to_lower :: proc(s: string, allocator := context.allocator) -> string {
}
return to_string(b)
}
/*
returns the input string `s` with all runes set to upper case
always allocates using the `allocator`
Converts the input string `s` to all uppercase characters.
strings.to_upper("test") -> TEST
*Allocates Using Provided Allocator*
Inputs:
- s: Input string to be converted.
- allocator: (default: context.allocator).
Example:
```odin
strings.to_upper("Test") -> TEST
```
Returns: A new string with all characters converted to uppercase.
*/
to_upper :: proc(s: string, allocator := context.allocator) -> string {
b: Builder
@@ -89,21 +117,36 @@ to_upper :: proc(s: string, allocator := context.allocator) -> string {
}
return to_string(b)
}
/*
Checks if the rune `c` is a delimiter (' ', '-', or '_').
// returns true when the `c` rune is a space, '-' or '_'
// useful when treating strings like words in a text editor or html paths
Inputs:
- c: Rune to check for delimiter status.
Returns: True if `c` is a delimiter, false otherwise.
*/
is_delimiter :: proc(c: rune) -> bool {
return c == '-' || c == '_' || is_space(c)
}
/*
Checks if the rune `r` is a non-alphanumeric or space character.
// returns true when the `r` rune is a non alpha or `unicode.is_space` rune
Inputs:
- r: Rune to check for separator status.
Returns: True if `r` is a non-alpha or `unicode.is_space` rune.
*/
is_separator :: proc(r: rune) -> bool {
if r <= 0x7f {
switch r {
case '0'..='9': return false
case 'a'..='z': return false
case 'A'..='Z': return false
case '_': return false
case '0' ..= '9':
return false
case 'a' ..= 'z':
return false
case 'A' ..= 'Z':
return false
case '_':
return false
}
return true
}
@@ -115,12 +158,32 @@ is_separator :: proc(r: rune) -> bool {
return unicode.is_space(r)
}
/*
iterator that loops through the string and calls the callback with the `prev`, `curr` and `next` rune
on empty string `s` the callback gets called once with empty runes
Iterates over a string, calling a callback for each rune with the previous, current, and next runes as arguments.
Inputs:
- w: An io.Writer to be used by the callback for writing output.
- s: The input string to be iterated over.
- callback: A procedure to be called for each rune in the string, with arguments (w: io.Writer, prev, curr, next: rune).
The callback can utilize the provided io.Writer to write output during the iteration.
Example:
```odin
my_callback :: proc(w: io.Writer, prev, curr, next: rune) {
fmt.println("my_callback", curr) // <-- Custom logic here
}
s := "hello world"
b: strings.Builder
strings.builder_init_len(&b, len(s))
w := strings.to_writer(&b)
strings.string_case_iterator(w, s, my_callback)
```
*/
string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Writer, prev, curr, next: rune)) {
string_case_iterator :: proc(
w: io.Writer,
s: string,
callback: proc(w: io.Writer, prev, curr, next: rune),
) {
prev, curr: rune
for next in s {
if curr == 0 {
@@ -139,10 +202,19 @@ string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Write
callback(w, prev, curr, 0)
}
}
// Alias to `to_camel_case`
to_lower_camel_case :: to_camel_case
/*
Converts the input string `s` to "lowerCamelCase".
// converts the `s` string to "lowerCamelCase"
*Allocates Using Provided Allocator*
Inputs:
- s: Input string to be converted.
- allocator: (default: context.allocator).
Returns: A "lowerCamelCase" formatted string.
*/
to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
s := s
s = trim_space(s)
@@ -164,10 +236,19 @@ to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
return to_string(b)
}
// Alias to `to_pascal_case`
to_upper_camel_case :: to_pascal_case
/*
Converts the input string `s` to "UpperCamelCase" (PascalCase).
// converts the `s` string to "PascalCase"
*Allocates Using Provided Allocator*
Inputs:
- s: Input string to be converted.
- allocator: (default: context.allocator).
Returns: A "PascalCase" formatted string.
*/
to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
s := s
s = trim_space(s)
@@ -189,17 +270,31 @@ to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
return to_string(b)
}
/*
Returns a string converted to a delimiter-separated case with configurable casing
/*
returns the `s` string to words seperated by the given `delimiter` rune
all runes will be upper or lowercased based on the `all_uppercase` bool
*Allocates Using Provided Allocator*
strings.to_delimiter_case("Hello World", '_', false) -> hello_world
strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD
strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD
strings.to_delimiter_case("aBC", '_', false) -> a_b_c
Inputs:
- s: The input string to be converted
- delimiter: The rune to be used as the delimiter between words
- all_upper_case: A boolean indicating if the output should be all uppercased (true) or lowercased (false)
- allocator: (default: context.allocator).
Example:
```odin
strings.to_delimiter_case("Hello World", '_', false) // -> "hello_world"
strings.to_delimiter_case("Hello World", ' ', true) // -> "HELLO WORLD"
strings.to_delimiter_case("aBC", '_', false) // -> "a_b_c"
```
Returns: The converted string
*/
to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string {
to_delimiter_case :: proc(
s: string,
delimiter: rune,
all_upper_case: bool,
allocator := context.allocator,
) -> string {
s := s
s = trim_space(s)
b: Builder
@@ -237,35 +332,96 @@ to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allo
return to_string(b)
}
/*
Converts a string to "snake_case" with all runes lowercased
/*
converts the `s` string to "snake_case" with all runes lowercased
strings.to_snake_case("HelloWorld") -> hello_world
strings.to_snake_case("Hello World") -> hello_world
*Allocates Using Provided Allocator*
Inputs:
- s: The input string to be converted
- allocator: (default: context.allocator).
Example:
```odin
strings.to_snake_case("HelloWorld") // -> "hello_world"
strings.to_snake_case("Hello World") // -> "hello_world"
```
Returns: The converted string
*/
to_snake_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '_', false, allocator)
}
// Alias for `to_upper_snake_case`
to_screaming_snake_case :: to_upper_snake_case
/*
Converts a string to "SNAKE_CASE" with all runes uppercased
// converts the `s` string to "SNAKE_CASE" with all runes uppercased
*Allocates Using Provided Allocator*
Inputs:
- s: The input string to be converted
- allocator: (default: context.allocator).
Example:
```odin
strings.to_upper_snake_case("HelloWorld") // -> "HELLO_WORLD"
```
Returns: The converted string
*/
to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '_', true, allocator)
}
/*
Converts a string to "kebab-case" with all runes lowercased
// converts the `s` string to "kebab-case" with all runes lowercased
*Allocates Using Provided Allocator*
Inputs:
- s: The input string to be converted
- allocator: (default: context.allocator).
Example:
```odin
strings.to_kebab_case("HelloWorld") // -> "hello-world"
```
Returns: The converted string
*/
to_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '-', false, allocator)
}
/*
Converts a string to "KEBAB-CASE" with all runes uppercased
// converts the `s` string to "KEBAB-CASE" with all runes uppercased
*Allocates Using Provided Allocator*
Inputs:
- s: The input string to be converted
- allocator: (default: context.allocator).
Example:
```odin
strings.to_upper_kebab_case("HelloWorld") // -> "HELLO-WORLD"
```
Returns: The converted string
*/
to_upper_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '-', true, allocator)
}
/*
Converts a string to "Ada_Case"
// converts the `s` string to "Ada_Case"
*Allocates Using Provided Allocator*
Inputs:
- s: The input string to be converted
- allocator: (default: context.allocator).
Example:
```odin
strings.to_ada_case("HelloWorld") // -> "Hello_World"
```
Returns: The converted string
*/
to_ada_case :: proc(s: string, allocator := context.allocator) -> string {
s := s
s = trim_space(s)
@@ -275,7 +431,9 @@ to_ada_case :: proc(s: string, allocator := context.allocator) -> string {
string_case_iterator(w, s, proc(w: io.Writer, prev, curr, next: rune) {
if !is_delimiter(curr) {
if is_delimiter(prev) || prev == 0 || (unicode.is_lower(prev) && unicode.is_upper(curr)) {
if is_delimiter(prev) ||
prev == 0 ||
(unicode.is_lower(prev) && unicode.is_upper(curr)) {
if prev != 0 {
io.write_rune(w, '_')
}
+58 -11
View File
@@ -2,49 +2,96 @@ package strings
import "core:runtime"
// custom string entry struct
// Custom string entry struct
Intern_Entry :: struct {
len: int,
str: [1]byte, // string is allocated inline with the entry to keep allocations simple
}
/*
Intern is a more memory efficient string map
// "intern" is a more memory efficient string map
// `allocator` is used to allocate the actual `Intern_Entry` strings
Uses Specified Allocator for `Intern_Entry` strings
Fields:
- allocator: The allocator used for the Intern_Entry strings
- entries: A map of strings to interned string entries
*/
Intern :: struct {
allocator: runtime.Allocator,
entries: map[string]^Intern_Entry,
}
/*
Initializes the entries map and sets the allocator for the string entries
// initialize the entries map and set the allocator for the string entries
*Allocates Using Provided Allocators*
Inputs:
- m: A pointer to the Intern struct to be initialized
- allocator: The allocator for the Intern_Entry strings (Default: context.allocator)
- map_allocator: The allocator for the map of entries (Default: context.allocator)
*/
intern_init :: proc(m: ^Intern, allocator := context.allocator, map_allocator := context.allocator) {
m.allocator = allocator
m.entries = make(map[string]^Intern_Entry, 16, map_allocator)
}
/*
Frees the map and all its content allocated using the `.allocator`.
// free the map and all its content allocated using the `.allocator`
Inputs:
- m: A pointer to the Intern struct to be destroyed
*/
intern_destroy :: proc(m: ^Intern) {
for _, value in m.entries {
free(value, m.allocator)
}
delete(m.entries)
}
/*
Returns the interned string for the given text, is set in the map if it didnt exist yet.
// returns the `text` string from the intern map - gets set if it didnt exist yet
// the returned string lives as long as the map entry lives
*MAY Allocate using the Intern's Allocator*
Inputs:
- m: A pointer to the Intern struct
- text: The string to be interned
NOTE: The returned string lives as long as the map entry lives.
Returns: The interned string and an allocator error if any
*/
intern_get :: proc(m: ^Intern, text: string) -> (str: string, err: runtime.Allocator_Error) {
entry := _intern_get_entry(m, text) or_return
#no_bounds_check return string(entry.str[:entry.len]), nil
}
/*
Returns the interned C-String for the given text, is set in the map if it didnt exist yet.
// returns the `text` cstring from the intern map - gets set if it didnt exist yet
// the returned cstring lives as long as the map entry lives
*MAY Allocate using the Intern's Allocator*
Inputs:
- m: A pointer to the Intern struct
- text: The string to be interned
NOTE: The returned C-String lives as long as the map entry lives
Returns: The interned C-String and an allocator error if any
*/
intern_get_cstring :: proc(m: ^Intern, text: string) -> (str: cstring, err: runtime.Allocator_Error) {
entry := _intern_get_entry(m, text) or_return
return cstring(&entry.str[0]), nil
}
/*
Internal function to lookup whether the text string exists in the map, returns the entry
Sets and allocates the entry if it wasn't set yet
// looks up wether the `text` string exists in the map, returns the entry
// sets & allocates the entry if it wasnt set yet
*MAY Allocate using the Intern's Allocator*
Inputs:
- m: A pointer to the Intern struct
- text: The string to be looked up or interned
Returns: The new or existing interned entry and an allocator error if any
*/
_intern_get_entry :: proc(m: ^Intern, text: string) -> (new_entry: ^Intern_Entry, err: runtime.Allocator_Error) #no_bounds_check {
if prev, ok := m.entries[text]; ok {
return prev, nil
+130 -20
View File
@@ -4,59 +4,104 @@ import "core:io"
import "core:unicode/utf8"
/*
io stream data for a string reader that can read based on bytes or runes
implements the vtable when using the io.Reader variants
"read" calls advance the current reading offset `i`
io stream data for a string reader that can read based on bytes or runes
implements the vtable when using the io.Reader variants
"read" calls advance the current reading offset `i`
*/
Reader :: struct {
s: string, // read-only buffer
i: i64, // current reading index
prev_rune: int, // previous reading index of rune or < 0
}
/*
Initializes a string Reader with the provided string
// init the reader to the string `s`
Inputs:
- r: A pointer to a Reader struct
- s: The input string to be read
*/
reader_init :: proc(r: ^Reader, s: string) {
r.s = s
r.i = 0
r.prev_rune = -1
}
/*
Converts a Reader into an io.Stream
// returns a stream from the reader data
Inputs:
- r: A pointer to a Reader struct
Returns: An io.Stream for the given Reader
*/
reader_to_stream :: proc(r: ^Reader) -> (s: io.Stream) {
s.stream_data = r
s.stream_vtable = &_reader_vtable
return
}
/*
Initializes a string Reader and returns an io.Reader for the given string
// init a reader to the string `s` and return an io.Reader
Inputs:
- r: A pointer to a Reader struct
- s: The input string to be read
Returns: An io.Reader for the given string
*/
to_reader :: proc(r: ^Reader, s: string) -> io.Reader {
reader_init(r, s)
rr, _ := io.to_reader(reader_to_stream(r))
return rr
}
/*
Initializes a string Reader and returns an io.Reader_At for the given string
// init a reader to the string `s` and return an io.Reader_At
Inputs:
- r: A pointer to a Reader struct
- s: The input string to be read
Returns: An io.Reader_At for the given string
*/
to_reader_at :: proc(r: ^Reader, s: string) -> io.Reader_At {
reader_init(r, s)
rr, _ := io.to_reader_at(reader_to_stream(r))
return rr
}
/*
Returns the remaining length of the Reader
// remaining length of the reader
Inputs:
- r: A pointer to a Reader struct
Returns: The remaining length of the Reader
*/
reader_length :: proc(r: ^Reader) -> int {
if r.i >= i64(len(r.s)) {
return 0
}
return int(i64(len(r.s)) - r.i)
}
/*
Returns the length of the string stored in the Reader
// returns the string length stored by the reader
Inputs:
- r: A pointer to a Reader struct
Returns: The length of the string stored in the Reader
*/
reader_size :: proc(r: ^Reader) -> i64 {
return i64(len(r.s))
}
/*
Reads len(p) bytes from the Reader's string and copies into the provided slice.
// reads len(p) bytes into the slice from the string in the reader
// returns `n` amount of read bytes and an io.Error
Inputs:
- r: A pointer to a Reader struct
- p: A byte slice to copy data into
Returns:
- n: The number of bytes read
- err: An io.Error if an error occurs while reading, including .EOF, otherwise nil denotes success.
*/
reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
if r.i >= i64(len(r.s)) {
return 0, .EOF
@@ -66,9 +111,18 @@ reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
r.i += i64(n)
return
}
/*
Reads len(p) bytes from the Reader's string and copies into the provided slice, at the specified offset from the current index.
// reads len(p) bytes into the slice from the string in the reader at an offset
// returns `n` amount of read bytes and an io.Error
Inputs:
- r: A pointer to a Reader struct
- p: A byte slice to copy data into
- off: The offset from which to read
Returns:
- n: The number of bytes read
- err: An io.Error if an error occurs while reading, including .EOF, otherwise nil denotes success.
*/
reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Error) {
if off < 0 {
return 0, .Invalid_Offset
@@ -82,8 +136,16 @@ reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Erro
}
return
}
/*
Reads and returns a single byte from the Reader's string
// reads and returns a single byte - error when out of bounds
Inputs:
- r: A pointer to a Reader struct
Returns:
- The byte read from the Reader
- err: An io.Error if an error occurs while reading, including .EOF, otherwise nil denotes success.
*/
reader_read_byte :: proc(r: ^Reader) -> (byte, io.Error) {
r.prev_rune = -1
if r.i >= i64(len(r.s)) {
@@ -93,8 +155,14 @@ reader_read_byte :: proc(r: ^Reader) -> (byte, io.Error) {
r.i += 1
return b, nil
}
/*
Decrements the Reader's index (i) by 1
// decreases the reader offset - error when below 0
Inputs:
- r: A pointer to a Reader struct
Returns: An io.Error if `r.i <= 0` (.Invalid_Unread), otherwise nil denotes success.
*/
reader_unread_byte :: proc(r: ^Reader) -> io.Error {
if r.i <= 0 {
return .Invalid_Unread
@@ -103,8 +171,17 @@ reader_unread_byte :: proc(r: ^Reader) -> io.Error {
r.i -= 1
return nil
}
/*
Reads and returns a single rune and its size from the Reader's string
// reads and returns a single rune and the rune size - error when out bounds
Inputs:
- r: A pointer to a Reader struct
Returns:
- ch: The rune read from the Reader
- size: The size of the rune in bytes
- err: An io.Error if an error occurs while reading
*/
reader_read_rune :: proc(r: ^Reader) -> (ch: rune, size: int, err: io.Error) {
if r.i >= i64(len(r.s)) {
r.prev_rune = -1
@@ -119,9 +196,16 @@ reader_read_rune :: proc(r: ^Reader) -> (ch: rune, size: int, err: io.Error) {
r.i += i64(size)
return
}
/*
Decrements the Reader's index (i) by the size of the last read rune
// decreases the reader offset by the last rune
// can only be used once and after a valid read_rune call
Inputs:
- r: A pointer to a Reader struct
WARNING: May only be used once and after a valid `read_rune` call
Returns: An io.Error if an error occurs while unreading (.Invalid_Unread), else nil denotes success.
*/
reader_unread_rune :: proc(r: ^Reader) -> io.Error {
if r.i <= 0 {
return .Invalid_Unread
@@ -133,8 +217,18 @@ reader_unread_rune :: proc(r: ^Reader) -> io.Error {
r.prev_rune = -1
return nil
}
/*
Seeks the Reader's index to a new position
// seeks the reader offset to a wanted offset
Inputs:
- r: A pointer to a Reader struct
- offset: The new offset position
- whence: The reference point for the new position (.Start, .Current, or .End)
Returns:
- The absolute offset after seeking
- err: An io.Error if an error occurs while seeking (.Invalid_Whence, .Invalid_Offset)
*/
reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.Error) {
r.prev_rune = -1
abs: i64
@@ -155,8 +249,19 @@ reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.E
r.i = abs
return abs, nil
}
/*
Writes the remaining content of the Reader's string into the provided io.Writer
// writes the string content left to read into the io.Writer `w`
Inputs:
- r: A pointer to a Reader struct
- w: The io.Writer to write the remaining content into
WARNING: Panics if writer writes more bytes than remainig length of string.
Returns:
- n: The number of bytes written
- err: An io.Error if an error occurs while writing (.Short_Write)
*/
reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
r.prev_rune = -1
if r.i >= i64(len(r.s)) {
@@ -175,7 +280,12 @@ reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
}
return
}
/*
VTable containing implementations for various io.Stream methods
This VTable is used by the Reader struct to provide its functionality
as an io.Stream.
*/
@(private)
_reader_vtable := io.Stream_VTable{
impl_size = proc(s: io.Stream) -> i64 {
+1026 -349
View File
File diff suppressed because it is too large Load Diff