Add string16 and cstring16 (UTF-16 based strings)

This commit is contained in:
gingerBill
2025-08-02 11:00:15 +01:00
parent 710203eadb
commit 2561427dd3
25 changed files with 873 additions and 62 deletions
+12 -1
View File
@@ -73,7 +73,7 @@ Type_Info_Rune :: struct {}
Type_Info_Float :: struct {endianness: Platform_Endianness}
Type_Info_Complex :: struct {}
Type_Info_Quaternion :: struct {}
Type_Info_String :: struct {is_cstring: bool}
Type_Info_String :: struct {is_cstring: bool, is_utf16: bool}
Type_Info_Boolean :: struct {}
Type_Info_Any :: struct {}
Type_Info_Type_Id :: struct {}
@@ -397,6 +397,11 @@ Raw_String :: struct {
len: int,
}
Raw_String16 :: struct {
data: [^]u16,
len: int,
}
Raw_Slice :: struct {
data: rawptr,
len: int,
@@ -450,6 +455,12 @@ Raw_Cstring :: struct {
}
#assert(size_of(Raw_Cstring) == size_of(cstring))
Raw_Cstring16 :: struct {
data: [^]u16,
}
#assert(size_of(Raw_Cstring16) == size_of(cstring16))
Raw_Soa_Pointer :: struct {
data: rawptr,
index: int,
+27 -1
View File
@@ -86,11 +86,26 @@ copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int
}
return n
}
// `copy_from_string16` is a built-in procedure that copies elements from a source string `src` to a destination slice `dst`.
// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
// of len(src) and len(dst).
//
// Prefer the procedure group `copy`.
@builtin
copy_from_string16 :: proc "contextless" (dst: $T/[]$E/u16, src: $S/string16) -> int {
n := min(len(dst), len(src))
if n > 0 {
intrinsics.mem_copy(raw_data(dst), raw_data(src), n*size_of(u16))
}
return n
}
// `copy` is a built-in procedure that copies elements from a source slice/string `src` to a destination slice `dst`.
// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
// of len(src) and len(dst).
@builtin
copy :: proc{copy_slice, copy_from_string}
copy :: proc{copy_slice, copy_from_string, copy_from_string16}
@@ -285,6 +300,15 @@ delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error
}
@builtin
delete_string16 :: proc(str: string16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
return mem_free_with_size(raw_data(str), len(str)*size_of(u16), allocator, loc)
}
@builtin
delete_cstring16 :: proc(str: cstring16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
return mem_free((^u16)(str), allocator, loc)
}
// `delete` will try to free the underlying data of the passed built-in data structure (string, cstring, dynamic array, slice, or map), with the given `allocator` if the allocator supports this operation.
//
// Note: Prefer `delete` over the specific `delete_*` procedures where possible.
@@ -297,6 +321,8 @@ delete :: proc{
delete_map,
delete_soa_slice,
delete_soa_dynamic_array,
delete_string16,
delete_cstring16,
}
+87
View File
@@ -493,12 +493,40 @@ string_cmp :: proc "contextless" (a, b: string) -> int {
return ret
}
string16_eq :: proc "contextless" (lhs, rhs: string16) -> bool {
x := transmute(Raw_String16)lhs
y := transmute(Raw_String16)rhs
if x.len != y.len {
return false
}
return #force_inline memory_equal(x.data, y.data, x.len*size_of(u16))
}
string16_cmp :: proc "contextless" (a, b: string16) -> int {
x := transmute(Raw_String16)a
y := transmute(Raw_String16)b
ret := memory_compare(x.data, y.data, min(x.len, y.len)*size_of(u16))
if ret == 0 && x.len != y.len {
return -1 if x.len < y.len else +1
}
return ret
}
string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) }
string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 }
string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 }
string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 }
string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 }
string16_ne :: #force_inline proc "contextless" (a, b: string16) -> bool { return !string16_eq(a, b) }
string16_lt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) < 0 }
string16_gt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) > 0 }
string16_le :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) <= 0 }
string16_ge :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) >= 0 }
cstring_len :: proc "contextless" (s: cstring) -> int {
p0 := uintptr((^byte)(s))
p := p0
@@ -508,6 +536,16 @@ cstring_len :: proc "contextless" (s: cstring) -> int {
return int(p - p0)
}
cstring16_len :: proc "contextless" (s: cstring16) -> int {
p := ([^]u16)(s)
n := 0
for p != nil && p[0] != 0 {
p = p[1:]
n += 1
}
return n
}
cstring_to_string :: proc "contextless" (s: cstring) -> string {
if s == nil {
return ""
@@ -517,6 +555,15 @@ cstring_to_string :: proc "contextless" (s: cstring) -> string {
return transmute(string)Raw_String{ptr, n}
}
cstring16_to_string16 :: proc "contextless" (s: cstring16) -> string16 {
if s == nil {
return ""
}
ptr := (^u16)(s)
n := cstring16_len(s)
return transmute(string16)Raw_String16{ptr, n}
}
cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool {
x := ([^]byte)(lhs)
@@ -559,6 +606,46 @@ cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return
cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 }
cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 }
cstring16_eq :: proc "contextless" (lhs, rhs: cstring16) -> bool {
x := ([^]u16)(lhs)
y := ([^]u16)(rhs)
if x == y {
return true
}
if (x == nil) ~ (y == nil) {
return false
}
xn := cstring16_len(lhs)
yn := cstring16_len(rhs)
if xn != yn {
return false
}
return #force_inline memory_equal(x, y, xn*size_of(u16))
}
cstring16_cmp :: proc "contextless" (lhs, rhs: cstring16) -> int {
x := ([^]u16)(lhs)
y := ([^]u16)(rhs)
if x == y {
return 0
}
if (x == nil) ~ (y == nil) {
return -1 if x == nil else +1
}
xn := cstring16_len(lhs)
yn := cstring16_len(rhs)
ret := memory_compare(x, y, min(xn, yn)*size_of(u16))
if ret == 0 && xn != yn {
return -1 if xn < yn else +1
}
return ret
}
cstring16_ne :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return !cstring16_eq(a, b) }
cstring16_lt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) < 0 }
cstring16_gt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) > 0 }
cstring16_le :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) <= 0 }
cstring16_ge :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) >= 0 }
complex32_eq :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) == real(b) && imag(a) == imag(b) }
complex32_ne :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) != real(b) || imag(a) != imag(b) }
+76
View File
@@ -1551,6 +1551,79 @@ fmt_string :: proc(fi: ^Info, s: string, verb: rune) {
fmt_cstring :: proc(fi: ^Info, s: cstring, verb: rune) {
fmt_string(fi, string(s), verb)
}
// Formats a string UTF-16 with a specific format.
//
// Inputs:
// - fi: Pointer to the Info struct containing format settings.
// - s: The string to format.
// - verb: The format specifier character (e.g. 's', 'v', 'q', 'x', 'X').
//
fmt_string16 :: proc(fi: ^Info, s: string16, verb: rune) {
s, verb := s, verb
if ol, ok := fi.optional_len.?; ok {
s = s[:clamp(ol, 0, len(s))]
}
if !fi.in_bad && fi.record_level > 0 && verb == 'v' {
verb = 'q'
}
switch verb {
case 's', 'v':
if fi.width_set {
if fi.width > len(s) {
if fi.minus {
io.write_string16(fi.writer, s, &fi.n)
}
for _ in 0..<fi.width - len(s) {
io.write_byte(fi.writer, ' ', &fi.n)
}
if !fi.minus {
io.write_string16(fi.writer, s, &fi.n)
}
} else {
io.write_string16(fi.writer, s, &fi.n)
}
} else {
io.write_string16(fi.writer, s, &fi.n)
}
case 'q', 'w': // quoted string
io.write_quoted_string16(fi.writer, s, '"', &fi.n)
case 'x', 'X':
space := fi.space
fi.space = false
defer fi.space = space
for i in 0..<len(s) {
if i > 0 && space {
io.write_byte(fi.writer, ' ', &fi.n)
}
char_set := __DIGITS_UPPER
if verb == 'x' {
char_set = __DIGITS_LOWER
}
_fmt_int(fi, u64(s[i]), 16, false, bit_size=16, digits=char_set)
}
case:
fmt_bad_verb(fi, verb)
}
}
// Formats a C-style UTF-16 string with a specific format.
//
// Inputs:
// - fi: Pointer to the Info struct containing format settings.
// - s: The C-style string to format.
// - verb: The format specifier character (Ref fmt_string).
//
fmt_cstring16 :: proc(fi: ^Info, s: cstring16, verb: rune) {
fmt_string16(fi, string16(s), verb)
}
// Formats a raw pointer with a specific format.
//
// Inputs:
@@ -3210,6 +3283,9 @@ fmt_arg :: proc(fi: ^Info, arg: any, verb: rune) {
case string: fmt_string(fi, a, verb)
case cstring: fmt_cstring(fi, a, verb)
case string16: fmt_string16(fi, a, verb)
case cstring16: fmt_cstring16(fi, a, verb)
case typeid: reflect.write_typeid(fi.writer, a, &fi.n)
case i16le: fmt_int(fi, u64(a), true, 16, verb)
+24
View File
@@ -5,6 +5,7 @@ package io
import "base:intrinsics"
import "core:unicode/utf8"
import "core:unicode/utf16"
// Seek whence values
Seek_From :: enum {
@@ -314,6 +315,29 @@ write_string :: proc(s: Writer, str: string, n_written: ^int = nil) -> (n: int,
return write(s, transmute([]byte)str, n_written)
}
// write_string16 writes the contents of the string16 s to w reencoded as utf-8
write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: int, err: Error) {
for i := 0; i < len(str); i += 1 {
r := rune(utf16.REPLACEMENT_CHAR)
switch c := str[i]; {
case c < utf16._surr1, utf16._surr3 <= c:
r = rune(c)
case utf16._surr1 <= c && c < utf16._surr2 && i+1 < len(str) &&
utf16._surr2 <= str[i+1] && str[i+1] < utf16._surr3:
r = utf16.decode_surrogate_pair(rune(c), rune(str[i+1]))
i += 1
}
w, err := write_rune(s, r, n_written)
n += w
if err != nil {
return
}
}
return
}
// write_rune writes a UTF-8 encoded rune to w.
write_rune :: proc(s: Writer, r: rune, n_written: ^int = nil) -> (size: int, err: Error) {
defer if err == nil && n_written != nil {
+27
View File
@@ -264,6 +264,33 @@ write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written
return
}
write_quoted_string16 :: proc(w: Writer, str: string16, quote: byte = '"', n_written: ^int = nil, for_json := false) -> (n: int, err: Error) {
defer if n_written != nil {
n_written^ += n
}
write_byte(w, quote, &n) or_return
for width, s := 0, str; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RUNE_SELF {
r, width = utf16.decode_rune_in_string(s)
}
if width == 1 && r == utf8.RUNE_ERROR {
write_byte(w, '\\', &n) or_return
write_byte(w, 'x', &n) or_return
write_byte(w, DIGITS_LOWER[s[0]>>4], &n) or_return
write_byte(w, DIGITS_LOWER[s[0]&0xf], &n) or_return
continue
}
n_wrapper(write_escaped_rune(w, r, quote, false, nil, for_json), &n) or_return
}
write_byte(w, quote, &n) or_return
return
}
// writer append a quoted rune into the byte buffer, return the written size
write_quoted_rune :: proc(w: Writer, r: rune) -> (n: int) {
_write_byte :: #force_inline proc(w: Writer, c: byte) -> int {
+20
View File
@@ -106,6 +106,26 @@ decode :: proc(d: []rune, s: []u16) -> (n: int) {
return
}
decode_rune_in_string :: proc(s: string16) -> (r: rune, width: int) {
r = rune(REPLACEMENT_CHAR)
n := len(s)
if n < 1 {
return
}
width = 1
switch c := s[0]; {
case c < _surr1, _surr3 <= c:
r = rune(c)
case _surr1 <= c && c < _surr2 && 1 < len(s) &&
_surr2 <= s[1] && s[1] < _surr3:
r = decode_surrogate_pair(rune(c), rune(s[1]))
width += 1
}
return
}
rune_count :: proc(s: []u16) -> (n: int) {
for i := 0; i < len(s); i += 1 {
c := s[i]
+4 -4
View File
@@ -1089,7 +1089,7 @@ gb_internal String internal_odin_root_dir(void) {
text = gb_alloc_array(permanent_allocator(), wchar_t, len+1);
GetModuleFileNameW(nullptr, text, cast(int)len);
path = string16_to_string(heap_allocator(), make_string16(text, len));
path = string16_to_string(heap_allocator(), make_string16(cast(u16 *)text, len));
for (i = path.len-1; i >= 0; i--) {
u8 c = path[i];
@@ -1387,14 +1387,14 @@ gb_internal String path_to_fullpath(gbAllocator a, String s, bool *ok_) {
mutex_lock(&fullpath_mutex);
len = GetFullPathNameW(&string16[0], 0, nullptr, nullptr);
len = GetFullPathNameW(cast(wchar_t *)&string16[0], 0, nullptr, nullptr);
if (len != 0) {
wchar_t *text = gb_alloc_array(permanent_allocator(), wchar_t, len+1);
GetFullPathNameW(&string16[0], len, text, nullptr);
GetFullPathNameW(cast(wchar_t *)&string16[0], len, text, nullptr);
mutex_unlock(&fullpath_mutex);
text[len] = 0;
result = string16_to_string(a, make_string16(text, len));
result = string16_to_string(a, make_string16(cast(u16 *)text, len));
result = string_trim_whitespace(result);
// Replace Windows style separators
+1 -1
View File
@@ -231,7 +231,7 @@ Array<String> cache_gather_envs() {
wchar_t *curr_string = strings;
while (curr_string && *curr_string) {
String16 wstr = make_string16_c(curr_string);
String16 wstr = make_string16_c(cast(u16 *)curr_string);
curr_string += wstr.len+1;
String str = string16_to_string(temporary_allocator(), wstr);
if (string_starts_with(str, str_lit("CURR_DATE_TIME="))) {
+8 -1
View File
@@ -2327,6 +2327,9 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
if (is_type_string(op_type) && id == BuiltinProc_len) {
if (operand->mode == Addressing_Constant) {
mode = Addressing_Constant;
GB_ASSERT_MSG(!is_type_string16(op_type), "TODO(bill): constant utf-16 string len");
String str = operand->value.value_string;
value = exact_value_i64(str.len);
type = t_untyped_integer;
@@ -2334,6 +2337,8 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
mode = Addressing_Value;
if (is_type_cstring(op_type)) {
add_package_dependency(c, "runtime", "cstring_len");
} else if (is_type_cstring16(op_type)) {
add_package_dependency(c, "runtime", "cstring16_len");
}
}
} else if (is_type_array(op_type)) {
@@ -4683,7 +4688,9 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
break;
case Type_Basic:
if (t->Basic.kind == Basic_string) {
operand->type = alloc_type_multi_pointer(t_u8);
operand->type = t_u8_multi_ptr;
} else if (t->Basic.kind == Basic_string16) {
operand->type = t_u16_multi_ptr;
}
break;
case Type_Pointer:
+6
View File
@@ -815,6 +815,12 @@ gb_internal bool signature_parameter_similar_enough(Type *x, Type *y) {
if (sig_compare(is_type_cstring, is_type_u8_multi_ptr, x, y)) {
return true;
}
if (sig_compare(is_type_cstring16, is_type_u16_ptr, x, y)) {
return true;
}
if (sig_compare(is_type_cstring16, is_type_u16_multi_ptr, x, y)) {
return true;
}
if (sig_compare(is_type_uintptr, is_type_rawptr, x, y)) {
return true;
+88
View File
@@ -2862,6 +2862,14 @@ gb_internal void add_comparison_procedures_for_fields(CheckerContext *c, Type *t
add_package_dependency(c, "runtime", "string_eq");
add_package_dependency(c, "runtime", "string_ne");
break;
case Basic_cstring16:
add_package_dependency(c, "runtime", "cstring16_eq");
add_package_dependency(c, "runtime", "cstring16_ne");
break;
case Basic_string16:
add_package_dependency(c, "runtime", "string16_eq");
add_package_dependency(c, "runtime", "string16_ne");
break;
}
break;
case Type_Struct:
@@ -3035,6 +3043,24 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper
case Token_LtEq: add_package_dependency(c, "runtime", "cstring_le"); break;
case Token_GtEq: add_package_dependency(c, "runtime", "cstring_gt"); break;
}
} else if (is_type_cstring16(x->type) && is_type_cstring16(y->type)) {
switch (op) {
case Token_CmpEq: add_package_dependency(c, "runtime", "cstring16_eq"); break;
case Token_NotEq: add_package_dependency(c, "runtime", "cstring16_ne"); break;
case Token_Lt: add_package_dependency(c, "runtime", "cstring16_lt"); break;
case Token_Gt: add_package_dependency(c, "runtime", "cstring16_gt"); break;
case Token_LtEq: add_package_dependency(c, "runtime", "cstring16_le"); break;
case Token_GtEq: add_package_dependency(c, "runtime", "cstring16_gt"); break;
}
} else if (is_type_string16(x->type) || is_type_string16(y->type)) {
switch (op) {
case Token_CmpEq: add_package_dependency(c, "runtime", "string16_eq"); break;
case Token_NotEq: add_package_dependency(c, "runtime", "string16_ne"); break;
case Token_Lt: add_package_dependency(c, "runtime", "string16_lt"); break;
case Token_Gt: add_package_dependency(c, "runtime", "string16_gt"); break;
case Token_LtEq: add_package_dependency(c, "runtime", "string16_le"); break;
case Token_GtEq: add_package_dependency(c, "runtime", "string16_gt"); break;
}
} else if (is_type_string(x->type) || is_type_string(y->type)) {
switch (op) {
case Token_CmpEq: add_package_dependency(c, "runtime", "string_eq"); break;
@@ -3340,6 +3366,11 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type
return true;
}
// []u16 <-> string16 (not cstring16)
if (is_type_u16_slice(src) && (is_type_string16(dst) && !is_type_cstring16(dst))) {
return true;
}
// cstring -> string
if (are_types_identical(src, t_cstring) && are_types_identical(dst, t_string)) {
if (operand->mode != Addressing_Constant) {
@@ -3347,6 +3378,14 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type
}
return true;
}
// cstring16 -> string16
if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) {
if (operand->mode != Addressing_Constant) {
add_package_dependency(c, "runtime", "cstring16_to_string16");
}
return true;
}
// cstring -> ^u8
if (are_types_identical(src, t_cstring) && is_type_u8_ptr(dst)) {
return !is_constant;
@@ -3372,6 +3411,34 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type
if (is_type_rawptr(src) && are_types_identical(dst, t_cstring)) {
return !is_constant;
}
// cstring -> ^u16
if (are_types_identical(src, t_cstring16) && is_type_u16_ptr(dst)) {
return !is_constant;
}
// cstring -> [^]u16
if (are_types_identical(src, t_cstring16) && is_type_u16_multi_ptr(dst)) {
return !is_constant;
}
// cstring -> rawptr
if (are_types_identical(src, t_cstring16) && is_type_rawptr(dst)) {
return !is_constant;
}
// ^u16 -> cstring16
if (is_type_u16_ptr(src) && are_types_identical(dst, t_cstring16)) {
return !is_constant;
}
// [^]u16 -> cstring
if (is_type_u16_multi_ptr(src) && are_types_identical(dst, t_cstring16)) {
return !is_constant;
}
// rawptr -> cstring16
if (is_type_rawptr(src) && are_types_identical(dst, t_cstring16)) {
return !is_constant;
}
// proc <-> proc
if (is_type_proc(src) && is_type_proc(dst)) {
if (is_type_polymorphic(dst)) {
@@ -4558,6 +4625,8 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar
// target_type = t_untyped_nil;
} else if (is_type_cstring(target_type)) {
// target_type = t_untyped_nil;
} else if (is_type_cstring16(target_type)) {
// target_type = t_untyped_nil;
} else if (!type_has_nil(target_type)) {
operand->mode = Addressing_Invalid;
convert_untyped_error(c, operand, target_type);
@@ -8226,6 +8295,7 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64
case Type_Basic:
if (t->Basic.kind == Basic_string) {
if (o->mode == Addressing_Constant) {
GB_ASSERT(o->value.kind == ExactValue_String);
*max_count = o->value.value_string.len;
}
if (o->mode != Addressing_Constant) {
@@ -8233,6 +8303,16 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64
}
o->type = t_u8;
return true;
} else if (t->Basic.kind == Basic_string16) {
if (o->mode == Addressing_Constant) {
GB_ASSERT(o->value.kind == ExactValue_String16);
*max_count = o->value.value_string16.len;
}
if (o->mode != Addressing_Constant) {
o->mode = Addressing_Value;
}
o->type = t_u16;
return true;
} else if (t->Basic.kind == Basic_UntypedString) {
if (o->mode == Addressing_Constant) {
*max_count = o->value.value_string.len;
@@ -10879,9 +10959,17 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node,
if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) {
valid = true;
if (o->mode == Addressing_Constant) {
GB_ASSERT(o->value.kind == ExactValue_String);
max_count = o->value.value_string.len;
}
o->type = type_deref(o->type);
} else if (t->Basic.kind == Basic_string16) {
valid = true;
if (o->mode == Addressing_Constant) {
GB_ASSERT(o->value.kind == ExactValue_String16);
max_count = o->value.value_string16.len;
}
o->type = type_deref(o->type);
}
break;
+9 -7
View File
@@ -1363,13 +1363,15 @@ gb_internal void init_universal(void) {
}
t_u8_ptr = alloc_type_pointer(t_u8);
t_u8_multi_ptr = alloc_type_multi_pointer(t_u8);
t_int_ptr = alloc_type_pointer(t_int);
t_i64_ptr = alloc_type_pointer(t_i64);
t_f64_ptr = alloc_type_pointer(t_f64);
t_u8_slice = alloc_type_slice(t_u8);
t_string_slice = alloc_type_slice(t_string);
t_u8_ptr = alloc_type_pointer(t_u8);
t_u8_multi_ptr = alloc_type_multi_pointer(t_u8);
t_u16_ptr = alloc_type_pointer(t_u16);
t_u16_multi_ptr = alloc_type_multi_pointer(t_u16);
t_int_ptr = alloc_type_pointer(t_int);
t_i64_ptr = alloc_type_pointer(t_i64);
t_f64_ptr = alloc_type_pointer(t_f64);
t_u8_slice = alloc_type_slice(t_u8);
t_string_slice = alloc_type_slice(t_string);
// intrinsics types for objective-c stuff
{
+1 -1
View File
@@ -669,7 +669,7 @@ gb_internal gb_inline f64 gb_sqrt(f64 x) {
gb_internal wchar_t **command_line_to_wargv(wchar_t *cmd_line, int *_argc) {
u32 i, j;
u32 len = cast(u32)string16_len(cmd_line);
u32 len = cast(u32)string16_len(cast(u16 *)cmd_line);
i = ((len+2)/2)*gb_size_of(void *) + gb_size_of(void *);
wchar_t **argv = cast(wchar_t **)GlobalAlloc(GMEM_FIXED, i + (len+2)*gb_size_of(wchar_t));
+51 -1
View File
@@ -29,6 +29,7 @@ enum ExactValueKind {
ExactValue_Compound = 8,
ExactValue_Procedure = 9,
ExactValue_Typeid = 10,
ExactValue_String16 = 11,
ExactValue_Count,
};
@@ -46,6 +47,7 @@ struct ExactValue {
Ast * value_compound;
Ast * value_procedure;
Type * value_typeid;
String16 value_string16;
};
};
@@ -66,6 +68,9 @@ gb_internal uintptr hash_exact_value(ExactValue v) {
case ExactValue_String:
res = gb_fnv32a(v.value_string.text, v.value_string.len);
break;
case ExactValue_String16:
res = gb_fnv32a(v.value_string.text, v.value_string.len*gb_size_of(u16));
break;
case ExactValue_Integer:
{
u32 key = gb_fnv32a(v.value_integer.dp, gb_size_of(*v.value_integer.dp) * v.value_integer.used);
@@ -118,6 +123,11 @@ gb_internal ExactValue exact_value_string(String string) {
result.value_string = string;
return result;
}
gb_internal ExactValue exact_value_string16(String16 string) {
ExactValue result = {ExactValue_String16};
result.value_string16 = string;
return result;
}
gb_internal ExactValue exact_value_i64(i64 i) {
ExactValue result = {ExactValue_Integer};
@@ -656,6 +666,7 @@ gb_internal i32 exact_value_order(ExactValue const &v) {
return 0;
case ExactValue_Bool:
case ExactValue_String:
case ExactValue_String16:
return 1;
case ExactValue_Integer:
return 2;
@@ -689,6 +700,7 @@ gb_internal void match_exact_values(ExactValue *x, ExactValue *y) {
case ExactValue_Bool:
case ExactValue_String:
case ExactValue_String16:
case ExactValue_Quaternion:
case ExactValue_Pointer:
case ExactValue_Compound:
@@ -891,7 +903,18 @@ gb_internal ExactValue exact_binary_operator_value(TokenKind op, ExactValue x, E
gb_memmove(data, sx.text, sx.len);
gb_memmove(data+sx.len, sy.text, sy.len);
return exact_value_string(make_string(data, len));
break;
}
case ExactValue_String16: {
if (op != Token_Add) goto error;
// NOTE(bill): How do you minimize this over allocation?
String sx = x.value_string;
String sy = y.value_string;
isize len = sx.len+sy.len;
u16 *data = gb_alloc_array(permanent_allocator(), u16, len);
gb_memmove(data, sx.text, sx.len*gb_size_of(u16));
gb_memmove(data+sx.len, sy.text, sy.len*gb_size_of(u16));
return exact_value_string16(make_string16(data, len));
}
}
@@ -994,6 +1017,19 @@ gb_internal bool compare_exact_values(TokenKind op, ExactValue x, ExactValue y)
}
break;
}
case ExactValue_String16: {
String16 a = x.value_string16;
String16 b = y.value_string16;
switch (op) {
case Token_CmpEq: return a == b;
case Token_NotEq: return a != b;
case Token_Lt: return a < b;
case Token_LtEq: return a <= b;
case Token_Gt: return a > b;
case Token_GtEq: return a >= b;
}
break;
}
case ExactValue_Pointer: {
switch (op) {
@@ -1050,6 +1086,20 @@ gb_internal gbString write_exact_value_to_string(gbString str, ExactValue const
gb_free(heap_allocator(), s.text);
return str;
}
case ExactValue_String16: {
String s = quote_to_ascii(heap_allocator(), v.value_string16);
string_limit = gb_max(string_limit, 36);
if (s.len <= string_limit) {
str = gb_string_append_length(str, s.text, s.len);
} else {
isize n = string_limit/5;
str = gb_string_append_length(str, s.text, n);
str = gb_string_append_fmt(str, "\"..%lld chars..\"", s.len-(2*n));
str = gb_string_append_length(str, s.text+s.len-n, n);
}
gb_free(heap_allocator(), s.text);
return str;
}
case ExactValue_Integer: {
String s = big_int_to_string(heap_allocator(), &v.value_integer);
str = gb_string_append_length(str, s.text, s.len);
+72
View File
@@ -1656,6 +1656,8 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
res.type = t;
res.value = llvm_cstring(m, str);
return res;
} else if (src->kind == Type_Basic && src->Basic.kind == Basic_string16 && dst->Basic.kind == Basic_cstring16) {
GB_PANIC("TODO(bill): UTF-16 string");
}
// if (is_type_float(dst)) {
// return value;
@@ -1795,6 +1797,38 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
}
if (is_type_cstring16(src) && is_type_u16_ptr(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_u16_ptr(src) && is_type_cstring16(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_u8_multi_ptr(src) && is_type_cstring16(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_cstring16(src) && is_type_rawptr(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_rawptr(src) && is_type_cstring16(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) {
TEMPORARY_ALLOCATOR_GUARD();
lbValue c = lb_emit_conv(p, value, t_cstring16);
auto args = array_make<lbValue>(temporary_allocator(), 1);
args[0] = c;
lbValue s = lb_emit_runtime_call(p, "cstring16_to_string16", args);
return lb_emit_conv(p, s, dst);
}
// integer -> boolean
if (is_type_integer(src) && is_type_boolean(dst)) {
lbValue res = {};
@@ -2296,6 +2330,14 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
return res;
}
// []u16 <-> string16
if (is_type_u16_slice(src) && is_type_string16(dst)) {
return lb_emit_transmute(p, value, t);
}
if (is_type_string16(src) && is_type_u16_slice(dst)) {
return lb_emit_transmute(p, value, t);
}
// []byte/[]u8 <-> string
if (is_type_u8_slice(src) && is_type_string(dst)) {
return lb_emit_transmute(p, value, t);
@@ -2304,6 +2346,7 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
return lb_emit_transmute(p, value, t);
}
if (is_type_array_like(dst)) {
Type *elem = base_array_type(dst);
isize index_count = cast(isize)get_array_type_count(dst);
@@ -2483,6 +2526,12 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
if (is_type_untyped(src)) {
if (is_type_string(src) && is_type_string16(dst)) {
GB_PANIC("TODO(bill): UTF-16 string");
lbAddr result = lb_add_local_generated(p, t, false);
lb_addr_store(p, result, value);
return lb_addr_load(p, result);
}
if (is_type_string(src) && is_type_string(dst)) {
lbAddr result = lb_add_local_generated(p, t, false);
lb_addr_store(p, result, value);
@@ -3056,6 +3105,13 @@ gb_internal lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind,
res.value = LLVMBuildIsNotNull(p->builder, x.value, "");
}
return res;
case Basic_cstring16:
if (op_kind == Token_CmpEq) {
res.value = LLVMBuildIsNull(p->builder, x.value, "");
} else if (op_kind == Token_NotEq) {
res.value = LLVMBuildIsNotNull(p->builder, x.value, "");
}
return res;
case Basic_any:
{
// TODO(bill): is this correct behaviour for nil comparison for any?
@@ -4432,6 +4488,22 @@ gb_internal lbAddr lb_build_addr_slice_expr(lbProcedure *p, Ast *expr) {
}
case Type_Basic: {
if (is_type_string16(type)) {
GB_ASSERT_MSG(are_types_identical(type, t_string16), "got %s", type_to_string(type));
lbValue len = lb_string_len(p, base);
if (high.value == nullptr) high = len;
if (!no_indices) {
lb_emit_slice_bounds_check(p, se->open, low, high, len, se->low != nullptr);
}
lbValue elem = lb_emit_ptr_offset(p, lb_string_elem(p, base), low);
lbValue new_len = lb_emit_arith(p, Token_Sub, high, low, t_int);
lbAddr str = lb_add_local_generated(p, t_string16, false);
lb_fill_string(p, str, elem, new_len);
return str;
}
GB_ASSERT_MSG(are_types_identical(type, t_string), "got %s", type_to_string(type));
lbValue len = lb_string_len(p, base);
if (high.value == nullptr) high = len;
+31
View File
@@ -1812,6 +1812,37 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) {
return type;
}
case Basic_cstring: return LLVMPointerType(LLVMInt8TypeInContext(ctx), 0);
case Basic_string16:
{
char const *name = "..string16";
LLVMTypeRef type = LLVMGetTypeByName(m->mod, name);
if (type != nullptr) {
return type;
}
type = LLVMStructCreateNamed(ctx, name);
if (build_context.metrics.ptr_size < build_context.metrics.int_size) {
GB_ASSERT(build_context.metrics.ptr_size == 4);
GB_ASSERT(build_context.metrics.int_size == 8);
LLVMTypeRef fields[3] = {
LLVMPointerType(lb_type(m, t_u16), 0),
lb_type(m, t_i32),
lb_type(m, t_int),
};
LLVMStructSetBody(type, fields, 3, false);
} else {
LLVMTypeRef fields[2] = {
LLVMPointerType(lb_type(m, t_u16), 0),
lb_type(m, t_int),
};
LLVMStructSetBody(type, fields, 2, false);
}
return type;
}
case Basic_cstring16: return LLVMPointerType(LLVMInt16TypeInContext(ctx), 0);
case Basic_any:
{
char const *name = "..any";
+9
View File
@@ -2289,6 +2289,10 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
}
if (is_type_cstring(t)) {
return lb_cstring_len(p, v);
} else if (is_type_cstring16(t)) {
return lb_cstring16_len(p, v);
} else if (is_type_string16(t)) {
return lb_string_len(p, v);
} else if (is_type_string(t)) {
return lb_string_len(p, v);
} else if (is_type_array(t)) {
@@ -2728,6 +2732,11 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu
res = lb_emit_conv(p, res, tv.type);
} else if (t->Basic.kind == Basic_cstring) {
res = lb_emit_conv(p, x, tv.type);
} else if (t->Basic.kind == Basic_string16) {
res = lb_string_elem(p, x);
res = lb_emit_conv(p, res, tv.type);
} else if (t->Basic.kind == Basic_cstring16) {
res = lb_emit_conv(p, x, tv.type);
}
break;
case Type_Pointer:
+27 -1
View File
@@ -531,7 +531,33 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ
case Basic_cstring:
{
tag_type = t_type_info_string;
LLVMValueRef vals[1] = {
LLVMValueRef vals[2] = {
lb_const_bool(m, t_bool, true).value,
lb_const_bool(m, t_bool, false).value,
};
variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals));
}
break;
case Basic_string16:
{
tag_type = t_type_info_string;
LLVMValueRef vals[2] = {
lb_const_bool(m, t_bool, false).value,
lb_const_bool(m, t_bool, true).value,
};
variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals));
}
break;
case Basic_cstring16:
{
tag_type = t_type_info_string;
LLVMValueRef vals[2] = {
lb_const_bool(m, t_bool, true).value,
lb_const_bool(m, t_bool, true).value,
};
+12
View File
@@ -1626,11 +1626,17 @@ gb_internal void lb_fill_string(lbProcedure *p, lbAddr const &string, lbValue ba
gb_internal lbValue lb_string_elem(lbProcedure *p, lbValue string) {
Type *t = base_type(string.type);
if (t->kind == Type_Basic && t->Basic.kind == Basic_string16) {
return lb_emit_struct_ev(p, string, 0);
}
GB_ASSERT(t->kind == Type_Basic && t->Basic.kind == Basic_string);
return lb_emit_struct_ev(p, string, 0);
}
gb_internal lbValue lb_string_len(lbProcedure *p, lbValue string) {
Type *t = base_type(string.type);
if (t->kind == Type_Basic && t->Basic.kind == Basic_string16) {
return lb_emit_struct_ev(p, string, 1);
}
GB_ASSERT_MSG(t->kind == Type_Basic && t->Basic.kind == Basic_string, "%s", type_to_string(t));
return lb_emit_struct_ev(p, string, 1);
}
@@ -1641,6 +1647,12 @@ gb_internal lbValue lb_cstring_len(lbProcedure *p, lbValue value) {
args[0] = lb_emit_conv(p, value, t_cstring);
return lb_emit_runtime_call(p, "cstring_len", args);
}
gb_internal lbValue lb_cstring16_len(lbProcedure *p, lbValue value) {
GB_ASSERT(is_type_cstring16(value.type));
auto args = array_make<lbValue>(permanent_allocator(), 1);
args[0] = lb_emit_conv(p, value, t_cstring16);
return lb_emit_runtime_call(p, "cstring16_len", args);
}
gb_internal lbValue lb_array_elem(lbProcedure *p, lbValue array_ptr) {
+4 -4
View File
@@ -142,9 +142,9 @@ gb_internal i32 system_exec_command_line_app_internal(bool exit_on_err, char con
}
wcmd = string_to_string16(permanent_allocator(), make_string(cast(u8 *)cmd_line, cmd_len-1));
if (CreateProcessW(nullptr, wcmd.text,
nullptr, nullptr, true, 0, nullptr, nullptr,
&start_info, &pi)) {
if (CreateProcessW(nullptr, cast(wchar_t *)wcmd.text,
nullptr, nullptr, true, 0, nullptr, nullptr,
&start_info, &pi)) {
WaitForSingleObject(pi.hProcess, INFINITE);
GetExitCodeProcess(pi.hProcess, cast(DWORD *)&exit_code);
@@ -232,7 +232,7 @@ gb_internal Array<String> setup_args(int argc, char const **argv) {
wchar_t **wargv = command_line_to_wargv(GetCommandLineW(), &wargc);
auto args = array_make<String>(a, 0, wargc);
for (isize i = 0; i < wargc; i++) {
wchar_t *warg = wargv[i];
u16 *warg = cast(u16 *)wargv[i];
isize wlen = string16_len(warg);
String16 wstr = make_string16(warg, wlen);
String arg = string16_to_string(a, wstr);
+2 -2
View File
@@ -59,7 +59,7 @@ struct Find_Result {
};
gb_internal String mc_wstring_to_string(wchar_t const *str) {
return string16_to_string(mc_allocator, make_string16_c(str));
return string16_to_string(mc_allocator, make_string16_c(cast(u16 *)str));
}
gb_internal String16 mc_string_to_wstring(String str) {
@@ -103,7 +103,7 @@ gb_internal HANDLE mc_find_first(String wildcard, MC_Find_Data *find_data) {
String16 wildcard_wide = mc_string_to_wstring(wildcard);
defer (mc_free(wildcard_wide));
HANDLE handle = FindFirstFileW(wildcard_wide.text, &_find_data);
HANDLE handle = FindFirstFileW(cast(wchar_t *)wildcard_wide.text, &_find_data);
if (handle == INVALID_HANDLE_VALUE) return INVALID_HANDLE_VALUE;
find_data->file_attributes = _find_data.dwFileAttributes;
+4 -4
View File
@@ -130,7 +130,7 @@ gb_internal String directory_from_path(String const &s) {
String16 wstr = string_to_string16(a, path);
defer (gb_free(a, wstr.text));
i32 attribs = GetFileAttributesW(wstr.text);
i32 attribs = GetFileAttributesW(cast(wchar_t *)wstr.text);
if (attribs < 0) return false;
return (attribs & FILE_ATTRIBUTE_DIRECTORY) != 0;
@@ -360,7 +360,7 @@ gb_internal ReadDirectoryError read_directory(String path, Array<FileInfo> *fi)
defer (gb_free(a, wstr.text));
WIN32_FIND_DATAW file_data = {};
HANDLE find_file = FindFirstFileW(wstr.text, &file_data);
HANDLE find_file = FindFirstFileW(cast(wchar_t *)wstr.text, &file_data);
if (find_file == INVALID_HANDLE_VALUE) {
return ReadDirectory_Unknown;
}
@@ -372,7 +372,7 @@ gb_internal ReadDirectoryError read_directory(String path, Array<FileInfo> *fi)
wchar_t *filename_w = file_data.cFileName;
u64 size = cast(u64)file_data.nFileSizeLow;
size |= (cast(u64)file_data.nFileSizeHigh) << 32;
String name = string16_to_string(a, make_string16_c(filename_w));
String name = string16_to_string(a, make_string16_c(cast(u16 *)filename_w));
if (name == "." || name == "..") {
gb_free(a, name.text);
continue;
@@ -494,7 +494,7 @@ gb_internal bool write_directory(String path) {
#else
gb_internal bool write_directory(String path) {
String16 wstr = string_to_string16(heap_allocator(), path);
LPCWSTR wdirectory_name = wstr.text;
LPCWSTR wdirectory_name = cast(wchar_t *)wstr.text;
HANDLE directory = CreateFileW(wdirectory_name,
GENERIC_WRITE,
+155 -17
View File
@@ -26,15 +26,14 @@ struct String_Iterator {
// NOTE(bill): String16 is only used for Windows due to its file directories
struct String16 {
wchar_t *text;
isize len;
wchar_t const &operator[](isize i) const {
u16 * text;
isize len;
u16 const &operator[](isize i) const {
GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i);
return text[i];
}
};
gb_internal gb_inline String make_string(u8 const *text, isize len) {
String s;
s.text = cast(u8 *)text;
@@ -45,19 +44,19 @@ gb_internal gb_inline String make_string(u8 const *text, isize len) {
return s;
}
gb_internal gb_inline String16 make_string16(wchar_t const *text, isize len) {
gb_internal gb_inline String16 make_string16(u16 const *text, isize len) {
String16 s;
s.text = cast(wchar_t *)text;
s.text = cast(u16 *)text;
s.len = len;
return s;
}
gb_internal isize string16_len(wchar_t const *s) {
gb_internal isize string16_len(u16 const *s) {
if (s == nullptr) {
return 0;
}
wchar_t const *p = s;
u16 const *p = s;
while (*p) {
p++;
}
@@ -69,7 +68,7 @@ gb_internal gb_inline String make_string_c(char const *text) {
return make_string(cast(u8 *)cast(void *)text, gb_strlen(text));
}
gb_internal gb_inline String16 make_string16_c(wchar_t const *text) {
gb_internal gb_inline String16 make_string16_c(u16 const *text) {
return make_string16(text, string16_len(text));
}
@@ -145,6 +144,27 @@ gb_internal int string_compare(String const &a, String const &b) {
return res;
}
gb_internal int string16_compare(String16 const &a, String16 const &b) {
if (a.text == b.text) {
return cast(int)(a.len - b.len);
}
if (a.text == nullptr) {
return -1;
}
if (b.text == nullptr) {
return +1;
}
uintptr n = gb_min(a.len, b.len);
int res = memcmp(a.text, b.text, n*gb_size_of(u16));
if (res == 0) {
res = cast(int)(a.len - b.len);
}
return res;
}
gb_internal isize string_index_byte(String const &s, u8 x) {
for (isize i = 0; i < s.len; i++) {
if (s.text[i] == x) {
@@ -182,6 +202,26 @@ template <isize N> gb_internal bool operator >= (String const &a, char const (&b
template <> bool operator == (String const &a, char const (&b)[1]) { return a.len == 0; }
template <> bool operator != (String const &a, char const (&b)[1]) { return a.len != 0; }
gb_internal gb_inline bool str_eq(String16 const &a, String16 const &b) {
if (a.len != b.len) return false;
if (a.len == 0) return true;
return memcmp(a.text, b.text, a.len) == 0;
}
gb_internal gb_inline bool str_ne(String16 const &a, String16 const &b) { return !str_eq(a, b); }
gb_internal gb_inline bool str_lt(String16 const &a, String16 const &b) { return string16_compare(a, b) < 0; }
gb_internal gb_inline bool str_gt(String16 const &a, String16 const &b) { return string16_compare(a, b) > 0; }
gb_internal gb_inline bool str_le(String16 const &a, String16 const &b) { return string16_compare(a, b) <= 0; }
gb_internal gb_inline bool str_ge(String16 const &a, String16 const &b) { return string16_compare(a, b) >= 0; }
gb_internal gb_inline bool operator == (String16 const &a, String16 const &b) { return str_eq(a, b); }
gb_internal gb_inline bool operator != (String16 const &a, String16 const &b) { return str_ne(a, b); }
gb_internal gb_inline bool operator < (String16 const &a, String16 const &b) { return str_lt(a, b); }
gb_internal gb_inline bool operator > (String16 const &a, String16 const &b) { return str_gt(a, b); }
gb_internal gb_inline bool operator <= (String16 const &a, String16 const &b) { return str_le(a, b); }
gb_internal gb_inline bool operator >= (String16 const &a, String16 const &b) { return str_ge(a, b); }
gb_internal gb_inline bool string_starts_with(String const &s, String const &prefix) {
if (prefix.len > s.len) {
return false;
@@ -614,7 +654,7 @@ gb_internal String normalize_path(gbAllocator a, String const &path, String cons
// TODO(bill): Make this non-windows specific
gb_internal String16 string_to_string16(gbAllocator a, String s) {
int len, len1;
wchar_t *text;
u16 *text;
if (s.len < 1) {
return make_string16(nullptr, 0);
@@ -625,9 +665,9 @@ gb_internal String16 string_to_string16(gbAllocator a, String s) {
return make_string16(nullptr, 0);
}
text = gb_alloc_array(a, wchar_t, len+1);
text = gb_alloc_array(a, u16, len+1);
len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, text, cast(int)len);
len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, cast(wchar_t *)text, cast(int)len);
if (len1 == 0) {
gb_free(a, text);
return make_string16(nullptr, 0);
@@ -646,7 +686,7 @@ gb_internal String string16_to_string(gbAllocator a, String16 s) {
return make_string(nullptr, 0);
}
len = convert_widechar_to_multibyte(s.text, cast(int)s.len, nullptr, 0);
len = convert_widechar_to_multibyte(cast(wchar_t *)s.text, cast(int)s.len, nullptr, 0);
if (len == 0) {
return make_string(nullptr, 0);
}
@@ -654,7 +694,7 @@ gb_internal String string16_to_string(gbAllocator a, String16 s) {
text = gb_alloc_array(a, u8, len+1);
len1 = convert_widechar_to_multibyte(s.text, cast(int)s.len, cast(char *)text, cast(int)len);
len1 = convert_widechar_to_multibyte(cast(wchar_t *)s.text, cast(int)s.len, cast(char *)text, cast(int)len);
if (len1 == 0) {
gb_free(a, text);
return make_string(nullptr, 0);
@@ -674,9 +714,9 @@ gb_internal String temporary_directory(gbAllocator allocator) {
return String{0};
}
DWORD len = gb_max(MAX_PATH, n);
wchar_t *b = gb_alloc_array(heap_allocator(), wchar_t, len+1);
u16 *b = gb_alloc_array(heap_allocator(), u16, len+1);
defer (gb_free(heap_allocator(), b));
n = GetTempPathW(len, b);
n = GetTempPathW(len, cast(wchar_t *)b);
if (n == 3 && b[1] == ':' && b[2] == '\\') {
} else if (n > 0 && b[n-1] == '\\') {
@@ -791,6 +831,104 @@ gb_internal String quote_to_ascii(gbAllocator a, String str, u8 quote='"') {
return res;
}
gb_internal Rune decode_surrogate_pair(u16 r1, u16 r2) {
static Rune const _surr1 = 0xd800;
static Rune const _surr2 = 0xdc00;
static Rune const _surr3 = 0xe000;
static Rune const _surr_self = 0x10000;
if (_surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3) {
return (((r1-_surr1)<<10) | (r2 - _surr2)) + _surr_self;
}
return GB_RUNE_INVALID;
}
gb_internal String quote_to_ascii(gbAllocator a, String16 str, u8 quote='"') {
static Rune const _surr1 = 0xd800;
static Rune const _surr2 = 0xdc00;
static Rune const _surr3 = 0xe000;
static Rune const _surr_self = 0x10000;
u16 *s = cast(u16 *)str.text;
isize n = str.len;
auto buf = array_make<u8>(a, 0, n*2);
array_add(&buf, quote);
for (isize width = 0; n > 0; s += width, n -= width) {
Rune r = cast(Rune)s[0];
width = 1;
if (r < _surr1 || _surr3 <= r) {
r = cast(Rune)r;
} else if (_surr1 <= r && r < _surr2) {
if (n>1) {
r = decode_surrogate_pair(s[0], s[1]);
if (r != GB_RUNE_INVALID) {
width = 2;
}
} else {
r = GB_RUNE_INVALID;
}
}
if (width == 1 && r == GB_RUNE_INVALID) {
array_add(&buf, cast(u8)'\\');
array_add(&buf, cast(u8)'x');
array_add(&buf, cast(u8)lower_hex[s[0]>>4]);
array_add(&buf, cast(u8)lower_hex[s[0]&0xf]);
continue;
}
if (r == quote || r == '\\') {
array_add(&buf, cast(u8)'\\');
array_add(&buf, u8(r));
continue;
}
if (r < 0x80 && is_printable(r)) {
array_add(&buf, u8(r));
continue;
}
switch (r) {
case '\a':
case '\b':
case '\f':
case '\n':
case '\r':
case '\t':
case '\v':
default:
if (r < ' ') {
u8 b = cast(u8)r;
array_add(&buf, cast(u8)'\\');
array_add(&buf, cast(u8)'x');
array_add(&buf, cast(u8)lower_hex[b>>4]);
array_add(&buf, cast(u8)lower_hex[b&0xf]);
}
if (r > GB_RUNE_MAX) {
r = 0XFFFD;
}
if (r < 0x10000) {
array_add(&buf, cast(u8)'\\');
array_add(&buf, cast(u8)'u');
for (isize i = 12; i >= 0; i -= 4) {
array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]);
}
} else {
array_add(&buf, cast(u8)'\\');
array_add(&buf, cast(u8)'U');
for (isize i = 28; i >= 0; i -= 4) {
array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]);
}
}
}
}
array_add(&buf, quote);
String res = {};
res.text = buf.data;
res.len = buf.count;
return res;
}
+116 -17
View File
@@ -41,8 +41,13 @@ enum BasicKind {
Basic_uint,
Basic_uintptr,
Basic_rawptr,
Basic_string, // ^u8 + int
Basic_cstring, // ^u8
Basic_string, // [^]u8 + int
Basic_cstring, // [^]u8
Basic_string16, // [^]u16 + int
Basic_cstring16, // [^]u16 + int
Basic_any, // rawptr + ^Type_Info
Basic_typeid,
@@ -500,8 +505,14 @@ gb_global Type basic_types[] = {
{Type_Basic, {Basic_uintptr, BasicFlag_Integer | BasicFlag_Unsigned, -1, STR_LIT("uintptr")}},
{Type_Basic, {Basic_rawptr, BasicFlag_Pointer, -1, STR_LIT("rawptr")}},
{Type_Basic, {Basic_string, BasicFlag_String, -1, STR_LIT("string")}},
{Type_Basic, {Basic_cstring, BasicFlag_String, -1, STR_LIT("cstring")}},
{Type_Basic, {Basic_string16, BasicFlag_String, -1, STR_LIT("string16")}},
{Type_Basic, {Basic_cstring16, BasicFlag_String, -1, STR_LIT("cstring16")}},
{Type_Basic, {Basic_any, 0, 16, STR_LIT("any")}},
{Type_Basic, {Basic_typeid, 0, 8, STR_LIT("typeid")}},
@@ -591,8 +602,12 @@ gb_global Type *t_uint = &basic_types[Basic_uint];
gb_global Type *t_uintptr = &basic_types[Basic_uintptr];
gb_global Type *t_rawptr = &basic_types[Basic_rawptr];
gb_global Type *t_string = &basic_types[Basic_string];
gb_global Type *t_cstring = &basic_types[Basic_cstring];
gb_global Type *t_string16 = &basic_types[Basic_string16];
gb_global Type *t_cstring16 = &basic_types[Basic_cstring16];
gb_global Type *t_any = &basic_types[Basic_any];
gb_global Type *t_typeid = &basic_types[Basic_typeid];
@@ -630,6 +645,8 @@ gb_global Type *t_untyped_uninit = &basic_types[Basic_UntypedUninit];
gb_global Type *t_u8_ptr = nullptr;
gb_global Type *t_u8_multi_ptr = nullptr;
gb_global Type *t_u16_ptr = nullptr;
gb_global Type *t_u16_multi_ptr = nullptr;
gb_global Type *t_int_ptr = nullptr;
gb_global Type *t_i64_ptr = nullptr;
gb_global Type *t_f64_ptr = nullptr;
@@ -1292,6 +1309,14 @@ gb_internal bool is_type_string(Type *t) {
}
return false;
}
gb_internal bool is_type_string16(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
if (t->kind == Type_Basic) {
return t->Basic.kind == Basic_string16;
}
return false;
}
gb_internal bool is_type_cstring(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
@@ -1300,6 +1325,14 @@ gb_internal bool is_type_cstring(Type *t) {
}
return false;
}
gb_internal bool is_type_cstring16(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
if (t->kind == Type_Basic) {
return t->Basic.kind == Basic_cstring16;
}
return false;
}
gb_internal bool is_type_typed(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
@@ -1429,6 +1462,12 @@ gb_internal bool is_type_u8(Type *t) {
}
return false;
}
gb_internal bool is_type_u16(Type *t) {
if (t->kind == Type_Basic) {
return t->Basic.kind == Basic_u16;
}
return false;
}
gb_internal bool is_type_array(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
@@ -1690,6 +1729,39 @@ gb_internal bool is_type_rune_array(Type *t) {
return false;
}
gb_internal bool is_type_u16_slice(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
if (t->kind == Type_Slice) {
return is_type_u16(t->Slice.elem);
}
return false;
}
gb_internal bool is_type_u16_array(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
if (t->kind == Type_Array) {
return is_type_u16(t->Array.elem);
}
return false;
}
gb_internal bool is_type_u16_ptr(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
if (t->kind == Type_Pointer) {
return is_type_u16(t->Slice.elem);
}
return false;
}
gb_internal bool is_type_u16_multi_ptr(Type *t) {
t = base_type(t);
if (t == nullptr) { return false; }
if (t->kind == Type_MultiPointer) {
return is_type_u16(t->Slice.elem);
}
return false;
}
gb_internal bool is_type_array_like(Type *t) {
return is_type_array(t) || is_type_enumerated_array(t);
@@ -2109,7 +2181,7 @@ gb_internal bool is_type_indexable(Type *t) {
Type *bt = base_type(t);
switch (bt->kind) {
case Type_Basic:
return bt->Basic.kind == Basic_string;
return bt->Basic.kind == Basic_string || bt->Basic.kind == Basic_string16;
case Type_Array:
case Type_Slice:
case Type_DynamicArray:
@@ -2129,7 +2201,7 @@ gb_internal bool is_type_sliceable(Type *t) {
Type *bt = base_type(t);
switch (bt->kind) {
case Type_Basic:
return bt->Basic.kind == Basic_string;
return bt->Basic.kind == Basic_string || bt->Basic.kind == Basic_string16;
case Type_Array:
case Type_Slice:
case Type_DynamicArray:
@@ -2376,6 +2448,7 @@ gb_internal bool type_has_nil(Type *t) {
case Basic_any:
return true;
case Basic_cstring:
case Basic_cstring16:
return true;
case Basic_typeid:
return true;
@@ -2443,8 +2516,9 @@ gb_internal bool is_type_comparable(Type *t) {
case Basic_rune:
return true;
case Basic_string:
return true;
case Basic_cstring:
case Basic_string16:
case Basic_cstring16:
return true;
case Basic_typeid:
return true;
@@ -3774,10 +3848,12 @@ gb_internal i64 type_size_of(Type *t) {
if (t->kind == Type_Basic) {
GB_ASSERT_MSG(is_type_typed(t), "%s", type_to_string(t));
switch (t->Basic.kind) {
case Basic_string: size = 2*build_context.int_size; break;
case Basic_cstring: size = build_context.ptr_size; break;
case Basic_any: size = 16; break;
case Basic_typeid: size = 8; break;
case Basic_string: size = 2*build_context.int_size; break;
case Basic_cstring: size = build_context.ptr_size; break;
case Basic_string16: size = 2*build_context.int_size; break;
case Basic_cstring16: size = build_context.ptr_size; break;
case Basic_any: size = 16; break;
case Basic_typeid: size = 8; break;
case Basic_int: case Basic_uint:
size = build_context.int_size;
@@ -3837,10 +3913,12 @@ gb_internal i64 type_align_of_internal(Type *t, TypePath *path) {
case Type_Basic: {
GB_ASSERT(is_type_typed(t));
switch (t->Basic.kind) {
case Basic_string: return build_context.int_size;
case Basic_cstring: return build_context.ptr_size;
case Basic_any: return 8;
case Basic_typeid: return 8;
case Basic_string: return build_context.int_size;
case Basic_cstring: return build_context.ptr_size;
case Basic_string16: return build_context.int_size;
case Basic_cstring16: return build_context.ptr_size;
case Basic_any: return 8;
case Basic_typeid: return 8;
case Basic_int: case Basic_uint:
return build_context.int_size;
@@ -4088,10 +4166,12 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
return size;
}
switch (kind) {
case Basic_string: return 2*build_context.int_size;
case Basic_cstring: return build_context.ptr_size;
case Basic_any: return 16;
case Basic_typeid: return 8;
case Basic_string: return 2*build_context.int_size;
case Basic_cstring: return build_context.ptr_size;
case Basic_string16: return 2*build_context.int_size;
case Basic_cstring16: return build_context.ptr_size;
case Basic_any: return 16;
case Basic_typeid: return 8;
case Basic_int: case Basic_uint:
return build_context.int_size;
@@ -4320,6 +4400,15 @@ gb_internal i64 type_offset_of(Type *t, i64 index, Type **field_type_) {
if (field_type_) *field_type_ = t_int;
return build_context.int_size; // len
}
} else if (t->Basic.kind == Basic_string16) {
switch (index) {
case 0:
if (field_type_) *field_type_ = t_u16_ptr;
return 0; // data
case 1:
if (field_type_) *field_type_ = t_int;
return build_context.int_size; // len
}
} else if (t->Basic.kind == Basic_any) {
switch (index) {
case 0:
@@ -4396,6 +4485,11 @@ gb_internal i64 type_offset_of_from_selection(Type *type, Selection sel) {
case 0: t = t_rawptr; break;
case 1: t = t_int; break;
}
} else if (t->Basic.kind == Basic_string16) {
switch (index) {
case 0: t = t_rawptr; break;
case 1: t = t_int; break;
}
} else if (t->Basic.kind == Basic_any) {
switch (index) {
case 0: t = t_rawptr; break;
@@ -4637,6 +4731,11 @@ gb_internal Type *type_internal_index(Type *t, isize index) {
GB_ASSERT(index == 0 || index == 1);
return index == 0 ? t_u8_ptr : t_int;
}
case Basic_string16:
{
GB_ASSERT(index == 0 || index == 1);
return index == 0 ? t_u16_ptr : t_int;
}
case Basic_any:
{
GB_ASSERT(index == 0 || index == 1);